Re: [Fortran, Patch, PR118789, v1] Fix associate to void*

2025-02-27 Thread Andre Vehreschild
Hi Thomas,

thanks for the review. Committed as gcc-15-7726-g0fc1abcc46e.

Thanks again,
Andre

On Wed, 26 Feb 2025 21:40:51 +0100
Thomas Koenig  wrote:

> Hi Andre,
>
> > Regtests ok on x86_64-pc-linux-gnu / F41. Ok for mainline?
>
> Looks good to me.
>
> Thanks for the patch!
>
> Best regards
>
>   Thomas
>


--
Andre Vehreschild * Email: vehre ad gmx dot de


Re: [PATCH] RISC-V: Minimal support for Qualcomm uC Xqccmp extension.

2025-02-27 Thread chendongyan

This patch support Qualcomm uC Xqccmp extension[1].
To enable GCC to recognize and process xqccmp extension correctly at 
compile time.


[1]https://github.com/quic/riscv-unified-db/releases/tag/Xqccmp_extension-0.1.0

Changes for v2:
- Remove the addition of xqccmp extension in 
gcc/common/config/riscv/riscv-ext-bitmask.def


gcc/ChangeLog:

    * common/config/riscv/riscv-common.cc: New extension.
    * config/riscv/riscv.opt: Ditto.

gcc/testsuite/ChangeLog:

    * gcc.target/riscv/arch-45.c: New test.

---
 gcc/common/config/riscv/riscv-common.cc  | 6 ++
 gcc/config/riscv/riscv.opt   | 5 +
 gcc/testsuite/gcc.target/riscv/arch-45.c | 5 +
 3 files changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/arch-45.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc

index 5038f0eb959a..6fbdb5ed2316 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -229,6 +229,8 @@ static const riscv_implied_info_t riscv_implied_info[] =

   {"xsfvcp", "zve32x"},

+  {"xqccmp", "zca"},
+
   {NULL, NULL}
 };

@@ -442,6 +444,8 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =

   {"xsfvqmaccdod",    ISA_SPEC_CLASS_NONE, 1, 0},
   {"xsfvfnrclipxfqf", ISA_SPEC_CLASS_NONE, 1, 0},

+  {"xqccmp", ISA_SPEC_CLASS_NONE, 0, 1},
+
   /* Terminate the list.  */
   {NULL, ISA_SPEC_CLASS_NONE, 0, 0}
 };
@@ -1778,6 +1782,8 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
   RISCV_EXT_FLAG_ENTRY ("xsfvqmaccdod",    x_riscv_sifive_subext, 
MASK_XSFVQMACCDOD),
   RISCV_EXT_FLAG_ENTRY ("xsfvfnrclipxfqf", x_riscv_sifive_subext, 
MASK_XSFVFNRCLIPXFQF),


+  RISCV_EXT_FLAG_ENTRY ("xqccmp", x_riscv_qc_subext, MASK_XQCCMP),
+
   {NULL, NULL, NULL, 0}
 };

diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index 7515c8ea13dd..61cc8258e323 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -535,6 +535,11 @@ Mask(XSFVQMACCDOD) Var(riscv_sifive_subext)

 Mask(XSFVFNRCLIPXFQF) Var(riscv_sifive_subext)

+TargetVariable
+int riscv_qc_subext
+
+Mask(XQCCMP) Var(riscv_qc_subext)
+
 TargetVariable
 int riscv_fmv_priority = 0

diff --git a/gcc/testsuite/gcc.target/riscv/arch-45.c 
b/gcc/testsuite/gcc.target/riscv/arch-45.c

new file mode 100644
index ..590d4f130325
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/arch-45.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_xqccmp -mabi=lp64" } */
+int foo()
+{
+}
--
2.43.0

在 2025/2/20 16:35, Yangyu Chen 写道:



On 20/2/2025 16:31, Dongyan Chen wrote:

This patch support Qualcomm uC Xqccmp extension[1].
To enable GCC to recognize and process xqccmp extension correctly at 
compile time.


[1]https://github.com/quic/riscv-unified-db/releases/tag/Xqccmp_extension-0.1.0 



gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: New extension.
* common/config/riscv/riscv-ext-bitmask.def (RISCV_EXT_BITMASK): 
Ditto.

* config/riscv/riscv.opt: Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/arch-45.c: New test.

---
  gcc/common/config/riscv/riscv-common.cc   | 6 ++
  gcc/common/config/riscv/riscv-ext-bitmask.def | 1 +
  gcc/config/riscv/riscv.opt    | 5 +
  gcc/testsuite/gcc.target/riscv/arch-45.c  | 5 +
  4 files changed, 17 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/riscv/arch-45.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc

index 5038f0eb959a..6fbdb5ed2316 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -229,6 +229,8 @@ static const riscv_implied_info_t 
riscv_implied_info[] =


    {"xsfvcp", "zve32x"},

+  {"xqccmp", "zca"},
+
    {NULL, NULL}
  };

@@ -442,6 +444,8 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =

    {"xsfvqmaccdod",    ISA_SPEC_CLASS_NONE, 1, 0},
    {"xsfvfnrclipxfqf", ISA_SPEC_CLASS_NONE, 1, 0},

+  {"xqccmp", ISA_SPEC_CLASS_NONE, 0, 1},
+
    /* Terminate the list.  */
    {NULL, ISA_SPEC_CLASS_NONE, 0, 0}
  };
@@ -1778,6 +1782,8 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
    RISCV_EXT_FLAG_ENTRY ("xsfvqmaccdod", x_riscv_sifive_subext, 
MASK_XSFVQMACCDOD),
    RISCV_EXT_FLAG_ENTRY ("xsfvfnrclipxfqf", x_riscv_sifive_subext, 
MASK_XSFVFNRCLIPXFQF),


+  RISCV_EXT_FLAG_ENTRY ("xqccmp", x_riscv_qc_subext, MASK_XQCCMP),
+
    {NULL, NULL, NULL, 0}
  };

diff --git a/gcc/common/config/riscv/riscv-ext-bitmask.def 
b/gcc/common/config/riscv/riscv-ext-bitmask.def

index 8b4e6d6349a7..c2809460d582 100644
--- a/gcc/common/config/riscv/riscv-ext-bitmask.def
+++ b/gcc/common/config/riscv/riscv-ext-bitmask.def
@@ -79,5 +79,6 @@ RISCV_EXT_BITMASK ("zcd",    1,  4)
  RISCV_EXT_BITMASK ("zcf",    1,  5)
  RISCV_EXT_BITMASK ("zcmop",    1,  6)
  RISCV_EXT_BITMASK ("zawrs",    1,  7)
+RISCV_EXT_BITMASK ("xqccmp", 

[PATCH v4] RISC-V: Fix bug for expand_const_vector interleave [PR118931]

2025-02-27 Thread pan2 . li
From: Pan Li 

This patch would like to fix one bug when expanding const vector for the
interleave case.  For example, we have:

base1 = 151
step = 121

For vec_series, we will generate vector in format of v[i] = base + i * step.
Then the vec_series will have below result for HImode, and we can find
that the result overflow to the highest 8 bits of HImode.

v1.b = {151, 255, 7,  0, 119,  0, 231,  0, 87,  1, 199,  1, 55,   2, 167,   2}

Aka we expect v1.b should be:

v1.b = {151, 0, 7,  0, 119,  0, 231,  0, 87,  0, 199,  0, 55,   0, 167,   0}

After that it will perform the IOR with v2 for the base2(aka another series).

v2.b =  {0,  17, 0, 33,   0, 49,   0, 65,  0, 81,   0, 97,  0, 113,   0, 129}

Unfortunately, the base1 + i * step1 in HImode may overflow to the high
8 bits, and the high 8 bits will pollute the v2 and result in incorrect
value in const_vector.

This patch would like to perform the overflow to smode check before IOR
the base2 series, and perform the clean highest bit if the const_vector
overflow to smode occurs.  If no overflow or VLA, will do nothing here.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

PR target/118931

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_const_vector): Add overflow to
smode check and clean up highest bits if overflow.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr118931-run-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-v.cc   | 36 +++
 .../riscv/rvv/base/pr118931-run-1.c   | 19 ++
 2 files changed, 48 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 7cc15f3d53c..d55f8333fb3 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1489,22 +1489,44 @@ expand_const_vector (rtx target, rtx src)
 
  EEW = 32, { 2, 4, ... }.
 
-This only works as long as the larger type does not overflow
-as we can't guarantee a zero value for each second element
-of the sequence with smaller EEW.
-??? For now we assume that no overflow happens with positive
-steps and forbid negative steps altogether.  */
+Both the series1 and series2 may overflow before taking the IOR
+to generate the final result.  However, only series1 matters
+because the series2 will shift before IOR, thus the overflow
+bits will never pollute the final result.
+
+For now we forbid the negative steps and overflow, and they
+will fall back to the default merge way to generate the
+const_vector.  */
+
  unsigned int new_smode_bitsize = builder.inner_bits_size () * 2;
  scalar_int_mode new_smode;
  machine_mode new_mode;
  poly_uint64 new_nunits
= exact_div (GET_MODE_NUNITS (builder.mode ()), 2);
+
+ poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+ bool overflow_smode_p = false;
+
+ if (!step1.is_constant ())
+   overflow_smode_p = true;
+ else
+   {
+ int elem_count = XVECLEN (src, 0);
+ uint64_t step1_val = (uint64_t)step1.to_constant ();
+ uint64_t base1_val = (uint64_t)base1_poly.to_constant ();
+ uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+
+ if ((elem_val >> builder.inner_bits_size ()) != 0)
+   overflow_smode_p = true;
+   }
+
  if (known_ge (step1, 0) && known_ge (step2, 0)
  && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
- && get_vector_mode (new_smode, new_nunits).exists (&new_mode))
+ && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
+ && !overflow_smode_p)
{
  rtx tmp1 = gen_reg_rtx (new_mode);
- base1 = gen_int_mode (rtx_to_poly_int64 (base1), new_smode);
+ base1 = gen_int_mode (base1_poly, new_smode);
  expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
 
  if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
new file mode 100644
index 000..ef866a72039
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-options "-O3 -march=rv64gcv -flto -mrvv-vector-bits=zvl" } */
+
+long long m;
+char f = 151;
+char h = 103;
+unsigned char a = 109;
+
+int main() {
+  for (char l = 0; l < 255 - 241; l += h - 102)
+a *= f;
+
+  m = a;
+
+  if (m != 29)
+__builtin_abort ();
+
+  return 0;
+}
-- 
2.43.0



[PATCH] value-range: Fix up irange::union_bitmask [PR118953]

2025-02-27 Thread Jakub Jelinek
Hi!

The following testcase is miscompiled during evrp.
Before vrp, we have (from ccp):
  # RANGE [irange] long long unsigned int [0, +INF] MASK 0xc000 
VALUE 0x2d
  _3 = _2 + 18446744073708503085;
...
  # RANGE [irange] long long unsigned int [0, +INF] MASK 0xc000 
VALUE 0x59
  _6 = (long long unsigned int) _5;
  # RANGE [irange] int [-INF, +INF] MASK 0xc000 VALUE 0x34
  _7 = k_11 + -1048524;
  switch (_7)  [33.33%], case 8:  [33.33%], case 24:  
[33.33%], case 32:  [33.33%]>
...
  # RANGE [irange] long long unsigned int [0, +INF] MASK 0xc07d 
VALUE 0x0
  # i_20 = PHI <_3(4), 0(3), _6(2)>
and evrp is now trying to figure out range for i_20 in range_of_phi.

All the ranges and MASK/VALUE pairs above are correct for the testcase,
k_11 and _2 based on it is a result of multiplication by a constant with low
14 bits cleared and then some numbers are added to it.

There is an obvious missed optimization for which I've filed PR119039,
simplify_switch_using_ranges could see that all the labels but default
are unreachable because the controlling expression has
MASK 0xc000 VALUE 0x34 and none of 8, 24 and 32 satisfy that.

Anyway, during range_of_phi for i_20, we process the PHI arguments
in order.  For the _3(4) case, we figure out that it is reachable
through the case 24: case 32: labels only of the switch and that
0x34 - 0x2d is 7, so derive
[irange] long long unsigned int [17, 17][25, 25] MASK 0xc000 VALUE 
0x2d
(the MASK/VALUE just got inherited from the _3 earlier range).
Now (not suprisingly because those labels aren't actually reachable),
that range is inconsistent, 0x2d is 45, so there is conflict between the
values and the irange_bitmask.
value-range.{h,cc} code differentiates between actually stored
irange_bitmask, which is that MASK 0xc000 VALUE 0x2d, and
semantic bitmask, which is what get_bitmask returns.  That is
  // The mask inherent in the range is calculated on-demand.  For
  // example, [0,255] does not have known bits set by default.  This
  // saves us considerable time, because setting it at creation incurs
  // a large penalty for irange::set.  At the time of writing there
  // was a 5% slowdown in VRP if we kept the mask precisely up to date
  // at all times.  Instead, we default to -1 and set it when
  // explicitly requested.  However, this function will always return
  // the correct mask.
  //
  // This also means that the mask may have a finer granularity than
  // the range and thus contradict it.  Think of the mask as an
  // enhancement to the range.  For example:
  //
  // [3, 1000] MASK 0xfffe VALUE 0x0
  //
  // 3 is in the range endpoints, but is excluded per the known 0 bits
  // in the mask.
  //
  // See also the note in irange_bitmask::intersect.
  irange_bitmask bm
= get_bitmask_from_range (type (), lower_bound (), upper_bound ());
  if (!m_bitmask.unknown_p ())
bm.intersect (m_bitmask);
Now, get_bitmask_from_range here is MASK 0x1f VALUE 0x0 and it intersects
that with that MASK 0xc000 VALUE 0x2d.
Which triggers the ugly special case in irange_bitmask::intersect:
  // If we have two known bits that are incompatible, the resulting
  // bit is undefined.  It is unclear whether we should set the entire
  // range to UNDEFINED, or just a subset of it.  For now, set the
  // entire bitmask to unknown (VARYING).
  if (wi::bit_and (~(m_mask | src.m_mask),
   m_value ^ src.m_value) != 0)
{
  unsigned prec = m_mask.get_precision ();
  m_mask = wi::minus_one (prec);
  m_value = wi::zero (prec);
}
so the semantic bitmask is actually MASK 0x VALUE 0x0.

Next, range_of_phi attempts to union it with the 0(3) PHI argument,
and during irange::union_ first adds the [0,0] to the subranges, so
[irange] long long unsigned int [0, 0][17, 17][25, 25] MASK 0xc000 
VALUE 0x2d
and then goes on to irange::union_bitmask which does
  if (m_bitmask == r.m_bitmask)
return false;
  irange_bitmask bm = get_bitmask ();
  irange_bitmask save = bm;
  bm.union_ (r.get_bitmask ());
  if (save == bm)
return false;
  m_bitmask = bm;
  if (save == get_bitmask ())
return false;
m_bitmask MASK 0xc000 VALUE 0x2d isn't the same as
r.m_bitmask MASK 0x0 VALUE 0x0, so we compute the semantic bitmask
(but note, not from the original range before union, but the modified one,
dunno if that isn't a problem as well), which is still the VARYING/unknown_p
one, union_ that with MASK 0x0 VALUE 0x0 and get still
MASK 0x VALUE 0x0, so don't update anything, the semantic
bitmask didn't change, so we are fine (not!, see later).

Except then we try to union with the third PHI argument.  And, because the
edge to that comes only from case 8: label and there is a known difference
between the two, the argument is actually already from earlier replaced by
45(2) constant.  So, irange::union_ adds the [45, 45] range to the list
of subranges, but 

RE: [3/3 PATCH v3]middle-end: delay checking for alignment to load [PR118464]

2025-02-27 Thread Richard Biener
On Wed, 26 Feb 2025, Tamar Christina wrote:

> > > >
> > > > No, I don't think so.  The code that eventually performs a
> > > > contiguous sub-group access directly should never extend
> > > > the load beyond GROUP_SIZE - or should be gated on the DR
> > > > not executed speculatively.  That is, we should "fix" this
> > > > elsewhere.
> > > >
> > >
> > > It doesn't, it's just not aligned within the range of GROUP_SIZE
> > > from what I remember.
> > >
> > > > If you have an updated patch I can look at what's wrong here if you
> > > > tell me how to reproduce (after applying the patch I suppose).
> > >
> > > Yes, applying the patch and running:
> > >
> > > /work/build/gcc/xgcc -B/work/build/gcc/
> > /work/gcc/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c  -m64   
> > -fdiagnostics-
> > plain-output  -flto -ffat-lto-objects -msse2 -ftree-vectorize 
> > -fno-tree-loop-
> > distribute-patterns -fno-vect-cost-model -fno-common -O2 -fdump-tree-vect-
> > details -msse4.1  -lm  -o ./vect-early-break_26.exe
> > 
> > So it works as in executing fine.  We have a VF of 4 and
> > 
> > note:   recording new base alignment for &b
> >   alignment:32
> >   misalignment: 0
> >   based on: _1 = b[i_32];
> > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c:35:21:
> > note:   recording new base alignment for &a
> >   alignment:32
> >   misalignment: 0
> >   based on: _2 = a[i_32];
> > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c:35:21:
> > note:   vect_compute_data_ref_alignment:
> > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c:35:21:
> > note:   alignment increased due to early break to 32 bytes.
> > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c:35:21:
> > missed:   misalign = 8 bytes of ref b[i_32]
> > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c:35:21:
> > note:   vect_compute_data_ref_alignment:
> > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c:35:21:
> > note:   alignment increased due to early break to 32 bytes.
> > 
> > so no peeling necessary.  But we also have like
> > 
> > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c:35:21:
> > missed:   misalign = 12 bytes of ref b[_6]
> > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c:35:21:
> > note:   vect_compute_data_ref_alignment:
> > /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/vect-early-break_26.c:35:21:
> > note:   alignment increased due to early break to 32 bytes.
> > 
> > and we are correctly saying we vectorize an unaligned access.
> > 
> > The "issue" is we're having SLP nodes with a load permutation, their
> > expansion might not happen with the whole DR group in mind.  I'd say
> > we simply refuse to do early break speculative load vectorization
> > for SLP nodes with a load permutation.
> 
> This is what I was trying to say on IRC when I mentioned that the permutes
> can end up creating an unaligned access wrt to the original address.
> 
> But the reason I was still trying to allow this case is because conceptually
> my assumption was that the permutes still maintain the access within
> the group.  After all, they're just shifting elements around.
> 
> In other words, I was assuming that the group a[i] - a[i-2] still stays within
> the group alignment of 32-bytes, even if the permute can make the second
> load in the group start at say, byte 28.  My assumption was though that it 
> can't
> make it start at byte 36.
> 
> Are you saying that this is the case? that it can? Then I agree the load 
> permutations
> on group loads are unsafe to speculate for unmasked loops...

Yes, looking at the code generation the loads do not stay within the
original properly aligned boundary.

Richard.

> Thanks,
> Tamar
> > 
> > It looks like a latent issue to me which could also interfere with
> > gap peeling, I have to dig a bit further what code is responsible
> > for the current behavior ...
> > 
> > 
> > 
> > > Thanks,
> > > Tamar
> > >
> > > >
> > > > > Enforcing the alignment on every group member would be wrong I think 
> > > > > since
> > > > > that ends up higher overall alignment than they need.
> > > > >
> > > > > > So besides these issues in get_load_store_type the change looks 
> > > > > > good now.
> > > > > >
> > > > >
> > > > > Thanks for the reviews.
> > > > >
> > > > > Tamar
> > > > > > Richard.
> > > > > >
> > > > > > > +  else
> > > > > > > + *alignment_support_scheme = dr_aligned;
> > > > > > > +}
> > > > > > > +
> > > > > > >if (*alignment_support_scheme == dr_unaligned_unsupported)
> > > > > > >  {
> > > > > > >if (dump_enabled_p ())
> > > > > > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
> > > > > > > index
> > > > > >
> > > >
> > b0cb081cba0ae8b11fbfcfcb8c6d440ec451ccb5..97caf61b345735d297ec49fd6ca
> > > > > > 64797435b46fc 100644
> > > > > > > --- a/gcc/tree-vectorizer.h
> > > > > 

RE: [PATCH v4] RISC-V: Fix bug for expand_const_vector interleave [PR118931]

2025-02-27 Thread Li, Pan2
Sure thing, will send the v5 for CI system and commit it if no surprise.

BTW, shall we plan some refactor for expand_const_vector in next stage 1, which 
grows to more than 500 lines and unfriendly for debugging up to a point.

Pan 

-Original Message-
From: Robin Dapp  
Sent: Thursday, February 27, 2025 5:22 PM
To: Li, Pan2 ; gcc-patches@gcc.gnu.org
Cc: juzhe.zh...@rivai.ai; kito.ch...@gmail.com; jeffreya...@gmail.com; Robin 
Dapp 
Subject: Re: [PATCH v4] RISC-V: Fix bug for expand_const_vector interleave 
[PR118931]

Hi Pan,

> +   poly_int64 base1_poly = rtx_to_poly_int64 (base1);
> +   bool overflow_smode_p = false;
> +
> +   if (!step1.is_constant ())
> + overflow_smode_p = true;
> +   else
> + {
> +   int elem_count = XVECLEN (src, 0);
> +   uint64_t step1_val = (uint64_t)step1.to_constant ();
> +   uint64_t base1_val = (uint64_t)base1_poly.to_constant ();

The casts aren't necessary anymore I suppose?

> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
> @@ -0,0 +1,19 @@
> +/* { dg-do run { target { riscv_v } } } */
> +/* { dg-options "-O3 -march=rv64gcv -flto -mrvv-vector-bits=zvl" } */

Please change -flto to -fwhole-program still.

OK with these changed, thanks!

-- 
Regards
 Robin



Re: [PATCH v4] RISC-V: Fix bug for expand_const_vector interleave [PR118931]

2025-02-27 Thread Robin Dapp

Sure thing, will send the v5 for CI system and commit it if no surprise.

BTW, shall we plan some refactor for expand_const_vector in next stage 1, which 
grows to more than 500 lines and unfriendly for debugging up to a point.


Yeah, sounds very reasonable.

--
Regards
Robin



[PATCH] driver: Fix multilib_os_dir and multiarch_dir for those target use TARGET_COMPUTE_MULTILIB

2025-02-27 Thread Kito Cheng
This patch fixes the multilib_os_dir and multiarch_dir for those targets
that use TARGET_COMPUTE_MULTILIB, since the TARGET_COMPUTE_MULTILIB hook
only update/fix the multilib_dir but not the multilib_os_dir and multiarch_dir,
so the multilib_os_dir and multiarch_dir are not set correctly for those 
targets.

Use RISC-V linux target (riscv64-unknown-linux-gnu) as an example:

```
$ riscv64-unknown-linux-gnu-gcc -print-multi-lib
.;
lib32/ilp32;@march=rv32imac@mabi=ilp32
lib32/ilp32d;@march=rv32imafdc@mabi=ilp32d
lib64/lp64;@march=rv64imac@mabi=lp64
lib64/lp64d;@march=rv64imafdc@mabi=lp64d
```

If we use the exactly same -march and -mabi options to compile a source file,
the multilib_os_dir and multiarch_dir are set correctly:

```
$ riscv64-unknown-linux-gnu-gcc -print-multi-os-directory -march=rv64imafdc 
-mabi=lp64d
../lib64/lp64d
$ riscv64-unknown-linux-gnu-gcc -print-multi-directory -march=rv64imafdc 
-mabi=lp64d
lib64/lp64d
```

However if we use the -march=rv64imafdcv -mabi=lp64d option to compile a source
file, the multilib_os_dir and multiarch_dir are not set correctly:
```
$ riscv64-unknown-linux-gnu-gcc -print-multi-os-directory -march=rv64imafdc 
-mabi=lp64d
lib64/lp64d
$ riscv64-unknown-linux-gnu-gcc -print-multi-directory -march=rv64imafdc 
-mabi=lp64d
lib64/lp64d
```

That's because the TARGET_COMPUTE_MULTILIB hook only update/fix the multilib_dir
but not the multilib_os_dir, so the multilib_os_dir is blank and will use same
value as multilib_dir, but that is not correct.

So we introduce second chance to fix the multilib_os_dir if it's not set, we do
also try to fix the multiarch_dir, because it may also not set correctly if
multilib_os_dir is not set.

gcc/ChangeLog:

* gcc.c (find_multilib_os_dir_by_multilib_dir): New.
(set_multilib_dir): Fix multilib_os_dir and multiarch_dir
if multilib_os_dir is not set.
---
 gcc/gcc.cc | 109 -
 1 file changed, 108 insertions(+), 1 deletion(-)

diff --git a/gcc/gcc.cc b/gcc/gcc.cc
index 04b3736a5da..6d447683b13 100644
--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
@@ -9736,6 +9736,104 @@ default_arg (const char *p, int len)
   return 0;
 }
 
+/* Use multilib_dir as key to find corresponding multilib_os_dir and
+   multiarch_dir.  */
+
+static void
+find_multilib_os_dir_by_multilib_dir (const char *multilib_dir,
+ const char **p_multilib_os_dir,
+ const char **p_multiarch_dir)
+{
+  const char *p = multilib_select;
+  unsigned int this_path_len;
+  const char *this_path;
+  int ok = 0;
+
+  while (*p != '\0')
+{
+  /* Ignore newlines.  */
+  if (*p == '\n')
+   {
+ ++p;
+ continue;
+   }
+
+  /* Get the initial path.  */
+  this_path = p;
+  while (*p != ' ')
+   {
+ if (*p == '\0')
+   {
+ fatal_error (input_location, "multilib select %qs %qs is invalid",
+  multilib_select, multilib_reuse);
+   }
+ ++p;
+   }
+  this_path_len = p - this_path;
+
+  ok = 0;
+  ++p;
+
+  /* Skip any arguments, we don't care at this stage.  */
+  while (*++p != ';');
+
+  if (this_path_len != 1
+ || this_path[0] != '.')
+   {
+ char *new_multilib_dir = XNEWVEC (char, this_path_len + 1);
+ char *q;
+
+ strncpy (new_multilib_dir, this_path, this_path_len);
+ new_multilib_dir[this_path_len] = '\0';
+ q = strchr (new_multilib_dir, ':');
+ if (q != NULL)
+   *q = '\0';
+
+ if (strcmp (new_multilib_dir, multilib_dir) == 0)
+   ok = 1;
+   }
+
+  /* Found matched multilib_dir, update multilib_os_dir and
+multiarch_dir.  */
+  if (ok)
+   {
+ const char *q = this_path, *end = this_path + this_path_len;
+
+ while (q < end && *q != ':')
+   q++;
+ if (q < end)
+   {
+ const char *q2 = q + 1, *ml_end = end;
+ char *new_multilib_os_dir;
+
+ while (q2 < end && *q2 != ':')
+   q2++;
+ if (*q2 == ':')
+   ml_end = q2;
+ if (ml_end - q == 1)
+   *p_multilib_os_dir = xstrdup (".");
+ else
+   {
+ new_multilib_os_dir = XNEWVEC (char, ml_end - q);
+ memcpy (new_multilib_os_dir, q + 1, ml_end - q - 1);
+ new_multilib_os_dir[ml_end - q - 1] = '\0';
+ *p_multilib_os_dir = new_multilib_os_dir;
+   }
+
+ if (q2 < end && *q2 == ':')
+   {
+ char *new_multiarch_dir = XNEWVEC (char, end - q2);
+ memcpy (new_multiarch_dir, q2 + 1, end - q2 - 1);
+ new_multiarch_dir[end - q2 - 1] = '\0';
+ *p_multiarch_dir = new_multiarch_dir;
+   }
+ break;
+   }
+   }
+  ++p;
+ 

Re: [PATCH 1/2] Add TARGET_COMPUTE_MULTILIB_OS hook to override multi-lib-os result.

2025-02-27 Thread Kito Cheng
Hi Jin Ma:

I realized multilib os dir is not correctly set as you described, but
I think that info should just come from multilib_select rather than
creating a new hook to do that, anyway I just wrote a fix...because we
hit that issue last week...

Hi Jeff:

Here is another approach to fix that - without introducing new hook,
just using existing info, also come with more simple step to reproduce
and verify:

https://gcc.gnu.org/pipermail/gcc-patches/2025-February/676540.html

On Sat, Feb 22, 2025 at 10:51 PM Jeff Law  wrote:
>
>
>
> On 2/22/25 7:31 AM, Palmer Dabbelt wrote:
> > On Sat, 22 Feb 2025 02:19:03 PST (-0800), ji...@linux.alibaba.com wrote:
> >> On Fri, 14 Feb 2025 21:03:46 +0800, Jin Ma wrote:
> >>> Create a new hook to let target could override the multi-lib-os result.
> >>>
> >>> The motivation for this change arises from the fact that using
> >>> TARGET_COMPUTE_MULTILIB to override the original multilib_dir can lead
> >>> to unexpected behavior with multilib_os_dir.
> >>>
> >>> In our build scripts, we establish a connection between multilib_os_dir
> >>> and multilib_dir. For example, in gcc/config/riscv/t-linux, we set
> >>> multilib_os_dir to be the parent directory of multilib_dir. However,
> >>> when TARGET_COMPUTE_MULTILIB overrides multilib_dir and returns a reused
> >>> result for multilib_dir, multilib_os_dir ends up being identical to
> >>> multilib_dir. This discrepancy is clearly inconsistent with our
> >>> expectations.
> >>>
> >>> gcc/ChangeLog:
> >>>
> >>>   * common/common-target.def (compute_multilib_os): New.
> >>>   * common/common-targhooks.cc (default_compute_multilib_os): New.
> >>>   * common/common-targhooks.h (default_compute_multilib_os): New.
> >>>   * doc/tm.texi (TARGET_COMPUTE_MULTILIB_OS): New.
> >>>   * doc/tm.texi.in: Regen.
> >>>   * gcc.cc (set_multilib_dir): Call targetm_common.compute_multilib_os.
> >>> ---
> >>>   gcc/common/common-target.def   | 14 ++
> >>>   gcc/common/common-targhooks.cc |  9 +
> >>>   gcc/common/common-targhooks.h  |  5 +
> >>>   gcc/doc/tm.texi| 10 ++
> >>>   gcc/doc/tm.texi.in |  1 +
> >>>   gcc/gcc.cc |  4 
> >>>   6 files changed, 43 insertions(+)
> >>
> >> Ping again :)
> >>
> >> Is there any comment on this patches?
> >
> > Not yet, I've got them open but I haven't had time to figure out the
> > paths yet.  I know we screwed this up the first time and need to do
> > something, I'm just not really quite sure what the right answer is yet.
> And I'm deeply concerned about adding another overriding target hook in
> here.  It feels like we're papering over a bigger problem elsewhere, but
> I haven't had the time to really dive in to either articulate my
> concerns more clearly or alleviate them.
>
> jeff
>


Re: [PATCH] testsuite, powerpc: Fix vsx-vectorize-* after alignment peeling [PR118567]

2025-02-27 Thread Alex Coplan
On 17/02/2025 14:28, Alex Coplan wrote:
> Hi,
> 
> After the recent alignment peeling enhancements in the vectorizer we
> started vectorizing the "checking" loops (that check for the right
> result) in gcc.target/powerpc/vsx-vectorize-*.c,  thus skewing the
> expected counts of various scan-dump-times tests (causing them to FAIL).
> This adds #pragma GCC novector above the relevant loops to prevent them
> from being vectorized, thereby fixing the test failures.
> 
> Tested with RUNTESTFLAGS="powerpc.exp=vsx-vectorize-*.c" on
> powerpc64le-linux-gnu (cfarm29): no FAILs observed wtih the patch
> applied.  OK for trunk?

Ping.

> 
> Thanks,
> Alex
> 
> gcc/testsuite/ChangeLog:
> 
>   PR testsuite/118567
>   * gcc.target/powerpc/vsx-vectorize-1.c: Add #pragma to block
>   vectorization of result-checking loop.
>   * gcc.target/powerpc/vsx-vectorize-2.c: Likewise.
>   * gcc.target/powerpc/vsx-vectorize-3.c: Likewise.
>   * gcc.target/powerpc/vsx-vectorize-4.c: Likewise.
>   * gcc.target/powerpc/vsx-vectorize-5.c: Likewise.
>   * gcc.target/powerpc/vsx-vectorize-6.c: Likewise.
>   * gcc.target/powerpc/vsx-vectorize-7.c: Likewise.
>   * gcc.target/powerpc/vsx-vectorize-8.c: Likewise.

> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-1.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-1.c
> index a0e0496d345..927a523568b 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-1.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-1.c
> @@ -30,6 +30,7 @@ main1 (struct foo * __restrict__ p)
>  }
>  
>/* check results:  */
> +#pragma GCC novector
>for (i = 0; i < N; i++)
>  {
>if (p->y[i] != x[i])
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-2.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-2.c
> index 52c49b27cb7..84a63b3c42f 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-2.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-2.c
> @@ -15,6 +15,7 @@ void bar (float *pd, float *pa, float *pb, float *pc)
>int i;
>  
>/* check results:  */
> +#pragma GCC novector
>for (i = 0; i < N; i++)
>  {
>if (pa[i] != (pb[i] * pc[i]))
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-3.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-3.c
> index f2f838a77fc..33054feef57 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-3.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-3.c
> @@ -15,6 +15,7 @@ void bar (short *pa, short *pb, short *pc)
>int i;
>  
>/* check results:  */
> +#pragma GCC novector
>for (i = 0; i < N; i++)
>  {
>if (pa[i] != (pb[i] * pc[i]))
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-4.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-4.c
> index 8bf9dff1712..05262cf76d9 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-4.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-4.c
> @@ -15,6 +15,7 @@ void bar (double *pa, double *pb, double *pc)
>int i;
>  
>/* check results:  */
> +#pragma GCC novector
>for (i = 0; i < N; i++)
>  {
>if (pa[i] != (pb[i] * pc[i]))
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-5.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-5.c
> index 1446e40b1d3..5478390f2ec 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-5.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-5.c
> @@ -15,6 +15,7 @@ void bar (char *pa, char *pb, char *pc)
>int i;
>  
>/* check results:  */
> +#pragma GCC novector
>for (i = 0; i < N; i++)
>  {
>if (pa[i] != (pb[i] + pc[i]))
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-6.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-6.c
> index 6f49ccbbb6a..e1dc35bfd4d 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-6.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-6.c
> @@ -15,6 +15,7 @@ void bar (double *pd, double *pa, double *pb, double *pc)
>int i;
>  
>/* check results:  */
> +#pragma GCC novector
>for (i = 0; i < N; i++)
>  {
>if (pa[i] != (pb[i] * pc[i]))
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-7.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-7.c
> index fde65a521d9..9a1ffd33881 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-7.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-7.c
> @@ -15,6 +15,7 @@ void bar (int *pd, int *pa, int *pb, int *pc)
>int i;
>  
>/* check results:  */
> +#pragma GCC novector
>for (i = 0; i < N; i++)
>  {
>if (pa[i] != (pb[i] * pc[i]))
> diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-8.c 
> b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-8.c
> index fb50cd54fd9..2f6fbfb443d 100644
> --- a/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-8.c
> +++ b/gcc/testsuite/gcc.target/powerpc/vsx-vectorize-8.c
> @@ -15,6 +15,7 @@ void bar (short *pd, short *pa, short *pb, 

[PATCH v5] RISC-V: Fix bug for expand_const_vector interleave [PR118931]

2025-02-27 Thread pan2 . li
From: Pan Li 

This patch would like to fix one bug when expanding const vector for the
interleave case.  For example, we have:

base1 = 151
step = 121

For vec_series, we will generate vector in format of v[i] = base + i * step.
Then the vec_series will have below result for HImode, and we can find
that the result overflow to the highest 8 bits of HImode.

v1.b = {151, 255, 7,  0, 119,  0, 231,  0, 87,  1, 199,  1, 55,   2, 167,   2}

Aka we expect v1.b should be:

v1.b = {151, 0, 7,  0, 119,  0, 231,  0, 87,  0, 199,  0, 55,   0, 167,   0}

After that it will perform the IOR with v2 for the base2(aka another series).

v2.b =  {0,  17, 0, 33,   0, 49,   0, 65,  0, 81,   0, 97,  0, 113,   0, 129}

Unfortunately, the base1 + i * step1 in HImode may overflow to the high
8 bits, and the high 8 bits will pollute the v2 and result in incorrect
value in const_vector.

This patch would like to perform the overflow to smode check before the
optimized interleave code generation.  If overflow or VLA, it will fall
back to the default merge approach.

The below test suites are passed for this patch.
* The rv64gcv fully regression test.

PR target/118931

gcc/ChangeLog:

* config/riscv/riscv-v.cc (expand_const_vector): Add overflow to
smode check and clean up highest bits if overflow.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/pr118931-run-1.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-v.cc   | 36 +++
 .../riscv/rvv/base/pr118931-run-1.c   | 19 ++
 2 files changed, 48 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 7cc15f3d53c..287eb3e54cf 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1489,22 +1489,44 @@ expand_const_vector (rtx target, rtx src)
 
  EEW = 32, { 2, 4, ... }.
 
-This only works as long as the larger type does not overflow
-as we can't guarantee a zero value for each second element
-of the sequence with smaller EEW.
-??? For now we assume that no overflow happens with positive
-steps and forbid negative steps altogether.  */
+Both the series1 and series2 may overflow before taking the IOR
+to generate the final result.  However, only series1 matters
+because the series2 will shift before IOR, thus the overflow
+bits will never pollute the final result.
+
+For now we forbid the negative steps and overflow, and they
+will fall back to the default merge way to generate the
+const_vector.  */
+
  unsigned int new_smode_bitsize = builder.inner_bits_size () * 2;
  scalar_int_mode new_smode;
  machine_mode new_mode;
  poly_uint64 new_nunits
= exact_div (GET_MODE_NUNITS (builder.mode ()), 2);
+
+ poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+ bool overflow_smode_p = false;
+
+ if (!step1.is_constant ())
+   overflow_smode_p = true;
+ else
+   {
+ int elem_count = XVECLEN (src, 0);
+ uint64_t step1_val = step1.to_constant ();
+ uint64_t base1_val = base1_poly.to_constant ();
+ uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+
+ if ((elem_val >> builder.inner_bits_size ()) != 0)
+   overflow_smode_p = true;
+   }
+
  if (known_ge (step1, 0) && known_ge (step2, 0)
  && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
- && get_vector_mode (new_smode, new_nunits).exists (&new_mode))
+ && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
+ && !overflow_smode_p)
{
  rtx tmp1 = gen_reg_rtx (new_mode);
- base1 = gen_int_mode (rtx_to_poly_int64 (base1), new_smode);
+ base1 = gen_int_mode (base1_poly, new_smode);
  expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
 
  if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
new file mode 100644
index 000..84c63b5040e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d -fwhole-program 
-mrvv-vector-bits=zvl" } */
+
+long long m;
+char f = 151;
+char h = 103;
+unsigned char a = 109;
+
+int main() {
+  for (char l = 0; l < 255 - 241; l += h - 102)
+a *= f;
+
+  m = a;
+
+  if (m != 29)
+__builtin_abort ();
+
+  return 0;
+}
-- 
2.43.0



Re: The COBOL front end, version 3, now in 14 easy pieces

2025-02-27 Thread James K. Lowden
On Mon, 24 Feb 2025 14:51:27 +0100
Richard Biener  wrote:

> Compiling a Cobol Hello World results in
> 
> > ./install/gcc-cobol/usr/local/bin/gcobol t.cob
> /usr/bin/ld: cannot find -lgcobol: No such file or directory
> collect2: error: ld returned 1 exit status
> 
> possibly because the 64bit libcobol is installed in
> /foo/usr/local/lib/ rather than .../lib64/ and
> the former is not in the configured search directory.

I spent all day working on this problem, and need guidance with
libtool.  The immediate problem is:

> autoreconf
> configure.ac:72: warning: LTOPTIONS_VERSION is m4_require'd but not m4_defun'd
> ../libtool.m4:67: LT_INIT is expanded from...
> configure.ac:72: the top level

The consensus on IRC is to use automake, which is fine, and libtool, about 
which I have never had anything good to say from the day I encountered it.  
But, enough about me.  Let's talk about libgcobol.  

On our parser branch I introduced automake in to configure.ac and added a new 
file, Makefile.am. My goal was to make sure libgcobol can be built and 
configured independently of gcc; after that to make any needed gcc adaptations. 
 But "independent" isn't quite possible, because for consistency gcc uses its 
own in-tree libtool.  Totally understandable.  

What I don't understand is where LTOPTIONS_VERSION, LTSUGAR_VERSION, etc. are 
defined, and how to bring them into (I think) configure.ac.  

I'd really prefer starting from scratch and building up to a working version 
over crabbing another example.  Every library has its own way.  libgcobol is 
about as simple as it gets.  Better to keep it that way.  

Suggestions?  

Thanks, 

--jkl



GCC 15.0.1 Status Report (2025-02-27), Stage 4 still in effect

2025-02-27 Thread Richard Biener
Status
==

The GCC development branch which will become GCC 15 is still in
stage4, open for regression and documentation fixes only.  We've
been in this stage for 6 weeks now and are slowly progressing
towards a release.

Besides of tackling the remaining P1 bugs also have an eye on
the testsuite status of your target, ideally there should be
no new failures compared to what happens on the GCC 14 branch
and for new failures there should be a bugzilla tracking it.


Quality Data


Priority  #   Change from last report
---   ---
P1   17-  15
P2  604-   7
P3  147- 120
P4  229+  21
P5   24
---   ---
Total P1-P3 768- 142
Total  1021- 121


Previous Report
===

https://gcc.gnu.org/pipermail/gcc/2025-January/245377.html


Re: [PATCH 0/2] i386: Adjust AVX10 related options

2025-02-27 Thread Hongtao Liu
On Mon, Feb 17, 2025 at 9:51 AM Hongtao Liu  wrote:
>
> On Thu, Feb 13, 2025 at 4:08 PM Haochen Jiang  wrote:
> >
> > Hi all,
> >
> > According to the previous feedback on our RFC for AVX10 option adjustment
> > and discussion with LLVM, we finalized how we are going to handle that.
> >
> > The overall direction is to re-alias avx10.x alias to 512 bit and only
> > using -mno-avx10.x to disable everything instead of the current confusing
> > -mno-avx10.x-[256,512], leading to deprecating -mno-avx10.x-[256,512].
> >
> > It is fine for AVX10.2 since it is just introduced. However, it will become
> > tricky for AVX10.1 introduced in GCC 14. Thus, we will deprecate
> > avx10.1 alias. For -mno- options, since we do not have avx10.1, having
> > -mno-avx10.1 would become weird. We will keep both -mno-avx10.1-256 and
> > -mno-avx10.1-512, while changing -mno-avx10.1-512 also disabling the whole
> > AVX10.1 to align with future.
> >
> > For option re-design to follow the latter length to determine the AVX10
> > size, we choose not to change that since it will break the previous
> > impression on -m options should enable everything after that. Also it
> > will make options like -mavx10.2-512 -mavx10.4-256 losing its flexibilty
> > on only enabling 512 bit on AVX10.1/2 but enabling 256 bit on AVX10.3/4.
> >
> > Upcoming are the two patches, the first patch will be backported to
> > GCC 14. Ok for trunk?
> Ok.
Please also add documents to gcc15 changes.
> >
> > Thx,
> > Haochen
> >
> >
>
>
> --
> BR,
> Hongtao



-- 
BR,
Hongtao


Re: [PATCH] RISC-V: Adjust LMUL when using maximum SEW [PR117955].

2025-02-27 Thread Jin Ma
On Thu, 27 Feb 2025 16:00:08 +0100, "Robin Dapp" wrote:
> Hi,
> 
> when merging two vsetvls that both only demand "SEW >= ..." we
> use their maximum SEW and keep the LMUL.  That may lead to invalid
> vector configurations like
>   e64, mf4.
> As we make sure that the SEW requirements overlap we can use the SEW
> and LMUL of the configuration with the larger SEW.
> 
> Ma Jin already touched this merge rule some weeks ago and fixed the
> ratio calculation (r15-6873).  Calculating the ratio from an invalid
> SEW/LMUL combination lead to an overflow in the ratio variable, though.
> I'd argue the proper fix is to update SEW and LMUL, keeping the ratio
> as before.  This breaks bug-10.c, though, and I'm not sure what it
> really tests.  SEW/LMUL actually doesn't change, we just emit a slightly
> different vsetvl.  Maybe it was reduced too far?  Jin, any insight
> there?  I changed it into a run test for now.
> 
> Regtested on rv64gcv_zvl512b.
> 
> Regards
>  Robin
> 
>   PR target/117955
> 
> gcc/ChangeLog:
> 
>   * config/riscv/riscv-v.cc (calculate_ratio): Use LMUL of vsetvl
>   with larger SEW.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/riscv/rvv/base/bug-10.c: Convert to run test.
>   * gcc.target/riscv/rvv/base/pr117955.c: New test.
> ---
>  gcc/config/riscv/riscv-vsetvl.cc  |   8 +-
>  .../gcc.target/riscv/rvv/base/bug-10.c|  32 +-
>  .../gcc.target/riscv/rvv/base/pr117955.c  | 827 ++
>  3 files changed, 861 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c
> 
> diff --git a/gcc/config/riscv/riscv-vsetvl.cc 
> b/gcc/config/riscv/riscv-vsetvl.cc
> index 82284624a24..f0165f7b8c8 100644
> --- a/gcc/config/riscv/riscv-vsetvl.cc
> +++ b/gcc/config/riscv/riscv-vsetvl.cc
> @@ -1729,9 +1729,11 @@ private:
>}
>inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
>{
> -int max_sew = MAX (prev.get_sew (), next.get_sew ());
> -prev.set_sew (max_sew);
> -prev.set_ratio (calculate_ratio (prev.get_sew (), prev.get_vlmul ()));
> +bool prev_sew_larger = prev.get_sew () >= next.get_sew ();
> +const vsetvl_info from = prev_sew_larger ? prev : next;
> +prev.set_sew (from.get_sew ());
> +prev.set_vlmul (from.get_vlmul ());
> +prev.set_ratio (from.get_ratio ());
>  use_min_of_max_sew (prev, next);
>}
>inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
> index af3a8610d63..5f7490e8a3b 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
> @@ -1,14 +1,40 @@
> -/* { dg-do compile { target { rv64 } } } */
> +/* { dg-do run { target { rv64 } } } */
> +/* { dg-require-effective-target rv64 } */
> +/* { dg-require-effective-target riscv_v } */
>  /* { dg-options " -march=rv64gcv_zvfh -mabi=lp64d -O2 
> --param=vsetvl-strategy=optim -fno-schedule-insns  -fno-schedule-insns2 
> -fno-schedule-fusion " } */
>  
>  #include 
>  
>  void
> -foo (uint8_t *ptr, vfloat16m4_t *v1, vuint32m8_t *v2, vuint8m2_t *v3, size_t 
> vl)
> +__attribute__ ((noipa))
> +foo (vfloat16m4_t *v1, vuint32m8_t *v2, vuint8m2_t *v3, size_t vl)
>  {
>*v1 = __riscv_vfmv_s_f_f16m4 (1, vl);
>*v2 = __riscv_vmv_s_x_u32m8 (2963090659u, vl);
>*v3 = __riscv_vsll_vx_u8m2 (__riscv_vid_v_u8m2 (vl), 2, vl);
>  }

This patch modifies the sequence:
vsetvli zero,a4,e32,m4,ta,ma + vsetvli zero,a4,e8,m2,ta,ma 
to:
vsetvli zero,a4,e32,m8,ta,ma + vsetvli zero,zero,e8,m2,ta,ma
Functionally, there is no difference. However, this change resolves the
issue with "e64,mf4", and allows the second vsetvli to omit a4, which is
beneficial.

> -/* { dg-final { scan-assembler-not {vsetvli.*zero,zero} } }*/
> +int
> +main ()
> +{
> +  vfloat16m4_t v1;
> +  vuint32m8_t v2;
> +  vuint8m2_t v3;
> +  int vl = 4;
> +  foo (&v1, &v2, &v3, vl);
> +
> +  _Float16 val1 = ((_Float16 *)&v1)[0];
> +  if (val1 - 1.f > 0.1f)
> +__builtin_abort ();
> +
> +  uint32_t val2 = ((uint32_t *)&v2)[0];
> +  if (val2 != 2963090659u)
> +__builtin_abort ();
> +
> +  for (int i = 0; i < vl; i++)
> +{
> +  uint8_t val = ((uint8_t *)&v3)[i];
> +  if (val != i << 2)
> +__builtin_abort ();
> +}
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c
> new file mode 100644
> index 000..49ccb6097d0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c
> @@ -0,0 +1,827 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv_zvfh -O3" } */

Here are three issues with this test case:
1. The test case does not seem to take effect, as it appears to pass both 
before and after applying the patch for RV64.
2. Since no mabi is specified, it consistently fails for RV32 with the error: 

Re: [PATCH] RISC-V: Adjust LMUL when using maximum SEW [PR117955].

2025-02-27 Thread Robin Dapp

This patch modifies the sequence:
vsetvli zero,a4,e32,m4,ta,ma + vsetvli zero,a4,e8,m2,ta,ma 
to:

vsetvli zero,a4,e32,m8,ta,ma + vsetvli zero,zero,e8,m2,ta,ma
Functionally, there is no difference. However, this change resolves the
issue with "e64,mf4", and allows the second vsetvli to omit a4, which is
beneficial.


My question rather was: Why did your test check for the presence of this a4?
Did you see a different issue in an unreduced test apart from what is tested 
right now (which seems at least partially wrong)?



Here are three issues with this test case:
1. The test case does not seem to take effect, as it appears to pass both 
before and after applying the patch for RV64.
2. Since no mabi is specified, it consistently fails for RV32 with the error: 
"Excess errors: cc1: error: ABI requires '-march=rv32'."
3. The test case seems to contain a lot of unnecessary code; perhaps we can 
streamline it.


As referenced in the PR the issue is flaky and only rarely occurs, under 
specific circumstances (and is latent on trunk).  The test case was already 
reduced.


You're right about the missing -mabi of course, I keep forgetting it...

--
Regards
Robin



[PATCH] input: Fix up ICEs with --param=file-cache-files=N for N > 16 [PR118860]

2025-02-27 Thread Jakub Jelinek
Hi!

The following testcase ICEs, because we first construct file_cache object
inside of *global_dc, then process options and then call file_cache::tune.
The earlier construction allocates the m_file_slots array (using new)
based on the static data member file_cache::num_file_slots, but then tune
changes it, without actually reallocating all m_file_slots arrays in already
constructed file_cache objects.

I think it is just weird to have the count be a static data member and
the pointer be non-static data member, that is just asking for issues like
this.

So, this patch changes num_file_slots into m_num_file_slots and turns tune
into a non-static member function and changes toplev.cc to call it on the
global_gc->get_file_cache () object.  And let's the tune just delete the
array and allocate it freshly if there is a change in the number of slots
or lines.

Note, file_cache_slot has similar problem, but because there are many, I
haven't moved the count into those objects; I just hope that when tune
is called there is exactly one file_cache constructed and all the
file_cache_slot objects constructed are pointed by its m_file_slots member,
so also on lines change it just deletes it and allocates again.  I think
it should be unlikely that the cache actually has any used slots by the time
it is called.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-02-27  Jakub Jelinek  

PR middle-end/118860
* input.h (file_cache::tune): No longer static.  Rename argument
from num_file_slots_ to num_file_slots.  Formatting fix.
(file_cache::num_file_slots): Renamed to ...
(file_cache::m_num_file_slots): ... this.  No longer static.
* input.cc (file_cache_slot::tune): Change return type from void to
size_t, return previous file_cache_slot::line_record_size value.
Formatting fixes.
(file_cache::tune): Rename argument from num_file_slots_ to
num_file_slots.  Set m_num_file_slots rather than num_file_slots.
If m_num_file_slots or file_cache_slot::line_record_size changes,
delete[] m_file_slots and new it again.
(file_cache::num_file_slots): Remove definition.
(file_cache::lookup_file): Use m_num_file_slots rather than
num_file_slots.
(file_cache::evicted_cache_tab_entry): Likewise.
(file_cache::file_cache): Likewise.  Initialize m_num_file_slots
to 16.
(file_cache::dump): Use m_num_file_slots rather than num_file_slots.
(file_cache_slot::get_next_line): Formatting fixes.
(file_cache_slot::read_line_num): Likewise.
(get_source_text_between): Likewise.
* toplev.cc (toplev::main): Call global_dc->get_file_cache ().tune
rather than file_cache::tune.

* gcc.dg/pr118860.c: New test.

--- gcc/input.h.jj  2025-02-03 11:05:04.833651537 +0100
+++ gcc/input.h 2025-02-27 10:38:00.943010438 +0100
@@ -161,7 +161,7 @@ class file_cache
 const char *buffer,
 size_t sz);
 
-  static void tune(size_t num_file_slots_, size_t lines);
+  void tune (size_t num_file_slots, size_t lines);
 
  private:
   file_cache_slot *evicted_cache_tab_entry (unsigned *highest_use_count);
@@ -169,7 +169,7 @@ class file_cache
   file_cache_slot *lookup_file (const char *file_path);
 
  private:
-  static size_t num_file_slots;
+  size_t m_num_file_slots;
   file_cache_slot *m_file_slots;
   input_context m_input_context;
 };
--- gcc/input.cc.jj 2025-02-24 00:06:26.036728909 +0100
+++ gcc/input.cc2025-02-27 11:00:59.351807727 +0100
@@ -79,8 +79,11 @@ public:
   void evict ();
   void set_content (const char *buf, size_t sz);
 
-  static void tune(size_t line_record_size_) {
-  line_record_size = line_record_size_;
+  static size_t tune (size_t line_record_size_)
+  {
+size_t ret = line_record_size;
+line_record_size = line_record_size_;
+return ret;
   }
 
  private:
@@ -200,14 +203,17 @@ size_t file_cache_slot::recent_cached_li
 
 /* Tune file_cache.  */
 void
-file_cache::tune (size_t num_file_slots_, size_t lines)
+file_cache::tune (size_t num_file_slots, size_t lines)
 {
-  num_file_slots = num_file_slots_;
-  file_cache_slot::tune (lines);
+  if (file_cache_slot::tune (lines) != lines
+  || m_num_file_slots != num_file_slots)
+{
+  delete[] m_file_slots;
+  m_file_slots = new file_cache_slot[num_file_slots];
+}
+  m_num_file_slots = num_file_slots;
 }
 
-size_t file_cache::num_file_slots = 16;
-
 static const char *
 find_end_of_line (const char *s, size_t len);
 
@@ -325,7 +331,7 @@ file_cache::lookup_file (const char *fil
 
   /* This will contain the found cached file.  */
   file_cache_slot *r = NULL;
-  for (unsigned i = 0; i < num_file_slots; ++i)
+  for (unsigned i = 0; i < m_num_file_slots; ++i)
 {
   file_cache_slot *c = &m_file_slots[i];
   if (c->get_file_path () && !strcmp (c->get_file_path (), file_path))
@@ -41

[PATCH] gimple-fold: Fix a pasto in fold_truth_andor_for_ifcombine [PR119030]

2025-02-27 Thread Jakub Jelinek
Hi!

The following testcase is miscompiled since r15-7597.
The left comparison is unsigned (x & 0x8000U) != 0) while the
right one is signed (x >> 16) >= 0 and is actually a signbit test,
so rsignbit is 64.
After debugging this and reading the r15-7597 change, I believe there
is just a pasto, the if (lsignbit) and if (rsignbit) blocks are pretty
much identical with just the first l on all variables starting with l
replaced with r (the only difference is that if (lsignbit) has a comment
explaining the sign <<= 1; stuff, while it isn't repeated in the second one.
Except the second one was using ll_unsignedp instead of rl_unsignedp
in one spot.  I think it should use the latter, the signedness of the left
comparison doesn't affect the other one, they are basically independent
with the exception that we check that after transformations they are both
EQ or both NE and later on we try to merge them together.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-02-27  Jakub Jelinek  

PR tree-optimization/119030
* gimple-fold.cc (fold_truth_andor_for_ifcombine): Fix a pasto,
ll_unsignedp -> rl_unsignedp.

* gcc.c-torture/execute/pr119030.c: New test.

--- gcc/gimple-fold.cc.jj   2025-02-24 00:06:26.018729158 +0100
+++ gcc/gimple-fold.cc  2025-02-27 16:27:28.843075160 +0100
@@ -8313,7 +8313,7 @@ fold_truth_andor_for_ifcombine (enum tre
   if (rsignbit)
 {
   wide_int sign = wi::mask (rl_bitsize - 1, true, rl_bitsize);
-  if (rsignbit > rl_bitsize && ll_unsignedp)
+  if (rsignbit > rl_bitsize && rl_unsignedp)
sign <<= 1;
   if (!rl_and_mask.get_precision ())
rl_and_mask = sign;
--- gcc/testsuite/gcc.c-torture/execute/pr119030.c.jj   2025-02-27 
16:34:11.332490739 +0100
+++ gcc/testsuite/gcc.c-torture/execute/pr119030.c  2025-02-27 
16:33:49.778789792 +0100
@@ -0,0 +1,26 @@
+/* PR tree-optimization/119030 */
+
+static inline unsigned
+foo (long long x)
+{
+  return x & 0x8000;
+}
+
+static inline long long
+bar (long long x)
+{
+  if (foo (x))
+return -1000L;
+  else
+return x >> 16;
+}
+
+long long x = -0x2LL;
+
+int
+main ()
+{
+  if (bar (x) >= 0)
+__builtin_abort ();
+  return 0;
+}

Jakub



[pushed][PR118940][LRA]: Add a test

2025-02-27 Thread Vladimir Makarov

The following patch adds a test for

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=118940

The PR is actually solved by a patch I submitted this week for another PR.

commit 3071eb2848a2e748cfd67e8c897890ce06c69d06
Author: Vladimir N. Makarov 
Date:   Thu Feb 27 13:39:04 2025 -0500

[PR118940][LRA]: Add a test

PR115458 also solves given PR.  So the patch adds only a
test case which can be used for testing LRA work aspects different from
PR115458 test case.

gcc/testsuite/ChangeLog:

PR target/118940
* gcc.target/i386/pr118940.c: New test.

diff --git a/gcc/testsuite/gcc.target/i386/pr118940.c b/gcc/testsuite/gcc.target/i386/pr118940.c
new file mode 100644
index 000..be094310173
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr118940.c
@@ -0,0 +1,127 @@
+/* { dg-do compile } */
+/* { dg-options "-w -g -Os -march=i386 -mregparm=3 -m32 -fno-PIE" } */
+
+typedef unsigned char uint8_t;
+typedef unsigned int uint32_t;
+typedef unsigned int size_t;
+typedef uint32_t bigint_element_t;
+
+/**
+ * Define a big-integer type
+ *
+ * @v size		Number of elements
+ * @ret bigint_t	Big integer type
+ */
+ #define bigint_t( size )		\
+ struct {			\
+  bigint_element_t element[ (size) ];			\
+ }
+
+/**
+* Determine number of elements required for a big-integer type
+*
+* @v len		Maximum length of big integer, in bytes
+* @ret size		Number of elements
+*/
+#define bigint_required_size( len )	\
+ ( ( (len) + sizeof ( bigint_element_t ) - 1 ) /			\
+   sizeof ( bigint_element_t ) )
+
+/**
+ * Determine number of elements in big-integer type
+ *
+ * @v bigint		Big integer
+ * @ret size		Number of elements
+ */
+ #define bigint_size( bigint )		\
+ ( sizeof ( *(bigint) ) / sizeof ( (bigint)->element[0] ) )
+
+ /**
+ * Initialise big integer
+ *
+ * @v value		Big integer to initialise
+ * @v data		Raw data
+ * @v len		Length of raw data
+ */
+#define bigint_init( value, data, len ) do {\
+	unsigned int size = bigint_size (value);			\
+	bigint_init_raw ( (value)->element, size, (data), (len) );	\
+	} while ( 0 )
+
+
+/**
+ * Calculate temporary working space required for moduluar exponentiation
+ *
+ * @v modulus		Big integer modulus
+ * @ret len		Length of temporary working space
+ */
+ #define bigint_mod_exp_tmp_len( modulus ) ( {\
+	unsigned int size = bigint_size (modulus);			\
+	sizeof ( struct {		\
+		bigint_t ( size ) temp[4];\
+	} ); } )
+
+
+/**
+ * Initialise big integer
+ *
+ * @v value0		Element 0 of big integer to initialise
+ * @v size		Number of elements
+ * @v data		Raw data
+ * @v len		Length of raw data
+ */
+ static inline __attribute__ (( always_inline )) void
+ bigint_init_raw ( uint32_t *value0, unsigned int size,
+ const void *data, size_t len ) {
+  bigint_t ( size ) __attribute__ (( may_alias )) *value =
+   ( ( void * ) value0 );
+  long pad_len = ( sizeof ( *value ) - len );
+  void *discard_D;
+  long discard_c;
+
+  /* Copy raw data in reverse order, padding with zeros */
+  __asm__ __volatile__ ( "\n1:\n\t"
+   "movb -1(%3,%1), %%al\n\t"
+   "stosb\n\t"
+   "loop 1b\n\t"
+   "xorl %%eax, %%eax\n\t"
+   "mov %4, %1\n\t"
+   "rep stosb\n\t"
+   : "=&D" ( discard_D ), "=&c" ( discard_c ),
+  "+m" ( *value )
+   : "r" ( data ), "g" ( pad_len ), "0" ( value0 ),
+  "1" ( len )
+   : "eax" );
+ }
+
+extern void touch (void *, ...);
+extern void touch3 (void *, void *, void *);
+extern void touch2 (void *, void *);
+
+/**
+ * Perform big integer self-tests
+ *
+ */
+void bigint_test_exec ( void ) {
+do{
+	static const uint8_t base_raw[3] = {0};
+	static const uint8_t modulus_raw[3] = {0};
+	static const uint8_t exponent_raw[25] = {0};
+	unsigned int size =
+		bigint_required_size ( sizeof ( base_raw ) );
+	unsigned int exponent_size =
+		bigint_required_size ( sizeof ( exponent_raw ) );
+	bigint_t ( size ) base_temp;
+	bigint_t ( size ) modulus_temp;
+	bigint_t ( exponent_size ) exponent_temp;
+	size_t tmp_len = bigint_mod_exp_tmp_len ( &modulus_temp );
+
+
+	touch ( &base_temp );
+	bigint_init ( &modulus_temp, modulus_raw,
+		  sizeof ( modulus_raw ) );
+	bigint_init ( &exponent_temp, exponent_raw,
+		  sizeof ( exponent_raw ) );
+	touch3 ( &base_temp, &modulus_temp, &exponent_temp );
+	} while ( 0 );
+}


Re: [PATCH] Fortran: fix check for non-optional arrays passed to elemental

2025-02-27 Thread Peter Hill
On Thu, 27 Feb 2025 at 18:09, Jerry D  wrote:
>
> On 2/27/25 7:38 AM, Peter Hill wrote:
> > Dear all,
> >
> > The attached patch fixes an ICE in gfc_resolve_code when passing an
> > optional array to an elemental procedure with `-pedantic` enabled.
> > PR95446 added the original check, this patch fixes the case where the
> > other actual argument is an array literal (or something else other
> > than a variable). The ICE is present since 11.1, so this could be
> > backported?
> >
> > Cheers,
> > Peter
> >
>
> Hi Peter, was there a PR associated with this one?
>
> Jerry
>
> --- snip ---

Hi Jerry,

Nope, I couldn't find one -- should I have created one first?

Cheers,
Peter


[PATCH v2] c++: ICE in replace_decl [PR118986]

2025-02-27 Thread Marek Polacek
On Thu, Feb 27, 2025 at 10:42:07AM -0500, Jason Merrill wrote:
> On 2/26/25 2:16 PM, Marek Polacek wrote:
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > -- >8 --
> > Yet another problem that started with r15-6052, compile time evaluation of
> > prvalues.
> > 
> > cp_fold_r/TARGET_EXPR sees:
> > 
> >TARGET_EXPR  >  D.2701.__p = TARGET_EXPR  >3
> >f1
> >D.2684  
> > 
> > so when we call maybe_constant_init, the object we're initializing is 
> > D.2701,
> > and the init is the expr_stmt.  We unwrap the 
> > EXPR_STMT/INIT_EXPR/TARGET_EXPR
> > in maybe_constant_init_1 and so end up evaluating the f1 call.  But f1 
> > returns
> > c2 whereas the type of D.2701 is ._anon_0 -- the closure.
> 
> Sounds like the problem is with the maybe_constant_init_1 unwrapping, it
> probably shouldn't strip INIT_EXPR if the type doesn't match that of 'decl'.

Yes, that's where the types change.  I thought that skipping the unpacking
here might cause other issues but it seems not.
 
> > So then we crash in replace_decl on:
> > 
> >   gcc_checking_assert (same_type_ignoring_top_level_qualifiers_p
> >(TREE_TYPE (decl), TREE_TYPE (replacement)));
> > 
> > due to the mismatched types.
> > 
> > cxx_eval_outermost_constant_expr is already ready for the types to be
> > different, in which case the result isn't constant.  But replace_decl
> > is called before that check.
> > 
> > I'm leaving the assert in replace_decl on purpose, maybe we'll find
> > another use for it.
> > 
> > PR c++/118986
> > 
> > gcc/cp/ChangeLog:
> > 
> > * constexpr.cc (cxx_eval_call_expression): Check that the types match
> > before calling replace_decl.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp2a/constexpr-prvalue1.C: New test.
> > ---
> >   gcc/cp/constexpr.cc   |  4 +++-
> >   .../g++.dg/cpp2a/constexpr-prvalue1.C | 23 +++
> >   2 files changed, 26 insertions(+), 1 deletion(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-prvalue1.C
> > 
> > diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
> > index 59dd0668af3..204cda2a222 100644
> > --- a/gcc/cp/constexpr.cc
> > +++ b/gcc/cp/constexpr.cc
> > @@ -3390,7 +3390,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, 
> > tree t,
> >current object under construction.  */
> > if (!*non_constant_p && ctx->object
> > && CLASS_TYPE_P (TREE_TYPE (res))
> > -   && !is_empty_class (TREE_TYPE (res)))
> > +   && !is_empty_class (TREE_TYPE (res))
> > +   && same_type_ignoring_top_level_qualifiers_p
> > +   (TREE_TYPE (res), TREE_TYPE (ctx->object)))
> 
> If this happens, rather than just skip the replace_decl, I think we want to
> set *non_constant_p or I expect we'll end up with a wrong value somewhere.

Good point, I agree.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
Yet another problem that started with r15-6052, compile time evaluation of
prvalues.

cp_fold_r/TARGET_EXPR sees:

  TARGET_EXPR >>> 

so when we call maybe_constant_init, the object we're initializing is D.2701,
and the init is the expr_stmt.  We unwrap the EXPR_STMT/INIT_EXPR/TARGET_EXPR
in maybe_constant_init_1 and so end up evaluating the f1 call.  But f1 returns
c2 whereas the type of D.2701 is ._anon_0 -- the closure.

So then we crash in replace_decl on:

  gcc_checking_assert (same_type_ignoring_top_level_qualifiers_p
   (TREE_TYPE (decl), TREE_TYPE (replacement)));

due to the mismatched types.

cxx_eval_outermost_constant_expr is already ready for the types to be
different, in which case the result isn't constant.  But replace_decl
is called before that check.

I'm leaving the assert in replace_decl on purpose, maybe we'll find
another use for it.

PR c++/118986

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_call_expression): Check that the types match
before calling replace_decl, if not, set *non_constant_p.
(maybe_constant_init_1): Don't strip INIT_EXPR if it would change the
type of the expression.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/constexpr-prvalue1.C: New test.
---
 gcc/cp/constexpr.cc   | 29 +--
 .../g++.dg/cpp2a/constexpr-prvalue1.C | 23 +++
 2 files changed, 43 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-prvalue1.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index c68666cc5dd..5439b2ea8fe 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -3388,16 +3388,22 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, 
tree t,
 
/* Rewrite all occurrences of the function's RESULT_DECL with the
   current object under construction.  */
-   if (!*non_constant_p && ctx->object
+   if (!*non_constant_p
+ 

Re: [PATCH] gimple-fold: Fix a pasto in fold_truth_andor_for_ifcombine [PR119030]

2025-02-27 Thread Richard Biener



> Am 27.02.2025 um 21:11 schrieb Jakub Jelinek :
> 
> Hi!
> 
> The following testcase is miscompiled since r15-7597.
> The left comparison is unsigned (x & 0x8000U) != 0) while the
> right one is signed (x >> 16) >= 0 and is actually a signbit test,
> so rsignbit is 64.
> After debugging this and reading the r15-7597 change, I believe there
> is just a pasto, the if (lsignbit) and if (rsignbit) blocks are pretty
> much identical with just the first l on all variables starting with l
> replaced with r (the only difference is that if (lsignbit) has a comment
> explaining the sign <<= 1; stuff, while it isn't repeated in the second one.
> Except the second one was using ll_unsignedp instead of rl_unsignedp
> in one spot.  I think it should use the latter, the signedness of the left
> comparison doesn't affect the other one, they are basically independent
> with the exception that we check that after transformations they are both
> EQ or both NE and later on we try to merge them together.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok

Richard 

> 2025-02-27  Jakub Jelinek  
> 
>PR tree-optimization/119030
>* gimple-fold.cc (fold_truth_andor_for_ifcombine): Fix a pasto,
>ll_unsignedp -> rl_unsignedp.
> 
>* gcc.c-torture/execute/pr119030.c: New test.
> 
> --- gcc/gimple-fold.cc.jj2025-02-24 00:06:26.018729158 +0100
> +++ gcc/gimple-fold.cc2025-02-27 16:27:28.843075160 +0100
> @@ -8313,7 +8313,7 @@ fold_truth_andor_for_ifcombine (enum tre
>   if (rsignbit)
> {
>   wide_int sign = wi::mask (rl_bitsize - 1, true, rl_bitsize);
> -  if (rsignbit > rl_bitsize && ll_unsignedp)
> +  if (rsignbit > rl_bitsize && rl_unsignedp)
>sign <<= 1;
>   if (!rl_and_mask.get_precision ())
>rl_and_mask = sign;
> --- gcc/testsuite/gcc.c-torture/execute/pr119030.c.jj2025-02-27 
> 16:34:11.332490739 +0100
> +++ gcc/testsuite/gcc.c-torture/execute/pr119030.c2025-02-27 
> 16:33:49.778789792 +0100
> @@ -0,0 +1,26 @@
> +/* PR tree-optimization/119030 */
> +
> +static inline unsigned
> +foo (long long x)
> +{
> +  return x & 0x8000;
> +}
> +
> +static inline long long
> +bar (long long x)
> +{
> +  if (foo (x))
> +return -1000L;
> +  else
> +return x >> 16;
> +}
> +
> +long long x = -0x2LL;
> +
> +int
> +main ()
> +{
> +  if (bar (x) >= 0)
> +__builtin_abort ();
> +  return 0;
> +}
> 
>Jakub
> 


[PATCH] libstdc++: Improve optional's <=> constraint recursion workaround [PR104606]

2025-02-27 Thread Patrick Palka
Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
14?  Not sure about backporting further given the original fix seems
harmless.

-- >8 --

It turns out the reason the behavior of this testcase changed after CWG
2369 is because validity of the substituted return type is now checked
later, after constraints.  So a more reliable workaround for this issue
is to add a constraint to check the validity of the return type earlier,
restoring the pre-CWG 2369 semantics.

PR libstdc++/104606

libstdc++-v3/ChangeLog:

* include/std/optional (operator<=>): Revert r14-9771 change.
Add constraint checking the validity of the return type
compare_three_way_result_t before the three_way_comparable_with
constraint.
---
 libstdc++-v3/include/std/optional | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/optional 
b/libstdc++-v3/include/std/optional
index 832dc6fd84b..a616dc07b10 100644
--- a/libstdc++-v3/include/std/optional
+++ b/libstdc++-v3/include/std/optional
@@ -1685,7 +1685,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
 requires (!__is_derived_from_optional<_Up>)
-  && three_way_comparable_with<_Up, _Tp>
+  && requires { typename compare_three_way_result_t<_Tp, _Up>; }
+  && three_way_comparable_with<_Tp, _Up>
 constexpr compare_three_way_result_t<_Tp, _Up>
 operator<=> [[nodiscard]] (const optional<_Tp>& __x, const _Up& __v)
 { return bool(__x) ? *__x <=> __v : strong_ordering::less; }
-- 
2.49.0.rc0



ping Re: [patch, doc] PR108369 GCC: Documentation of -x option

2025-02-27 Thread Jerry D

On 2/26/25 5:58 PM, Jerry D wrote:
This attached patch is intended to clarify the '-x' option using '-x 
f77' as an example. I was not sure who should review.


Tested by inspecting the generated info file from make info.

OK for trunk and backport to 14?

Regards,

Jerry

Author: Jerry DeLisle 
Date:   Wed Feb 26 17:26:26 2025 -0800

     GCC: Documentation of -x option

     This change updates information about the -x option to clarify
     that it does not ensure standards compliance. Sparked by
     discussions in the following PR.

     PR fortran/108369

     gcc/ChangeLog:

     * doc/invoke.texi: Add a note to clarify. Adjust some wording.





[PATCH v3] aarch64: Ignore target pragmas while defining intrinsics

2025-02-27 Thread Andrew Carlotti
Compared to v2, this splits out the alignment switching into a new class and
merges the rest of the switching functionality into aarch64_target_switcher,
as agreed with Richard in the previous review discussion.

Bootstrapped and regression tested on aarch64. Is this ok for master?

---

Refactor the switcher classes into two separate classes:

- sve_alignment_switcher takes the alignment switching functionality,
  and is used only for ABI correctness when defining sve structure
  types.
- aarch64_target_switcher takes the rest of the functionality of
  aarch64_simd_switcher and sve_switcher, and gates simd/sve specific
  parts upon the specified feature flags.

Additionally, aarch64_target_switcher now adds dependencies of the
specified flags (which adds +fcma and +bf16 to some intrinsic
declarations), and unsets current_target_pragma.

This last change fixes an internal bug where we would sometimes add a
user specified target pragma (stored in current_target_pragma) on top of
an internally specified target architecture while initialising
intrinsics with `#pragma GCC aarch64 "arm_*.h"`.  As far as I can tell, this
has no visible impact at the moment.  However, the unintended target
feature combinations lead to unwanted behaviour in an under-development
patch.

gcc/ChangeLog:

* common/config/aarch64/aarch64-common.cc
(struct aarch64_extension_info): Add field.
(aarch64_get_required_features): New.
* config/aarch64/aarch64-builtins.cc
(aarch64_simd_switcher::aarch64_simd_switcher): Rename to...
(aarch64_target_switcher::aarch64_target_switcher): ...this,
and extend to handle sve, nosimd and target pragmas.
(aarch64_simd_switcher::~aarch64_simd_switcher): Rename to...
(aarch64_target_switcher::~aarch64_target_switcher): ...this,
and extend to handle sve, nosimd and target pragmas.
(handle_arm_acle_h): Use aarch64_target_switcher.
(handle_arm_neon_h): Rename switcher and pass explicit flags.
(aarch64_general_init_builtins): Ditto.
* config/aarch64/aarch64-protos.h
(class aarch64_simd_switcher): Rename to...
(class aarch64_target_switcher): ...this, and add new members.
(aarch64_get_required_features): New prototype.
* config/aarch64/aarch64-sve-builtins.cc
(sve_switcher::sve_switcher): Delete
(sve_switcher::~sve_switcher): Delete
(sve_alignment_switcher::sve_alignment_switcher): New
(sve_alignment_switcher::~sve_alignment_switcher): New
(register_builtin_types): Use alignment switcher
(init_builtins): Rename switcher.
(handle_arm_sve_h): Ditto.
(handle_arm_neon_sve_bridge_h): Ditto.
(handle_arm_sme_h): Ditto.
* config/aarch64/aarch64-sve-builtins.h
(class sve_switcher): Delete.
(class sme_switcher): Delete.
(class sve_alignment_switcher): New.


diff --git a/gcc/common/config/aarch64/aarch64-common.cc 
b/gcc/common/config/aarch64/aarch64-common.cc
index 
ef4458fb69308d2bb6785e97be5be85226cf0ebb..500bf784983d851c54ea4ec59cf3cad29e5e309e
 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -157,6 +157,8 @@ struct aarch64_extension_info
   aarch64_feature_flags flags_on;
   /* If this feature is turned off, these bits also need to be turned off.  */
   aarch64_feature_flags flags_off;
+  /* If this feature remains enabled, these bits must also remain enabled.  */
+  aarch64_feature_flags flags_required;
 };
 
 /* ISA extensions in AArch64.  */
@@ -164,9 +166,10 @@ static constexpr aarch64_extension_info all_extensions[] =
 {
 #define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, FEATURE_STRING) \
   {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \
-   feature_deps::get_flags_off (feature_deps::root_off_##IDENT)},
+   feature_deps::get_flags_off (feature_deps::root_off_##IDENT), \
+   feature_deps::IDENT ().enable},
 #include "config/aarch64/aarch64-option-extensions.def"
-  {NULL, 0, 0, 0}
+  {NULL, 0, 0, 0, 0}
 };
 
 struct aarch64_arch_info
@@ -204,6 +207,18 @@ static constexpr aarch64_processor_info all_cores[] =
   {NULL, aarch64_no_cpu, aarch64_no_arch, 0}
 };
 
+/* Return the set of feature flags that are required to be enabled when the
+   features in FLAGS are enabled.  */
+
+aarch64_feature_flags
+aarch64_get_required_features (aarch64_feature_flags flags)
+{
+  const struct aarch64_extension_info *opt;
+  for (opt = all_extensions; opt->name != NULL; opt++)
+if (flags & opt->flag_canonical)
+  flags |= opt->flags_required;
+  return flags;
+}
 
 /* Print a list of CANDIDATES for an argument, and try to suggest a specific
close match.  */
diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index 
128cc365d3d585e01cb69668f285318ee56a36fc..93f939a9c834c664fa8f081e6a484779071503eb
 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b

Re: [PATCH] c++: ICE with GOTO_EXPR [PR118928]

2025-02-27 Thread Marek Polacek
On Thu, Feb 27, 2025 at 01:15:12PM -0500, Jason Merrill wrote:
> On 2/20/25 9:51 AM, Marek Polacek wrote:
> > Now with the test fixed.
> > 
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > -- >8 --
> > In this PR we crash in cxx_eval_constant_expression/GOTO_EXPR on:
> > 
> >gcc_assert (cxx_dialect >= cxx23);
> > 
> > The code obviously doesn't expect to see a goto pre-C++23.  But we can
> > get here with the new prvalue optimization.  In this test we found
> > ourselves in synthesize_method for X::X().  This function calls:
> > 
> >   a) finish_function, which does cp_genericize -> ... -> genericize_c_loops,
> >  which creates the GOTO_EXPR;
> >   b) expand_or_defer_fn -> maybe_clone_body -> ... -> cp_fold_function
> >  where we reach the new maybe_constant_init call and crash on the
> >  goto.
> 
> Hmm, this looks like bad recursion; finish_function wants to fold before
> genericize, but then beacuse maybe_clone_body calls finish_function, we end
> up folding again after genericize.
> 
> Maybe we should call maybe_clone_body directly from finish_function rather
> than from expand_or_defer?

Probably best to leave that to 16; I wonder what kind of trouble that
would unravel.
 
> But the patch is OK as is.

Thanks.

> > Since we can validly get to that assert, I think we should just remove
> > it.  I don't see other similar asserts like this one.
> > 
> > PR c++/118928
> > 
> > gcc/cp/ChangeLog:
> > 
> > * constexpr.cc (cxx_eval_constant_expression) : Remove
> > an assert.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp0x/constexpr-prvalue5.C: New test.
> > ---
> >   gcc/cp/constexpr.cc   |  1 -
> >   .../g++.dg/cpp0x/constexpr-prvalue5.C | 24 +++
> >   2 files changed, 24 insertions(+), 1 deletion(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue5.C
> > 
> > diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
> > index 59dd0668af3..c68666cc5dd 100644
> > --- a/gcc/cp/constexpr.cc
> > +++ b/gcc/cp/constexpr.cc
> > @@ -8691,7 +8691,6 @@ cxx_eval_constant_expression (const constexpr_ctx 
> > *ctx, tree t,
> > *jump_target = TREE_OPERAND (t, 0);
> > else
> > {
> > - gcc_assert (cxx_dialect >= cxx23);
> >   if (!ctx->quiet)
> > error_at (loc, "% is not a constant expression");
> >   *non_constant_p = true;
> > diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue5.C 
> > b/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue5.C
> > new file mode 100644
> > index 000..1f847bbe183
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue5.C
> > @@ -0,0 +1,24 @@
> > +// PR c++/118928
> > +// { dg-do compile { target c++11 } }
> > +// { dg-options "-O" }
> > +
> > +using size_t = decltype(sizeof(0));
> > +
> > +namespace std {
> > +template  struct initializer_list {
> > +  const T *_M_array;
> > +  size_t _M_len;
> > +};
> > +struct S {
> > +  constexpr S(const char *); // { dg-warning "used but never defined" }
> > +};
> > +struct vector {
> > +  constexpr vector(initializer_list) {}
> > +};
> > +}
> > +struct Y {
> > +std::vector v;
> > +};
> > +struct X {
> > +  Y y{{""}};
> > +} x;
> > 
> > base-commit: a2755339c6c9832467c573d956e91565943ecdc1
> 

Marek



[PATCH] ifcvt: Fix ICE with (fix:SI (fix:DF (reg:DF))) [PR117712]

2025-02-27 Thread Jakub Jelinek
Hi!

As documented in the manual, FIX/UNSIGNED_FIX from floating point
mode to integral mode has unspecified rounding and FIX from floating point
mode to the same floating point mode is expressing rounding toward zero.
So, some targets (arc, arm, csky, m68k, mmix, nds32, pdp11, sparc and
visium) use
(fix:SI (fix:SF (match_operand:SF 1 "..._operand")))
etc. to express the rounding toward zero during conversion to integer.
For some reason other targets don't use that.

Anyway, the 2 FIXes (or inner FIX with outer UNSIGNED_FIX) cause problems
since the r15-2890 which removed some strict checks in ifcvt.cc on what
SET_SRC can be actually conditionalized (I must say I'm still worried
about the change, don't know why one can't get e.g. inline asm or
something with UNSPEC or some complex backend specific RTLs that
force_operand can't handle), force_operand just ICEs on it, it can only
handle (through expand_fix) conversions from floating point to integral.

The following patch fixes this by detecting this case and just pretend
the inner FIX isn't there, i.e. call expand_fix with the inner FIX's
operand instead, which works and on targets like arm it will just
create the nested FIXes again.

Bootstrapped/regtested on x86_64-linux and i686-linux and Christophe
tested this on arm, ok for trunk?

2025-02-27  Jakub Jelinek  

PR rtl-optimization/117712
* expr.cc (force_operand): Handle {,UNSIGNED_}FIX with
FIX operand using expand_fix on the inner FIX operand.

* gcc.dg/pr117712.c: New test.

--- gcc/expr.cc.jj  2025-01-31 15:12:57.892489693 +0100
+++ gcc/expr.cc 2025-02-27 12:07:06.079581662 +0100
@@ -8747,7 +8747,19 @@ force_operand (rtx value, rtx target)
 {
   if (!target)
target = gen_reg_rtx (GET_MODE (value));
-  op1 = force_operand (XEXP (value, 0), NULL_RTX);
+  /* FIX or UNSIGNED_FIX with integral mode has unspecified rounding,
+while FIX with floating point mode rounds toward zero.  So, some
+targets use expressions like (fix:SI (fix:DF (reg:DF ...)))
+to express rounding toward zero during the conversion to int.
+expand_fix isn't able to handle that, it can only handle
+FIX/UNSIGNED_FIX from floating point mode to integral one.  */
+  if ((code == FIX || code == UNSIGNED_FIX)
+ && GET_CODE (XEXP (value, 0)) == FIX
+ && (GET_MODE (XEXP (value, 0))
+ == GET_MODE (XEXP (XEXP (value, 0), 0
+   op1 = force_operand (XEXP (XEXP (value, 0), 0), NULL_RTX);
+  else
+   op1 = force_operand (XEXP (value, 0), NULL_RTX);
   switch (code)
{
case ZERO_EXTEND:
--- gcc/testsuite/gcc.dg/pr117712.c.jj  2025-02-27 11:59:55.906554128 +0100
+++ gcc/testsuite/gcc.dg/pr117712.c 2025-02-27 12:04:27.397775236 +0100
@@ -0,0 +1,13 @@
+/* PR rtl-optimization/117712 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math" } */
+
+int b;
+
+int
+foo (int x)
+{
+  if (b)
+x = 0.96 * x;
+  return x;
+}

Jakub



[PATCH] c++: generic lambda, implicit 'this' capture, xobj memfn [PR119038]

2025-02-27 Thread Patrick Palka
Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
14?

-- >8 --

When a generic lambda calls an overload set containing an iobj member
function we speculatively capture 'this'.  We need to do the same
for an xobj member function.

PR c++/119038

gcc/cp/ChangeLog:

* lambda.cc (maybe_generic_this_capture): Consider xobj
member functions as well, not just iobj.

gcc/testsuite/ChangeLog:

* g++.dg/cpp23/explicit-obj-lambda15.C: New test.
---
 gcc/cp/lambda.cc   |  2 +-
 gcc/testsuite/g++.dg/cpp23/explicit-obj-lambda15.C | 11 +++
 2 files changed, 12 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp23/explicit-obj-lambda15.C

diff --git a/gcc/cp/lambda.cc b/gcc/cp/lambda.cc
index 09898f6746c..9679fdc1a2d 100644
--- a/gcc/cp/lambda.cc
+++ b/gcc/cp/lambda.cc
@@ -992,7 +992,7 @@ maybe_generic_this_capture (tree object, tree fns)
for (lkp_iterator iter (fns); iter; ++iter)
  if (((!id_expr && TREE_CODE (*iter) != USING_DECL)
   || TREE_CODE (*iter) == TEMPLATE_DECL)
- && DECL_IOBJ_MEMBER_FUNCTION_P (*iter))
+ && DECL_OBJECT_MEMBER_FUNCTION_P (*iter))
{
  /* Found a non-static member.  Capture this.  */
  lambda_expr_this_capture (lam, /*maybe*/-1);
diff --git a/gcc/testsuite/g++.dg/cpp23/explicit-obj-lambda15.C 
b/gcc/testsuite/g++.dg/cpp23/explicit-obj-lambda15.C
new file mode 100644
index 000..369f0895ed1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp23/explicit-obj-lambda15.C
@@ -0,0 +1,11 @@
+// PR c++/119038
+// { dg-do compile { target c++23 } }
+
+struct A {
+  void f() {
+[&](auto x) { g(x); h(x); }(0);
+  }
+
+  void g(this A&, int);
+  void h(this auto&, auto);
+};
-- 
2.49.0.rc0



[committed] libstdc++: Fix outdated comment in

2025-02-27 Thread Jonathan Wakely
My r15-998-g2a83084ce55363 change replaced the use of nothrow
operator new with a call to __get_temporary_buffer, so update the
comment to match.

libstdc++-v3/ChangeLog:

* include/std/stacktrace (_Impl::_M_allocate): Fix outdated
comment.
---

Lightly tested on x86_64-linux (because it's only a comment).

Pushed to trunk.

 libstdc++-v3/include/std/stacktrace | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/stacktrace 
b/libstdc++-v3/include/std/stacktrace
index f94a424e4cf..491122293c5 100644
--- a/libstdc++-v3/include/std/stacktrace
+++ b/libstdc++-v3/include/std/stacktrace
@@ -559,7 +559,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
  if constexpr (is_same_v>)
{
- // For std::allocator we use nothrow-new directly so we
+ // Use non-throwing __get_temporary_buffer, so that we
  // don't need to handle exceptions from __alloc.allocate(n).
  auto __p = __detail::__get_temporary_buffer(__n);
  if (__p == nullptr) [[unlikely]]
-- 
2.48.1



[Fortran, Patch, PR118730, v1] Ensure user-finalized type is referenced

2025-02-27 Thread Andre Vehreschild
Hi all,

attached patch fixes user defined finalizers in derived (class) types not
getting called, when the variable declared of that type was not used in the
current block. The patch ensures calling the finalizer by marking the
variable referenced, if it has not been.

Additionally I had to patch three testcases, because their tree-dump-scans did
not fit anymore. In one case a variable was not used in the two others the
counts did not match any more.

Regstests ok on x86_64-pc-linux-gnu / F41. Ok for mainline?

Regards,
Andre
--
Andre Vehreschild * Email: vehre ad gmx dot de
From e86c918e59b8c1b66ce837c2b4c735204c2d5510 Mon Sep 17 00:00:00 2001
From: Andre Vehreschild 
Date: Thu, 27 Feb 2025 12:27:10 +0100
Subject: [PATCH] Fortran: Ensure finalizer is called for unreferenced variable
 [PR118730]

	PR fortran/118730

gcc/fortran/ChangeLog:

	* resolve.cc: Mark unused derived type variable with finalizers
	referenced to execute finalizer when leaving scope.

gcc/testsuite/ChangeLog:

	* gfortran.dg/class_array_15.f03: Remove unused variable.
	* gfortran.dg/coarray_poly_7.f90: Adapt scan-tree-dump expr.
	* gfortran.dg/coarray_poly_8.f90: Same.
	* gfortran.dg/finalize_60.f90: New test.
---
 gcc/fortran/resolve.cc   |  8 +
 gcc/testsuite/gfortran.dg/class_array_15.f03 |  2 +-
 gcc/testsuite/gfortran.dg/coarray_poly_7.f90 |  2 +-
 gcc/testsuite/gfortran.dg/coarray_poly_8.f90 |  2 +-
 gcc/testsuite/gfortran.dg/finalize_60.f90| 33 
 5 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/finalize_60.f90

diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index 6a83a7967a8..f83d122a3a2 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -17063,6 +17063,14 @@ skip_interfaces:
   return;
 }

+  /* Ensure that variables of derived or class type having a finalizer are
+ marked used even when the variable is not used anything else in the scope.
+ This fixes PR118730.  */
+  if (sym->attr.flavor == FL_VARIABLE && !sym->attr.referenced
+  && (sym->ts.type == BT_DERIVED || sym->ts.type == BT_CLASS)
+  && gfc_may_be_finalized (sym->ts))
+gfc_set_sym_referenced (sym);
+
   if (sym->attr.flavor == FL_DERIVED && !resolve_fl_derived (sym))
 return;

diff --git a/gcc/testsuite/gfortran.dg/class_array_15.f03 b/gcc/testsuite/gfortran.dg/class_array_15.f03
index 332b39833eb..f53b2356952 100644
--- a/gcc/testsuite/gfortran.dg/class_array_15.f03
+++ b/gcc/testsuite/gfortran.dg/class_array_15.f03
@@ -100,7 +100,7 @@ subroutine pr54992  ! This test remains as the original.
   implicit none
   type (tn), target  :: b
   class(ncBh), pointer :: bh
-  class(ncBh), allocatable, dimension(:) :: t
+
   allocate(b%cBh(1),source=defaultBhC)
   b%cBh(1)%hostNode => b
 ! #1 this worked
diff --git a/gcc/testsuite/gfortran.dg/coarray_poly_7.f90 b/gcc/testsuite/gfortran.dg/coarray_poly_7.f90
index d8d83aea39b..21a3054f59c 100644
--- a/gcc/testsuite/gfortran.dg/coarray_poly_7.f90
+++ b/gcc/testsuite/gfortran.dg/coarray_poly_7.f90
@@ -18,4 +18,4 @@ end
 ! { dg-final { scan-tree-dump-times "foo \\(struct __class_MAIN___T_1_1t & restrict x, void \\* restrict caf_token.., integer\\(kind=\[48\]\\) caf_offset..\\)" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "bar \\(struct __class_MAIN___T_1_1t \\* restrict x, void \\* restrict caf_token.., integer\\(kind=\[48\]\\) caf_offset..\\)" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "bar \\(0B, 0B, 0\\);" 1 "original" } }
-! { dg-final { scan-tree-dump-times "foo \\(&class.., y._data.token, \\(integer\\(kind=\[48\]\\)\\) class..._data.data - \\(integer\\(kind=\[48\]\\)\\) y._data.data\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "foo \\(&class.\[0-9\]+, y._data.token, \\(integer\\(kind=\[48\]\\)\\) class.\[0-9\]+._data.data - \\(integer\\(kind=\[48\]\\)\\) y._data.data\\);" 1 "original" } }
diff --git a/gcc/testsuite/gfortran.dg/coarray_poly_8.f90 b/gcc/testsuite/gfortran.dg/coarray_poly_8.f90
index abdfc0ca5f8..9ceece419ae 100644
--- a/gcc/testsuite/gfortran.dg/coarray_poly_8.f90
+++ b/gcc/testsuite/gfortran.dg/coarray_poly_8.f90
@@ -18,4 +18,4 @@ end
 ! { dg-final { scan-tree-dump-times "foo \\(struct __class_MAIN___T_1_1t & restrict x, void \\* restrict caf_token.., integer\\(kind=\[48\]\\) caf_offset..\\)" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "bar \\(struct __class_MAIN___T_1_1t \\* restrict x, void \\* restrict caf_token.., integer\\(kind=\[48\]\\) caf_offset..\\)" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "bar \\(0B, 0B, 0\\);" 1 "original" } }
-! { dg-final { scan-tree-dump-times "foo \\(&class.., y._data.token, \\(integer\\(kind=\[48\]\\)\\) class..._data.data - \\(integer\\(kind=\[48\]\\)\\) y._data.data\\);" 1 "original" } }
+! { dg-final { scan-tree-dump-times "foo \\(&class.\[0-9\]+, y._data.token, \\(integer\\(kind=\[48\]\\)\\) class.\[0-9\]+._data.data - \\(integer\\(kind=\[48\]\\)\\) 

Re: [PUSHED] nvptx: Build libgfortran with '-mfake-ptx-alloca' [PR107635]

2025-02-27 Thread Andre Vehreschild
Hi Thomas,

are you really telling me, that gfortran's coarray test library is compiled for
offloading to GPU (or other SIMD processors)? Because that's what NVPTX is used
for most, right? In my opinion that makes no sense, because coarrays in Fortran
are used for SISD style accesses. Although the new style now extracts "kernels"
for the access (which are tiny; and therefore should not perform on any GPU),
I'd rather expect the caf_single library to be not compiled for
NVPTX-offloading. Are there different opinions?

Sorry, for disturbing the NVPTX build. I wasn't aware, that it was done for
caf_*.

Regards,
Andre

On Thu, 27 Feb 2025 21:53:42 +0100
Thomas Schwinge  wrote:

> As of recent commit 8bf0ee8d62b8a08e808344d31354ab713157e15d
> "Fortran: Add transfer_between_remotes [PR107635]", we've got 'alloca' usage
> in 'libgfortran/caf/single.c:_gfortran_caf_transfer_between_remotes', and
> the libgfortran target library fails to build for legacy configurations where
> PTX 'alloca' is not available:
> 
> ../../../../source-gcc/libgfortran/caf/single.c: In function
> ‘_gfortran_caf_transfer_between_remotes’:
> ../../../../source-gcc/libgfortran/caf/single.c:675:23: sorry, unimplemented:
> dynamic stack allocation not supported 675 |   transfer_desc =
> __builtin_alloca (desc_size); |
> ^~~~
> ../../../../source-gcc/libgfortran/caf/single.c:680:20: sorry, unimplemented:
> dynamic stack allocation not supported 680 | transfer_ptr =
> __builtin_alloca (*opt_dst_charlen * src_size); |
> ^~ make[6]: *** [Makefile:4675:
> caf/single.lo] Error 1
> 
> With '-mfake-ptx-alloca', libgfortran again succeeds to build, and compared
> to before, we've got only a small number of regressions due to nvptx 'ld'
> complaining about 'unresolved symbol __GCC_nvptx__PTX_alloca_not_supported':
> 
> [-PASS:-]{+FAIL:+} gfortran.dg/coarray/codimension_2.f90 -fcoarray=lib
> -O2  -lcaf_single (test for excess errors)
> 
> [-PASS:-]{+FAIL:+} gfortran.dg/coarray/event_4.f08 -fcoarray=lib  -O2
> -lcaf_single (test for excess errors) [-PASS:-]{+UNRESOLVED:+}
> gfortran.dg/coarray/event_4.f08 -fcoarray=lib  -O2  -lcaf_single [-execution
> test-]{+compilation failed to produce executable+}
> 
> [-PASS:-]{+FAIL:+} gfortran.dg/coarray/fail_image_2.f08 -fcoarray=lib
> -O2  -lcaf_single (test for excess errors) [-PASS:-]{+UNRESOLVED:+}
> gfortran.dg/coarray/fail_image_2.f08 -fcoarray=lib  -O2  -lcaf_single
> [-execution test-]{+compilation failed to produce executable+}
> 
> [-PASS:-]{+FAIL:+} gfortran.dg/coarray/proc_pointer_assign_1.f90
> -fcoarray=lib  -O2  -lcaf_single (test for excess errors)
> [-PASS:-]{+UNRESOLVED:+} gfortran.dg/coarray/proc_pointer_assign_1.f90
> -fcoarray=lib  -O2  -lcaf_single [-execution test-]{+compilation failed to
> produce executable+}
> 
> [-PASS:-]{+FAIL:+} gfortran.dg/coarray_43.f90   -O  (test for excess
> errors)
> 
> That's acceptable for such legacy PTX configurations.
> 
>   PR target/107635
>   libgfortran/
>   * config/t-nvptx: New.
>   * configure.host [nvptx] (tmake_file): Add it.
> ---
>  libgfortran/config/t-nvptx | 2 ++
>  libgfortran/configure.host | 4 
>  2 files changed, 6 insertions(+)
>  create mode 100644 libgfortran/config/t-nvptx
> 
> diff --git a/libgfortran/config/t-nvptx b/libgfortran/config/t-nvptx
> new file mode 100644
> index 000..88bd6eee982
> --- /dev/null
> +++ b/libgfortran/config/t-nvptx
> @@ -0,0 +1,2 @@
> +# Re 'alloca' usage in '../caf/single.c':
> +AM_CFLAGS += -mfake-ptx-alloca
> diff --git a/libgfortran/configure.host b/libgfortran/configure.host
> index 291188d19c2..9abd40f511a 100644
> --- a/libgfortran/configure.host
> +++ b/libgfortran/configure.host
> @@ -91,6 +91,10 @@ case "${target}" in
>   tmake_file="t-aix"
>   ;;
>  
> +  nvptx-*-none)
> + tmake_file="$tmake_file t-nvptx"
> + ;;
> +
>*)
>   ;;
>  


-- 
Andre Vehreschild * Email: vehre ad gmx dot de 


[PATCH] MAINTAINERS: add myself to write after approval and DCO

2025-02-27 Thread Giuseppe D'Angelo

Hello,

I've added myself to MAINTAINERS. Patch is attached.

Thank you,
--
Giuseppe D'Angelo
From a3f77f2528b9383c70f0361e0f3863cee58e9648 Mon Sep 17 00:00:00 2001
From: Giuseppe D'Angelo 
Date: Fri, 28 Feb 2025 08:37:25 +0100
Subject: [PATCH] MAINTAINERS: add myself to write after approval and DCO

ChangeLog:

	* MAINTAINERS: Added myself as write after approval and DCO.

Signed-off-by: Giuseppe D'Angelo 
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index c423dd6e787..193cd802a07 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -428,6 +428,7 @@ Ludovic Courtès ludo
 Cary Coutant-   
 Lawrence Crowl  crowl   
 Lili Cui-   
+Giuseppe D'Angelo   peppe   
 Palmer Dabbelt  palmer  
 Ian Dalliandall 
 David Daney daney   
@@ -929,6 +930,7 @@ information.
 
 Soumya AR   
 Juergen Christ  
+Giuseppe D'Angelo   
 Robin Dapp  
 Robin Dapp  
 Aldy Hernandez  
-- 
2.34.1



smime.p7s
Description: S/MIME Cryptographic Signature


[PATCH]AArch64: force operand to fresh register to avoid subreg issues [PR118892]

2025-02-27 Thread Tamar Christina
Hi All,

When the input is already a subreg and we try to make a paradoxical
subreg out of it for copysign this can fail if it violates the sugreg
relationship.

Use force_lowpart_subreg instead of lowpart_subreg to then force the
results to a register instead of ICEing.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master?

Thanks,
Tamar

gcc/ChangeLog:

PR target/118892
* config/aarch64/aarch64.md (copysign3): Use
force_lowpart_subreg instead of lowpart_subreg.

gcc/testsuite/ChangeLog:

PR target/118892
* gcc.target/aarch64/copysign-pr118892.c: New test.

---
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 
cfe730f3732ce45c914b30a908851a4a7dd77c0f..62be9713cf417922b3c06e38f12f401872751fa2
 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7479,8 +7479,8 @@ (define_expand "copysign3"
   && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
 {
   emit_insn (gen_ior3 (
-   lowpart_subreg (mode, operands[0], mode),
-   lowpart_subreg (mode, operands[1], mode),
+   force_lowpart_subreg (mode, operands[0], mode),
+   force_lowpart_subreg (mode, operands[1], mode),
v_bitmask));
   DONE;
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign-pr118892.c 
b/gcc/testsuite/gcc.target/aarch64/copysign-pr118892.c
new file mode 100644
index 
..adfa30dc3e2db895af4f2057bdd1011fdb7d4537
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign-pr118892.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+double l();
+double f()
+{
+  double t6[2] = {l(), l()};
+  double t7[2];
+  __builtin_memcpy(&t7, &t6, sizeof(t6));
+  return -__builtin_fabs(t7[1]);
+}


-- 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index cfe730f3732ce45c914b30a908851a4a7dd77c0f..62be9713cf417922b3c06e38f12f401872751fa2 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7479,8 +7479,8 @@ (define_expand "copysign3"
   && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
 {
   emit_insn (gen_ior3 (
-	lowpart_subreg (mode, operands[0], mode),
-	lowpart_subreg (mode, operands[1], mode),
+	force_lowpart_subreg (mode, operands[0], mode),
+	force_lowpart_subreg (mode, operands[1], mode),
 	v_bitmask));
   DONE;
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/copysign-pr118892.c b/gcc/testsuite/gcc.target/aarch64/copysign-pr118892.c
new file mode 100644
index ..adfa30dc3e2db895af4f2057bdd1011fdb7d4537
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/copysign-pr118892.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast" } */
+
+double l();
+double f()
+{
+  double t6[2] = {l(), l()};
+  double t7[2];
+  __builtin_memcpy(&t7, &t6, sizeof(t6));
+  return -__builtin_fabs(t7[1]);
+}



Re: [PATCH] input: Fix up ICEs with --param=file-cache-files=N for N > 16 [PR118860]

2025-02-27 Thread Richard Biener



> Am 27.02.2025 um 20:54 schrieb Jakub Jelinek :
> 
> Hi!
> 
> The following testcase ICEs, because we first construct file_cache object
> inside of *global_dc, then process options and then call file_cache::tune.
> The earlier construction allocates the m_file_slots array (using new)
> based on the static data member file_cache::num_file_slots, but then tune
> changes it, without actually reallocating all m_file_slots arrays in already
> constructed file_cache objects.
> 
> I think it is just weird to have the count be a static data member and
> the pointer be non-static data member, that is just asking for issues like
> this.
> 
> So, this patch changes num_file_slots into m_num_file_slots and turns tune
> into a non-static member function and changes toplev.cc to call it on the
> global_gc->get_file_cache () object.  And let's the tune just delete the
> array and allocate it freshly if there is a change in the number of slots
> or lines.
> 
> Note, file_cache_slot has similar problem, but because there are many, I
> haven't moved the count into those objects; I just hope that when tune
> is called there is exactly one file_cache constructed and all the
> file_cache_slot objects constructed are pointed by its m_file_slots member,
> so also on lines change it just deletes it and allocates again.  I think
> it should be unlikely that the cache actually has any used slots by the time
> it is called.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Ok

Richard 

> 2025-02-27  Jakub Jelinek  
> 
>PR middle-end/118860
>* input.h (file_cache::tune): No longer static.  Rename argument
>from num_file_slots_ to num_file_slots.  Formatting fix.
>(file_cache::num_file_slots): Renamed to ...
>(file_cache::m_num_file_slots): ... this.  No longer static.
>* input.cc (file_cache_slot::tune): Change return type from void to
>size_t, return previous file_cache_slot::line_record_size value.
>Formatting fixes.
>(file_cache::tune): Rename argument from num_file_slots_ to
>num_file_slots.  Set m_num_file_slots rather than num_file_slots.
>If m_num_file_slots or file_cache_slot::line_record_size changes,
>delete[] m_file_slots and new it again.
>(file_cache::num_file_slots): Remove definition.
>(file_cache::lookup_file): Use m_num_file_slots rather than
>num_file_slots.
>(file_cache::evicted_cache_tab_entry): Likewise.
>(file_cache::file_cache): Likewise.  Initialize m_num_file_slots
>to 16.
>(file_cache::dump): Use m_num_file_slots rather than num_file_slots.
>(file_cache_slot::get_next_line): Formatting fixes.
>(file_cache_slot::read_line_num): Likewise.
>(get_source_text_between): Likewise.
>* toplev.cc (toplev::main): Call global_dc->get_file_cache ().tune
>rather than file_cache::tune.
> 
>* gcc.dg/pr118860.c: New test.
> 
> --- gcc/input.h.jj2025-02-03 11:05:04.833651537 +0100
> +++ gcc/input.h2025-02-27 10:38:00.943010438 +0100
> @@ -161,7 +161,7 @@ class file_cache
> const char *buffer,
> size_t sz);
> 
> -  static void tune(size_t num_file_slots_, size_t lines);
> +  void tune (size_t num_file_slots, size_t lines);
> 
>  private:
>   file_cache_slot *evicted_cache_tab_entry (unsigned *highest_use_count);
> @@ -169,7 +169,7 @@ class file_cache
>   file_cache_slot *lookup_file (const char *file_path);
> 
>  private:
> -  static size_t num_file_slots;
> +  size_t m_num_file_slots;
>   file_cache_slot *m_file_slots;
>   input_context m_input_context;
> };
> --- gcc/input.cc.jj2025-02-24 00:06:26.036728909 +0100
> +++ gcc/input.cc2025-02-27 11:00:59.351807727 +0100
> @@ -79,8 +79,11 @@ public:
>   void evict ();
>   void set_content (const char *buf, size_t sz);
> 
> -  static void tune(size_t line_record_size_) {
> -  line_record_size = line_record_size_;
> +  static size_t tune (size_t line_record_size_)
> +  {
> +size_t ret = line_record_size;
> +line_record_size = line_record_size_;
> +return ret;
>   }
> 
>  private:
> @@ -200,14 +203,17 @@ size_t file_cache_slot::recent_cached_li
> 
> /* Tune file_cache.  */
> void
> -file_cache::tune (size_t num_file_slots_, size_t lines)
> +file_cache::tune (size_t num_file_slots, size_t lines)
> {
> -  num_file_slots = num_file_slots_;
> -  file_cache_slot::tune (lines);
> +  if (file_cache_slot::tune (lines) != lines
> +  || m_num_file_slots != num_file_slots)
> +{
> +  delete[] m_file_slots;
> +  m_file_slots = new file_cache_slot[num_file_slots];
> +}
> +  m_num_file_slots = num_file_slots;
> }
> 
> -size_t file_cache::num_file_slots = 16;
> -
> static const char *
> find_end_of_line (const char *s, size_t len);
> 
> @@ -325,7 +331,7 @@ file_cache::lookup_file (const char *fil
> 
>   /* This will contain the found cached file.  */
>   file_cache_slot *r = NULL;
> -  for (unsigned i = 0; i < num_file_slots; ++i)
> +  for (unsigned i = 0; i < m_num_file_slots; ++i)
>   

[PATCH] c++: more overeager use of deleted function before ADL [PR119034]

2025-02-27 Thread Patrick Palka
Tested on x86_64-pc-linux-gnu, does this look OK for stage 1?

-- >8 --

The PR68942 fix used the tf_conv flag to disable mark_used when
substituting a FUNCTION_DECL callee of an ADL-enabled call.  In this
slightly more elaborate testcase, we end up prematurely calling
mark_used anyway on the FUNCTION_DECL directly from the CALL_EXPR case
of tsubst_expr during partial instantiation, leading to a bogus "use of
deleted function" error.

This patch fixes the general problem in a more robust way by ensuring
the callee of an ADL-enabled call is wrapped in an OVERLOAD, so that
tsubst_expr leaves it alone.

PR c++/119034
PR c++/68942

gcc/cp/ChangeLog:

* pt.cc (tsubst_expr) : Revert PR68942 fix to
* semantics.cc (finish_call_expr): Ensure the callee of an
ADL-enabled call is wrapped in an OVERLOAD.

gcc/testsuite/ChangeLog:

* g++.dg/template/koenig13.C: New test.
---
 gcc/cp/pt.cc |  8 +---
 gcc/cp/semantics.cc  |  8 
 gcc/testsuite/g++.dg/template/koenig13.C | 16 
 3 files changed, 25 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/koenig13.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 62d91a2dd15..4b69b26808b 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -21290,13 +21290,7 @@ tsubst_expr (tree t, tree args, tsubst_flags_t 
complain, tree in_decl)
  /* Avoid error about taking the address of a constructor.  */
  function = TREE_OPERAND (function, 0);
 
-   tsubst_flags_t subcomplain = complain;
-   if (koenig_p && TREE_CODE (function) == FUNCTION_DECL)
- /* When KOENIG_P, we don't want to mark_used the callee before
-augmenting the overload set via ADL, so during this initial
-substitution we disable mark_used by setting tf_conv (68942).  
*/
- subcomplain |= tf_conv;
-   function = tsubst_expr (function, args, subcomplain, in_decl);
+   function = tsubst_expr (function, args, complain, in_decl);
 
if (BASELINK_P (function))
  qualified_p = true;
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 7c7d3e3c432..9e13b690631 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -3321,6 +3321,14 @@ finish_call_expr (tree fn, vec **args, bool 
disallow_virtual,
   if (type_dependent_expression_p (fn)
  || any_type_dependent_arguments_p (*args))
{
+ if (koenig_p
+ && TREE_CODE (orig_fn) == FUNCTION_DECL
+ && !fndecl_built_in_p (orig_fn))
+   /* For an ADL-enabled call where unqualified lookup found a
+  single non-template function, wrap it in an OVERLOAD so
+  that later substitution doesn't greedily mark the function
+  as used.  */
+   orig_fn = ovl_make (orig_fn, NULL_TREE);
  result = build_min_nt_call_vec (orig_fn, *args);
  SET_EXPR_LOCATION (result, cp_expr_loc_or_input_loc (fn));
  KOENIG_LOOKUP_P (result) = koenig_p;
diff --git a/gcc/testsuite/g++.dg/template/koenig13.C 
b/gcc/testsuite/g++.dg/template/koenig13.C
new file mode 100644
index 000..75c9d95df7e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/koenig13.C
@@ -0,0 +1,16 @@
+// PR c++/119034
+// { dg-do compile { target c++14 } }
+// A version of koenig12.C involving partial instantiation of a generic lambda.
+
+void foo(...) = delete;
+
+template  void lookup(T t) { [&](auto u) { foo(u); }(t); }
+
+namespace N {
+ struct A { };
+ int foo(A);
+}
+
+int main() {
+  lookup(N::A{});
+}
-- 
2.49.0.rc0



Re: [PATCH] c++: ICE with GOTO_EXPR [PR118928]

2025-02-27 Thread Jason Merrill

On 2/20/25 9:51 AM, Marek Polacek wrote:

Now with the test fixed.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
In this PR we crash in cxx_eval_constant_expression/GOTO_EXPR on:

   gcc_assert (cxx_dialect >= cxx23);

The code obviously doesn't expect to see a goto pre-C++23.  But we can
get here with the new prvalue optimization.  In this test we found
ourselves in synthesize_method for X::X().  This function calls:

  a) finish_function, which does cp_genericize -> ... -> genericize_c_loops,
 which creates the GOTO_EXPR;
  b) expand_or_defer_fn -> maybe_clone_body -> ... -> cp_fold_function
 where we reach the new maybe_constant_init call and crash on the
 goto.


Hmm, this looks like bad recursion; finish_function wants to fold before 
genericize, but then beacuse maybe_clone_body calls finish_function, we 
end up folding again after genericize.


Maybe we should call maybe_clone_body directly from finish_function 
rather than from expand_or_defer?


But the patch is OK as is.


Since we can validly get to that assert, I think we should just remove
it.  I don't see other similar asserts like this one.

PR c++/118928

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_constant_expression) : Remove
an assert.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-prvalue5.C: New test.
---
  gcc/cp/constexpr.cc   |  1 -
  .../g++.dg/cpp0x/constexpr-prvalue5.C | 24 +++
  2 files changed, 24 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue5.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 59dd0668af3..c68666cc5dd 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8691,7 +8691,6 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
*jump_target = TREE_OPERAND (t, 0);
else
{
- gcc_assert (cxx_dialect >= cxx23);
  if (!ctx->quiet)
error_at (loc, "% is not a constant expression");
  *non_constant_p = true;
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue5.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue5.C
new file mode 100644
index 000..1f847bbe183
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-prvalue5.C
@@ -0,0 +1,24 @@
+// PR c++/118928
+// { dg-do compile { target c++11 } }
+// { dg-options "-O" }
+
+using size_t = decltype(sizeof(0));
+
+namespace std {
+template  struct initializer_list {
+  const T *_M_array;
+  size_t _M_len;
+};
+struct S {
+  constexpr S(const char *); // { dg-warning "used but never defined" }
+};
+struct vector {
+  constexpr vector(initializer_list) {}
+};
+}
+struct Y {
+std::vector v;
+};
+struct X {
+  Y y{{""}};
+} x;

base-commit: a2755339c6c9832467c573d956e91565943ecdc1




[pushed][PR116336][LRA]: Add a test

2025-02-27 Thread Vladimir Makarov

This is a test taken from

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=116336

The PR is actually solved a few weeks ago.

commit e59dd301aea9c8d5a5d04e808c87d591a26e85bb
Author: Vladimir N. Makarov 
Date:   Thu Feb 27 13:05:44 2025 -0500

[PR116336][LRA]: Add a test

  Patch for PR116234 solves given PR116366.  So the patch adds only the test
case which is very different from PR116234 one.

gcc/testsuite/ChangeLog:

PR rtl-optimization/116336
* gcc.dg/pr116336.c: New test.

diff --git a/gcc/testsuite/gcc.dg/pr116336.c b/gcc/testsuite/gcc.dg/pr116336.c
new file mode 100644
index 000..3e4f3d329c1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr116336.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -gno-statement-frontiers -fcompare-debug" } */
+
+double duk_js_execute_bytecode_duk__tv_0_0;
+double duk_double_div();
+void duk_handle_call_unprotected();
+void duk_js_execute_bytecode() {
+  double du_0;
+  long opcode_shifted;
+  switch (opcode_shifted)
+  case 2:
+du_0 = duk_double_div();
+  duk_js_execute_bytecode_duk__tv_0_0 = du_0;
+  duk_handle_call_unprotected();
+  duk_js_execute_bytecode();
+}


[PUSHED] nvptx: Build libgfortran with '-mfake-ptx-alloca' [PR107635]

2025-02-27 Thread Thomas Schwinge
As of recent commit 8bf0ee8d62b8a08e808344d31354ab713157e15d
"Fortran: Add transfer_between_remotes [PR107635]", we've got 'alloca' usage
in 'libgfortran/caf/single.c:_gfortran_caf_transfer_between_remotes', and
the libgfortran target library fails to build for legacy configurations where
PTX 'alloca' is not available:

../../../../source-gcc/libgfortran/caf/single.c: In function 
‘_gfortran_caf_transfer_between_remotes’:
../../../../source-gcc/libgfortran/caf/single.c:675:23: sorry, 
unimplemented: dynamic stack allocation not supported
  675 |   transfer_desc = __builtin_alloca (desc_size);
  |   ^~~~
../../../../source-gcc/libgfortran/caf/single.c:680:20: sorry, 
unimplemented: dynamic stack allocation not supported
  680 | transfer_ptr = __builtin_alloca (*opt_dst_charlen * src_size);
  |^~
make[6]: *** [Makefile:4675: caf/single.lo] Error 1

With '-mfake-ptx-alloca', libgfortran again succeeds to build, and compared
to before, we've got only a small number of regressions due to nvptx 'ld'
complaining about 'unresolved symbol __GCC_nvptx__PTX_alloca_not_supported':

[-PASS:-]{+FAIL:+} gfortran.dg/coarray/codimension_2.f90 -fcoarray=lib  -O2 
 -lcaf_single (test for excess errors)

[-PASS:-]{+FAIL:+} gfortran.dg/coarray/event_4.f08 -fcoarray=lib  -O2  
-lcaf_single (test for excess errors)
[-PASS:-]{+UNRESOLVED:+} gfortran.dg/coarray/event_4.f08 -fcoarray=lib  -O2 
 -lcaf_single [-execution test-]{+compilation failed to produce executable+}

[-PASS:-]{+FAIL:+} gfortran.dg/coarray/fail_image_2.f08 -fcoarray=lib  -O2  
-lcaf_single (test for excess errors)
[-PASS:-]{+UNRESOLVED:+} gfortran.dg/coarray/fail_image_2.f08 -fcoarray=lib 
 -O2  -lcaf_single [-execution test-]{+compilation failed to produce 
executable+}

[-PASS:-]{+FAIL:+} gfortran.dg/coarray/proc_pointer_assign_1.f90 
-fcoarray=lib  -O2  -lcaf_single (test for excess errors)
[-PASS:-]{+UNRESOLVED:+} gfortran.dg/coarray/proc_pointer_assign_1.f90 
-fcoarray=lib  -O2  -lcaf_single [-execution test-]{+compilation failed to 
produce executable+}

[-PASS:-]{+FAIL:+} gfortran.dg/coarray_43.f90   -O  (test for excess errors)

That's acceptable for such legacy PTX configurations.

PR target/107635
libgfortran/
* config/t-nvptx: New.
* configure.host [nvptx] (tmake_file): Add it.
---
 libgfortran/config/t-nvptx | 2 ++
 libgfortran/configure.host | 4 
 2 files changed, 6 insertions(+)
 create mode 100644 libgfortran/config/t-nvptx

diff --git a/libgfortran/config/t-nvptx b/libgfortran/config/t-nvptx
new file mode 100644
index 000..88bd6eee982
--- /dev/null
+++ b/libgfortran/config/t-nvptx
@@ -0,0 +1,2 @@
+# Re 'alloca' usage in '../caf/single.c':
+AM_CFLAGS += -mfake-ptx-alloca
diff --git a/libgfortran/configure.host b/libgfortran/configure.host
index 291188d19c2..9abd40f511a 100644
--- a/libgfortran/configure.host
+++ b/libgfortran/configure.host
@@ -91,6 +91,10 @@ case "${target}" in
tmake_file="t-aix"
;;
 
+  nvptx-*-none)
+   tmake_file="$tmake_file t-nvptx"
+   ;;
+
   *)
;;
 
-- 
2.34.1



[PATCH] libstdc++: Fix constraint recursion in basic_const_iterator relops [PR112490]

2025-02-27 Thread Patrick Palka
Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
14?

-- >8 --

Here for

  using RCI = reverse_iterator::iterator>>
  static_assert(std::totally_ordered);

we effectively need to check the requirement

  requires (RCI x) { x RELOP x; }  for each RELOP in {<, >, <=, >=}

which we expect to be straightforwardly satisfied by reverse_iterator's
namespace-scope relops.  But due to ADL we find ourselves also
considering the basic_const_iterator relop friends, which before CWG
2369 would be quickly discarded since RCI clearly isn't convertible to
basic_const_iterator.  After CWG 2369 though we must first check these
relops' constraints (with _It = vector::iterator and _It2 = RCI),
which entails checking totally_ordered recursively.

This patch fixes this by turning the problematic non-dependent parameter
of type basic_const_iterator<_It> into a dependent parameter of type
basic_const_iterator<_It3> where _It3 is constrained to match _It.
Thus the basic_const_iterator relop friends now get quickly discarded
during deduction since RCI isn't a specialization of basic_const_iterator
(or derived from one) and so _It3 is not deduced.

PR libstdc++/112490

libstdc++-v3/ChangeLog:

* include/bits/stl_iterator.h (basic_const_iterator::operator<):
Replace non-dependent basic_const_iterator parameter with
a dependent one of type basic_const_iterator<_It3>.  Constrain
_It3 to match _It.
(basic_const_iterator::operator>): Likewise.
(basic_const_iterator::operator<=): Likewise.
(basic_const_iterator::operator>=): Likewise.
* testsuite/24_iterators/const_iterator/112490.cc: New test.
---
 libstdc++-v3/include/bits/stl_iterator.h | 16 
 .../24_iterators/const_iterator/112490.cc| 12 
 2 files changed, 20 insertions(+), 8 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc

diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
b/libstdc++-v3/include/bits/stl_iterator.h
index 3e025342fb5..33732b1a428 100644
--- a/libstdc++-v3/include/bits/stl_iterator.h
+++ b/libstdc++-v3/include/bits/stl_iterator.h
@@ -2881,30 +2881,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
&& three_way_comparable_with<_It, _It2>
   { return _M_current <=> __y; }
 
-template<__detail::__not_a_const_iterator _It2>
+template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
   friend constexpr bool
-  operator<(const _It2& __x, const basic_const_iterator& __y)
+  operator<(const _It2& __x, const basic_const_iterator<_It3>& __y)
   noexcept(noexcept(__x < __y._M_current))
   requires random_access_iterator<_It> && totally_ordered_with<_It, _It2>
   { return __x < __y._M_current; }
 
-template<__detail::__not_a_const_iterator _It2>
+template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
   friend constexpr bool
-  operator>(const _It2& __x, const basic_const_iterator& __y)
+  operator>(const _It2& __x, const basic_const_iterator<_It3>& __y)
   noexcept(noexcept(__x > __y._M_current))
   requires random_access_iterator<_It> && totally_ordered_with<_It, _It2>
   { return __x > __y._M_current; }
 
-template<__detail::__not_a_const_iterator _It2>
+template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
   friend constexpr bool
-  operator<=(const _It2& __x, const basic_const_iterator& __y)
+  operator<=(const _It2& __x, const basic_const_iterator<_It3>& __y)
   noexcept(noexcept(__x <= __y._M_current))
   requires random_access_iterator<_It> && totally_ordered_with<_It, _It2>
   { return __x <= __y._M_current; }
 
-template<__detail::__not_a_const_iterator _It2>
+template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
   friend constexpr bool
-  operator>=(const _It2& __x, const basic_const_iterator& __y)
+  operator>=(const _It2& __x, const basic_const_iterator<_It3>& __y)
   noexcept(noexcept(__x >= __y._M_current))
   requires random_access_iterator<_It> && totally_ordered_with<_It, _It2>
   { return __x >= __y._M_current; }
diff --git a/libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc 
b/libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc
new file mode 100644
index 000..9bb154847cf
--- /dev/null
+++ b/libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc
@@ -0,0 +1,12 @@
+// { dg-do compile { target c++23 } }
+
+// PR libstdc++/112490 - infinite meta error in
+// reverse_iterator::iterator>>
+
+#include 
+#include 
+
+using I = std::vector::iterator;
+using CI = std::basic_const_iterator;
+using RCI = std::reverse_iterator;
+static_assert(std::totally_ordered);
-- 
2.49.0.rc0



[PUSHED 2/2] nvptx: '#define MAX_FIXED_MODE_SIZE 128'

2025-02-27 Thread Thomas Schwinge
... instead of 64 via 'gcc/defaults.h':

MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (DImode)

This fixes ICEs:

[-FAIL: c-c++-common/pr111309-1.c  -Wc++-compat  (internal compiler error: 
in expand_fn_using_insn, at internal-fn.cc:268)-]
[-FAIL:-]{+PASS:+} c-c++-common/pr111309-1.c  -Wc++-compat  (test for 
excess errors)
[-UNRESOLVED:-]{+PASS:+} c-c++-common/pr111309-1.c  -Wc++-compat  
[-compilation failed to produce executable-]{+execution test+}

[-FAIL: c-c++-common/pr111309-1.c  -std=gnu++17 (internal compiler error: 
in expand_fn_using_insn, at internal-fn.cc:268)-]
[-FAIL:-]{+PASS:+} c-c++-common/pr111309-1.c  -std=gnu++17 (test for excess 
errors)
[-UNRESOLVED:-]{+PASS:+} c-c++-common/pr111309-1.c  -std=gnu++17 
[-compilation failed to produce executable-]{+execution test+}
[-FAIL: c-c++-common/pr111309-1.c  -std=gnu++26 (internal compiler error: 
in expand_fn_using_insn, at internal-fn.cc:268)-]
[-FAIL:-]{+PASS:+} c-c++-common/pr111309-1.c  -std=gnu++26 (test for excess 
errors)
[-UNRESOLVED:-]{+PASS:+} c-c++-common/pr111309-1.c  -std=gnu++26 
[-compilation failed to produce executable-]{+execution test+}
[-FAIL: c-c++-common/pr111309-1.c  -std=gnu++98 (internal compiler error: 
in expand_fn_using_insn, at internal-fn.cc:268)-]
[-FAIL:-]{+PASS:+} c-c++-common/pr111309-1.c  -std=gnu++98 (test for excess 
errors)
[-UNRESOLVED:-]{+PASS:+} c-c++-common/pr111309-1.c  -std=gnu++98 
[-compilation failed to produce executable-]{+execution test+}

[-FAIL: gcc.dg/torture/pr116480-1.c   -O0  (internal compiler error: in 
expand_fn_using_insn, at internal-fn.cc:268)-]
[-FAIL:-]{+PASS:+} gcc.dg/torture/pr116480-1.c   -O0  (test for excess 
errors)
[-FAIL: gcc.dg/torture/pr116480-1.c   -O1  (internal compiler error: in 
expand_fn_using_insn, at internal-fn.cc:268)-]
[-FAIL:-]{+PASS:+} gcc.dg/torture/pr116480-1.c   -O1  (test for excess 
errors)
PASS: gcc.dg/torture/pr116480-1.c   -O2  (test for excess errors)
PASS: gcc.dg/torture/pr116480-1.c   -O3 -g  (test for excess errors)
PASS: gcc.dg/torture/pr116480-1.c   -Os  (test for excess errors)

..., where we ran into 'gcc_assert (icode != CODE_FOR_nothing);' in
'gcc/internal-fn.cc:expand_fn_using_insn' for '__int128' '__builtin_clzg' etc.:

during RTL pass: expand
[...]/c-c++-common/pr111309-1.c: In function 'clzI':
[...]/c-c++-common/pr111309-1.c:69:10: internal compiler error: in 
expand_fn_using_insn, at internal-fn.cc:268
0x120ec2cf internal_error(char const*, ...)
[...]/gcc/diagnostic-global-context.cc:517
0x102c7c5b fancy_abort(char const*, int, char const*)
[...]/gcc/diagnostic.cc:1722
0x109708eb expand_fn_using_insn
[...]/gcc/internal-fn.cc:268
0x1098114f expand_internal_call(internal_fn, gcall*)
[...]/gcc/internal-fn.cc:5273
0x1098114f expand_internal_call(gcall*)
[...]/gcc/internal-fn.cc:5281
0x10594fc7 expand_call_stmt
[...]/gcc/cfgexpand.cc:3049
[...]

Likewise, as of commit e8ad697a75b0870a833366daf687668a57cabb6e
"libstdc++: Use new type-generic built-ins in  [PR118855]",
the libstdc++ target library build ICEd in the same way.

Additionally, this change fixes:

[-FAIL:-]{+PASS:+} gcc.dg/pr105094.c (test for excess errors)

..., which was:

[...]/gcc.dg/pr105094.c: In function 'foo':
[...]/gcc.dg/pr105094.c:11:12: error: size of variable 's' is too large

And, finally, regarding 'gcc.target/nvptx/stack_frame-1.c'.  Before, in
'gcc/cfgexpand.cc': 'expand_used_vars' -> 'expand_used_vars_for_block' ->
'expand_one_var' for 'ww' -> 'gcc/function.cc:use_register_for_decl' due to
'DECL_MODE (decl) == BLKmode' did 'return false;', thus -> 'add_stack_var'
(even if 'ww' wasn't then actually living on the stack).  Now, 'ww' has
'TImode' and 'use_register_for_decl' does 'return true;', thus ->
'expand_one_register_var', and therefore no unused stack frame emitted.

gcc/
* config/nvptx/nvptx.h (MAX_FIXED_MODE_SIZE): '#define'.
gcc/testsuite/
* gcc.target/nvptx/stack_frame-1.c: Adjust.
---
 gcc/config/nvptx/nvptx.h   | 2 ++
 gcc/testsuite/gcc.target/nvptx/stack_frame-1.c | 7 ++-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index c21e7cb960d..1ec5f60fea7 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -77,6 +77,8 @@
 #define LONG_LONG_TYPE_SIZE 64
 #define TARGET_SUPPORTS_WIDE_INT 1
 
+#define MAX_FIXED_MODE_SIZE 128
+
 #undef SIZE_TYPE
 #define SIZE_TYPE (TARGET_ABI64 ? "long unsigned int" : "unsigned int")
 #undef PTRDIFF_TYPE
diff --git a/gcc/testsuite/gcc.target/nvptx/stack_frame-1.c 
b/gcc/testsuite/gcc.target/nvptx/stack_frame-1.c
index 476d0ac93ed..cca89b530a8 100644
--- a/gcc/testsuite/gcc.target/nvptx/stack_frame-1.c
+++ b/gcc/testsuite/gcc.target/nvptx/stack_frame-1.c
@@ -1,11 +1,11 @@
-/* GCC emits an unused s

[PUSHED 1/2] Add 'gcc.target/nvptx/stack_frame-1.c'

2025-02-27 Thread Thomas Schwinge
gcc/testsuite/
* gcc.target/nvptx/stack_frame-1.c: New.
---
 .../gcc.target/nvptx/stack_frame-1.c  | 34 +++
 1 file changed, 34 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/nvptx/stack_frame-1.c

diff --git a/gcc/testsuite/gcc.target/nvptx/stack_frame-1.c 
b/gcc/testsuite/gcc.target/nvptx/stack_frame-1.c
new file mode 100644
index 000..476d0ac93ed
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/stack_frame-1.c
@@ -0,0 +1,34 @@
+/* GCC emits an unused stack frame.  */
+
+/* { dg-do assemble } */
+/* { dg-options {-O2 -mno-soft-stack} } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+/* Greatly reduced from libgcc code, where this issue is visible for
+   '_divdi3.o:__divti3', '_divmoddi4.o:__divmodti4', '_moddi3.o:__modti3',
+   '_udivdi3.o:__udivti3', '_udivmoddi4.o:__udivmodti4',
+   '_umoddi3.o:__umodti3'.  */
+
+int f (int n)
+{
+  const union {
+struct { long low, high; };
+__int128 ll;
+  } ww = {{.low = n, .high = 0}};
+  return (int) ww.ll;
+}
+/*
+** f:
+** \.visible \.func \(\.param\.u32 %value_out\) f \(\.param\.u32 %in_ar0\)
+** {
+** \.reg\.u32 %value;
+** \.reg\.u32 %ar0;
+** ld\.param\.u32 %ar0, \[%in_ar0\];
+** \.local \.align 16 \.b8 %frame_ar\[16\];
+** \.reg\.u64 %frame;
+** cvta\.local\.u64 %frame, %frame_ar;
+** mov\.u32%value, %ar0;
+** st\.param\.u32  \[%value_out\], %value;
+** ret;
+*/
-- 
2.34.1



Re: [PATCH] Fortran: fix check for non-optional arrays passed to elemental

2025-02-27 Thread Jerry D

On 2/27/25 12:33 PM, Peter Hill wrote:

On Thu, 27 Feb 2025 at 18:09, Jerry D  wrote:


On 2/27/25 7:38 AM, Peter Hill wrote:

Dear all,

The attached patch fixes an ICE in gfc_resolve_code when passing an
optional array to an elemental procedure with `-pedantic` enabled.
PR95446 added the original check, this patch fixes the case where the
other actual argument is an array literal (or something else other
than a variable). The ICE is present since 11.1, so this could be
backported?

Cheers,
Peter



Hi Peter, was there a PR associated with this one?

Jerry

--- snip ---


Hi Jerry,

Nope, I couldn't find one -- should I have created one first?

Cheers,
Peter


Yes please with a testcase that illustrates the problem. This way we 
capture some hsitory,if you dont mind.


Jerry


[PUSHED] nvptx: Delay 'sorry, unimplemented: dynamic stack allocation not supported' from expansion time to code generation

2025-02-27 Thread Thomas Schwinge
This gives the back end a chance to clean out a few more unnecessary instances
of dynamic stack allocation.  This progresses:

PASS: gcc.dg/pr78902.c  (test for warnings, line 7)
PASS: gcc.dg/pr78902.c  (test for warnings, line 8)
PASS: gcc.dg/pr78902.c  (test for warnings, line 9)
PASS: gcc.dg/pr78902.c  (test for warnings, line 10)
PASS: gcc.dg/pr78902.c  (test for warnings, line 11)
PASS: gcc.dg/pr78902.c  (test for warnings, line 12)
PASS: gcc.dg/pr78902.c  (test for warnings, line 13)
PASS: gcc.dg/pr78902.c strndup excessive bound at line 14 (test for 
warnings, line 13)
[-UNSUPPORTED: gcc.dg/pr78902.c: dynamic stack allocation not supported-]
{+PASS: gcc.dg/pr78902.c (test for excess errors)+}

UNSUPPORTED: gcc.dg/torture/pr71901.c   -O0 : dynamic stack allocation not 
supported
[-UNSUPPORTED:-]{+PASS:+} gcc.dg/torture/pr71901.c   -O1  [-: dynamic stack 
allocation not supported-]{+(test for excess errors)+}
UNSUPPORTED: gcc.dg/torture/pr71901.c   -O2 : dynamic stack allocation not 
supported
UNSUPPORTED: gcc.dg/torture/pr71901.c   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions : dynamic stack 
allocation not supported
UNSUPPORTED: gcc.dg/torture/pr71901.c   -O3 -g : dynamic stack allocation 
not supported
[-UNSUPPORTED:-]{+PASS:+} gcc.dg/torture/pr71901.c   -Os  [-: dynamic stack 
allocation not supported-]{+(test for excess errors)+}

UNSUPPORTED: gcc.dg/torture/pr78742.c   -O0 : dynamic stack allocation not 
supported
[-UNSUPPORTED:-]{+PASS:+} gcc.dg/torture/pr78742.c   -O1  [-: dynamic stack 
allocation not supported-]{+(test for excess errors)+}
[-UNSUPPORTED:-]{+PASS:+} gcc.dg/torture/pr78742.c   -O2  [-: dynamic stack 
allocation not supported-]{+(test for excess errors)+}
[-UNSUPPORTED:-]{+PASS:+} gcc.dg/torture/pr78742.c   -O3 
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  
[-: dynamic stack allocation not supported-]{+(test for excess errors)+}
[-UNSUPPORTED:-]{+PASS:+} gcc.dg/torture/pr78742.c   -O3 -g  [-: dynamic 
stack allocation not supported-]{+(test for excess errors)+}
UNSUPPORTED: gcc.dg/torture/pr78742.c   -Os : dynamic stack allocation not 
supported

[-UNSUPPORTED:-]{+PASS:+} gfortran.dg/pr101267.f90   -O  [-: dynamic stack 
allocation not supported-]{+(test for excess errors)+}

[-UNSUPPORTED:-]{+PASS:+} gfortran.dg/pr112404.f90   -O  [-: dynamic stack 
allocation not supported-]{+(test for excess errors)+}

gcc/
* config/nvptx/nvptx.md (define_expand "allocate_stack")
[!TARGET_SOFT_STACK]: Move
'sorry ("dynamic stack allocation not supported");'...
(define_insn "@nvptx_alloca_"): ... here.
gcc/testsuite/
* gcc.target/nvptx/alloca-1-unused-O0-sm_30.c: Adjust.
---
 gcc/config/nvptx/nvptx.md | 42 ++-
 .../nvptx/alloca-1-unused-O0-sm_30.c  | 12 +-
 2 files changed, 32 insertions(+), 22 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index e7d3630d4c4..d08afd4378b 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1671,15 +1671,8 @@
(match_operand 1 "nvptx_register_operand")]
   ""
 {
-  if (!TARGET_SOFT_STACK
-  && TARGET_PTX_7_3
-  && TARGET_SM52)
+  if (!TARGET_SOFT_STACK)
 emit_insn (gen_nvptx_alloca (Pmode, operands[0], operands[1]));
-  else if (!TARGET_SOFT_STACK)
-{
-  sorry ("dynamic stack allocation not supported");
-  emit_insn (gen_nop ());
-}
   else if (TARGET_SOFT_STACK)
 {
   emit_move_insn (stack_pointer_rtx,
@@ -1696,19 +1689,28 @@
   [(set (match_operand:P 0 "nvptx_register_operand" "=R")
 (unspec:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")]
  UNSPEC_ALLOCA))]
-  "TARGET_PTX_7_3
-   && TARGET_SM52"
+  ""
   {
-/* Convert the address from '.local' state space to generic.  That way,
-   we don't have to use 'st.local', 'ld.local', and can easily pass the
-   address to other "generic functions".
-   TODO 'gcc.target/nvptx/alloca-5.c' */
-output_asm_insn ("{", NULL);
-output_asm_insn ("\\t.reg%t0\\t%0_local;", operands);
-output_asm_insn ("\\talloca%u0\\t%0_local, %1;", operands);
-output_asm_insn ("\\tcvta.local%u0\\t%0, %0_local;", operands);
-output_asm_insn ("}", NULL);
-return "";
+if (TARGET_PTX_7_3
+   && TARGET_SM52)
+  {
+   /* Convert the address from '.local' state space to generic.  That way,
+  we don't have to use 'st.local', 'ld.local', and can easily pass the
+  address to other "generic functions".
+  TODO 'gcc.target/nvptx/alloca-5.c' */
+   output_asm_insn ("{", NULL);
+   output_asm_insn ("\\t.reg%t0\\t%0_local;", operands);
+   output_asm_insn ("\\talloca%u0\\t%0_local, %1;", operands);
+   output_asm_insn ("\\tcvta.local%u0\\t%0, %0_local;", operands);
+  

[PUSHED] nvptx: Add test cases for dead/unused 'alloca'/VLA

2025-02-27 Thread Thomas Schwinge
gcc/testsuite/
* gcc.target/nvptx/alloca-1-dead-O0-sm_30.c: New.
* gcc.target/nvptx/alloca-1-dead-O0.c: Likewise.
* gcc.target/nvptx/alloca-1-dead-O1-sm_30.c: Likewise.
* gcc.target/nvptx/alloca-1-dead-O1.c: Likewise.
* gcc.target/nvptx/alloca-1-unused-O0-sm_30.c: Likewise.
* gcc.target/nvptx/alloca-1-unused-O0.c: Likewise.
* gcc.target/nvptx/alloca-1-unused-O1-sm_30.c: Likewise.
* gcc.target/nvptx/alloca-1-unused-O1.c: Likewise.
* gcc.target/nvptx/vla-1-dead-O0-sm_30.c: Likewise.
* gcc.target/nvptx/vla-1-dead-O0.c: Likewise.
* gcc.target/nvptx/vla-1-dead-O1-sm_30.c: Likewise.
* gcc.target/nvptx/vla-1-dead-O1.c: Likewise.
* gcc.target/nvptx/vla-1-unused-O0-sm_30.c: Likewise.
* gcc.target/nvptx/vla-1-unused-O0.c: Likewise.
* gcc.target/nvptx/vla-1-unused-O1-sm_30.c: Likewise.
* gcc.target/nvptx/vla-1-unused-O1.c: Likewise.
---
 .../gcc.target/nvptx/alloca-1-dead-O0-sm_30.c | 10 
 .../gcc.target/nvptx/alloca-1-dead-O0.c   | 48 +++
 .../gcc.target/nvptx/alloca-1-dead-O1-sm_30.c | 17 +++
 .../gcc.target/nvptx/alloca-1-dead-O1.c   | 17 +++
 .../nvptx/alloca-1-unused-O0-sm_30.c  |  9 
 .../gcc.target/nvptx/alloca-1-unused-O0.c | 16 +++
 .../nvptx/alloca-1-unused-O1-sm_30.c  | 16 +++
 .../gcc.target/nvptx/alloca-1-unused-O1.c | 16 +++
 .../gcc.target/nvptx/vla-1-dead-O0-sm_30.c| 10 
 .../gcc.target/nvptx/vla-1-dead-O0.c  | 27 +++
 .../gcc.target/nvptx/vla-1-dead-O1-sm_30.c| 19 
 .../gcc.target/nvptx/vla-1-dead-O1.c  | 19 
 .../gcc.target/nvptx/vla-1-unused-O0-sm_30.c  |  9 
 .../gcc.target/nvptx/vla-1-unused-O0.c| 26 ++
 .../gcc.target/nvptx/vla-1-unused-O1-sm_30.c  | 18 +++
 .../gcc.target/nvptx/vla-1-unused-O1.c| 18 +++
 16 files changed, 295 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/nvptx/alloca-1-dead-O0-sm_30.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/alloca-1-dead-O0.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/alloca-1-dead-O1-sm_30.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/alloca-1-dead-O1.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/alloca-1-unused-O0-sm_30.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/alloca-1-unused-O0.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/alloca-1-unused-O1-sm_30.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/alloca-1-unused-O1.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/vla-1-dead-O0-sm_30.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/vla-1-dead-O0.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/vla-1-dead-O1-sm_30.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/vla-1-dead-O1.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/vla-1-unused-O0-sm_30.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/vla-1-unused-O0.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/vla-1-unused-O1-sm_30.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/vla-1-unused-O1.c

diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-1-dead-O0-sm_30.c 
b/gcc/testsuite/gcc.target/nvptx/alloca-1-dead-O0-sm_30.c
new file mode 100644
index 000..2d002b57579
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/alloca-1-dead-O0-sm_30.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options {-O0 -mno-soft-stack} } */
+/* { dg-additional-options -march=sm_30 } */
+
+void f(void)
+{
+  char *a = __builtin_alloca(123);
+  /* { dg-message {sorry, unimplemented: dynamic stack allocation not 
supported} {} { target *-*-* } .-1 } */
+  a[0] = 0;
+}
diff --git a/gcc/testsuite/gcc.target/nvptx/alloca-1-dead-O0.c 
b/gcc/testsuite/gcc.target/nvptx/alloca-1-dead-O0.c
new file mode 100644
index 000..ecc47e04e72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/alloca-1-dead-O0.c
@@ -0,0 +1,48 @@
+/* { dg-do assemble } */
+/* { dg-options {-O0 -mno-soft-stack} } */
+/* { dg-add-options nvptx_alloca_ptx } */
+/* { dg-additional-options -save-temps } */
+/* { dg-final { check-function-bodies {** } {} } } */
+
+void f(void)
+{
+  char *a = __builtin_alloca(123);
+  a[0] = 0;
+}
+/*
+** f:
+** \.visible \.func f
+** {
+** \.local \.align 16 \.b8 %frame_ar\[16\];
+** \.reg\.u64 %frame;
+** cvta\.local\.u64 %frame, %frame_ar;
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** \.reg\.u32 (%r[0-9]+);
+** \.reg\.u64 (%r[0-9]+);
+** mov\.u64\11, 16;
+** add\.u64\1, \11, -1;
+** add\.u64\2, \1, 123;
+** div\.u64\3, \2, 16;
+** mul\.lo\.u64\4, \3, 16;
+** {
+** \.

[PUSHED] nvptx: Support '-mfake-ptx-alloca'

2025-02-27 Thread Thomas Schwinge
With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only
for configurations where PTX 'alloca' is not available.  Rather than a
compile-time 'sorry, unimplemented: dynamic stack allocation not supported'
in presence of dynamic stack allocation, compilation and assembly then
succeeds.  However, attempting to link in such '*.o' files then fails due
to unresolved symbol '__GCC_nvptx__PTX_alloca_not_supported'.

This is meant to be used in scenarios where large volumes of code are
compiled, a small fraction of which runs into dynamic stack allocation, but
these parts are not important for specific use cases, and we'd thus like the
build to succeed, and error out just upon actual, very rare use of the
offending '*.o' files.

gcc/
* config/nvptx/nvptx.opt (-mfake-ptx-alloca): New.
* config/nvptx/nvptx-protos.h (nvptx_output_fake_ptx_alloca):
Declare.
* config/nvptx/nvptx.cc (nvptx_output_fake_ptx_alloca): New.
* config/nvptx/nvptx.md (define_insn "@nvptx_alloca_")
[!(TARGET_PTX_7_3 && TARGET_SM52)]: Use it for
'-mfake-ptx-alloca'.
gcc/testsuite/
* gcc.target/nvptx/alloca-1-O0_-mfake-ptx-alloca.c: New.
* gcc.target/nvptx/alloca-2-O0_-mfake-ptx-alloca.c: Likewise.
* gcc.target/nvptx/alloca-4-O3_-mfake-ptx-alloca.c: Likewise.
* gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c: Likewise.
* gcc.target/nvptx/alloca-4-O3.c:
'dg-additional-options -mfake-ptx-alloca'.
---
 gcc/config/nvptx/nvptx-protos.h   |  1 +
 gcc/config/nvptx/nvptx.cc | 21 
 gcc/config/nvptx/nvptx.md |  4 ++
 gcc/config/nvptx/nvptx.opt| 15 ++
 .../nvptx/alloca-1-O0_-mfake-ptx-alloca.c | 49 +++
 .../nvptx/alloca-2-O0_-mfake-ptx-alloca.c | 18 +++
 gcc/testsuite/gcc.target/nvptx/alloca-4-O3.c  |  2 +
 .../nvptx/alloca-4-O3_-mfake-ptx-alloca.c | 48 ++
 .../nvptx/vla-1-O0_-mfake-ptx-alloca.c| 29 +++
 9 files changed, 187 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/nvptx/alloca-1-O0_-mfake-ptx-alloca.c
 create mode 100644 
gcc/testsuite/gcc.target/nvptx/alloca-2-O0_-mfake-ptx-alloca.c
 create mode 100644 
gcc/testsuite/gcc.target/nvptx/alloca-4-O3_-mfake-ptx-alloca.c
 create mode 100644 gcc/testsuite/gcc.target/nvptx/vla-1-O0_-mfake-ptx-alloca.c

diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index e85e758322d..ca1fed6d992 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -55,6 +55,7 @@ extern rtx nvptx_expand_compare (rtx);
 extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
 extern const char *nvptx_output_mov_insn (rtx, rtx);
 extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
+extern const char *nvptx_output_fake_ptx_alloca (void);
 extern const char *nvptx_output_return (void);
 extern const char *nvptx_output_set_softstack (unsigned);
 extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 64de2b1cc9b..d1648d52f2f 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -1758,6 +1758,27 @@ nvptx_output_set_softstack (unsigned src_regno)
 }
   return "";
 }
+
+/* Output a fake PTX 'alloca'.  */
+
+const char *
+nvptx_output_fake_ptx_alloca (void)
+{
+#define FAKE_PTX_ALLOCA_NAME "__GCC_nvptx__PTX_alloca_not_supported"
+  static tree decl;
+  if (!decl)
+{
+  tree alloca_type = TREE_TYPE (builtin_decl_explicit (BUILT_IN_ALLOCA));
+  decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,
+get_identifier (FAKE_PTX_ALLOCA_NAME), alloca_type);
+  DECL_EXTERNAL (decl) = 1;
+  TREE_PUBLIC (decl) = 1;
+  nvptx_record_needed_fndecl (decl);
+}
+  return "\tcall\t(%0), " FAKE_PTX_ALLOCA_NAME ", (%1);";
+#undef FAKE_PTX_ALLOCA_NAME
+}
+
 /* Output a return instruction.  Also copy the return value to its outgoing
location.  */
 
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index d08afd4378b..3201247c5fb 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1705,6 +1705,8 @@
output_asm_insn ("}", NULL);
return "";
   }
+else if (nvptx_fake_ptx_alloca)
+  return nvptx_output_fake_ptx_alloca ();
 else
   {
sorry_at (INSN_LOCATION (insn),
@@ -1733,6 +1735,7 @@
   gcc_checking_assert (REG_P (operands[0]));
   emit_insn (gen_nvptx_stacksave (Pmode, operands[0], operands[1]));
 }
+  /* We don't bother to special-case '-mfake-ptx-alloca' here.  */
   else
 {
   /* The concept of a '%stack' pointer doesn't apply like this.
@@ -1765,6 +1768,7 @@
   operands[1] = force_reg (Pmode, operands[1]);
   emit_insn (gen_nvptx_stackrestore (Pmode, operands[0], operands[1]));
 }
+  /* We don't bother to special-case '-mfake-ptx-al

Re: [PATCH] Simplify _Hashtable::_M_merge_multi

2025-02-27 Thread François Dumont
For the record, this small change have been committed as part of another 
larger one:


https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bcc8dea6a45b46febfa76df6f5e3e5b13f3b4a58


On 19/02/2025 13:49, Jonathan Wakely wrote:

On Mon, 17 Feb 2025 at 11:59, François Dumont  wrote:


On 16/02/2025 23:14, Jonathan Wakely wrote:

On Sun, 16 Feb 2025 at 21:15, François Dumont  wrote:

Hi

A minor simplification.

libstdc++: Simplify _Hashtable::_M_merge_multi

When merging two hashtable instances of the same type we do not need
to go through _M_src_hash_code that also check for identical Hash functor
type.

But that check is very cheap, do we really gain much here?

No runtime gain no, just limit instantiations.


If we're getting rid of _M_src_hash_code in this overload of
_M_merge_multi, should we get rid of it in the other overload too,
since that's the only other place that uses _M_src_hash_code?

Seems complicated, so maybe we can avoid is_same_v and is_empty_v with
this smaller patch ?

is_same_v and is_empty_v are extremely cheap, they are variable
templates that use a compiler intrinsic.


  libstdc++: [_Hashtable] Avoid is_same_v and is_empty_v instantiations

  There is no need to check for same Hash functor type and this type
being stateless
  if there is no hash code cached to reuse.

  libstdc++-v3/ChangeLog:

  * include/bits/hashtable.h (_Hashtable<>::_M_src_hash_code):
  Add __hash_cached::value check.

Let me know if interested.

OK for trunk



[committed] libphobos: Run unittest tests with dg-runtest.

2025-02-27 Thread Iain Buclaw
Hi,

This patches fixes the libphobos unittest testsuite to use `dg-runtest'
test driver rather than `dg-test', same as all other libphobos tests.
This prevents the tests from being ran multiple times when parallelized.

Set `libphobos_test_name' as well so that all tests get a unique name.

Regression tested on x86_64-linux-gnu/-m32, committed to mainline.

Regards,
Iain.

---
libphobos/ChangeLog:

* testsuite/libphobos.unittest/unittest.exp: Use `dg-runtest' rather
than `dg-test'.  Set `libphobos_test_name'.
---
 libphobos/testsuite/libphobos.unittest/unittest.exp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/libphobos/testsuite/libphobos.unittest/unittest.exp 
b/libphobos/testsuite/libphobos.unittest/unittest.exp
index 3e2d3b84cdc..0b0e3e3ce09 100644
--- a/libphobos/testsuite/libphobos.unittest/unittest.exp
+++ b/libphobos/testsuite/libphobos.unittest/unittest.exp
@@ -42,8 +42,10 @@ foreach unit_test $unit_test_list {
 set expected_fail [lindex $unit_test 1]
 
 foreach test $tests {
+set libphobos_test_name "[dg-trim-dirname $srcdir $test] $test_flags"
 set shouldfail $expected_fail
-dg-test $test "" $test_flags
+dg-runtest $test "" $test_flags
+set libphobos_test_name ""
 }
 
 set shouldfail 0
-- 
2.43.0



Re: [PATCH] x86: Move TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P to i386.cc

2025-02-27 Thread Hongtao Liu
On Wed, Feb 26, 2025 at 6:01 AM H.J. Lu  wrote:
>
> Move the TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P target hook from
> i386.h to i386.cc.
Ok for the patch, looks obvious.
>
> * config/i386/i386.h (TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P):
> Moved to ...
> * config/i386/i386.cc (TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P):
> Here.
>
> --
> H.J.



-- 
BR,
Hongtao


Re: [PATCH] libstdc++: Fix constraint recursion in basic_const_iterator relops [PR112490]

2025-02-27 Thread Jonathan Wakely
On Thu, 27 Feb 2025 at 20:52, Patrick Palka  wrote:
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> 14?

OK for both.

Do we want a library issue for this? The "buggy" code you're changing
is exactly what the standard says.


>
> -- >8 --
>
> Here for
>
>   using RCI = reverse_iterator::iterator>>
>   static_assert(std::totally_ordered);
>
> we effectively need to check the requirement
>
>   requires (RCI x) { x RELOP x; }  for each RELOP in {<, >, <=, >=}
>
> which we expect to be straightforwardly satisfied by reverse_iterator's
> namespace-scope relops.  But due to ADL we find ourselves also
> considering the basic_const_iterator relop friends, which before CWG
> 2369 would be quickly discarded since RCI clearly isn't convertible to
> basic_const_iterator.  After CWG 2369 though we must first check these
> relops' constraints (with _It = vector::iterator and _It2 = RCI),
> which entails checking totally_ordered recursively.
>
> This patch fixes this by turning the problematic non-dependent parameter
> of type basic_const_iterator<_It> into a dependent parameter of type
> basic_const_iterator<_It3> where _It3 is constrained to match _It.
> Thus the basic_const_iterator relop friends now get quickly discarded
> during deduction since RCI isn't a specialization of basic_const_iterator
> (or derived from one) and so _It3 is not deduced.
>
> PR libstdc++/112490
>
> libstdc++-v3/ChangeLog:
>
> * include/bits/stl_iterator.h (basic_const_iterator::operator<):
> Replace non-dependent basic_const_iterator parameter with
> a dependent one of type basic_const_iterator<_It3>.  Constrain
> _It3 to match _It.
> (basic_const_iterator::operator>): Likewise.
> (basic_const_iterator::operator<=): Likewise.
> (basic_const_iterator::operator>=): Likewise.
> * testsuite/24_iterators/const_iterator/112490.cc: New test.
> ---
>  libstdc++-v3/include/bits/stl_iterator.h | 16 
>  .../24_iterators/const_iterator/112490.cc| 12 
>  2 files changed, 20 insertions(+), 8 deletions(-)
>  create mode 100644 
> libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc
>
> diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
> b/libstdc++-v3/include/bits/stl_iterator.h
> index 3e025342fb5..33732b1a428 100644
> --- a/libstdc++-v3/include/bits/stl_iterator.h
> +++ b/libstdc++-v3/include/bits/stl_iterator.h
> @@ -2881,30 +2881,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> && three_way_comparable_with<_It, _It2>
>{ return _M_current <=> __y; }
>
> -template<__detail::__not_a_const_iterator _It2>
> +template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
>friend constexpr bool
> -  operator<(const _It2& __x, const basic_const_iterator& __y)
> +  operator<(const _It2& __x, const basic_const_iterator<_It3>& __y)
>noexcept(noexcept(__x < __y._M_current))
>requires random_access_iterator<_It> && totally_ordered_with<_It, _It2>
>{ return __x < __y._M_current; }
>
> -template<__detail::__not_a_const_iterator _It2>
> +template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
>friend constexpr bool
> -  operator>(const _It2& __x, const basic_const_iterator& __y)
> +  operator>(const _It2& __x, const basic_const_iterator<_It3>& __y)
>noexcept(noexcept(__x > __y._M_current))
>requires random_access_iterator<_It> && totally_ordered_with<_It, _It2>
>{ return __x > __y._M_current; }
>
> -template<__detail::__not_a_const_iterator _It2>
> +template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
>friend constexpr bool
> -  operator<=(const _It2& __x, const basic_const_iterator& __y)
> +  operator<=(const _It2& __x, const basic_const_iterator<_It3>& __y)
>noexcept(noexcept(__x <= __y._M_current))
>requires random_access_iterator<_It> && totally_ordered_with<_It, _It2>
>{ return __x <= __y._M_current; }
>
> -template<__detail::__not_a_const_iterator _It2>
> +template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
>friend constexpr bool
> -  operator>=(const _It2& __x, const basic_const_iterator& __y)
> +  operator>=(const _It2& __x, const basic_const_iterator<_It3>& __y)
>noexcept(noexcept(__x >= __y._M_current))
>requires random_access_iterator<_It> && totally_ordered_with<_It, _It2>
>{ return __x >= __y._M_current; }
> diff --git a/libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc 
> b/libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc
> new file mode 100644
> index 000..9bb154847cf
> --- /dev/null
> +++ b/libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc
> @@ -0,0 +1,12 @@
> +// { dg-do compile { target c++23 } }
> +
> +// PR libstdc++/112490 - infinite meta error in
> +// reverse_iterator::iterator>>
> +
> +#include 
> +#include 

[PATCH] libstdc++: Fix ranges::iter_move handling of rvalues [PR106612]

2025-02-27 Thread Jonathan Wakely
The specification for std::ranges::iter_move apparently requires us to
handle types which do not satisfy std::indirectly_readable, for example
with overloaded operator* which behaves differently for different value
categories.

libstdc++-v3/ChangeLog:

PR libstdc++/106612
* include/bits/iterator_concepts.h (_IterMove::__iter_ref_t):
New alias template.
(_IterMove::__result): Use __iter_ref_t instead of
std::iter_reference_t.
(_IterMove::__type): Remove incorrect __dereferenceable
constraint.
(_IterMove::operator()): Likewise. Add correct constraints. Use
__iter_ref_t instead of std::iter_reference_t. Forward parameter
as correct value category.
(iter_swap): Add comments.
* testsuite/24_iterators/customization_points/iter_move.cc: Test
that iter_move is found by ADL and that rvalue arguments are
handled correctly.
---

Tested x86_64-linux.

I think the spec is silly to require this, but here we are.

 libstdc++-v3/include/bits/iterator_concepts.h | 33 +--
 .../customization_points/iter_move.cc | 95 +++
 2 files changed, 119 insertions(+), 9 deletions(-)

diff --git a/libstdc++-v3/include/bits/iterator_concepts.h 
b/libstdc++-v3/include/bits/iterator_concepts.h
index 4265c475273..555af3bdb38 100644
--- a/libstdc++-v3/include/bits/iterator_concepts.h
+++ b/libstdc++-v3/include/bits/iterator_concepts.h
@@ -103,32 +103,42 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   namespace ranges
   {
 /// @cond undocumented
+// Implementation of std::ranges::iter_move, [iterator.cust.move].
 namespace __imove
 {
   void iter_move() = delete;
 
+  // Satisfied if _Tp is a class or enumeration type and iter_move
+  // can be found by argument-dependent lookup.
   template
concept __adl_imove
  = (std::__detail::__class_or_enum>)
- && requires(_Tp&& __t) { iter_move(static_cast<_Tp&&>(__t)); };
+ && requires(_Tp&& __t) { iter_move(static_cast<_Tp&&>(__t)); };
 
   struct _IterMove
   {
   private:
+   // The type returned by dereferencing a value of type _Tp.
+   // Unlike iter_reference_t this preserves the value category of _Tp.
+   template requires requires { *std::declval<_Tp>(); }
+ using __iter_ref_t = decltype(*std::declval<_Tp>());
+
template
  struct __result
- { using type = iter_reference_t<_Tp>; };
+ { using type = __iter_ref_t<_Tp>; };
 
+   // Use iter_move(E) if that works.
template
  requires __adl_imove<_Tp>
  struct __result<_Tp>
  { using type = decltype(iter_move(std::declval<_Tp>())); };
 
+   // Otherwise, if *E if an lvalue, use std::move(*E).
template
  requires (!__adl_imove<_Tp>)
- && is_lvalue_reference_v>
+   && is_lvalue_reference_v<__iter_ref_t<_Tp>>
  struct __result<_Tp>
- { using type = remove_reference_t>&&; };
+ { using type = remove_reference_t<__iter_ref_t<_Tp>>&&; };
 
template
  static constexpr bool
@@ -142,10 +152,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   public:
// The result type of iter_move(std::declval<_Tp>())
-   template
+   template
  using __type = typename __result<_Tp>::type;
 
-   template
+   template
+ requires __adl_imove<_Tp> || requires { *std::declval<_Tp>(); }
  [[nodiscard]]
  constexpr __type<_Tp>
  operator()(_Tp&& __e) const
@@ -153,10 +164,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  {
if constexpr (__adl_imove<_Tp>)
  return iter_move(static_cast<_Tp&&>(__e));
-   else if constexpr (is_lvalue_reference_v>)
- return static_cast<__type<_Tp>>(*__e);
+   else if constexpr (is_lvalue_reference_v<__iter_ref_t<_Tp>>)
+ return std::move(*static_cast<_Tp&&>(__e));
else
- return *__e;
+ return *static_cast<_Tp&&>(__e);
  }
   };
 } // namespace __imove
@@ -167,6 +178,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 }
   } // namespace ranges
 
+  /// The result type of ranges::iter_move(std::declval<_Tp&>())
   template<__detail::__dereferenceable _Tp>
 requires 
__detail::__can_reference>
 using iter_rvalue_reference_t = ranges::__imove::_IterMove::__type<_Tp&>;
@@ -873,11 +885,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 namespace ranges
 {
   /// @cond undocumented
+  // Implementation of std::ranges::iter_swap, [iterator.cust.swap].
   namespace __iswap
   {
 template
   void iter_swap(_It1, _It2) = delete;
 
+// Satisfied if _Tp and _Up are class or enumeration types and iter_swap
+// can be found by argument-dependent lookup.
 template
   concept __adl_iswap
= (std::__detail::__class_or_enum>
diff --git 
a/libstdc++-v3/testsuite/24_iterators/customization_points/iter_move.cc 
b/l

[PATCH] libstdc++: Add static_assertions to ranges::to adaptor factory [PR112803]

2025-02-27 Thread Jonathan Wakely
The standard requires that we reject attempts to create a ranges::to
adaptor for cv-qualified types and non-class types. Currently we only
diagnose it once the adaptor is used in a pipeline.

This adds static assertions to diagnose it immediately.

libstdc++-v3/ChangeLog:

PR libstdc++/112803
* include/std/ranges (ranges::to): Add static assertions to
enforce Mandates conditions.
* testsuite/std/ranges/conv/112803.cc: New test.
---

Tested x86_64-linux.

 libstdc++-v3/include/std/ranges   |  3 +++
 .../testsuite/std/ranges/conv/112803.cc   | 20 +++
 2 files changed, 23 insertions(+)
 create mode 100644 libstdc++-v3/testsuite/std/ranges/conv/112803.cc

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 6c65722b687..c0b1134ab32 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -9569,6 +9569,9 @@ namespace __detail
 constexpr auto
 to [[nodiscard]] (_Args&&... __args)
 {
+  static_assert(!is_const_v<_Cont> && !is_volatile_v<_Cont>);
+  static_assert(is_class_v<_Cont>);
+
   using __detail::_To;
   using views::__adaptor::_Partial;
   return _Partial<_To<_Cont>, decay_t<_Args>...>{0, 
std::forward<_Args>(__args)...};
diff --git a/libstdc++-v3/testsuite/std/ranges/conv/112803.cc 
b/libstdc++-v3/testsuite/std/ranges/conv/112803.cc
new file mode 100644
index 000..0a73b0200b0
--- /dev/null
+++ b/libstdc++-v3/testsuite/std/ranges/conv/112803.cc
@@ -0,0 +1,20 @@
+// { dg-do compile { target c++23 } }
+
+// Bug 112803 - : to(Args&&... args) is missing Mandates
+
+#include 
+
+void
+test()
+{
+  struct C { };
+
+  (void) std::ranges::to(); // { dg-error "here" }
+  (void) std::ranges::to(); // { dg-error "here" }
+  (void) std::ranges::to(); // { dg-error "here" }
+  (void) std::ranges::to(); // { dg-error "here" }
+  (void) std::ranges::to(); // { dg-error "here" }
+  (void) std::ranges::to(); // { dg-error "here" }
+}
+
+// { dg-error "static assertion failed" "" { target *-*-* } 0 }
-- 
2.48.1



Re: [PATCH] libstdc++: Improve optional's <=> constraint recursion workaround [PR104606]

2025-02-27 Thread Jonathan Wakely
On Thu, 27 Feb 2025 at 21:03, Patrick Palka  wrote:
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> 14?  Not sure about backporting further given the original fix seems
> harmless.

Yeah, trunk and 14 seems good enough. Thanks.


>
> -- >8 --
>
> It turns out the reason the behavior of this testcase changed after CWG
> 2369 is because validity of the substituted return type is now checked
> later, after constraints.  So a more reliable workaround for this issue
> is to add a constraint to check the validity of the return type earlier,
> restoring the pre-CWG 2369 semantics.
>
> PR libstdc++/104606
>
> libstdc++-v3/ChangeLog:
>
> * include/std/optional (operator<=>): Revert r14-9771 change.
> Add constraint checking the validity of the return type
> compare_three_way_result_t before the three_way_comparable_with
> constraint.
> ---
>  libstdc++-v3/include/std/optional | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/libstdc++-v3/include/std/optional 
> b/libstdc++-v3/include/std/optional
> index 832dc6fd84b..a616dc07b10 100644
> --- a/libstdc++-v3/include/std/optional
> +++ b/libstdc++-v3/include/std/optional
> @@ -1685,7 +1685,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
>
>template
>  requires (!__is_derived_from_optional<_Up>)
> -  && three_way_comparable_with<_Up, _Tp>
> +  && requires { typename compare_three_way_result_t<_Tp, _Up>; }
> +  && three_way_comparable_with<_Tp, _Up>
>  constexpr compare_three_way_result_t<_Tp, _Up>
>  operator<=> [[nodiscard]] (const optional<_Tp>& __x, const _Up& __v)
>  { return bool(__x) ? *__x <=> __v : strong_ordering::less; }
> --
> 2.49.0.rc0
>



Re: [patch, doc] PR108369 GCC: Documentation of -x option

2025-02-27 Thread Jerry D

On 2/27/25 11:31 AM, Harald Anlauf wrote:

Am 27.02.25 um 02:58 schrieb Jerry D:

This attached patch is intended to clarify the '-x' option using '-x
f77' as an example. I was not sure who should review.

Tested by inspecting the generated info file from make info.

OK for trunk and backport to 14?


OK for trunk.

I don't think we backport documentation fixes.


No prob,

Commited

commit cfc9f42122f41668b986126ba7e65e3dd8bf1e9e (HEAD -> master, 
origin/master, origin/HEAD)

Author: Jerry DeLisle 
Date:   Wed Feb 26 17:26:26 2025 -0800

Thanks,

Jerry


Re: [PATCH] Fortran: fix check for non-optional arrays passed to elemental

2025-02-27 Thread Jerry D

On 2/27/25 7:38 AM, Peter Hill wrote:

Dear all,

The attached patch fixes an ICE in gfc_resolve_code when passing an
optional array to an elemental procedure with `-pedantic` enabled.
PR95446 added the original check, this patch fixes the case where the
other actual argument is an array literal (or something else other
than a variable). The ICE is present since 11.1, so this could be
backported?

Cheers,
Peter



Hi Peter, was there a PR associated with this one?

Jerry

--- snip ---


Re: [patch, doc] PR108369 GCC: Documentation of -x option

2025-02-27 Thread Harald Anlauf

Am 27.02.25 um 02:58 schrieb Jerry D:
This attached patch is intended to clarify the '-x' option using '-x 
f77' as an example. I was not sure who should review.


Tested by inspecting the generated info file from make info.

OK for trunk and backport to 14?


OK for trunk.

I don't think we backport documentation fixes.


Regards,

Jerry

Author: Jerry DeLisle 
Date:   Wed Feb 26 17:26:26 2025 -0800

     GCC: Documentation of -x option

     This change updates information about the -x option to clarify
     that it does not ensure standards compliance. Sparked by
     discussions in the following PR.

     PR fortran/108369

     gcc/ChangeLog:

     * doc/invoke.texi: Add a note to clarify. Adjust some wording.






[PATCH] c++: Fix cxx_eval_store_expression {REAL,IMAG}PART_EXPR handling [PR119045]

2025-02-27 Thread Jakub Jelinek
Hi!

I've added the asserts that probe == target because {REAL,IMAG}PART_EXPR
always implies a scalar type and so applying ARRAY_REF/COMPONENT_REF
etc. on it further doesn't make sense and the later code relies on it
to be the last one in refs array.  But as the following testcase shows,
we can fail those assertions in case there is a reference or pointer
to the __real__ or __imag__ part, in that case we just evaluate the
constant expression and so probe won't be the same as target.
That case doesn't push anything into the refs array though.

The following patch changes those asserts to verify that refs is still
empty, which fixes it.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-02-27  Jakub Jelinek  

PR c++/119045
* constexpr.cc (cxx_eval_store_expression) :
Assert that refs->is_empty () rather than probe == target.
(cxx_eval_store_expression) : Likewise.

* g++.dg/cpp1y/constexpr-complex2.C: New test.

--- gcc/cp/constexpr.cc.jj  2025-02-24 00:06:25.652734218 +0100
+++ gcc/cp/constexpr.cc 2025-02-27 16:59:23.966507094 +0100
@@ -6415,7 +6415,7 @@ cxx_eval_store_expression (const constex
  break;
 
case REALPART_EXPR:
- gcc_assert (probe == target);
+ gcc_assert (refs->is_empty ());
  vec_safe_push (refs, NULL_TREE);
  vec_safe_push (refs, probe);
  vec_safe_push (refs, TREE_TYPE (probe));
@@ -6423,7 +6423,7 @@ cxx_eval_store_expression (const constex
  break;
 
case IMAGPART_EXPR:
- gcc_assert (probe == target);
+ gcc_assert (refs->is_empty ());
  vec_safe_push (refs, NULL_TREE);
  vec_safe_push (refs, probe);
  vec_safe_push (refs, TREE_TYPE (probe));
--- gcc/testsuite/g++.dg/cpp1y/constexpr-complex2.C.jj  2025-02-27 
17:01:18.613919510 +0100
+++ gcc/testsuite/g++.dg/cpp1y/constexpr-complex2.C 2025-02-27 
17:01:47.631517685 +0100
@@ -0,0 +1,18 @@
+// PR c++/119045
+// { dg-do compile { target c++14 } }
+
+constexpr float
+foo ()
+{
+  __complex__ float f {1, 2};
+  float s = __real__ f + __imag__ f;
+  float &r = __real__ f;
+  float &i = __imag__ f;
+  r = 42;
+  s += __real__ f;
+  i = 3;
+  s += __imag__ f;
+  return s;
+}
+
+static_assert (foo () == 48.0f, "");

Jakub



Re: [Fortran, Patch, PR118730, v1] Ensure user-finalized type is referenced

2025-02-27 Thread Harald Anlauf

Hi Andre,

Am 27.02.25 um 18:36 schrieb Andre Vehreschild:

Hi all,

attached patch fixes user defined finalizers in derived (class) types not
getting called, when the variable declared of that type was not used in the
current block. The patch ensures calling the finalizer by marking the
variable referenced, if it has not been.

Additionally I had to patch three testcases, because their tree-dump-scans did
not fit anymore. In one case a variable was not used in the two others the
counts did not match any more.

Regstests ok on x86_64-pc-linux-gnu / F41. Ok for mainline?


this LGTM.

Thanks for the patch!

Harald


Regards,
Andre
--
Andre Vehreschild * Email: vehre ad gmx dot de




[PATCH] libstdc++: Fix ranges::move and ranges::move_backward to use iter_move [PR105609]

2025-02-27 Thread Jonathan Wakely
The ranges::move and ranges::move_backward algorithms are supposed to
use ranges::iter_move(iter) instead of std::move(*iter), which matters
for an iterator type with an iter_move overload findable by ADL.

Currently those algorithms use std::__assign_one which uses std::move,
so define a new ranges::__detail::__assign_one helper function that uses
ranges::iter_move.

libstdc++-v3/ChangeLog:

PR libstdc++/105609
* include/bits/ranges_algobase.h (__detail::__assign_one): New
helper function.
(__copy_or_move, __copy_or_move_backward): Use new function
instead of std::__assign_one.
* testsuite/25_algorithms/move/constrained.cc: Check that
ADL iter_move is used in preference to std::move.
* testsuite/25_algorithms/move_backward/constrained.cc:
Likewise.
---

Tested x86_64-linux.

 libstdc++-v3/include/bits/ranges_algobase.h   | 26 +
 .../25_algorithms/move/constrained.cc | 29 +++
 .../move_backward/constrained.cc  | 29 +++
 3 files changed, 78 insertions(+), 6 deletions(-)

diff --git a/libstdc++-v3/include/bits/ranges_algobase.h 
b/libstdc++-v3/include/bits/ranges_algobase.h
index eceb859e88b..a08f659b3ae 100644
--- a/libstdc++-v3/include/bits/ranges_algobase.h
+++ b/libstdc++-v3/include/bits/ranges_algobase.h
@@ -188,6 +188,20 @@ namespace ranges
 
   inline constexpr __equal_fn equal{};
 
+namespace __detail
+{
+  template
+[[__gnu__::__always_inline__]]
+constexpr void
+__assign_one(_OutIter& __out, _InIter& __in)
+{
+  if constexpr (_IsMove)
+   *__out = ranges::iter_move(__in);
+  else
+   *__out = *__in;
+}
+} // namespace __detail
+
   template
 struct in_out_result
 {
@@ -291,14 +305,14 @@ namespace ranges
__builtin_memmove(__result, __first,
  sizeof(_ValueTypeI) * __num);
  else if (__num == 1)
-   std::__assign_one<_IsMove>(__result, __first);
+   __detail::__assign_one<_IsMove>(__result, __first);
  return {__first + __num, __result + __num};
}
}
 
  for (auto __n = __last - __first; __n > 0; --__n)
{
- std::__assign_one<_IsMove>(__result, __first);
+ __detail::__assign_one<_IsMove>(__result, __first);
  ++__first;
  ++__result;
}
@@ -308,7 +322,7 @@ namespace ranges
{
  while (__first != __last)
{
- std::__assign_one<_IsMove>(__result, __first);
+ __detail::__assign_one<_IsMove>(__result, __first);
  ++__first;
  ++__result;
}
@@ -420,7 +434,7 @@ namespace ranges
__builtin_memmove(__result, __first,
  sizeof(_ValueTypeI) * __num);
  else if (__num == 1)
-   std::__assign_one<_IsMove>(__result, __first);
+   __detail::__assign_one<_IsMove>(__result, __first);
  return {__first + __num, __result};
}
}
@@ -432,7 +446,7 @@ namespace ranges
{
  --__tail;
  --__result;
- std::__assign_one<_IsMove>(__result, __tail);
+ __detail::__assign_one<_IsMove>(__result, __tail);
}
  return {std::move(__lasti), std::move(__result)};
}
@@ -445,7 +459,7 @@ namespace ranges
{
  --__tail;
  --__result;
- std::__assign_one<_IsMove>(__result, __tail);
+ __detail::__assign_one<_IsMove>(__result, __tail);
}
  return {std::move(__lasti), std::move(__result)};
}
diff --git a/libstdc++-v3/testsuite/25_algorithms/move/constrained.cc 
b/libstdc++-v3/testsuite/25_algorithms/move/constrained.cc
index 587b2f3728b..e2b45b070ef 100644
--- a/libstdc++-v3/testsuite/25_algorithms/move/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/move/constrained.cc
@@ -204,6 +204,35 @@ test05()
   VERIFY( ranges::equal(v, (int[]){1,2,3,0}) );
 }
 
+namespace pr105609
+{
+  struct I {
+using value_type = int;
+using difference_type = std::ptrdiff_t;
+int operator*() const;
+I& operator++();
+I operator++(int);
+I& operator--();
+I operator--(int);
+bool operator==(I) const;
+friend int& iter_move(const I&);
+  };
+}
+
+void
+test06(pr105609::I i)
+{
+  // PR libstdc++/105609
+  // ranges::move should use ranges::iter_move instead of std::move
+  struct O {
+O(int&) { }
+O(int&&) = delete;
+  };
+
+  O* o = nullptr;
+  std::ranges::move(i, i, o);
+}
+
 int
 main()
 {
diff --git a/libstdc++-v3/testsuite/25_algorithms/move_backward/constrained.cc 
b/libstdc++-v3/testsuite/25_algorithms/move_backward/constrained.cc
index 8f6fd455b4b..4d94d386dd0 100644
--- a/libstdc++-v3/testsuite/25_algorith

Re: [PATCH] libstdc++: Fix constraint recursion in basic_const_iterator relops [PR112490]

2025-02-27 Thread Patrick Palka
On Thu, 27 Feb 2025, Jonathan Wakely wrote:

> On Thu, 27 Feb 2025 at 20:52, Patrick Palka  wrote:
> >
> > Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> > 14?
> 
> OK for both.

Thanks.

> 
> Do we want a library issue for this? The "buggy" code you're changing
> is exactly what the standard says.

I think so, I'll submit one.  It seems like a GCC specific issue
only because we're still the only ones to implement CWG 2369 AFAICT.

> 
> 
> >
> > -- >8 --
> >
> > Here for
> >
> >   using RCI = reverse_iterator::iterator>>
> >   static_assert(std::totally_ordered);
> >
> > we effectively need to check the requirement
> >
> >   requires (RCI x) { x RELOP x; }  for each RELOP in {<, >, <=, >=}
> >
> > which we expect to be straightforwardly satisfied by reverse_iterator's
> > namespace-scope relops.  But due to ADL we find ourselves also
> > considering the basic_const_iterator relop friends, which before CWG
> > 2369 would be quickly discarded since RCI clearly isn't convertible to
> > basic_const_iterator.  After CWG 2369 though we must first check these
> > relops' constraints (with _It = vector::iterator and _It2 = RCI),
> > which entails checking totally_ordered recursively.
> >
> > This patch fixes this by turning the problematic non-dependent parameter
> > of type basic_const_iterator<_It> into a dependent parameter of type
> > basic_const_iterator<_It3> where _It3 is constrained to match _It.
> > Thus the basic_const_iterator relop friends now get quickly discarded
> > during deduction since RCI isn't a specialization of basic_const_iterator
> > (or derived from one) and so _It3 is not deduced.
> >
> > PR libstdc++/112490
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/bits/stl_iterator.h (basic_const_iterator::operator<):
> > Replace non-dependent basic_const_iterator parameter with
> > a dependent one of type basic_const_iterator<_It3>.  Constrain
> > _It3 to match _It.
> > (basic_const_iterator::operator>): Likewise.
> > (basic_const_iterator::operator<=): Likewise.
> > (basic_const_iterator::operator>=): Likewise.
> > * testsuite/24_iterators/const_iterator/112490.cc: New test.
> > ---
> >  libstdc++-v3/include/bits/stl_iterator.h | 16 
> >  .../24_iterators/const_iterator/112490.cc| 12 
> >  2 files changed, 20 insertions(+), 8 deletions(-)
> >  create mode 100644 
> > libstdc++-v3/testsuite/24_iterators/const_iterator/112490.cc
> >
> > diff --git a/libstdc++-v3/include/bits/stl_iterator.h 
> > b/libstdc++-v3/include/bits/stl_iterator.h
> > index 3e025342fb5..33732b1a428 100644
> > --- a/libstdc++-v3/include/bits/stl_iterator.h
> > +++ b/libstdc++-v3/include/bits/stl_iterator.h
> > @@ -2881,30 +2881,30 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
> > && three_way_comparable_with<_It, _It2>
> >{ return _M_current <=> __y; }
> >
> > -template<__detail::__not_a_const_iterator _It2>
> > +template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
> >friend constexpr bool
> > -  operator<(const _It2& __x, const basic_const_iterator& __y)
> > +  operator<(const _It2& __x, const basic_const_iterator<_It3>& __y)
> >noexcept(noexcept(__x < __y._M_current))
> >requires random_access_iterator<_It> && totally_ordered_with<_It, 
> > _It2>
> >{ return __x < __y._M_current; }
> >
> > -template<__detail::__not_a_const_iterator _It2>
> > +template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
> >friend constexpr bool
> > -  operator>(const _It2& __x, const basic_const_iterator& __y)
> > +  operator>(const _It2& __x, const basic_const_iterator<_It3>& __y)
> >noexcept(noexcept(__x > __y._M_current))
> >requires random_access_iterator<_It> && totally_ordered_with<_It, 
> > _It2>
> >{ return __x > __y._M_current; }
> >
> > -template<__detail::__not_a_const_iterator _It2>
> > +template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
> >friend constexpr bool
> > -  operator<=(const _It2& __x, const basic_const_iterator& __y)
> > +  operator<=(const _It2& __x, const basic_const_iterator<_It3>& __y)
> >noexcept(noexcept(__x <= __y._M_current))
> >requires random_access_iterator<_It> && totally_ordered_with<_It, 
> > _It2>
> >{ return __x <= __y._M_current; }
> >
> > -template<__detail::__not_a_const_iterator _It2>
> > +template<__detail::__not_a_const_iterator _It2, same_as<_It> _It3>
> >friend constexpr bool
> > -  operator>=(const _It2& __x, const basic_const_iterator& __y)
> > +  operator>=(const _It2& __x, const basic_const_iterator<_It3>& __y)
> >noexcept(noexcept(__x >= __y._M_current))
> >requires random_access_iterator<_It> && totally_ordered_with<_It, 
> > _It2>
> >{ return __x >= __y._M_current; }
> > diff --git a/libstdc++-v3/testsuite/24_iterato

Re: [PATCH v4] RISC-V: Fix bug for expand_const_vector interleave [PR118931]

2025-02-27 Thread Robin Dapp

+/* { dg-do run { target { riscv_v } } } */
+/* { dg-options "-O3 -march=rv64gcv -flto -mrvv-vector-bits=zvl" } */


Ah, the CI flagged the test in previous versions.  It's missing the usual 
-mabi=...  I keep forgetting this...


--
Regards
Robin



Re: [PATCH v4] RISC-V: Fix bug for expand_const_vector interleave [PR118931]

2025-02-27 Thread Robin Dapp

Hi Pan,


+ poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+ bool overflow_smode_p = false;
+
+ if (!step1.is_constant ())
+   overflow_smode_p = true;
+ else
+   {
+ int elem_count = XVECLEN (src, 0);
+ uint64_t step1_val = (uint64_t)step1.to_constant ();
+ uint64_t base1_val = (uint64_t)base1_poly.to_constant ();


The casts aren't necessary anymore I suppose?


+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr118931-run-1.c
@@ -0,0 +1,19 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-options "-O3 -march=rv64gcv -flto -mrvv-vector-bits=zvl" } */


Please change -flto to -fwhole-program still.

OK with these changed, thanks!

--
Regards
Robin



Re: [committed] d: Increase max parallelism of the D testsuite

2025-02-27 Thread Iain Buclaw
Excerpts from Lewis Hyatt's message of Februar 26, 2025 2:04 am:
> On Tue, Feb 25, 2025 at 12:00 PM Iain Buclaw  wrote:
>>
>> Hi,
>>
>> It was noticed that when running the testsuite for gdc and libphobos in
>> parallel, this was capped at 10 simultaneous jobs each.  Increase this
>> limit to 128, which enables running for example `make check-d -j48` to
>> complete in half the time.
>>
>> Bootstrapped and regression tested on x86_64-linux-gnu.
>>
>> Committed to mainline.
>>
>> Regards,
>> Iain.
> 
> Might it be a good time to revisit the issue with the libphobos
> unittest.exp, that it runs the tests too many times (and a variable
> number of them) in parallel mode? It was last discussed a few years
> ago here:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-July/598487.html
> 
> But I never followed back up on it basically the patch you
> proposed in that thread worked fine when I tested it, FWIW.
> 
> 
> -Lewis
> 

Hi Lewis,

Good spot! Was a PR created for this? I don't think I ever got round to 
testing this at the time, but checked just now and confirmed what you're 
seeing.

Iain.


[OG14] OpenMP/Fortran: extend 'adjust_args' clause, fixes for it and declare variant [PR115271]: avoid 'error: variable ‘saved_loc2’ set but not used [-Werror=unused-but-set-variable]' (was: [Patch] O

2025-02-27 Thread Thomas Schwinge
Hi Tobias!

On 2025-02-17T19:49:43+0100, Tobias Burnus  wrote:
> OpenMP/Fortran: extend 'adjust_args' clause, fixes for it and declare variant 
> [PR115271]

The OG14 "cherry-pick" of this apparently has some different code at
least in one place; trunk code:

> --- a/gcc/fortran/openmp.cc
> +++ b/gcc/fortran/openmp.cc

> @@ -6766,32 +6766,160 @@ gfc_match_omp_declare_variant (void)

> +   if (!p->expr && gfc_match ("omp_num_args") == MATCH_YES)
> + {
> +   if (!have_range)
> + p->u.adj_args.range_start = need_range = true;
> +   else
> + need_range = false;
> +
> +   locus saved_loc2 = gfc_current_locus;
> +   gfc_gobble_whitespace ();
> +   char c = gfc_peek_ascii_char ();
> +   if (c == '+' || c == '-')
> + {
> +   if (gfc_match ("+ %e", &p->expr) == MATCH_YES)
> + p->u.adj_args.omp_num_args_plus = true;
> +   else if (gfc_match ("- %e", &p->expr) == MATCH_YES)
> + p->u.adj_args.omp_num_args_minus = true;
> +   else if (!gfc_error_check ())
> + {
> +   gfc_error ("expected constant integer expression "
> +  "at %C");
> +   p->u.adj_args.error_p = true;
> +   return MATCH_ERROR;
> + }
> +   p->where = gfc_get_location_range (&saved_loc, 1,
> +  &saved_loc, 1,
> +  &gfc_current_locus);
> + }
> +   else
> + {
> +   p->where = gfc_get_location_range (&saved_loc, 1,
> +  &saved_loc, 1,
> +  &saved_loc2);

OG14 doesn't ever use 'saved_loc2', and therefore
'error: variable ‘saved_loc2’ set but not used 
[-Werror=unused-but-set-variable]'.
Pushed to OG14 branch commit f2ab2de88082d8c52bded3725aa78610085e072c
"OpenMP/Fortran: extend 'adjust_args' clause, fixes for it and declare variant 
[PR115271]: avoid 'error: variable ‘saved_loc2’ set but not used 
[-Werror=unused-but-set-variable]'",
see attached.


Grüße
 Thomas


>From f2ab2de88082d8c52bded3725aa78610085e072c Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Sun, 23 Feb 2025 10:34:39 +0100
Subject: [PATCH] =?UTF-8?q?OpenMP/Fortran:=20extend=20'adjust=5Fargs'=20cl?=
 =?UTF-8?q?ause,=20fixes=20for=20it=20and=20declare=20variant=20[PR115271]?=
 =?UTF-8?q?:=20avoid=20'error:=20variable=20=E2=80=98saved=5Floc2=E2=80=99?=
 =?UTF-8?q?=20set=20but=20not=20used=20[-Werror=3Dunused-but-set-variable]?=
 =?UTF-8?q?'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[...]/source-gcc/gcc/fortran/openmp.cc: In function ‘match gfc_match_omp_declare_variant()’:
[...]/source-gcc/gcc/fortran/openmp.cc:7127:25: error: variable ‘saved_loc2’ set but not used [-Werror=unused-but-set-variable]
 7127 |   locus saved_loc2 = gfc_current_locus;
  | ^~

Fix-up for OG14 commit e0246616020e95e74afa9d504c8848e6e905fab4
"OpenMP/Fortran: extend 'adjust_args' clause, fixes for it and declare variant [PR115271]".

	PR fortran/115271
	gcc/fortran/
	* openmp.cc (gfc_match_omp_declare_variant): Remove
	'locus saved_loc2'.
---
 gcc/fortran/openmp.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/fortran/openmp.cc b/gcc/fortran/openmp.cc
index 16b255ec1062..421e5276d339 100644
--- a/gcc/fortran/openmp.cc
+++ b/gcc/fortran/openmp.cc
@@ -7124,7 +7124,6 @@ gfc_match_omp_declare_variant (void)
 		  else
 		need_range = false;
 
-		  locus saved_loc2 = gfc_current_locus;
 		  gfc_gobble_whitespace ();
 		  char c = gfc_peek_ascii_char ();
 		  if (c == '+' || c == '-')
-- 
2.45.2



[PATCH] RISC-V: Adjust LMUL when using maximum SEW [PR117955].

2025-02-27 Thread Robin Dapp

Hi,

when merging two vsetvls that both only demand "SEW >= ..." we
use their maximum SEW and keep the LMUL.  That may lead to invalid
vector configurations like
 e64, mf4.
As we make sure that the SEW requirements overlap we can use the SEW
and LMUL of the configuration with the larger SEW.

Ma Jin already touched this merge rule some weeks ago and fixed the
ratio calculation (r15-6873).  Calculating the ratio from an invalid
SEW/LMUL combination lead to an overflow in the ratio variable, though.
I'd argue the proper fix is to update SEW and LMUL, keeping the ratio
as before.  This breaks bug-10.c, though, and I'm not sure what it
really tests.  SEW/LMUL actually doesn't change, we just emit a slightly
different vsetvl.  Maybe it was reduced too far?  Jin, any insight
there?  I changed it into a run test for now.

Regtested on rv64gcv_zvl512b.

Regards
Robin

PR target/117955

gcc/ChangeLog:

* config/riscv/riscv-v.cc (calculate_ratio): Use LMUL of vsetvl
with larger SEW.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/bug-10.c: Convert to run test.
* gcc.target/riscv/rvv/base/pr117955.c: New test.
---
gcc/config/riscv/riscv-vsetvl.cc  |   8 +-
.../gcc.target/riscv/rvv/base/bug-10.c|  32 +-
.../gcc.target/riscv/rvv/base/pr117955.c  | 827 ++
3 files changed, 861 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c

diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 82284624a24..f0165f7b8c8 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1729,9 +1729,11 @@ private:
  }
  inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
  {
-int max_sew = MAX (prev.get_sew (), next.get_sew ());
-prev.set_sew (max_sew);
-prev.set_ratio (calculate_ratio (prev.get_sew (), prev.get_vlmul ()));
+bool prev_sew_larger = prev.get_sew () >= next.get_sew ();
+const vsetvl_info from = prev_sew_larger ? prev : next;
+prev.set_sew (from.get_sew ());
+prev.set_vlmul (from.get_vlmul ());
+prev.set_ratio (from.get_ratio ());
use_min_of_max_sew (prev, next);
  }
  inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
index af3a8610d63..5f7490e8a3b 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/bug-10.c
@@ -1,14 +1,40 @@
-/* { dg-do compile { target { rv64 } } } */
+/* { dg-do run { target { rv64 } } } */
+/* { dg-require-effective-target rv64 } */
+/* { dg-require-effective-target riscv_v } */
/* { dg-options " -march=rv64gcv_zvfh -mabi=lp64d -O2 --param=vsetvl-strategy=optim 
-fno-schedule-insns  -fno-schedule-insns2 -fno-schedule-fusion " } */

#include 

void
-foo (uint8_t *ptr, vfloat16m4_t *v1, vuint32m8_t *v2, vuint8m2_t *v3, size_t 
vl)
+__attribute__ ((noipa))
+foo (vfloat16m4_t *v1, vuint32m8_t *v2, vuint8m2_t *v3, size_t vl)
{
  *v1 = __riscv_vfmv_s_f_f16m4 (1, vl);
  *v2 = __riscv_vmv_s_x_u32m8 (2963090659u, vl);
  *v3 = __riscv_vsll_vx_u8m2 (__riscv_vid_v_u8m2 (vl), 2, vl);
}

-/* { dg-final { scan-assembler-not {vsetvli.*zero,zero} } }*/
+int
+main ()
+{
+  vfloat16m4_t v1;
+  vuint32m8_t v2;
+  vuint8m2_t v3;
+  int vl = 4;
+  foo (&v1, &v2, &v3, vl);
+
+  _Float16 val1 = ((_Float16 *)&v1)[0];
+  if (val1 - 1.f > 0.1f)
+__builtin_abort ();
+
+  uint32_t val2 = ((uint32_t *)&v2)[0];
+  if (val2 != 2963090659u)
+__builtin_abort ();
+
+  for (int i = 0; i < vl; i++)
+{
+  uint8_t val = ((uint8_t *)&v3)[i];
+  if (val != i << 2)
+__builtin_abort ();
+}
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c
new file mode 100644
index 000..49ccb6097d0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr117955.c
@@ -0,0 +1,827 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvfh -O3" } */
+
+#include 
+
+#define dataLen 100
+#define isNaNF16UI( a ) (((~(a) & 0x7C00) == 0) && ((a) & 0x03FF))
+#define isNaNF32UI( a ) (((~(a) & 0x7F80) == 0) && ((a) & 0x007F))
+#define isNaNF64UI( a ) (((~(a) & UINT64_C( 0x7FF0 )) == 0) && ((a) 
& UINT64_C( 0x000F )))
+typedef _Float16 float16_t;
+typedef float float32_t;
+typedef double float64_t;
+
+float16_t convert_binary_u16_f16(uint16_t u16){
+  union { float16_t f16; uint16_t u16; } converter;
+  converter.u16 = u16;
+  if(isNaNF16UI(converter.u16)) return 0;
+  return converter.f16;
+}
+float32_t convert_binary_u32_f32(uint32_t u32){
+  union { float32_t f32; uint32_t u32; } converter;
+  converter.u32 = u32;
+  if(isNaNF32UI(converter.u32)) return 0;
+  return converter.f32;
+}
+float64_t convert_binary_u64_f64(uint64_t u64){
+  union { float64_t f64; uint64_t u64; } convert

Re: [PATCH] c: Assorted fixes for flexible array members in unions [PR119001]

2025-02-27 Thread Qing Zhao
Hi, Jakub,

Thanks a lot for fixing this issues.

Qing

> On Feb 26, 2025, at 03:53, Jakub Jelinek  wrote:
> 
> Hi!
> 
> r15-209 allowed flexible array members inside of unions, but as the
> following testcase shows, not everything has been adjusted for that.
> Unlike structures, in unions flexible array member (as an extension)
> can be any of the members, not just the last one, as in union all
> members are effectively last.
> The first hunk is about an ICE on the initialization of the FAM
> in union which is not the last FIELD_DECL with a string literal,
> the second hunk just formatting fix, third hunk fixes a bug in which
> we were just throwing away the initializers (except for with string literal)
> of FAMs in unions which aren't the last FIELD_DECL, and the last hunk
> is to diagnose FAM errors in unions the same as for structures, in
> particular trying to initialize a FAM with non-constant or initialization
> in nested context.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> 2025-02-26  Jakub Jelinek  
> 
> PR c/119001
> gcc/
> * varasm.cc (output_constructor_regular_field): Don't fail
> assertion if next is non-NULL and FIELD_DECL if
> TREE_CODE (local->type) is UNION_TYPE.
> gcc/c/
> * c-typeck.cc (pop_init_level): Don't clear constructor_type
> if DECL_CHAIN of constructor_fields is NULL but p->type is UNION_TYPE.
> Formatting fix.
> (process_init_element): Diagnose non-static initialization of flexible
> array member in union or FAM in union initialization in nested context.
> gcc/testsuite/
> * gcc.dg/pr119001-1.c: New test.
> * gcc.dg/pr119001-2.c: New test.
> 
> --- gcc/varasm.cc.jj 2025-01-03 17:59:48.816160159 +0100
> +++ gcc/varasm.cc 2025-02-25 10:52:08.043968775 +0100
> @@ -5827,10 +5827,13 @@ output_constructor_regular_field (oc_loc
> and the FE splits them into dynamic initialization.  */
>  gcc_checking_assert (fieldsize >= fldsize);
>  /* Given a non-empty initialization, this field had better
> - be last.  Given a flexible array member, the next field
> - on the chain is a TYPE_DECL of the enclosing struct.  */
> + be last except in unions.  Given a flexible array member, the next
> + field on the chain is a TYPE_DECL of the enclosing struct.  */
>  const_tree next = DECL_CHAIN (local->field);
> -  gcc_assert (!fieldsize || !next || TREE_CODE (next) != FIELD_DECL);
> +  gcc_assert (!fieldsize
> +  || !next
> +  || TREE_CODE (next) != FIELD_DECL
> +  || TREE_CODE (local->type) == UNION_TYPE);
> }
>   else
> fieldsize = tree_to_uhwi (DECL_SIZE_UNIT (local->field));
> --- gcc/c/c-typeck.cc.jj 2025-02-13 14:10:52.934623189 +0100
> +++ gcc/c/c-typeck.cc 2025-02-25 11:54:39.857363690 +0100
> @@ -10270,7 +10270,8 @@ pop_init_level (location_t loc, int impl
>  gcc_assert (!TYPE_SIZE (constructor_type));
> 
>  if (constructor_depth > 2)
> -error_init (loc, "initialization of flexible array member in a nested 
> context");
> +error_init (loc, "initialization of flexible array member "
> + "in a nested context");
>  else
>pedwarn_init (loc, OPT_Wpedantic,
>  "initialization of a flexible array member");
> @@ -10278,7 +10279,8 @@ pop_init_level (location_t loc, int impl
>  /* We have already issued an error message for the existence
> of a flexible array member not at the end of the structure.
> Discard the initializer so that we do not die later.  */
> -  if (DECL_CHAIN (constructor_fields) != NULL_TREE)
> +  if (DECL_CHAIN (constructor_fields) != NULL_TREE
> +  && (!p->type || TREE_CODE (p->type) != UNION_TYPE))
>constructor_type = NULL_TREE;
> }
> }
> @@ -12124,6 +12126,42 @@ retry:
>warning (OPT_Wtraditional, "traditional C rejects initialization "
> "of unions");
> 
> +  /* Error for non-static initialization of a flexible array member.  */
> +  if (fieldcode == ARRAY_TYPE
> +  && !require_constant_value
> +  && TYPE_SIZE (fieldtype) == NULL_TREE)
> +{
> +  error_init (loc, "non-static initialization of a flexible "
> +  "array member");
> +  break;
> +}
> +
> +  /* Error for initialization of a flexible array member with
> + a string constant if the structure is in an array.  E.g.:
> + union U { int x; char y[]; };
> + union U s[] = { { 1, "foo" } };
> + is invalid.  */
> +  if (string_flag
> +  && fieldcode == ARRAY_TYPE
> +  && constructor_depth > 1
> +  && TYPE_SIZE (fieldtype) == NULL_TREE)
> +{
> +  bool in_array_p = false;
> +  for (struct constructor_stack *p = constructor_stack;
> +   p && p->type; p = p->next)
> + if (TREE_CODE (p->type) == ARRAY_TYPE)
> +  {
> +in_array_p = true;
> +break;
> +  }
> +  if (in_array_p)
> + {
> +  error_init (loc, "initialization of flexible array "
> +  "member in a nested context");
> +  break;
> + }
> +}
> +
>  /* Accept a string constant to initialize a subarray.  */
>  if (value.value != NULL_TREE
>  && fieldcode == ARRAY_TYPE
> --- gcc/testsuit

Re: [PATCH v3] libstdc++: implement constexpr memory algorithms

2025-02-27 Thread Jonathan Wakely

On 26/02/25 17:27 +0100, Giuseppe D'Angelo wrote:

On 26/02/2025 16:33, Giuseppe D'Angelo wrote:

Whops, sorry, missed this sub-thread (while replying to the other one).
Change of plans then, I'll amend and remove the ad-hoc constexpr macro.


Done, v3 attached.

Thanks,
--
Giuseppe D'Angelo




From de3751a38330f508be9f08b77136a31481018828 Mon Sep 17 00:00:00 2001
From: Giuseppe D'Angelo 
Date: Sun, 16 Feb 2025 19:37:07 +0100
Subject: [PATCH] libstdc++: implement constexpr memory algorithms

This commit adds support for C++26's constexpr specialized memory
algorithms, introduced by P2283R2, P3508R0, P3369R0.

The uninitialized_default, value, copy, move and fill algorithms are
affected, in all of their variants (iterator-based, range-based and _n
versions.)

The changes are mostly mechanical -- add `constexpr` to a number of
signatures when compiling in C++26 and above modes. The internal helper
guard class for range algorithms instead can be marked unconditionally.

The only "real" change to the implementation of the algorithms is that
during constant evaluation I need to dispatch to a constexpr-friendly
version of them.

For each algorithm family I've added only one test to cover it and its
variants; the idea is to avoid too much repetition and simplify future
maintenance.


The patch itself looks good, but I have some comments on the ChangeLog
part. These are thigns I would have tweaked myself before pushing, but
now that you have commit access ...


libstdc++-v3/ChangeLog:

* include/bits/ranges_uninitialized.h: Mark the specialized
memory algorithms as constexpr in C++26. Also mark the members
of the _DestroyGuard helper class.
* include/bits/stl_uninitialized.h: Ditto.
* include/bits/stl_construct.h: Mark _Construct_novalue (which
uses placement new to do default initialization) as constexpr
in C++26. This is possible due to P2747R2, which GCC already
implements; other compilers in C++26 modes already implement
P2448R2, so there should be no issues there.


Per-file ChangeLog entries should say "what, not why" ... for reasons.
That makes old GNU-style ChangeLog files sometimes not very useful.
But at least we now have Git commit messages where we can put all the
detailed background, rationale etc.

So the explanation that it's possible to make it constexpr due to
P2448R2 should be above in the free text part of the commit message
above, along with the parenthesis saying what the function does.

Also, please mention the changed function by name, i.e.

* include/bits/stl_construct.h (_Construct_novalue): Mark
constexpr for C++26.

See
https://www.gnu.org/prep/standards/html_node/Style-of-Change-Logs.html
for the format.

For the changes in ranges_uninitialized.h and stl_uninitialized.h it's
basically "all the functions" so I think it's OK not to name them all.
(It would also be OK if you did name them all individually, but I
don't think it's necessary.)


* include/bits/version.def: Bump the feature-testing macro.


Please mention which macro, e.g. something like:

* include/bits/version.def (raw_memory_algorithms): Bump
value.


* include/bits/version.h: Regenerate.


(No need to mention the specific macro by name here, because the whole
file was regenerated so this is fine.)


* testsuite/20_util/specialized_algorithms/feature_test_macro.cc: New 
test.


Please add a line break after the colon here (and the lines below). We
try to keep ChangeLog entries below about 78 columns, but for
libstdc++ tests that's often impossible because the pathname is
already longer than that! But we can still put the "New test." on a
new line.

Feel free to push to trunk with those changes to the ChangeLog lines
(and add Reviewed-by: tags for me and/or Patrick if you like). If
you'd feel more comfortable sending it to the mailing list again for a
final check, that's fine too.

Either way, the traditional first commit when you've got write after
approval access is to add yourself to the MAINTAINERS file, as per:
https://gcc.gnu.org/gitwrite.html#authenticated
If you add yourself to the DCO section at the end of the file, you can
omit the Signed-off-by: line in future commit messages (it's implied
if your name is in that section of the MAINTAINERS file).



* 
testsuite/20_util/specialized_algorithms/uninitialized_copy/constexpr.cc: New 
test.
* 
testsuite/20_util/specialized_algorithms/uninitialized_default_construct/constexpr.cc:
New test.
* 
testsuite/20_util/specialized_algorithms/uninitialized_fill/constexpr.cc: New 
test.
* 
testsuite/20_util/specialized_algorithms/uninitialized_move/constexpr.cc: New 
test.
* 
testsuite/20_util/specialized_algorithms/uninitialized_value_construct/constexpr.cc:
New test.

Signed-off-by: Giuseppe D'Angelo 
---
.../include/bits/ranges_uninitialized.h   | 21 ++
libstdc++-v3/include/bits/stl_cons

[comitted] testsuite: arm: Avoid incremental link warnings in pr61123-enum-size

2025-02-27 Thread Richard Earnshaw
This test uses incremental linking, but that can generate warnings if
the LTO step contains a mix of LTO and non-LTO object files (this can
happen when there's a testglue file that is normally included during
linking).

We don't care about the testglue, though, so just tell the LTO
optimizer to generate nolto-rel output, which is what it is falling
back to anyway.

gcc/testsuite:
* gcc.target/arm/lto/pr61123-enum-size_0.c: (dg-lto-options) Move
linker related options to ...
(dg-extra-ld-options): ... here.  Add -flinker-output=nolto-rel.
---
 gcc/testsuite/gcc.target/arm/lto/pr61123-enum-size_0.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/arm/lto/pr61123-enum-size_0.c 
b/gcc/testsuite/gcc.target/arm/lto/pr61123-enum-size_0.c
index c23f9d85760..4ccbeb39f43 100644
--- a/gcc/testsuite/gcc.target/arm/lto/pr61123-enum-size_0.c
+++ b/gcc/testsuite/gcc.target/arm/lto/pr61123-enum-size_0.c
@@ -1,5 +1,6 @@
 /* { dg-lto-do link } */
-/* { dg-lto-options { { -fno-short-enums -Wl,-Ur,--no-enum-size-warning -Os 
-nostdlib -flto } } } */
+/* { dg-lto-options { { -fno-short-enums -Os -flto } } } */
+/* { dg-extra-ld-options "-flinker-output=nolto-rel 
-Wl,-Ur,--no-enum-size-warning -nostdlib" } */
 
 #include 
 
-- 
2.34.1



Re: [PATCH] testsuite: arm: Prune incremental link warning

2025-02-27 Thread Richard Earnshaw
On 17/12/2024 15:04, Richard Earnshaw (lists) wrote:
> On 15/11/2024 10:15, Christophe Lyon wrote:
>> On Thu, 14 Nov 2024 at 18:33, Torbjorn SVENSSON
>>  wrote:
>>>
>>>
>>>
>>> On 2024-11-14 16:53, Christophe Lyon wrote:
 On Sun, 10 Nov 2024 at 17:44, Torbjörn SVENSSON
  wrote:
>
> Ok for trunk and releases/gcc-14?
>
> --
>
> When the feature "needs_status_wrapper" in dejagnu is used, the
> resulting gcc_tg.o file is a regular object file and thus the following
> warning will be emitted if doing an incremental link:
>
> .../ld: warning: incremental linking of LTO and non-LTO objects; using  
> which will bypass whole program optimization
>
> Since the warning causes test cases, like pr61123-enum-size, to fail,
> prune it.

 This makes sense. Just to be sure that -flinker-output=nolto-rel does
 not avoid the problem described in PR61123,
 any chance you could try revering SVN r211832 (well just remove "LTO"
 from short-enums entry in c.opt) and see the original bug with the
 warning (pruned)?
>>>
>>> If I build r15-5047-g7e1d9f58858 with the following change (I obviously
>>> have my patch applied when testing it...):
>>>
>>> diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
>>> index 9b9f5e744f6..80d934b516d 100644
>>> --- a/gcc/c-family/c.opt
>>> +++ b/gcc/c-family/c.opt
>>> @@ -2277,7 +2277,7 @@ C++ ObjC++ Optimization Var(flag_rtti) Init(1)
>>>   Generate run time type descriptor information.
>>>
>>>   fshort-enums
>>> -C ObjC C++ ObjC++ LTO Optimization Var(flag_short_enums)
>>> +C ObjC C++ ObjC++ Optimization Var(flag_short_enums)
>>>   Use the narrowest integer type possible for enumeration types.
>>>
>>>   fshort-wchar
>>>
>>>
>>>
>>> For Cortex-A7, Cortex-M0/3/4/7/33/55/85, I get the following failure:
>>>
>>> FAIL: object-readelf Tag_ABI_enum_size size is incorrect.
>>>
>>> I suppose this is what you wanted to get confirmed, right?
>>>
>>> We could, instead of pruning, add the -flonker-output=nolto-rel, but
>>> that fails to link as there is no main function etc.
>>>
>>> I'm not sure that this answers you question, so if it's not what you had
>>> in mind, please let me know. :)
>>>
>>
>> Yes exactly: I wanted to make sure that we'd still have caught the
>> original bug even when using the status_wrapper (and that
>> -flinker-output=nolto-rel didn't have the side effect of hiding the
>> bug).
>>
>> Thanks for the clarification.
> 
> I wonder if we could address this by building the wrapper as a fat LTO 
> object.  It would then be compatible with both LTO and non-LTO compilations.
> 
> We could experiment with this by setting [target_info wrap_compile_flags] to 
> return "-flto -ffat-lto-objects".
> 

After discussing this on IRC I pushed a different patch.  Since the warning 
we're trying to prune tells us what it's falling back to, we might as well just 
tell it to do that anyway, then we won't get the warning.

R.

https://gcc.gnu.org/pipermail/gcc-patches/2025-February/676548.html

> R.
> 
>>
>> Christophe
>>
>>>
>>> Kind regards,
>>> Torbjörn
>>>

 Thanks,

 Christophe

>
> gcc/testsuite/ChangeLog:
>
>  * gcc.target/arm/lto/lto.exp: Prune incremental link warning if
>  status wrapper is used.
>
> Signed-off-by: Torbjörn SVENSSON 
> ---
>   gcc/testsuite/gcc.target/arm/lto/lto.exp | 9 +
>   1 file changed, 9 insertions(+)
>
> diff --git a/gcc/testsuite/gcc.target/arm/lto/lto.exp 
> b/gcc/testsuite/gcc.target/arm/lto/lto.exp
> index 4ccb0737253..3f8377bdd3e 100644
> --- a/gcc/testsuite/gcc.target/arm/lto/lto.exp
> +++ b/gcc/testsuite/gcc.target/arm/lto/lto.exp
> @@ -43,6 +43,14 @@ if { ![check_effective_target_lto] } {
>   return
>   }
>
> +# This variable should only apply to tests called in this exp file.
> +global dg_runtest_extra_prunes
> +set dg_runtest_extra_prunes ""
> +if { ![check_effective_target_unwrapped] } {
> +# The status wrapper is a regular object file
> +lappend dg_runtest_extra_prunes "warning: incremental linking of LTO 
> and non-LTO objects"
> +}
> +
>   gcc_init
>   lto_init no-mathlib
>
> @@ -60,4 +68,5 @@ foreach src [lsort [find $srcdir/$subdir *_0.c]] {
>   lto-execute $src $sid
>   }
>
> +set dg_runtest_extra_prunes ""
>   lto_finish
> --
> 2.25.1
>
>>>
> 




Re: [PATCH] c++: ICE in replace_decl [PR118986]

2025-02-27 Thread Jason Merrill

On 2/26/25 2:16 PM, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
Yet another problem that started with r15-6052, compile time evaluation of
prvalues.

cp_fold_r/TARGET_EXPR sees:

   TARGET_EXPR >>> 

so when we call maybe_constant_init, the object we're initializing is D.2701,
and the init is the expr_stmt.  We unwrap the EXPR_STMT/INIT_EXPR/TARGET_EXPR
in maybe_constant_init_1 and so end up evaluating the f1 call.  But f1 returns
c2 whereas the type of D.2701 is ._anon_0 -- the closure.


Sounds like the problem is with the maybe_constant_init_1 unwrapping, it 
probably shouldn't strip INIT_EXPR if the type doesn't match that of 'decl'.



So then we crash in replace_decl on:

  gcc_checking_assert (same_type_ignoring_top_level_qualifiers_p
   (TREE_TYPE (decl), TREE_TYPE (replacement)));

due to the mismatched types.

cxx_eval_outermost_constant_expr is already ready for the types to be
different, in which case the result isn't constant.  But replace_decl
is called before that check.

I'm leaving the assert in replace_decl on purpose, maybe we'll find
another use for it.

PR c++/118986

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_call_expression): Check that the types match
before calling replace_decl.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/constexpr-prvalue1.C: New test.
---
  gcc/cp/constexpr.cc   |  4 +++-
  .../g++.dg/cpp2a/constexpr-prvalue1.C | 23 +++
  2 files changed, 26 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/constexpr-prvalue1.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 59dd0668af3..204cda2a222 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -3390,7 +3390,9 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree 
t,
   current object under construction.  */
if (!*non_constant_p && ctx->object
&& CLASS_TYPE_P (TREE_TYPE (res))
-   && !is_empty_class (TREE_TYPE (res)))
+   && !is_empty_class (TREE_TYPE (res))
+   && same_type_ignoring_top_level_qualifiers_p
+   (TREE_TYPE (res), TREE_TYPE (ctx->object)))


If this happens, rather than just skip the replace_decl, I think we want 
to set *non_constant_p or I expect we'll end up with a wrong value 
somewhere.


Jason



Re: [PATCH] testsuite: arm: Prune incremental link warning

2025-02-27 Thread Torbjorn SVENSSON




On 2025-02-27 16:36, Richard Earnshaw wrote:

On 17/12/2024 15:04, Richard Earnshaw (lists) wrote:

On 15/11/2024 10:15, Christophe Lyon wrote:

On Thu, 14 Nov 2024 at 18:33, Torbjorn SVENSSON
 wrote:




On 2024-11-14 16:53, Christophe Lyon wrote:

On Sun, 10 Nov 2024 at 17:44, Torbjörn SVENSSON
 wrote:


Ok for trunk and releases/gcc-14?

--

When the feature "needs_status_wrapper" in dejagnu is used, the
resulting gcc_tg.o file is a regular object file and thus the following
warning will be emitted if doing an incremental link:

.../ld: warning: incremental linking of LTO and non-LTO objects; using  which 
will bypass whole program optimization

Since the warning causes test cases, like pr61123-enum-size, to fail,
prune it.


This makes sense. Just to be sure that -flinker-output=nolto-rel does
not avoid the problem described in PR61123,
any chance you could try revering SVN r211832 (well just remove "LTO"
from short-enums entry in c.opt) and see the original bug with the
warning (pruned)?


If I build r15-5047-g7e1d9f58858 with the following change (I obviously
have my patch applied when testing it...):

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 9b9f5e744f6..80d934b516d 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -2277,7 +2277,7 @@ C++ ObjC++ Optimization Var(flag_rtti) Init(1)
   Generate run time type descriptor information.

   fshort-enums
-C ObjC C++ ObjC++ LTO Optimization Var(flag_short_enums)
+C ObjC C++ ObjC++ Optimization Var(flag_short_enums)
   Use the narrowest integer type possible for enumeration types.

   fshort-wchar



For Cortex-A7, Cortex-M0/3/4/7/33/55/85, I get the following failure:

FAIL: object-readelf Tag_ABI_enum_size size is incorrect.

I suppose this is what you wanted to get confirmed, right?

We could, instead of pruning, add the -flonker-output=nolto-rel, but
that fails to link as there is no main function etc.

I'm not sure that this answers you question, so if it's not what you had
in mind, please let me know. :)



Yes exactly: I wanted to make sure that we'd still have caught the
original bug even when using the status_wrapper (and that
-flinker-output=nolto-rel didn't have the side effect of hiding the
bug).

Thanks for the clarification.


I wonder if we could address this by building the wrapper as a fat LTO object.  
It would then be compatible with both LTO and non-LTO compilations.

We could experiment with this by setting [target_info wrap_compile_flags] to return 
"-flto -ffat-lto-objects".



After discussing this on IRC I pushed a different patch.  Since the warning 
we're trying to prune tells us what it's falling back to, we might as well just 
tell it to do that anyway, then we won't get the warning.

R.

https://gcc.gnu.org/pipermail/gcc-patches/2025-February/676548.html


I just tested this approach and it works fine for me.
Thanks for fixing it Richard!

Kind regards,
Torbjörn




R.



Christophe



Kind regards,
Torbjörn



Thanks,

Christophe



gcc/testsuite/ChangeLog:

  * gcc.target/arm/lto/lto.exp: Prune incremental link warning if
  status wrapper is used.

Signed-off-by: Torbjörn SVENSSON 
---
   gcc/testsuite/gcc.target/arm/lto/lto.exp | 9 +
   1 file changed, 9 insertions(+)

diff --git a/gcc/testsuite/gcc.target/arm/lto/lto.exp 
b/gcc/testsuite/gcc.target/arm/lto/lto.exp
index 4ccb0737253..3f8377bdd3e 100644
--- a/gcc/testsuite/gcc.target/arm/lto/lto.exp
+++ b/gcc/testsuite/gcc.target/arm/lto/lto.exp
@@ -43,6 +43,14 @@ if { ![check_effective_target_lto] } {
   return
   }

+# This variable should only apply to tests called in this exp file.
+global dg_runtest_extra_prunes
+set dg_runtest_extra_prunes ""
+if { ![check_effective_target_unwrapped] } {
+# The status wrapper is a regular object file
+lappend dg_runtest_extra_prunes "warning: incremental linking of LTO and 
non-LTO objects"
+}
+
   gcc_init
   lto_init no-mathlib

@@ -60,4 +68,5 @@ foreach src [lsort [find $srcdir/$subdir *_0.c]] {
   lto-execute $src $sid
   }

+set dg_runtest_extra_prunes ""
   lto_finish
--
2.25.1












Re: [PATCH] c++: Adjust #embed support for P1967R14

2025-02-27 Thread Jason Merrill

On 2/24/25 4:31 AM, Jakub Jelinek wrote:

Hi!

Now that the #embed paper has been voted in, the following patch
removes the pedwarn for C++26 on it (and adjusts pedwarn warning for
older C++ versions) and predefines __cpp_pp_embed FTM.

I believe we otherwise implement everything in the paper already,
except I'm really confused by the
[Example:

#embed  limit(__has_include("a.h"))

#if __has_embed( limit(__has_include("a.h")))
// ill-formed: __has_include [cpp.cond] cannot appear here
#endif

— end example]
part.  My reading of both C23 and C++ with the P1967R14 paper in
is that the first case (#embed with __has_include or __has_embed in its
clauses) is what is clearly invalid and so the ill-formed note should be
for #embed.  And the __has_include/__has_embed in __has_embed is actually
questionable.
Both C and C++ have something like
"The identifiers __has_include, __has_embed, and __has_c_attribute
shall not appear in any context not mentioned in this subclause."
or
"The identifiers __has_include and __has_cpp_attribute shall not appear
in any context not mentioned in this subclause."
(into which P1967R14 adds __has_embed) in the conditional inclusion
subclause.
#embed is defined in a different one, so using those in there is invalid
(unless "using the rules specified for conditional inclusion" wording
e.g. in limit clause overrides that).
The reason why I think it is fuzzy for __has_embed is that __has_embed
is actually defined in the Conditional inclusion subclause (so that
would mean one can use __has_include, __has_embed and __has_*attribute
in there) but its clauses are described in a different one.

GCC currently accepts
#embed __FILE__ limit (__has_include ())
#if __has_embed (__FILE__ limit (__has_include ()))
#endif
#embed __FILE__ limit (__has_embed ("a.c"))
#if __has_embed (__FILE__ limit (__has_embed ("a.c")))
#endif
with the exception of __has_embed in #embed which results in a strange
message.
Note, it isn't just about limit clause, but also about
prefix/suffix/if_empty, except that in those cases the "using the rules
specified for conditional inclusion" doesn't apply.

In any case, I'd hope that can be dealt with incrementally (and should
be handled the same for both C and C++).


I agree.


Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2025-02-24  Jakub Jelinek  

libcpp/
* init.cc (lang_defaults): Set embed for GNUCXX26 and CXX26.
* directives.cc (do_embed): Adjust pedwarn wording for embed in C++.
gcc/c-family/
* c-cppbuiltin.cc (c_cpp_builtins): Predefine __cpp_pp_embed=202502
for C++26.
gcc/testsuite/
* g++.dg/cpp/embed-1.C: Adjust for pedwarn wording change and don't
expect any error for C++26.
* g++.dg/cpp/embed-2.C: Adjust for pedwarn wording change and don't
expect any warning for C++26.
* g++.dg/cpp26/feat-cxx26.C: Test __cpp_pp_embed value.

--- libcpp/init.cc.jj   2025-02-13 19:59:56.204572159 +0100
+++ libcpp/init.cc  2025-02-20 21:28:07.340156536 +0100
@@ -149,8 +149,8 @@ static const struct lang_flags lang_defa
/* CXX20*/ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 },
/* GNUCXX23 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
/* CXX23*/ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
-  /* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
-  /* CXX26*/ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
+  /* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1 },
+  /* CXX26*/ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1 },
/* ASM  */ { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
  };
  
--- libcpp/directives.cc.jj	2025-02-13 19:59:56.202572170 +0100

+++ libcpp/directives.cc2025-02-20 21:40:56.379899457 +0100
@@ -1367,7 +1367,7 @@ do_embed (cpp_reader *pfile)
  {
if (CPP_OPTION (pfile, cplusplus))
cpp_error (pfile, CPP_DL_PEDWARN,
-  "%<#%s%> is a GCC extension", "embed");
+  "%<#%s%> before C++26 is a GCC extension", "embed");


Please change this to cpp_pedwarning/CPP_W_CXX26_EXTENSIONS.


else
cpp_error (pfile, CPP_DL_PEDWARN,
   "%<#%s%> before C23 is a GCC extension", "embed");
--- gcc/c-family/c-cppbuiltin.cc.jj 2025-02-13 19:59:55.144578075 +0100
+++ gcc/c-family/c-cppbuiltin.cc2025-02-20 21:34:18.626480792 +0100
@@ -1093,6 +1093,7 @@ c_cpp_builtins (cpp_reader *pfile)
  cpp_define (pfile, "__cpp_deleted_function=202403L");
  cpp_define (pfile, "__cpp_variadic_friend=202403L");
  cpp_define (pfile, "__cpp_pack_indexing=202311L");
+ cpp_define (pfile, "__cpp_pp_embed=202502L");
}
if (flag_concepts && cxx_dialect > cxx14)
cpp_define (pfile, "__cpp_concepts=202002L");
--- gcc/testsuite/g++.dg/cpp/embed-1.C.jj   2024-10-13 18:47:45.508432900 
+0200
+++ gcc/t

Re: [PATCH] c++: too many errors with sneaky template [PR118516]

2025-02-27 Thread Jason Merrill

On 2/20/25 5:53 PM, Marek Polacek wrote:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


-- >8 --
Since C++20 P0846, a name followed by a < can be treated as a template-name
even though name lookup did not find a template-name.  That happens
in this test with "i < foo ()":

   for (int id = 0; i < foo(); ++id);

and results in a raft of errors about non-constant foo().  The problem
is that the require_potential_constant_expression call in
cp_parser_template_argument emits errors even when we're parsing
tentatively.  So we repeat the error when we're trying to parse
as a nested-name-specifier, type-name, etc.

Guarding the call with !cp_parser_uncommitted_to_tentative_parse_p would
mean that require_potential_constant_expression never gets called.  But
we don't need the call at all as far as I can tell.  Stuff like

   template struct S { };
   int foo () { return 4; }
   void
   g ()
   {
 S s;
   }

gets diagnosed in convert_nontype_argument.  In fact, with this patch,
we only emit "call to non-constexpr function" once.  (That is, in C++17
only; C++14 uses a different path.)

PR c++/118516

gcc/cp/ChangeLog:

* parser.cc (cp_parser_template_argument): Don't call
require_potential_constant_expression.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/fn-template11.C:
* g++.dg/template/fn-template1.C: New test.
* g++.dg/template/fn-template2.C: New test.
---
  gcc/cp/parser.cc |  1 -
  gcc/testsuite/g++.dg/cpp2a/fn-template11.C   |  2 +-
  gcc/testsuite/g++.dg/template/fn-template1.C | 12 
  gcc/testsuite/g++.dg/template/fn-template2.C |  9 +
  4 files changed, 22 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/template/fn-template1.C
  create mode 100644 gcc/testsuite/g++.dg/template/fn-template2.C

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 0578aad1b1c..84b36a21767 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -20315,7 +20315,6 @@ cp_parser_template_argument (cp_parser* parser)
/* With C++17 generalized non-type template arguments we need to handle
 lvalue constant expressions, too.  */
argument = cp_parser_assignment_expression (parser);
-  require_potential_constant_expression (argument);
  }
  
if (!maybe_type_id)

diff --git a/gcc/testsuite/g++.dg/cpp2a/fn-template11.C 
b/gcc/testsuite/g++.dg/cpp2a/fn-template11.C
index 1a6b6882900..ca25403f39b 100644
--- a/gcc/testsuite/g++.dg/cpp2a/fn-template11.C
+++ b/gcc/testsuite/g++.dg/cpp2a/fn-template11.C
@@ -7,5 +7,5 @@ int nonconst ();
  int foo ()
  {
return blah < // { dg-error "not declared" }
-nonconst (), nonconst (); // { dg-error "call to non-.constexpr. function" 
}
+nonconst (), nonconst ();
  }
diff --git a/gcc/testsuite/g++.dg/template/fn-template1.C 
b/gcc/testsuite/g++.dg/template/fn-template1.C
new file mode 100644
index 000..14b98836880
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/fn-template1.C
@@ -0,0 +1,12 @@
+// PR c++/118516
+// { dg-do compile }
+// Like cpp2a/fn-template11.C but with blah declared.
+
+int nonconst ();
+
+int foo ()
+{
+  int blah = 20;
+  return blah <
+nonconst (), nonconst ();
+}
diff --git a/gcc/testsuite/g++.dg/template/fn-template2.C 
b/gcc/testsuite/g++.dg/template/fn-template2.C
new file mode 100644
index 000..c7c31dd9b30
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/fn-template2.C
@@ -0,0 +1,9 @@
+// PR c++/118516
+// { dg-do compile }
+
+int foo();
+int main()
+{
+for (int id = 0; i 

[PATCH] Fortran: fix check for non-optional arrays passed to elemental

2025-02-27 Thread Peter Hill
Dear all,

The attached patch fixes an ICE in gfc_resolve_code when passing an
optional array to an elemental procedure with `-pedantic` enabled.
PR95446 added the original check, this patch fixes the case where the
other actual argument is an array literal (or something else other
than a variable). The ICE is present since 11.1, so this could be
backported?

Cheers,
Peter

gcc/fortran/Changelog

  * resolve.cc (resolve_elemental_actual): When checking other
  actual arguments to elemental procedures, don't check
  attributes of literals and function calls

gcc/testsuite/Changelog

  * gfortran.dg/pr95446.f90: Expand test case to literals and
  function calls

Signed-off-by: Peter Hill 
---
gcc/fortran/resolve.cc|  4 +++-
gcc/testsuite/gfortran.dg/pr95446.f90 | 14 ++
2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/resolve.cc b/gcc/fortran/resolve.cc
index 6a83a7967a8..bf602389d5b 100644
--- a/gcc/fortran/resolve.cc
+++ b/gcc/fortran/resolve.cc
@@ -2429,7 +2429,9 @@ resolve_elemental_actual (gfc_expr *expr, gfc_code *c)
 for (a = arg0; a; a = a->next)
   if (a != arg
   && a->expr->rank == arg->expr->rank
-   && !a->expr->symtree->n.sym->attr.optional)
+   && (a->expr->expr_type != EXPR_VARIABLE
+   || (a->expr->expr_type == EXPR_VARIABLE
+   && !a->expr->symtree->n.sym->attr.optional)))
 {
   t = true;
   break;
diff --git a/gcc/testsuite/gfortran.dg/pr95446.f90
b/gcc/testsuite/gfortran.dg/pr95446.f90
index 86e1019d7af..0787658813a 100644
--- a/gcc/testsuite/gfortran.dg/pr95446.f90
+++ b/gcc/testsuite/gfortran.dg/pr95446.f90
@@ -22,6 +22,20 @@ program elemental_optional

  end function outer

+  function outer_literal(o) result(l)
+integer, intent(in), optional :: o(5)
+integer :: l(5)
+
+l = inner(o, [1,2,3,4,5])
+  end function outer_literal
+
+  function outer_func(o) result(l)
+integer, intent(in), optional :: o(5)
+integer :: l(5)
+
+l = inner(o, outer())
+  end function outer_func
+
  elemental function inner(a,b) result(x)
integer, intent(in), optional :: a
integer, intent(in) :: b
--
2.48.1


Re: [PATCH] ipa-sra: Avoid clashes with ipa-cp when pulling accesses across calls (PR 118243)

2025-02-27 Thread Jan Hubicka
> gcc/ChangeLog:
>
> 2025-02-10  Martin Jambor  
>
>   PR ipa/118243
>   * ipa-sra.cc (pull_accesses_from_callee): New parameters
>   caller_ipcp_ts and param_idx.  Check that scalar pulled accesses would
>   not clash with a known IPA-CP aggregate constant.
>   (param_splitting_across_edge): Pass IPA-CP transformation summary and
>   caller parameter index to pull_accesses_from_callee.
>
> gcc/testsuite/ChangeLog:
>
> 2025-02-10  Martin Jambor  
>
>   PR ipa/118243
>   * g++.dg/ipa/pr118243.C: New test.

OK,
thanks!
Honza


Re: [PATCH] ipa-vr: Handle non-conversion unary ops separately from conversions (PR 118756)

2025-02-27 Thread Jan Hubicka
> gcc/ChangeLog:
> 
> 2025-02-24  Martin Jambor  
> 
>   PR ipa/118785
> 
>   * ipa-cp.cc (ipa_vr_intersect_with_arith_jfunc): Handle non-conversion
>   unary operations separately before doing any conversions.  Check
>   expr_type_first_operand_type_p for non-unary operations too.  Fix type
>   of op_res.
> 
> gcc/testsuite/ChangeLog:
> 
> 2025-02-24  Martin Jambor  
> 
>   PR ipa/118785
>   * g++.dg/lto/pr118785_0.C: New test.
> ---
>  gcc/ipa-cp.cc | 34 ---
>  gcc/testsuite/g++.dg/lto/pr118785_0.C | 14 +++
>  2 files changed, 45 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/lto/pr118785_0.C
> 
> diff --git a/gcc/ipa-cp.cc b/gcc/ipa-cp.cc
> index 68959f2677b..0dd65a707b6 100644
> --- a/gcc/ipa-cp.cc
> +++ b/gcc/ipa-cp.cc
> @@ -1720,8 +1720,34 @@ ipa_vr_intersect_with_arith_jfunc (vrange &vr,
>enum tree_code operation = ipa_get_jf_pass_through_operation (jfunc);
>if (TREE_CODE_CLASS (operation) == tcc_unary)
>  {
> +  value_range op_res;
> +  const value_range *inter_vr;
> +  if (operation != NOP_EXPR)
> + {
> +   tree operation_type;
> +   if (expr_type_first_operand_type_p (operation))
> + operation_type = src_type;
> +   else if (operation == ABSU_EXPR)
> + operation_type = unsigned_type_for (src_type);
> +   else
> + return;

I would say this is fine for stage4, but I think we should add comment
explaining why this somewhat ugly logic is here and how to fix it later.

Patch is OK with this change.
Thanks,
Honza


[PATCH] c++, v2: Adjust #embed support for P1967R14

2025-02-27 Thread Jakub Jelinek
On Thu, Feb 27, 2025 at 10:48:14AM -0500, Jason Merrill wrote:
> > --- libcpp/directives.cc.jj 2025-02-13 19:59:56.202572170 +0100
> > +++ libcpp/directives.cc2025-02-20 21:40:56.379899457 +0100
> > @@ -1367,7 +1367,7 @@ do_embed (cpp_reader *pfile)
> >   {
> > if (CPP_OPTION (pfile, cplusplus))
> > cpp_error (pfile, CPP_DL_PEDWARN,
> > -  "%<#%s%> is a GCC extension", "embed");
> > +  "%<#%s%> before C++26 is a GCC extension", "embed");
> 
> Please change this to cpp_pedwarning/CPP_W_CXX26_EXTENSIONS.

Ok.  I've changed it for C as well and added a -Wc11-c23-compat warning
for this as well (like the cpp_pedwarning is used for other preprocessor
extensions).

Here is an updated patch, so far tested with
GXX_TESTSUITE_STDS=98,11,14,17,20,23,26 make check-gcc check-g++ 
RUNTESTFLAGS='dg.exp=*embed* cpp.exp=*embed*'
ok for trunk if it passes full bootstrap/regtest?

2025-02-27  Jakub Jelinek  

libcpp/
* include/cpplib.h (enum cpp_warning_reason): Add
CPP_W_CXX26_EXTENSIONS enumerator.
* init.cc (lang_defaults): Set embed for GNUCXX26 and CXX26.
* directives.cc (do_embed): Adjust pedwarn wording for embed in C++,
use cpp_pedwarning instead of cpp_error and add CPP_W_C11_C23_COMPAT
warning of cpp_pedwarning hasn't diagnosed anything.
gcc/c-family/
* c.opt (Wc++26-extensions): Add CppReason(CPP_W_CXX26_EXTENSIONS).
* c-cppbuiltin.cc (c_cpp_builtins): Predefine __cpp_pp_embed=202502
for C++26.
gcc/testsuite/
* g++.dg/cpp/embed-1.C: Adjust for pedwarn wording change and don't
expect any error for C++26.
* g++.dg/cpp/embed-2.C: Adjust for pedwarn wording change and don't
expect any warning for C++26.
* g++.dg/cpp26/feat-cxx26.C: Test __cpp_pp_embed value.
* gcc.dg/cpp/embed-17.c: New test.

--- libcpp/include/cpplib.h.jj  2025-01-02 11:47:49.482953335 +0100
+++ libcpp/include/cpplib.h 2025-02-27 17:09:20.684244029 +0100
@@ -749,6 +749,7 @@ enum cpp_warning_reason {
   CPP_W_CXX17_EXTENSIONS,
   CPP_W_CXX20_EXTENSIONS,
   CPP_W_CXX23_EXTENSIONS,
+  CPP_W_CXX26_EXTENSIONS,
   CPP_W_EXPANSION_TO_DEFINED,
   CPP_W_BIDIRECTIONAL,
   CPP_W_INVALID_UTF8,
--- libcpp/init.cc.jj   2025-01-02 11:47:49.417954243 +0100
+++ libcpp/init.cc  2025-02-27 17:07:24.319855385 +0100
@@ -149,8 +149,8 @@ static const struct lang_flags lang_defa
   /* CXX20*/ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,0,0,0,0,0,0,1,0,0,1 },
   /* GNUCXX23 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
   /* CXX23*/ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
-  /* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
-  /* CXX26*/ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,0,0,1 },
+  /* GNUCXX26 */ { 1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1 },
+  /* CXX26*/ { 1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,0,1,1,1,1,1,0,1,1,0,1 },
   /* ASM  */ { 0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }
 };
 
--- libcpp/directives.cc.jj 2025-01-02 11:47:49.306955792 +0100
+++ libcpp/directives.cc2025-02-27 17:28:27.421351577 +0100
@@ -1349,7 +1349,7 @@ do_embed (cpp_reader *pfile)
 {
   int angle_brackets;
   struct cpp_embed_params params = {};
-  bool ok;
+  bool ok, warned = false;
   const char *fname = NULL;
 
   /* Tell the lexer this is an embed directive.  */
@@ -1366,12 +1366,17 @@ do_embed (cpp_reader *pfile)
   if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, embed))
 {
   if (CPP_OPTION (pfile, cplusplus))
-   cpp_error (pfile, CPP_DL_PEDWARN,
-  "%<#%s%> is a GCC extension", "embed");
+   warned = cpp_pedwarning (pfile, CPP_W_CXX26_EXTENSIONS,
+"%<#%s%> before C++26 is a GCC extension",
+"embed");
   else
-   cpp_error (pfile, CPP_DL_PEDWARN,
-  "%<#%s%> before C23 is a GCC extension", "embed");
+   warned = cpp_pedwarning (pfile, CPP_W_PEDANTIC,
+"%<#%s%> before C23 is a GCC extension",
+"embed");
 }
+  if (!warned && CPP_OPTION (pfile, cpp_warn_c11_c23_compat) > 0)
+cpp_warning (pfile, CPP_W_C11_C23_COMPAT,
+"%<#%s%> is a C23 feature", "embed");
 
   fname = parse_include (pfile, &angle_brackets, NULL, ¶ms.loc);
   if (!fname)
--- gcc/c-family/c.opt.jj   2025-02-13 14:10:52.904623608 +0100
+++ gcc/c-family/c.opt  2025-02-27 17:08:33.198901580 +0100
@@ -513,7 +513,7 @@ C++ ObjC++ Var(warn_cxx23_extensions) Wa
 Warn about C++23 constructs in code compiled with an older standard.
 
 Wc++26-extensions
-C++ ObjC++ Var(warn_cxx26_extensions) Warning Init(1)
+C++ ObjC++ Var(warn_cxx26_extensions) Warning Init(1) 
CppReason(CPP_W_CXX26_EXTENSIONS)
 Warn about C++26 constructs in code compiled with an older standard.
 
 Wcalloc-transposed-args
--- gcc/c-family/c-cppbuiltin.cc.jj