Re: [PATCH 1/2] Handle component_ref to a structre/union field including flexible array member [PR101832]

2023-02-02 Thread Richard Biener via Gcc-patches
On Wed, 1 Feb 2023, Qing Zhao wrote:

> 
> 
> > On Feb 1, 2023, at 6:41 AM, Richard Biener  wrote:
> > 
> > On Tue, 31 Jan 2023, Qing Zhao wrote:
> > 
> >> GCC extension accepts the case when a struct with a flexible array member
> >> is embedded into another struct (possibly recursively).
> >> __builtin_object_size should treat such struct as flexible size per
> >> -fstrict-flex-arrays.
> >> 
> >>PR tree-optimization/101832
> >> 
> >> gcc/ChangeLog:
> >> 
> >>PR tree-optimization/101832
> >>* tree-object-size.cc (flexible_size_type_p): New function.
> >>(addr_object_size): Handle structure/union type when it has
> >>flexible size.
> >> 
> >> gcc/testsuite/ChangeLog:
> >> 
> >>PR tree-optimization/101832
> >>* gcc.dg/builtin-object-size-pr101832-2.c: New test.
> >>* gcc.dg/builtin-object-size-pr101832-3.c: New test.
> >>* gcc.dg/builtin-object-size-pr101832-4.c: New test.
> >>* gcc.dg/builtin-object-size-pr101832.c: New test.
> >> ---
> >> .../gcc.dg/builtin-object-size-pr101832-2.c   | 135 ++
> >> .../gcc.dg/builtin-object-size-pr101832-3.c   | 135 ++
> >> .../gcc.dg/builtin-object-size-pr101832-4.c   | 135 ++
> >> .../gcc.dg/builtin-object-size-pr101832.c | 119 +++
> >> gcc/tree-object-size.cc   | 115 +++
> >> 5 files changed, 611 insertions(+), 28 deletions(-)
> >> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
> >> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-3.c
> >> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-4.c
> >> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
> >> 
> >> diff --git a/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c 
> >> b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
> >> new file mode 100644
> >> index 000..f38babc5415
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
> >> @@ -0,0 +1,135 @@
> >> +/* PR 101832: 
> >> +   GCC extension accepts the case when a struct with a flexible array 
> >> member
> >> +   is embedded into another struct (possibly recursively).
> >> +   __builtin_object_size will treat such struct as flexible size per
> >> +   -fstrict-flex-arrays.  */ 
> >> +/* { dg-do run } */
> >> +/* { dg-options "-O2 -fstrict-flex-arrays=1" } */
> >> +
> >> +#include 
> >> +
> >> +unsigned n_fails = 0;
> >> +
> >> +#define expect(p, _v) do { \
> >> +  size_t v = _v; \
> >> +  if (p == v) \
> >> +printf("ok:  %s == %zd\n", #p, p); \
> >> +  else {\
> >> +printf("WAT: %s == %zd (expected %zd)\n", #p, p, v); \
> >> +n_fails++; \
> >> +  } \
> >> +} while (0);
> >> +
> >> +struct A {
> >> +  int n;
> >> +  char data[];/* Content following header */
> >> +};
> >> +
> >> +struct B {
> >> +  int m;
> >> +  struct A a;
> >> +};
> >> +
> >> +struct C {
> >> +  int q;
> >> +  struct B b;
> >> +};
> >> +
> >> +struct A0 {
> >> +  int n;
> >> +  char data[0];/* Content following header */
> >> +};
> >> +
> >> +struct B0 {
> >> +  int m;
> >> +  struct A0 a;
> >> +};
> >> +
> >> +struct C0 {
> >> +  int q;
> >> +  struct B0 b;
> >> +};
> >> +
> >> +struct A1 {
> >> +  int n;
> >> +  char data[1];/* Content following header */
> >> +};
> >> +
> >> +struct B1 {
> >> +  int m;
> >> +  struct A1 a;
> >> +};
> >> +
> >> +struct C1 {
> >> +  int q;
> >> +  struct B1 b;
> >> +};
> >> +
> >> +struct An {
> >> +  int n;
> >> +  char data[8];/* Content following header */
> >> +};
> >> +
> >> +struct Bn {
> >> +  int m;
> >> +  struct An a;
> >> +};
> >> +
> >> +struct Cn {
> >> +  int q;
> >> +  struct Bn b;
> >> +};
> >> +
> >> +volatile void *magic1, *magic2;
> >> +
> >> +int main(int argc, char *argv[])
> >> +{
> >> +struct B *outer;
> >> +struct C *outest;
> >> +
> >> +/* Make sure optimization can't find some other object size. */
> >> +outer = (void *)magic1;
> >> +outest = (void *)magic2;
> >> +
> >> +expect(__builtin_object_size(&outer->a, 1), -1);
> >> +expect(__builtin_object_size(&outest->b, 1), -1);
> >> +expect(__builtin_object_size(&outest->b.a, 1), -1);
> >> +
> >> +struct B0 *outer0;
> >> +struct C0 *outest0;
> >> +
> >> +/* Make sure optimization can't find some other object size. */
> >> +outer0 = (void *)magic1;
> >> +outest0 = (void *)magic2;
> >> +
> >> +expect(__builtin_object_size(&outer0->a, 1), -1);
> >> +expect(__builtin_object_size(&outest0->b, 1), -1);
> >> +expect(__builtin_object_size(&outest0->b.a, 1), -1);
> >> +
> >> +struct B1 *outer1;
> >> +struct C1 *outest1;
> >> +
> >> +/* Make sure optimization can't find some other object size. */
> >> +outer1 = (void *)magic1;
> >> +outest1 = (void *)magic2;
> >> +
> >> +expect(__builtin_object_size(&outer1->a, 1), -1);
> >> +expect(__builtin_object_size(&outest1->b, 1), -1);
> >> +expect(__builtin_object_size(&outest1->b.a, 1), 

Re: [PATCH 2/2] Documentation Update.

2023-02-02 Thread Richard Biener via Gcc-patches
On Wed, 1 Feb 2023, Siddhesh Poyarekar wrote:

> On 2023-02-01 13:24, Qing Zhao wrote:
> > 
> > 
> >> On Feb 1, 2023, at 11:55 AM, Siddhesh Poyarekar 
> >> wrote:
> >>
> >> On 2023-01-31 09:11, Qing Zhao wrote:
> >>> Update documentation to clarify a GCC extension on structure with
> >>> flexible array member being nested in another structure.
> >>> gcc/ChangeLog:
> >>>  * doc/extend.texi: Document GCC extension on a structure containing
> >>>  a flexible array member to be a member of another structure.
> >>
> >> Should this resolve pr#77650 since the proposed action there appears to be
> >> to document these semantics?
> > 
> > My understanding of pr77650 is specifically for documentation on the
> > following case:
> > 
> > The structure with a flexible array member is the middle field of another
> > structure.
> > 
> > Which I added in the documentation as the 2nd situation.
> > However, I am still not very comfortable on my current clarification on this
> > situation: how should we document on
> > the expected gcc behavior to handle such situation?
> 
> I reckon wording that dissuades programmers from using this might be
> appropriate, i.e. don't rely on this and if you already have such nested flex
> arrays, change code to remove them.
> 
> >>> +In the above, @code{flex_data.data[]} is allowed to be extended flexibly
> >>> to
> >>> +the padding. E.g, up to 4 elements.
> 
> """
> ... Relying on space in struct padding is bad programming practice and any
> code relying on this behaviour should be modified to ensure that flexible
> array members only end up at the ends of arrays.  The `-pedantic` flag should
> help identify such uses.
> """
> 
> Although -pedantic will also flag on flex arrays nested in structs even if
> they're at the end of the parent struct, so my suggestion on the warning is
> not really perfect.

Wow, so I checked and we indeed accept

struct X { int n; int data[]; };
struct Y { struct X x; int end; };

and -pedantic says

t.c:2:21: warning: invalid use of structure with flexible array member 
[-Wpedantic]
2 | struct Y { struct X x; int end; };
  |   

and clang reports

t.c:2:21: warning: field 'x' with variable sized type 'struct X' not at 
the end of a struct or class is a GNU extension 
[-Wgnu-variable-sized-type-not-at-end]
struct Y { struct X x; int end; };
^

looking at PR77650 what seems missing there is the semantics of this
extension as expected/required by the glibc use.  comment#5 seems
to suggest that for my example above its expected that
Y.x.data[0] aliases Y.end?!  There must be a better way to write
the glibc code and IMHO it would be best to deprecate this extension.
Definitely the middle-end wouldn't consider this aliasing for
my example - maybe it "works" when wrapped inside a union but
then for sure only when the union is visible in all accesses ...

typedef union
{
  struct __gconv_info __cd;
  struct
  {
struct __gconv_info __cd;
struct __gconv_step_data __data;
  } __combined;
} _G_iconv_t;

could be written as

typedef union
{
  struct __gconv_info __cd;
  char __dummy[sizeof(struct __gconv_info) + sizeof(struct 
__gconv_step_data)];
} _G_iconv_t;

in case the intent is to provide a complete type with space for
a single __gconv_step_data.

Richard.


[PATCH] s390: Add LEN_LOAD/LEN_STORE support.

2023-02-02 Thread Robin Dapp via Gcc-patches
Hi,

this patch adds LEN_LOAD/LEN_STORE support for z14 and newer.
It defines a bias value of -1 and implements the LEN_LOAD and LEN_STORE
optabs.

It also includes various vll/vstl testcases adapted from Kewen Lin's patch
for Power.

Bootstrapped and regtested on z13-z16.

Is it OK?

Regards
 Robin

gcc/ChangeLog:

* config/s390/predicates.md (vll_bias_operand): Add -1 bias.
* config/s390/s390.cc (s390_option_override_internal): Make
partial vector usage the default from z13 on.
* config/s390/vector.md (len_load_v16qi): Add.
(len_store_v16qi): Add.

gcc/testsuite/ChangeLog:

* gcc.target/s390/s390.exp: Add partial subdirectory.
* gcc.target/s390/vector/vec-nopeel-2.c: Change test
expectation.
* lib/target-supports.exp: Add s390.
* gcc.target/s390/vector/partial/s390-vec-length-1.h: New test.
* gcc.target/s390/vector/partial/s390-vec-length-2.h: New test.
* gcc.target/s390/vector/partial/s390-vec-length-3.h: New test.
* gcc.target/s390/vector/partial/s390-vec-length-7.h: New test.
* gcc.target/s390/vector/partial/s390-vec-length-epil-1.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-epil-2.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-epil-3.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-epil-7.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-epil-run-1.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-epil-run-2.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-epil-run-3.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-epil-run-7.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-full-1.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-full-2.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-full-3.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-full-7.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-full-run-1.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-full-run-2.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-full-run-3.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-full-run-7.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length-run-1.h: New test.
* gcc.target/s390/vector/partial/s390-vec-length-run-2.h: New test.
* gcc.target/s390/vector/partial/s390-vec-length-run-3.h: New test.
* gcc.target/s390/vector/partial/s390-vec-length-run-7.h: New test.
* gcc.target/s390/vector/partial/s390-vec-length-small.c: New test.
* gcc.target/s390/vector/partial/s390-vec-length.h: New test.
---
 gcc/config/s390/predicates.md |  8 
 gcc/config/s390/s390.cc   |  8 
 gcc/config/s390/vector.md | 39 +++
 gcc/testsuite/gcc.target/s390/s390.exp|  3 ++
 .../s390/vector/partial/s390-vec-length-1.h   | 18 +
 .../s390/vector/partial/s390-vec-length-2.h   | 18 +
 .../s390/vector/partial/s390-vec-length-3.h   | 31 +++
 .../s390/vector/partial/s390-vec-length-7.h   | 17 
 .../vector/partial/s390-vec-length-epil-1.c   | 13 +++
 .../vector/partial/s390-vec-length-epil-2.c   | 13 +++
 .../vector/partial/s390-vec-length-epil-3.c   | 16 
 .../vector/partial/s390-vec-length-epil-7.c   | 11 ++
 .../partial/s390-vec-length-epil-run-1.c  |  7 
 .../partial/s390-vec-length-epil-run-2.c  |  7 
 .../partial/s390-vec-length-epil-run-3.c  |  7 
 .../partial/s390-vec-length-epil-run-7.c  |  7 
 .../vector/partial/s390-vec-length-full-1.c   | 12 ++
 .../vector/partial/s390-vec-length-full-2.c   | 12 ++
 .../vector/partial/s390-vec-length-full-3.c   | 13 +++
 .../vector/partial/s390-vec-length-full-7.c   | 14 +++
 .../partial/s390-vec-length-full-run-1.c  |  7 
 .../partial/s390-vec-length-full-run-2.c  |  7 
 .../partial/s390-vec-length-full-run-3.c  |  7 
 .../partial/s390-vec-length-full-run-7.c  |  7 
 .../vector/partial/s390-vec-length-run-1.h| 34 
 .../vector/partial/s390-vec-length-run-2.h| 36 +
 .../vector/partial/s390-vec-length-run-3.h| 34 
 .../vector/partial/s390-vec-length-run-7.h| 16 
 .../vector/partial/s390-vec-length-small.c| 15 +++
 .../s390/vector/partial/s390-vec-length.h | 14 +++
 .../gcc.target/s390/vector/vec-nopeel-2.c |  2 +-
 gcc/testsuite/lib/target-supports.exp |  3 +-
 32 files changed, 454 insertions(+), 2 deletions(-)
 create mode 100644 
gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-1.h
 create mode 100644 
gcc/testsuite/gcc.target/s390/vector/partial/s390-vec-length-2.h
 create mode 100644 
gcc/test

Re: [PATCH] [vect] Don't peel nonlinear iv(mult or shift) for epilog when vf is not constant.

2023-02-02 Thread Richard Biener via Gcc-patches
On Thu, 2 Feb 2023, liuhongt wrote:

> Normally when vf is not constant, it will be prevented by
> vectorizable_nonlinear_inductions, but for this case, it failed going
> into
> 
> if (STMT_VINFO_RELEVANT_P (stmt_info))
>   {
>   need_to_vectorize = true;
>   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
>  && ! PURE_SLP_STMT (stmt_info))
> ok = vectorizable_induction (loop_vinfo,
>  stmt_info, NULL, NULL,
>  &cost_vec);
> 
> since the iv is never used outside of the loop, and will be dce later, so
> vectorizer doesn't bother checking if it's vectorizable. it's
> true but hit gcc_assert in vect_can_peel_nonlinear_iv_p when vf is not
> constant. One solution is ignoring the nonlinear iv peeling if it's
> !STMT_VINFO_RELEVANT_P (stmt_info) just like the upper code, the other
> solution is returning false earlier in the
> vect_can_peel_nonlinear_iv_p when vf is not constant, the patch chooses
> the second incase there's other cases using vect_can_advance_ivs_p which
> calls vect_can_peel_nonlinear_iv_p.
> 
> Also remove vect_can_peel_nonlinear_iv_p from 
> vectorizable_nonlinear_inductions.
> 
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} and 
> aarch64-linux-gnu{-m32,}
> Ok for trunk?

OK.

Thanks,
Richard.

> gcc/ChangeLog:
> 
>   PR tree-optimization/108601
>   * tree-vectorizer.h (vect_can_peel_nonlinear_iv_p): Remove declare.
>   * tree-vect-loop.cc
>   (vectorizable_nonlinear_induction): Remove
>   vect_can_peel_nonlinear_iv_p.
>   (vect_can_peel_nonlinear_iv_p): Don't peel
>   nonlinear iv(mult or shift) for epilog when vf is not
>   constant and moved the defination to ..
>   * tree-vect-loop-manip.cc (vect_can_peel_nonlinear_iv_p):
>   .. Here.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/aarch64/pr108601.c: New test.
> ---
>  gcc/testsuite/gcc.target/aarch64/pr108601.c | 11 +
>  gcc/tree-vect-loop-manip.cc | 44 
>  gcc/tree-vect-loop.cc   | 46 -
>  gcc/tree-vectorizer.h   |  3 --
>  4 files changed, 55 insertions(+), 49 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr108601.c
> 
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr108601.c 
> b/gcc/testsuite/gcc.target/aarch64/pr108601.c
> new file mode 100644
> index 000..deb8b3061d8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr108601.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fprofile-generate -mcpu=neoverse-v1" } */
> +
> +int
> +foo() {
> +  int flag = 1;
> +  for (; flag <= 1 << 21; flag <<= 1)
> +;
> +  return 0;
> +}
> +
> diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
> index b5c5f859144..c04fcf40c44 100644
> --- a/gcc/tree-vect-loop-manip.cc
> +++ b/gcc/tree-vect-loop-manip.cc
> @@ -1390,6 +1390,50 @@ iv_phi_p (stmt_vec_info stmt_info)
>return true;
>  }
>  
> +/* Return true if vectorizer can peel for nonlinear iv.  */
> +static bool
> +vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
> +   enum vect_induction_op_type induction_type)
> +{
> +  tree niters_skip;
> +  /* Init_expr will be update by vect_update_ivs_after_vectorizer,
> + if niters or vf is unkown:
> + For shift, when shift mount >= precision, there would be UD.
> + For mult, don't known how to generate
> + init_expr * pow (step, niters) for variable niters.
> + For neg, it should be ok, since niters of vectorized main loop
> + will always be multiple of 2.  */
> +  if ((!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
> +   || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant ())
> +  && induction_type != vect_step_op_neg)
> +{
> +  if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +  "Peeling for epilogue is not supported"
> +  " for nonlinear induction except neg"
> +  " when iteration count is unknown.\n");
> +  return false;
> +}
> +
> +  /* Also doens't support peel for neg when niter is variable.
> + ??? generate something like niter_expr & 1 ? init_expr : -init_expr?  */
> +  niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
> +  if ((niters_skip != NULL_TREE
> +   && TREE_CODE (niters_skip) != INTEGER_CST)
> +  || (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
> +   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0))
> +{
> +  if (dump_enabled_p ())
> + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +  "Peeling for alignement is not supported"
> +  " for nonlinear induction when niters_skip"
> +  " is not constant.\n");
> +  return false;
> +}
> +
> +  return true;
> +}
> +
>  /* Function vect_can_advance_ivs_p

[committed] nested, openmp: Wrap OMP_CLAUSE_*_GIMPLE_SEQ into GIMPLE_BIND for declare_vars [PR108435]

2023-02-02 Thread Jakub Jelinek via Gcc-patches
Hi!

When gimplifying OMP_CLAUSE_{LASTPRIVATE,LINEAR}_STMT, we wrap it always
into a GIMPLE_BIND, but when putting statements directly into
OMP_CLAUSE_{LASTPRIVATE,LINEAR}_GIMPLE_SEQ, we do it only if needed (there
are any temporaries that need to be declared in the sequence).
convert_nonlocal_omp_clauses was relying on the GIMPLE_BIND to be there always
because it called declare_vars on it.

The following patch wraps it into GIMPLE_BIND in tree-nested if we need to
declare_vars on it on demand.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2023-02-02  Jakub Jelinek  

PR middle-end/108435
* tree-nested.cc (convert_nonlocal_omp_clauses)
: If info->new_local_var_chain and
*seq is not a GIMPLE_BIND, wrap the sequence into a new GIMPLE_BIND
before calling declare_vars.
(convert_nonlocal_omp_clauses) : Merge
with the OMP_CLAUSE_LASTPRIVATE handling except for whether
seq is initialized to &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (clause)
or &OMP_CLAUSE_LINEAR_GIMPLE_SEQ (clause).

* gcc.dg/gomp/pr108435.c: New test.

--- gcc/tree-nested.cc.jj   2023-01-02 09:32:40.110029184 +0100
+++ gcc/tree-nested.cc  2023-02-01 16:22:33.272248502 +0100
@@ -1566,29 +1566,29 @@ convert_nonlocal_omp_clauses (tree *pcla
  break;
 
case OMP_CLAUSE_LASTPRIVATE:
- {
-   tree save_local_var_chain = info->new_local_var_chain;
-   info->new_local_var_chain = NULL;
-   gimple_seq *seq = &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (clause);
-   walk_body (convert_nonlocal_reference_stmt,
-  convert_nonlocal_reference_op, info, seq);
-   if (info->new_local_var_chain)
- declare_vars (info->new_local_var_chain,
-   gimple_seq_first_stmt (*seq), false);
-   info->new_local_var_chain = save_local_var_chain;
- }
- break;
-
case OMP_CLAUSE_LINEAR:
  {
tree save_local_var_chain = info->new_local_var_chain;
info->new_local_var_chain = NULL;
-   gimple_seq *seq = &OMP_CLAUSE_LINEAR_GIMPLE_SEQ (clause);
+   gimple_seq *seq;
+   if (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_LASTPRIVATE)
+ seq = &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (clause);
+   else
+ seq = &OMP_CLAUSE_LINEAR_GIMPLE_SEQ (clause);
walk_body (convert_nonlocal_reference_stmt,
   convert_nonlocal_reference_op, info, seq);
if (info->new_local_var_chain)
- declare_vars (info->new_local_var_chain,
-   gimple_seq_first_stmt (*seq), false);
+ {
+   gimple *g = gimple_seq_first_stmt (*seq);
+   if (gimple_code (g) != GIMPLE_BIND)
+ {
+   g = gimple_build_bind (NULL_TREE, *seq, NULL_TREE);
+   *seq = NULL;
+   gimple_seq_add_stmt_without_update (seq, g);
+ }
+   declare_vars (info->new_local_var_chain,
+ gimple_seq_first_stmt (*seq), false);
+ }
info->new_local_var_chain = save_local_var_chain;
  }
  break;
--- gcc/testsuite/gcc.dg/gomp/pr108435.c.jj 2023-02-01 16:24:56.180152165 
+0100
+++ gcc/testsuite/gcc.dg/gomp/pr108435.c2023-02-01 16:24:36.150445983 
+0100
@@ -0,0 +1,18 @@
+/* PR middle-end/108435 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fopenmp" } */
+
+int
+main ()
+{
+  int i, j;
+  void
+  bar (void)
+  {
+#pragma omp for simd collapse(2)
+ for (i = 1; i <= 16; i++)
+   for (j = 1; j <= 16; j++)
+;
+  }
+  bar ();
+}

Jakub



[PATCH] Replace IFN_TRAP with BUILT_IN_UNREACHABLE_TRAP [PR107300]

2023-02-02 Thread Jakub Jelinek via Gcc-patches
Hi!

For PR106099 I've added IFN_TRAP as an alternative to __builtin_trap
meant for __builtin_unreachable purposes (e.g. with -funreachable-traps
or some sanitizers) which doesn't need vops because __builtin_unreachable
doesn't need them either.  This works in various cases, but unfortunately
IPA likes to decide on the redirection to unreachable just by tweaking
the cgraph edge to point to a different FUNCTION_DECL.  As internal
functions don't have a decl, this causes problems like in the following
testcase.

The following patch fixes it by removing IFN_TRAP again and replacing
it with user inaccessible BUILT_IN_UNREACHABLE_TRAP, so that e.g.
builtin_decl_unreachable can return it directly and we don't need to tweak
it later in wherever we actually replace the call stmt.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-02-02  Jakub Jelinek  

PR ipa/107300
* builtins.def (BUILT_IN_UNREACHABLE_TRAP): New builtin.
* internal-fn.def (TRAP): Remove.
* internal-fn.cc (expand_TRAP): Remove.
* tree.cc (build_common_builtin_nodes): Define
BUILT_IN_UNREACHABLE_TRAP if not yet defined.
(builtin_decl_unreachable): Use BUILT_IN_UNREACHABLE_TRAP
instead of BUILT_IN_TRAP.
* gimple.cc (gimple_build_builtin_unreachable): Remove
emitting internal function for BUILT_IN_TRAP.
* asan.cc (maybe_instrument_call): Handle BUILT_IN_UNREACHABLE_TRAP.
* cgraph.cc (cgraph_edge::verify_corresponds_to_fndecl): Handle
BUILT_IN_UNREACHABLE_TRAP instead of BUILT_IN_TRAP.
* ipa-devirt.cc (possible_polymorphic_call_target_p): Handle
BUILT_IN_UNREACHABLE_TRAP.
* builtins.cc (expand_builtin, is_inexpensive_builtin): Likewise.
* tree-cfg.cc (verify_gimple_call,
pass_warn_function_return::execute): Likewise.
* attribs.cc (decl_attributes): Don't report exclusions on
BUILT_IN_UNREACHABLE_TRAP either.

* gcc.dg/pr107300.c: New test.

--- gcc/builtins.def.jj 2023-01-02 09:32:37.988059844 +0100
+++ gcc/builtins.def2023-02-01 19:19:15.382475912 +0100
@@ -1048,6 +1048,7 @@ DEF_GCC_BUILTIN(BUILT_IN_SETJMP,
 DEF_EXT_LIB_BUILTIN(BUILT_IN_STRFMON, "strfmon", 
BT_FN_SSIZE_STRING_SIZE_CONST_STRING_VAR, ATTR_FORMAT_STRFMON_NOTHROW_3_4)
 DEF_LIB_BUILTIN(BUILT_IN_STRFTIME, "strftime", 
BT_FN_SIZE_STRING_SIZE_CONST_STRING_CONST_TM_PTR, 
ATTR_FORMAT_STRFTIME_NOTHROW_3_0)
 DEF_GCC_BUILTIN(BUILT_IN_TRAP, "trap", BT_FN_VOID, 
ATTR_NORETURN_NOTHROW_LEAF_COLD_LIST)
+DEF_GCC_BUILTIN(BUILT_IN_UNREACHABLE_TRAP, "unreachable trap", 
BT_FN_VOID, ATTR_CONST_NORETURN_NOTHROW_LEAF_COLD_LIST)
 DEF_GCC_BUILTIN(BUILT_IN_UNREACHABLE, "unreachable", BT_FN_VOID, 
ATTR_CONST_NORETURN_NOTHROW_LEAF_COLD_LIST)
 DEF_GCC_BUILTIN(BUILT_IN_UNWIND_INIT, "unwind_init", BT_FN_VOID, 
ATTR_NULL)
 DEF_GCC_BUILTIN(BUILT_IN_UPDATE_SETJMP_BUF, "update_setjmp_buf", 
BT_FN_VOID_PTR, ATTR_NULL)
--- gcc/internal-fn.def.jj  2023-01-02 09:32:38.771048530 +0100
+++ gcc/internal-fn.def 2023-02-01 19:19:52.239936390 +0100
@@ -457,11 +457,6 @@ DEF_INTERNAL_FN (SHUFFLEVECTOR, ECF_CONS
 /* <=> optimization.  */
 DEF_INTERNAL_FN (SPACESHIP, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 
-/* __builtin_trap created from/for __builtin_unreachable.  */
-DEF_INTERNAL_FN (TRAP, ECF_CONST | ECF_LEAF | ECF_NORETURN
-  | ECF_NOTHROW | ECF_COLD | ECF_LOOPING_CONST_OR_PURE,
-NULL)
-
 /* [[assume (cond)]].  */
 DEF_INTERNAL_FN (ASSUME, ECF_CONST | ECF_LEAF | ECF_NOTHROW
 | ECF_LOOPING_CONST_OR_PURE, NULL)
--- gcc/internal-fn.cc.jj   2023-01-02 09:32:22.206287869 +0100
+++ gcc/internal-fn.cc  2023-02-01 19:16:52.939561023 +0100
@@ -4518,12 +4518,6 @@ expand_SPACESHIP (internal_fn, gcall *st
 }
 
 void
-expand_TRAP (internal_fn, gcall *)
-{
-  expand_builtin_trap ();
-}
-
-void
 expand_ASSUME (internal_fn, gcall *)
 {
 }
--- gcc/tree.cc.jj  2023-01-27 20:09:16.151971051 +0100
+++ gcc/tree.cc 2023-02-01 20:37:01.287315982 +0100
@@ -9758,6 +9758,7 @@ build_common_builtin_nodes (void)
 
   if (!builtin_decl_explicit_p (BUILT_IN_UNREACHABLE)
   || !builtin_decl_explicit_p (BUILT_IN_TRAP)
+  || !builtin_decl_explicit_p (BUILT_IN_UNREACHABLE_TRAP)
   || !builtin_decl_explicit_p (BUILT_IN_ABORT))
 {
   ftype = build_function_type (void_type_node, void_list_node);
@@ -9767,6 +9768,12 @@ build_common_builtin_nodes (void)
  "__builtin_unreachable",
  ECF_NOTHROW | ECF_LEAF | ECF_NORETURN
  | ECF_CONST | ECF_COLD);
+  if (!builtin_decl_explicit_p (BUILT_IN_UNREACHABLE_TRAP))
+   local_define_builtin ("__builtin_unreachable trap", ftype,
+ BUILT_IN_UNREACHABLE_TRAP,
+ "__builtin_unreachable trap",
+ ECF_NOTHROW | ECF_LE

Re: [PATCH] Replace IFN_TRAP with BUILT_IN_UNREACHABLE_TRAP [PR107300]

2023-02-02 Thread Richard Biener via Gcc-patches
On Thu, 2 Feb 2023, Jakub Jelinek wrote:

> Hi!
> 
> For PR106099 I've added IFN_TRAP as an alternative to __builtin_trap
> meant for __builtin_unreachable purposes (e.g. with -funreachable-traps
> or some sanitizers) which doesn't need vops because __builtin_unreachable
> doesn't need them either.  This works in various cases, but unfortunately
> IPA likes to decide on the redirection to unreachable just by tweaking
> the cgraph edge to point to a different FUNCTION_DECL.  As internal
> functions don't have a decl, this causes problems like in the following
> testcase.
> 
> The following patch fixes it by removing IFN_TRAP again and replacing
> it with user inaccessible BUILT_IN_UNREACHABLE_TRAP, so that e.g.
> builtin_decl_unreachable can return it directly and we don't need to tweak
> it later in wherever we actually replace the call stmt.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

Thanks,
Richard.

> 2023-02-02  Jakub Jelinek  
> 
>   PR ipa/107300
>   * builtins.def (BUILT_IN_UNREACHABLE_TRAP): New builtin.
>   * internal-fn.def (TRAP): Remove.
>   * internal-fn.cc (expand_TRAP): Remove.
>   * tree.cc (build_common_builtin_nodes): Define
>   BUILT_IN_UNREACHABLE_TRAP if not yet defined.
>   (builtin_decl_unreachable): Use BUILT_IN_UNREACHABLE_TRAP
>   instead of BUILT_IN_TRAP.
>   * gimple.cc (gimple_build_builtin_unreachable): Remove
>   emitting internal function for BUILT_IN_TRAP.
>   * asan.cc (maybe_instrument_call): Handle BUILT_IN_UNREACHABLE_TRAP.
>   * cgraph.cc (cgraph_edge::verify_corresponds_to_fndecl): Handle
>   BUILT_IN_UNREACHABLE_TRAP instead of BUILT_IN_TRAP.
>   * ipa-devirt.cc (possible_polymorphic_call_target_p): Handle
>   BUILT_IN_UNREACHABLE_TRAP.
>   * builtins.cc (expand_builtin, is_inexpensive_builtin): Likewise.
>   * tree-cfg.cc (verify_gimple_call,
>   pass_warn_function_return::execute): Likewise.
>   * attribs.cc (decl_attributes): Don't report exclusions on
>   BUILT_IN_UNREACHABLE_TRAP either.
> 
>   * gcc.dg/pr107300.c: New test.
> 
> --- gcc/builtins.def.jj   2023-01-02 09:32:37.988059844 +0100
> +++ gcc/builtins.def  2023-02-01 19:19:15.382475912 +0100
> @@ -1048,6 +1048,7 @@ DEF_GCC_BUILTIN(BUILT_IN_SETJMP,
>  DEF_EXT_LIB_BUILTIN(BUILT_IN_STRFMON, "strfmon", 
> BT_FN_SSIZE_STRING_SIZE_CONST_STRING_VAR, ATTR_FORMAT_STRFMON_NOTHROW_3_4)
>  DEF_LIB_BUILTIN(BUILT_IN_STRFTIME, "strftime", 
> BT_FN_SIZE_STRING_SIZE_CONST_STRING_CONST_TM_PTR, 
> ATTR_FORMAT_STRFTIME_NOTHROW_3_0)
>  DEF_GCC_BUILTIN(BUILT_IN_TRAP, "trap", BT_FN_VOID, 
> ATTR_NORETURN_NOTHROW_LEAF_COLD_LIST)
> +DEF_GCC_BUILTIN(BUILT_IN_UNREACHABLE_TRAP, "unreachable trap", 
> BT_FN_VOID, ATTR_CONST_NORETURN_NOTHROW_LEAF_COLD_LIST)
>  DEF_GCC_BUILTIN(BUILT_IN_UNREACHABLE, "unreachable", BT_FN_VOID, 
> ATTR_CONST_NORETURN_NOTHROW_LEAF_COLD_LIST)
>  DEF_GCC_BUILTIN(BUILT_IN_UNWIND_INIT, "unwind_init", BT_FN_VOID, 
> ATTR_NULL)
>  DEF_GCC_BUILTIN(BUILT_IN_UPDATE_SETJMP_BUF, "update_setjmp_buf", 
> BT_FN_VOID_PTR, ATTR_NULL)
> --- gcc/internal-fn.def.jj2023-01-02 09:32:38.771048530 +0100
> +++ gcc/internal-fn.def   2023-02-01 19:19:52.239936390 +0100
> @@ -457,11 +457,6 @@ DEF_INTERNAL_FN (SHUFFLEVECTOR, ECF_CONS
>  /* <=> optimization.  */
>  DEF_INTERNAL_FN (SPACESHIP, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
>  
> -/* __builtin_trap created from/for __builtin_unreachable.  */
> -DEF_INTERNAL_FN (TRAP, ECF_CONST | ECF_LEAF | ECF_NORETURN
> -| ECF_NOTHROW | ECF_COLD | ECF_LOOPING_CONST_OR_PURE,
> -  NULL)
> -
>  /* [[assume (cond)]].  */
>  DEF_INTERNAL_FN (ASSUME, ECF_CONST | ECF_LEAF | ECF_NOTHROW
>| ECF_LOOPING_CONST_OR_PURE, NULL)
> --- gcc/internal-fn.cc.jj 2023-01-02 09:32:22.206287869 +0100
> +++ gcc/internal-fn.cc2023-02-01 19:16:52.939561023 +0100
> @@ -4518,12 +4518,6 @@ expand_SPACESHIP (internal_fn, gcall *st
>  }
>  
>  void
> -expand_TRAP (internal_fn, gcall *)
> -{
> -  expand_builtin_trap ();
> -}
> -
> -void
>  expand_ASSUME (internal_fn, gcall *)
>  {
>  }
> --- gcc/tree.cc.jj2023-01-27 20:09:16.151971051 +0100
> +++ gcc/tree.cc   2023-02-01 20:37:01.287315982 +0100
> @@ -9758,6 +9758,7 @@ build_common_builtin_nodes (void)
>  
>if (!builtin_decl_explicit_p (BUILT_IN_UNREACHABLE)
>|| !builtin_decl_explicit_p (BUILT_IN_TRAP)
> +  || !builtin_decl_explicit_p (BUILT_IN_UNREACHABLE_TRAP)
>|| !builtin_decl_explicit_p (BUILT_IN_ABORT))
>  {
>ftype = build_function_type (void_type_node, void_list_node);
> @@ -9767,6 +9768,12 @@ build_common_builtin_nodes (void)
> "__builtin_unreachable",
> ECF_NOTHROW | ECF_LEAF | ECF_NORETURN
> | ECF_CONST | ECF_COLD);
> +  if (!builtin_decl_explicit_p (BUILT_IN_UNREACHABLE_TRAP))

[PATCH] rtl-ssa: Fix splitting of clobber groups [PR108508]

2023-02-02 Thread Richard Sandiford via Gcc-patches
Since rtl-ssa isn't a real/native SSA representation, it has
to honour the constraints of the underlying rtl representation.
Part of this involves maintaining an rpo list of definitions
for each rtl register, backed by a splay tree where necessary
for quick lookup/insertion.

However, clobbers of a register don't act as barriers to
other clobbers of a register.  E.g. it's possible to move one
flag-clobbering instruction across an arbitrary number of other
flag-clobbering instructions.  In order to allow passes to do
that without quadratic complexity, the splay tree groups all
consecutive clobbers into groups, with only the group being
entered into the splay tree.  These groups in turn have an
internal splay tree of clobbers where necessary.

This means that, if we insert a new definition and use into
the middle of a sea of clobbers, we need to split the clobber
group into two groups.  This was quite a difficult condition
to trigger during development, and the PR shows that the code
to handle it had (at least) two bugs.

First, the process involves searching the clobber tree for
the split point.  This search can give either the previous
clobber (which will belong to the first of the split groups)
or the next clobber (which will belong to the second of the
split groups).  The code for the former case handled the
split correctly but the code for the latter case didn't.

Second, I'd forgotten to add the second clobber group to the
main splay tree. :-(

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK for trunk
& GCC 12?  Although the testcase is "only" a regression from GCC 12,
I think the rtl-ssa patch should be backported to GCC 11 too.

Richard


gcc/
PR rtl-optimization/108508
* rtl-ssa/accesses.cc (function_info::split_clobber_group): When
the splay tree search gives the first clobber in the second group,
make sure that the root of the first clobber group is updated
correctly.  Enter the new clobber group into the definition splay
tree.

gcc/testsuite/
PR rtl-optimization/108508
* gcc.target/aarch64/pr108508.c: New test.
---
 gcc/rtl-ssa/accesses.cc | 14 ---
 gcc/testsuite/gcc.target/aarch64/pr108508.c | 28 +
 2 files changed, 38 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr108508.c

diff --git a/gcc/rtl-ssa/accesses.cc b/gcc/rtl-ssa/accesses.cc
index 03b9a475d3b..f12b5f4dd77 100644
--- a/gcc/rtl-ssa/accesses.cc
+++ b/gcc/rtl-ssa/accesses.cc
@@ -794,23 +794,26 @@ function_info::merge_clobber_groups (clobber_info 
*clobber1,
 // GROUP spans INSN, and INSN now sets the resource that GROUP clobbers.
 // Split GROUP around INSN and return the clobber that comes immediately
 // before INSN.
+//
+// The resource that GROUP clobbers is known to have an associated
+// splay tree.
 clobber_info *
 function_info::split_clobber_group (clobber_group *group, insn_info *insn)
 {
   // Search for either the previous or next clobber in the group.
   // The result is less than zero if CLOBBER should come before NEIGHBOR
   // or greater than zero if CLOBBER should come after NEIGHBOR.
-  int comparison = lookup_clobber (group->m_clobber_tree, insn);
+  clobber_tree &tree1 = group->m_clobber_tree;
+  int comparison = lookup_clobber (tree1, insn);
   gcc_checking_assert (comparison != 0);
-  clobber_info *neighbor = group->m_clobber_tree.root ();
+  clobber_info *neighbor = tree1.root ();
 
-  clobber_tree tree1, tree2;
+  clobber_tree tree2;
   clobber_info *prev;
   clobber_info *next;
   if (comparison > 0)
 {
   // NEIGHBOR is the last clobber in what will become the first group.
-  tree1 = neighbor;
   tree2 = tree1.split_after_root ();
   prev = neighbor;
   next = as_a (prev->next_def ());
@@ -843,6 +846,9 @@ function_info::split_clobber_group (clobber_group *group, 
insn_info *insn)
   tree2->set_group (group2);
   last_clobber->set_group (group2);
 
+  // Insert GROUP2 into the splay tree as an immediate successor of GROUP1.
+  def_splay_tree::insert_child (group1, 1, group2);
+
   return prev;
 }
 
diff --git a/gcc/testsuite/gcc.target/aarch64/pr108508.c 
b/gcc/testsuite/gcc.target/aarch64/pr108508.c
new file mode 100644
index 000..e97896b6a1b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr108508.c
@@ -0,0 +1,28 @@
+/* { dg-options "-O3 -fharden-conditional-branches -fno-dce 
-fno-guess-branch-probability" } */
+
+#include 
+
+int
+test_vld3q_lane_f64 (void)
+{
+  float64x2x3_t vectors;
+  float64_t temp[2];
+  int i, j;
+
+  for (i = 0; i < 3; i++)
+  {
+vst1q_f64 (temp, vectors.val[i]);
+for (j = 0; j < 2; j++)
+  if (temp[j])
+return 1;
+  }
+
+  return 0;
+}
+
+void
+foo (void)
+{
+  if (test_vld3q_lane_f64 () || test_vld3q_lane_f64 ())
+__builtin_abort ();
+}
-- 
2.25.1



Re: [PATCH] gomp: Various fixes for SVE types [PR101018]

2023-02-02 Thread Richard Sandiford via Gcc-patches
Ping^2

Richard Sandiford  writes:
> [I posted this late in stage 4 as an RFC, but it wasn't suitable for
> GCC 12 at that point.  I kind-of dropped the ball after that, sorry.]
>
> Various parts of the omp code checked whether the size of a decl
> was an INTEGER_CST in order to determine whether the decl was
> variable-sized or not.  If it was variable-sized, it was expected
> to have a DECL_VALUE_EXPR replacement, as for VLAs.
>
> This patch uses poly_int_tree_p instead, so that variable-length
> SVE vectors are treated like constant-length vectors.  This means
> that some structures become poly_int-sized, with some fields at
> poly_int offsets, but we already have code to handle that.
>
> An alternative would have been to handle the data via indirection
> instead.  However, that's likely to be more complicated, and it
> would contradict is_variable_sized, which already uses a check
> for TREE_CONSTANT rather than INTEGER_CST.
>
> gimple_add_tmp_var should probably not add a safelen of 1
> for SVE vectors, but that's really a separate thing and might
> be hard to test.
>
> Tested on aarch64-linux-gnu.  OK to install?
>
> Richard
>
>
> gcc/
>   PR middle-end/101018
>   * poly-int.h (can_and_p): New function.
>   * fold-const.cc (poly_int_binop): Use it to optimize BIT_AND_EXPRs
>   involving POLY_INT_CSTs.
>   * expr.cc (get_inner_reference): Fold poly_uint64 size_trees
>   into the constant bitsize.
>   * gimplify.cc (gimplify_bind_expr): Use poly_int_tree_p instead
>   of INTEGER_CST when checking for constant-sized omp data.
>   (omp_add_variable): Likewise.
>   (omp_notice_variable): Likewise.
>   (gimplify_adjust_omp_clauses_1): Likewise.
>   (gimplify_adjust_omp_clauses): Likewise.
>   * omp-low.cc (scan_sharing_clauses): Likewise.
>   (lower_omp_target): Likewise.
>
> gcc/testsuite/
>   PR middle-end/101018
>   * gcc.target/aarch64/sve/acle/pr101018-1.c: New test.
>   * gcc.target/aarch64/sve/acle/pr101018-2.c: Likewise
> ---
>  gcc/expr.cc   |  4 +--
>  gcc/fold-const.cc |  7 +
>  gcc/gimplify.cc   | 23 
>  gcc/omp-low.cc| 10 +++
>  gcc/poly-int.h| 19 +
>  .../aarch64/sve/acle/general/pr101018-1.c | 27 +++
>  .../aarch64/sve/acle/general/pr101018-2.c | 23 
>  7 files changed, 94 insertions(+), 19 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/acle/general/pr101018-2.c
>
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index d9407432ea5..a304c583d16 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -7941,10 +7941,10 @@ get_inner_reference (tree exp, poly_int64_pod 
> *pbitsize,
>  
>if (size_tree != 0)
>  {
> -  if (! tree_fits_uhwi_p (size_tree))
> +  if (! tree_fits_poly_uint64_p (size_tree))
>   mode = BLKmode, *pbitsize = -1;
>else
> - *pbitsize = tree_to_uhwi (size_tree);
> + *pbitsize = tree_to_poly_uint64 (size_tree);
>  }
>  
>*preversep = reverse_storage_order_for_component_p (exp);
> diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> index b89cac91cae..000600017e2 100644
> --- a/gcc/fold-const.cc
> +++ b/gcc/fold-const.cc
> @@ -1183,6 +1183,13 @@ poly_int_binop (poly_wide_int &res, enum tree_code 
> code,
>   return false;
>break;
>  
> +case BIT_AND_EXPR:
> +  if (TREE_CODE (arg2) != INTEGER_CST
> +   || !can_and_p (wi::to_poly_wide (arg1), wi::to_wide (arg2),
> +  &res))
> + return false;
> +  break;
> +
>  case BIT_IOR_EXPR:
>if (TREE_CODE (arg2) != INTEGER_CST
> || !can_ior_p (wi::to_poly_wide (arg1), wi::to_wide (arg2),
> diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
> index f06ce3cc77a..096738c8ed4 100644
> --- a/gcc/gimplify.cc
> +++ b/gcc/gimplify.cc
> @@ -7352,7 +7352,7 @@ omp_add_variable (struct gimplify_omp_ctx *ctx, tree 
> decl, unsigned int flags)
>/* When adding a variable-sized variable, we have to handle all sorts
>   of additional bits of data: the pointer replacement variable, and
>   the parameters of the type.  */
> -  if (DECL_SIZE (decl) && TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
> +  if (DECL_SIZE (decl) && !poly_int_tree_p (DECL_SIZE (decl)))
>  {
>/* Add the pointer replacement variable as PRIVATE if the variable
>replacement is private, else FIRSTPRIVATE since we'll need the
> @@ -8002,7 +8002,8 @@ omp_notice_variable (struct gimplify_omp_ctx *ctx, tree 
> decl, bool in_code)
>&& (flags & (GOVD_SEEN | GOVD_LOCAL)) == GOVD_SEEN
>&& DECL_SIZE (decl))
>  {
> -  if (TREE_CODE (DECL_SIZE (decl)) != INTEGER_CST)
> +  tree size;
> +  if (!poly_int_tree_p (DECL_SIZE (decl)))
>   {

Re: [PATCH] sched-deps, cselib: Fix up some -fcompare-debug issues and regressions [PR108463]

2023-02-02 Thread Alexandre Oliva via Gcc-patches
On Jan 27, 2023, Jakub Jelinek  wrote:

> Now, 1) is precondition of 2), we can only subst the VALUEs if we
> have actually looked the address up, but as can be seen on that testcase,
> we are relying on at least the 1) to be done because we subst the values
> later on even on DEBUG_INSNs and actually use those when needed.

Ugh.  That definitely rings a bell, now that you mention it.  I wish I
had recalled that when I saw the "obvious" opportunity for optimization
:-/

> So, I (as done in the patch below) reinstalled the 1) and not 2) for
> DEBUG_INSNs.

Thanks!

> I've spent a day debugging that and found the problem is that as documented
> in a large comment in cselib.cc above n_useless_values variable definition,
> we spend quite a few effort on making sure that VALUEs created on
> DEBUG_INSNs don't affect the cselib decisions for non-DEBUG_INSNs such as
> pruning of useless values etc., but if a VALUE created that way is then
> looked up/needed from non-DEBUG_INSNs, we promote it to non-debug.

*nod*

> The reason for -fcompare-debug failure is that there is one large DEBUG_INSN
> with 16 MEMs in it mostly with addresses that so far didn't appear in the IL
> otherwise.  Later on, we see an instruction storing into MEM destination
> and invalidate that MEM.

Aha!

> Unfortunately, n_useless_values which in my understanding should be always
> the same between -g and -g0 compilations diverges, has 3 more useless values
> for -g.

Yeah, that's not good.

> Now, these were initially VALUEs created for DEBUG_INSN lookups.  As I said,
> cselib.cc has code to promote such VALUEs (well, their location elements) to
> non-debug if they are looked up from non-DEBUG_INSNs.  The problem is that
> when looking some completely unrelated MEM from a non-DEBUG_INSN we run into
> a hash collision and so call cselib_hasher::equal to check if the unrelated
> MEM is equal to the one from DEBUG_INSN only element.  The equal static
> member function calls rtx_equal_for_cselib_1 and if that returns true,
> promotes the location to non-DEBUG, otherwise returns false.  So far so
> good.  But rtx_equal_for_cselib_1 internally performs various other cselib
> lookups, all done with the non-DEBUG_INSN cselib_current_insn, so they
> all promote to non-debug.

Good catch!

> So, I think we need to pretend
> that such lookup which only happens with -g and not -g0 actually comes
> from some DEBUG_INSN (note, the lookups rtx_equal_for_cselib_1 does
> are always with create = 0).
> The cselib.cc part of the patch does that.

Agreed, that makes sense to me, thanks!

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

FWIW, I'd approve it if I had the authority to do so :-)


> I'd think we would need to differentiate between num_debug_mems and
> num_mems depending on if setting_insn is non-NULL DEBUG_INSN or not.

*nod*, I concur.

Thanks!

-- 
Alexandre Oliva, happy hackerhttps://FSFLA.org/blogs/lxo/
   Free Software Activist   GNU Toolchain Engineer
Disinformation flourishes because many people care deeply about injustice
but very few check the facts.  Ask me about 


[committed] libgomp.texi (OpenMP TR11 impl. status): Fix 'strict' item

2023-02-02 Thread Tobias Burnus

There is less new in TR11 as claimed ... 'strict' on grainsize/num_tasks is 
already
in OpenMP 5.1, it is implemented and also listed as 'Y' under 5.1.
Only 'num_threads(strict: int-expr)' is new in TR11.

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
commit 8da7476c5fa8870c2fcded48d3de95978434c1be
Author: Tobias Burnus 
Date:   Thu Feb 2 12:05:58 2023 +0100

libgomp.texi (OpenMP TR11 impl. status): Fix 'strict' item

Fix the 'strict' modifier status: it is already listed (as 'Y') for OpenMP
5.1 for num_task and grainsize; only strict on num_threads is new with TR11.

libgomp/
* libgomp.texi (OpenMP TR11): Fix item for 'strict' modifier.

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 0aa653d3ace..1f84b050eb2 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -469,8 +469,7 @@ Technical Report (TR) 11 is the first preview for OpenMP 6.0.
 @item @code{omp_curr_progress_width} identifier @tab N @tab
 @item @code{safesync} clause to the @code{parallel} construct @tab N @tab
 @item @code{omp_get_max_progress_width} runtime routine @tab N @tab
-@item @code{strict} modifier keyword to @code{num_threads}, @code{num_tasks}
-  and @code{grainsize} @tab N @tab
+@item @code{strict} modifier keyword to @code{num_threads} @tab N @tab
 @item @code{memscope} clause to @code{atomic} and @code{flush} @tab N @tab
 @item Routines for obtaining memory spaces/allocators for shared/device memory
   @tab N @tab


Re: [PATCH] amdgcn: Add instruction pattern for conditional shift operations

2023-02-02 Thread Andrew Stubbs

On 01/02/2023 15:35, Paul-Antoine Arras wrote:
This patch introduces an instruction pattern for conditional shift 
operations (cond_{ashl|ashr|lshr}) in the GCN machine description.

Tested on GCN3 Fiji gfx803.

OK to commit?


The changelog will need to be wrapped to 80 columns.

OK otherwise.

Andrew


[PATCH] middle-end/108625 - wrong folding due to misinterpreted !

2023-02-02 Thread Richard Biener via Gcc-patches
The following fixes a problem with ! handling in genmatch which isn't
conservative enough when intermediate simplifications push to the
sequence but the final operation appears to just pick an existing
(but in this case newly defined in the sequence) operand.  The easiest
fix is to disallow adding to the sequence when processing !.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR middle-end/108625
* genmatch.cc (expr::gen_transform): Also disallow resimplification
from pushing to lseq with force_leaf.
(dt_simplify::gen_1): Likewise.

* gcc.dg/pr108625.c: New testcase.
---
 gcc/genmatch.cc |  6 --
 gcc/testsuite/gcc.dg/pr108625.c | 14 ++
 2 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr108625.c

diff --git a/gcc/genmatch.cc b/gcc/genmatch.cc
index e147ab9db7a..43bd0212d0e 100644
--- a/gcc/genmatch.cc
+++ b/gcc/genmatch.cc
@@ -2541,7 +2541,8 @@ expr::gen_transform (FILE *f, int indent, const char 
*dest, bool gimple,
   for (unsigned i = 0; i < ops.length (); ++i)
fprintf (f, ", _o%d[%u]", depth, i);
   fprintf (f, ");\n");
-  fprintf_indent (f, indent, "tem_op.resimplify (lseq, valueize);\n");
+  fprintf_indent (f, indent, "tem_op.resimplify (%s, valueize);\n",
+ !force_leaf ? "lseq" : "NULL");
   fprintf_indent (f, indent,
  "_r%d = maybe_push_res_to_seq (&tem_op, %s);\n", depth,
  !force_leaf ? "lseq" : "NULL");
@@ -3451,7 +3452,8 @@ dt_simplify::gen_1 (FILE *f, int indent, bool gimple, 
operand *result)
  if (!is_predicate)
{
  fprintf_indent (f, indent,
- "res_op->resimplify (lseq, valueize);\n");
+ "res_op->resimplify (%s, valueize);\n",
+ !e->force_leaf ? "lseq" : "NULL");
  if (e->force_leaf)
fprintf_indent (f, indent,
"if (!maybe_push_res_to_seq (res_op, NULL)) "
diff --git a/gcc/testsuite/gcc.dg/pr108625.c b/gcc/testsuite/gcc.dg/pr108625.c
new file mode 100644
index 000..03fc2889c4f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr108625.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1 -fdump-tree-optimized" } */
+
+unsigned char foo(int x)
+{
+  int t = -x;
+  unsigned char t1 = t;
+  unsigned char t2 = t;
+  /* We may not rewrite this as (unsigned char)(t - x).  */
+  return t1 + t2;
+}
+
+/* { dg-final { scan-tree-dump-times "x_" 1 "forwprop1" } } */
+/* { dg-final { scan-tree-dump-times "x_" 1 "optimized" } } */
-- 
2.35.3


Re: [PATCH] driver, toplevel: Avoid emitting the version information twice.

2023-02-02 Thread Iain Sandoe



> On 30 Jan 2023, at 07:48, Richard Biener  wrote:
> 
> On Sun, Jan 29, 2023 at 12:35 PM Iain Sandoe via Gcc-patches
>  wrote:
>> 
>> Technically, this is seems to be a regression somewhere between 4.2 and
>> 4.6 but, it seems, not enough for anyone to care too much.  Tested on
>> various Darwin versions and x86_64, powerpc64 linux,
>> OK for trunk {now,stage1}?
> 
> This will elide the earlier printing, right? 

Yes.

> eliding the 2nd would be prefered so the info comes first?

Indeed; that is better .. 

how about this update then?
OK for trunk now/stage1?

— 8< ---

For a regular compile job, with -v we emit the GCC version information
twice - once from main() and once from process_options().

We do not need to output the second header.

Signed-off-by: Iain Sandoe 

gcc/ChangeLog:

* toplev.cc (toplev::main): Only print the version information header
from toplevel main().
---
 gcc/toplev.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/toplev.cc b/gcc/toplev.cc
index 42937f0ba00..4c15d4f542e 100644
--- a/gcc/toplev.cc
+++ b/gcc/toplev.cc
@@ -1358,7 +1358,7 @@ process_options (bool no_backend)
  option flags in use.  */
   if (version_flag)
 {
-  print_version (stderr, "", true);
+  /* We already printed the version header in main ().  */
   if (!quiet_flag)
{
  fputs ("options passed: ", stderr);
-- 
2.37.1 (Apple Git-137.1)




[PATCH] RISC-V: Fix bug of TARGET_COMPUTE_MULTILIB implemented in riscv.

2023-02-02 Thread Jin Ma via Gcc-patches
MAX_MATCH_SCORE is not assigned anywhere except initialized to 0,
causing BEST_MATCH_MULTI_LIB to always be 0 or -1, which will
cause the result of TARGET_COMPUTE_MULTILIB hook to fail.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc:
---
 gcc/common/config/riscv/riscv-common.cc | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 616e2f897b9..787674003cb 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -1700,7 +1700,10 @@ riscv_compute_multilib (
 
   /* Record highest match score multi-lib setting.  */
   if (match_score > max_match_score)
-   best_match_multi_lib = i;
+   {
+ best_match_multi_lib = i;
+ max_match_score = match_score;
+   }
 }
 
   if (best_match_multi_lib == -1)
-- 
2.17.1



[committed] amdgcn, libgomp: Manually allocated stacks

2023-02-02 Thread Andrew Stubbs
I've committed this patch to change the ways stacks are initialized on 
amdgcn. The patch only touches GCN files, or the GCN-only portions of 
libgomp files, so I'm allowing it despite stage 4 because I want the ABI 
change done for GCC 13, and because it enables Tobias's reverse 
offload-patch that has already been approved, I think.


The stacks used to be placed in the "private segment" provided for the 
purpose by the GPU drivers, but those addresses are not accessible from 
the host, not even by the HSA API, which was a problem for reverse offload.


The new scheme allocates space in the same way as we do the heap space, 
except that each kernel has its own instance. We were already doing that 
for the "team arena" ephemeral heap, so I have unified the two 
implementations.


While the change does not alter the procedure call standard, it does 
alter the kernel entry ABI and requires any code using the compiler 
builtins for kernel properties to be rebuilt. A recent version of Newlib 
is required (version 4.3.0.20230120 has the necessary changes).


Benchmarking shows no significant change in performance.

The __builtin_apply tests fail because they attempt to access memory in 
parent stack frames (I think), but that causes a memory fault when they 
don't exist (stack underflow; if I modify the testcase to include extra 
call depth it passed fine). In any case, the behaviour of 
__builtin_apply has not changed, only the device has become less forgiving.


I will back-port this to OG12 shortly.

Andrewamdgcn, libgomp: Manually allocated stacks

Switch from using stacks in the "private segment" to using a memory block
allocated on the host side.  The primary reason is to permit the reverse
offload implementation to access values located on the device stack, but
there may also be performance benefits, especially with repeated kernel
invocations.

This implementation unifies the stacks with the "team arena" optimization
feature, and now allows both to have run-time configurable sizes.

A new ABI is needed, so all libraries must be rebuilt, and newlib must be
version 4.3.0.20230120 or newer.

gcc/ChangeLog:

* config/gcn/gcn-run.cc: Include libgomp-gcn.h.
(struct kernargs): Replace the common content with kernargs_abi.
(struct heap): Delete.
(main): Read GCN_STACK_SIZE envvar.
Allocate space for the device stacks.
Write the new kernargs fields.
* config/gcn/gcn.cc (gcn_option_override): Remove stack_size_opt.
(default_requested_args): Remove PRIVATE_SEGMENT_BUFFER_ARG and
PRIVATE_SEGMENT_WAVE_OFFSET_ARG.
(gcn_addr_space_convert): Mask the QUEUE_PTR_ARG content.
(gcn_expand_prologue): Move the TARGET_PACKED_WORK_ITEMS to the top.
Set up the stacks from the values in the kernargs, not private.
(gcn_expand_builtin_1): Match the stack configuration in the prologue.
(gcn_hsa_declare_function_name): Turn off the private segment.
(gcn_conditional_register_usage): Ensure QUEUE_PTR is fixed.
* config/gcn/gcn.h (FIXED_REGISTERS): Fix the QUEUE_PTR register.
* config/gcn/gcn.opt (mstack-size): Change the description.

include/ChangeLog:

* gomp-constants.h (GOMP_VERSION_GCN): Bump.

libgomp/ChangeLog:

* config/gcn/libgomp-gcn.h (DEFAULT_GCN_STACK_SIZE): New define.
(DEFAULT_TEAM_ARENA_SIZE): New define.
(struct heap): Move to this file.
(struct kernargs_abi): Likewise.
* config/gcn/team.c (gomp_gcn_enter_kernel): Use team arena size from
the kernargs.
* libgomp.h: Include libgomp-gcn.h.
(TEAM_ARENA_SIZE): Remove.
(team_malloc): Update the error message.
* plugin/plugin-gcn.c (struct kernargs): Move common content to
struct kernargs_abi.
(struct agent_info): Rename team arenas to ephemeral memories.
(struct team_arena_list): Rename 
(struct ephemeral_memories_list): to this.
(struct heap): Delete.
(team_arena_size): New variable.
(stack_size): New variable.
(print_kernel_dispatch): Update debug messages.
(init_environment_variables): Read GCN_TEAM_ARENA_SIZE.
Read GCN_STACK_SIZE.
(get_team_arena): Rename ...
(configure_ephemeral_memories): ... to this, and set up stacks.
(release_team_arena): Rename ...
(release_ephemeral_memories): ... to this.
(destroy_team_arenas): Rename ...
(destroy_ephemeral_memories): ... to this.
(create_kernel_dispatch): Add num_threads parameter.
Adjust for kernargs_abi refactor and ephemeral memories.
(release_kernel_dispatch): Adjust for ephemeral memories.
(run_kernel): Pass thread-count to create_kernel_dispatch.
(GOMP_OFFLOAD_init_device): Adjust for ephemeral memories.
(GOMP_OFFLOAD_fini_device): Adjust for ephemeral memories.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/pr4

Re: [PATCH] sched-deps, cselib: Fix up some -fcompare-debug issues and regressions [PR108463]

2023-02-02 Thread Richard Biener via Gcc-patches
On Thu, 2 Feb 2023, Alexandre Oliva wrote:

> On Jan 27, 2023, Jakub Jelinek  wrote:
> 
> > Now, 1) is precondition of 2), we can only subst the VALUEs if we
> > have actually looked the address up, but as can be seen on that testcase,
> > we are relying on at least the 1) to be done because we subst the values
> > later on even on DEBUG_INSNs and actually use those when needed.
> 
> Ugh.  That definitely rings a bell, now that you mention it.  I wish I
> had recalled that when I saw the "obvious" opportunity for optimization
> :-/
> 
> > So, I (as done in the patch below) reinstalled the 1) and not 2) for
> > DEBUG_INSNs.
> 
> Thanks!
> 
> > I've spent a day debugging that and found the problem is that as documented
> > in a large comment in cselib.cc above n_useless_values variable definition,
> > we spend quite a few effort on making sure that VALUEs created on
> > DEBUG_INSNs don't affect the cselib decisions for non-DEBUG_INSNs such as
> > pruning of useless values etc., but if a VALUE created that way is then
> > looked up/needed from non-DEBUG_INSNs, we promote it to non-debug.
> 
> *nod*
> 
> > The reason for -fcompare-debug failure is that there is one large DEBUG_INSN
> > with 16 MEMs in it mostly with addresses that so far didn't appear in the IL
> > otherwise.  Later on, we see an instruction storing into MEM destination
> > and invalidate that MEM.
> 
> Aha!
> 
> > Unfortunately, n_useless_values which in my understanding should be always
> > the same between -g and -g0 compilations diverges, has 3 more useless values
> > for -g.
> 
> Yeah, that's not good.
> 
> > Now, these were initially VALUEs created for DEBUG_INSN lookups.  As I said,
> > cselib.cc has code to promote such VALUEs (well, their location elements) to
> > non-debug if they are looked up from non-DEBUG_INSNs.  The problem is that
> > when looking some completely unrelated MEM from a non-DEBUG_INSN we run into
> > a hash collision and so call cselib_hasher::equal to check if the unrelated
> > MEM is equal to the one from DEBUG_INSN only element.  The equal static
> > member function calls rtx_equal_for_cselib_1 and if that returns true,
> > promotes the location to non-DEBUG, otherwise returns false.  So far so
> > good.  But rtx_equal_for_cselib_1 internally performs various other cselib
> > lookups, all done with the non-DEBUG_INSN cselib_current_insn, so they
> > all promote to non-debug.
> 
> Good catch!
> 
> > So, I think we need to pretend
> > that such lookup which only happens with -g and not -g0 actually comes
> > from some DEBUG_INSN (note, the lookups rtx_equal_for_cselib_1 does
> > are always with create = 0).
> > The cselib.cc part of the patch does that.
> 
> Agreed, that makes sense to me, thanks!
> 
> > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
> 
> FWIW, I'd approve it if I had the authority to do so :-)

OK.

Thanks,
Richard.


Re: [PATCH][GCC] arm: Optimize arm-mlib.h header inclusion (pr108505).

2023-02-02 Thread Srinath Parvathaneni via Gcc-patches
Ping!!

From: Gcc-patches 
 on behalf of 
Srinath Parvathaneni via Gcc-patches 
Sent: 27 January 2023 17:44
To: gcc-patches@gcc.gnu.org 
Cc: nd ; Richard Earnshaw ; Kyrylo 
Tkachov 
Subject: [PATCH][GCC] arm: Optimize arm-mlib.h header inclusion (pr108505).

Hello,

I have committed a fix [1] into gcc trunk for a build issue mentioned in 
pr108505 and
latter received few upstream comments proposing more robust fix for this issue.

In this patch I'm addressing those comments and sending this as a followup 
patch.

Regression tested on arm-none-eabi target and found no regressions.

Ok for master?

[1] https://gcc.gnu.org/pipermail/gcc-patches/2023-January/610513.html

Regards,
Srinath.

gcc/ChangeLog:

2023-01-27  Srinath Parvathaneni  

PR target/108505
* config.gcc (tm_mlib_file): Define new variable.


### Attachment also inlined for ease of reply###


diff --git a/gcc/config.gcc b/gcc/config.gcc
index 
89f56047cfe3126bc6c8e90c8b4840dea13538f9..2aab92bbfd8b4088259ebf9b565af8e8bbef1122
 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4355,6 +4355,7 @@ case "${target}" in
 case ${arm_multilib} in
 aprofile|rmprofile)
 
tmake_profile_file="arm/t-multilib"
+   tm_mlib_file="arm/arm-mlib.h"
 ;;
 @*)
 ml=`echo "X$arm_multilib" | 
sed '1s,^X@,,'`
@@ -4393,7 +4394,7 @@ case "${target}" in
 # through to the multilib selector
 with_float="soft"
 tmake_file="${tmake_file} 
${tmake_profile_file}"
-   tm_file="$tm_file arm/arm-mlib.h"
+   tm_file="$tm_file $tm_mlib_file"
 TM_MULTILIB_CONFIG="$with_multilib_list"
 fi
 fi





Re: [PATCH] PR tree-optimization/107570 - Reset SCEV after folding in VRP.

2023-02-02 Thread Richard Biener via Gcc-patches
On Wed, Feb 1, 2023 at 7:12 PM Andrew MacLeod via Gcc-patches
 wrote:
>
> We can reset SCEV after we fold, then SCEVs cache shouldn't have
> anything in it when we go to remove ssa-names in remove_unreachable().
>
> We were resetting it later sometimes if we were processing the array
> bounds warning, so I removed that call and just always reset it now.
>
> Bootstraps on x86_64-pc-linux-gnu. Testing running. Assuming no
> regressions,  OK for trunk?

+
+  // SCEV needs to be reset for array bounds, and we do not wish to trigger
+  // any SCEV lookups when removing unreachable globals, so reset it here.
+  scev_reset ();

the comment suggests that SCEV queries (aka analyze_scalar_evolution)
won't return anything after a scev_reset ().  That's not true - instead what
it does is nuke the SCEV cache.  That's necessary when you
release SSA names or alter the CFG and you want to avoid followup
SCEV queries to pick up stale data.

So if remove_and_update_globals performs SCEV queries and eventually
releases SSA names you cannot remove the second call to scev_reset.

But yes, it's probably substitute_and_fold_engine::substitute_and_fold
itself that should do a

  if (scev_initialized_p ())
scev_reset ();

possibly only in the case it released an SSA name, or removed an
edge (but that's maybe premature optimization).

Richard.

>
> Andrew


Re: Re: [PATCH] CPROP: Allow cprop optimization when the function has a single block

2023-02-02 Thread Richard Biener via Gcc-patches
On Thu, 2 Feb 2023, juzhe.zh...@rivai.ai wrote:

> Yeah, Thanks. You are right. CSE should do the job. 
> Now I know the reason CSE failed to optimize is I include 
> VL_REGNUM(66)/VTYPE_RENUM(67) hard reg
> as the dependency of pred_broadcast:
> (insn 19 18 20 4 (set (reg:VNx1DI 152)
> > (if_then_else:VNx1DI (unspec:VNx1BI [
> > (const_vector:VNx1BI repeat [
> > (const_int 1 [0x1])
> > ])
> > (const_int 4 [0x4])
> > (const_int 2 [0x2]) repeated x2
> > (const_int 0 [0])
> > (reg:SI 66 vl)
> > (reg:SI 67 vtype)
> > ] UNSPEC_VPREDICATE)
> > (vec_duplicate:VNx1DI (reg/v:DI 148 [ x ]))
> > (unspec:VNx1DI [
> > (const_int 0 [0])
> > ] UNSPEC_VUNDEF))) "rvv.c":22:23 695 {pred_broadcastvnx1di}
> >  (nil))
> Then CSE failed to set the 152 as copy.
> 
> VL_REGNUM(66)/VTYPE_RENUM(67) are the global hard reg that I should make each 
> RVV instruction depend on them.
> Since we use vsetvl instruction (which is setting global 
> VL_REGNUM(66)/VTYPE_RENUM(67) status) to set the global status for
> each RVV instruction. 
> Including the dependency here is to make sure the global VL/VTYPE status is 
> correct of each RVV instruction. (If we don't include
> such dependency in RVV instruction, instruction scheduling may move the RVV 
> instructions and vsetvl instructions randomly then
> produce incorrect vsetvl configuration)
> 
> The original reg_class of VL_REGNUM(66)/VTYPE_RENUM(67) I set here:
> riscv_regno_to_class [VL_REGNUM] = VL_REGS;
> riscv_regno_to_class [VTYPE_RENUM] = VTYPE_REGS;
> Such configuration make CSE failed.
> 
> However, if I change the reg_class :
> riscv_regno_to_class [VL_REGNUM] = NO_REGS;
> riscv_regno_to_class [VTYPE_RENUM] = NO_REGS;
> The CSE now can do the optimization now!
> 
> 1) Would you mind telling me the difference between them?

No idea.  I think CSE avoids to touch hard register references because
eliding them to copies can increase register pressure.

> 2) If I set these 2 global status register as NO_REGS, will it create 
>issues for the global status configuration of each RVV instructions ?

No idea either.  Usually these kind of dependences are introduced
by targets at the point the VL setting is introduced to avoid
pessimizing optimizations earlier.  Often, for cases like a VL
register, this is done after register allocation only and indeed
necessary to avoid the second scheduling pass from breaking things.

Richard.


Re: [PATCH] rtl-ssa: Fix splitting of clobber groups [PR108508]

2023-02-02 Thread Richard Biener via Gcc-patches
On Thu, Feb 2, 2023 at 10:49 AM Richard Sandiford via Gcc-patches
 wrote:
>
> Since rtl-ssa isn't a real/native SSA representation, it has
> to honour the constraints of the underlying rtl representation.
> Part of this involves maintaining an rpo list of definitions
> for each rtl register, backed by a splay tree where necessary
> for quick lookup/insertion.
>
> However, clobbers of a register don't act as barriers to
> other clobbers of a register.  E.g. it's possible to move one
> flag-clobbering instruction across an arbitrary number of other
> flag-clobbering instructions.  In order to allow passes to do
> that without quadratic complexity, the splay tree groups all
> consecutive clobbers into groups, with only the group being
> entered into the splay tree.  These groups in turn have an
> internal splay tree of clobbers where necessary.
>
> This means that, if we insert a new definition and use into
> the middle of a sea of clobbers, we need to split the clobber
> group into two groups.  This was quite a difficult condition
> to trigger during development, and the PR shows that the code
> to handle it had (at least) two bugs.
>
> First, the process involves searching the clobber tree for
> the split point.  This search can give either the previous
> clobber (which will belong to the first of the split groups)
> or the next clobber (which will belong to the second of the
> split groups).  The code for the former case handled the
> split correctly but the code for the latter case didn't.
>
> Second, I'd forgotten to add the second clobber group to the
> main splay tree. :-(
>
> Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK for trunk
> & GCC 12?  Although the testcase is "only" a regression from GCC 12,
> I think the rtl-ssa patch should be backported to GCC 11 too.

OK.

Thanks,
Richard.

> Richard
>
>
> gcc/
> PR rtl-optimization/108508
> * rtl-ssa/accesses.cc (function_info::split_clobber_group): When
> the splay tree search gives the first clobber in the second group,
> make sure that the root of the first clobber group is updated
> correctly.  Enter the new clobber group into the definition splay
> tree.
>
> gcc/testsuite/
> PR rtl-optimization/108508
> * gcc.target/aarch64/pr108508.c: New test.
> ---
>  gcc/rtl-ssa/accesses.cc | 14 ---
>  gcc/testsuite/gcc.target/aarch64/pr108508.c | 28 +
>  2 files changed, 38 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr108508.c
>
> diff --git a/gcc/rtl-ssa/accesses.cc b/gcc/rtl-ssa/accesses.cc
> index 03b9a475d3b..f12b5f4dd77 100644
> --- a/gcc/rtl-ssa/accesses.cc
> +++ b/gcc/rtl-ssa/accesses.cc
> @@ -794,23 +794,26 @@ function_info::merge_clobber_groups (clobber_info 
> *clobber1,
>  // GROUP spans INSN, and INSN now sets the resource that GROUP clobbers.
>  // Split GROUP around INSN and return the clobber that comes immediately
>  // before INSN.
> +//
> +// The resource that GROUP clobbers is known to have an associated
> +// splay tree.
>  clobber_info *
>  function_info::split_clobber_group (clobber_group *group, insn_info *insn)
>  {
>// Search for either the previous or next clobber in the group.
>// The result is less than zero if CLOBBER should come before NEIGHBOR
>// or greater than zero if CLOBBER should come after NEIGHBOR.
> -  int comparison = lookup_clobber (group->m_clobber_tree, insn);
> +  clobber_tree &tree1 = group->m_clobber_tree;
> +  int comparison = lookup_clobber (tree1, insn);
>gcc_checking_assert (comparison != 0);
> -  clobber_info *neighbor = group->m_clobber_tree.root ();
> +  clobber_info *neighbor = tree1.root ();
>
> -  clobber_tree tree1, tree2;
> +  clobber_tree tree2;
>clobber_info *prev;
>clobber_info *next;
>if (comparison > 0)
>  {
>// NEIGHBOR is the last clobber in what will become the first group.
> -  tree1 = neighbor;
>tree2 = tree1.split_after_root ();
>prev = neighbor;
>next = as_a (prev->next_def ());
> @@ -843,6 +846,9 @@ function_info::split_clobber_group (clobber_group *group, 
> insn_info *insn)
>tree2->set_group (group2);
>last_clobber->set_group (group2);
>
> +  // Insert GROUP2 into the splay tree as an immediate successor of GROUP1.
> +  def_splay_tree::insert_child (group1, 1, group2);
> +
>return prev;
>  }
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr108508.c 
> b/gcc/testsuite/gcc.target/aarch64/pr108508.c
> new file mode 100644
> index 000..e97896b6a1b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr108508.c
> @@ -0,0 +1,28 @@
> +/* { dg-options "-O3 -fharden-conditional-branches -fno-dce 
> -fno-guess-branch-probability" } */
> +
> +#include 
> +
> +int
> +test_vld3q_lane_f64 (void)
> +{
> +  float64x2x3_t vectors;
> +  float64_t temp[2];
> +  int i, j;
> +
> +  for (i = 0; i < 3; i++)
> +  {
> +vst1q_f64 (temp, vectors.val[i]);
> +   

Re: Re: [PATCH] CPROP: Allow cprop optimization when the function has a single block

2023-02-02 Thread juzhe.zh...@rivai.ai
Thank you so much. Kito helped me fix it already.
RVV instruction patterns can have CSE optimizations now.



juzhe.zh...@rivai.ai
 
From: Richard Biener
Date: 2023-02-02 20:26
To: juzhe.zh...@rivai.ai
CC: gcc-patches; kito.cheng; richard.sandiford; jeffreyalaw; apinski
Subject: Re: Re: [PATCH] CPROP: Allow cprop optimization when the function has 
a single block
On Thu, 2 Feb 2023, juzhe.zh...@rivai.ai wrote:
 
> Yeah, Thanks. You are right. CSE should do the job. 
> Now I know the reason CSE failed to optimize is I include 
> VL_REGNUM(66)/VTYPE_RENUM(67) hard reg
> as the dependency of pred_broadcast:
> (insn 19 18 20 4 (set (reg:VNx1DI 152)
> > (if_then_else:VNx1DI (unspec:VNx1BI [
> > (const_vector:VNx1BI repeat [
> > (const_int 1 [0x1])
> > ])
> > (const_int 4 [0x4])
> > (const_int 2 [0x2]) repeated x2
> > (const_int 0 [0])
> > (reg:SI 66 vl)
> > (reg:SI 67 vtype)
> > ] UNSPEC_VPREDICATE)
> > (vec_duplicate:VNx1DI (reg/v:DI 148 [ x ]))
> > (unspec:VNx1DI [
> > (const_int 0 [0])
> > ] UNSPEC_VUNDEF))) "rvv.c":22:23 695 {pred_broadcastvnx1di}
> >  (nil))
> Then CSE failed to set the 152 as copy.
> 
> VL_REGNUM(66)/VTYPE_RENUM(67) are the global hard reg that I should make each 
> RVV instruction depend on them.
> Since we use vsetvl instruction (which is setting global 
> VL_REGNUM(66)/VTYPE_RENUM(67) status) to set the global status for
> each RVV instruction. 
> Including the dependency here is to make sure the global VL/VTYPE status is 
> correct of each RVV instruction. (If we don't include
> such dependency in RVV instruction, instruction scheduling may move the RVV 
> instructions and vsetvl instructions randomly then
> produce incorrect vsetvl configuration)
> 
> The original reg_class of VL_REGNUM(66)/VTYPE_RENUM(67) I set here:
> riscv_regno_to_class [VL_REGNUM] = VL_REGS;
> riscv_regno_to_class [VTYPE_RENUM] = VTYPE_REGS;
> Such configuration make CSE failed.
> 
> However, if I change the reg_class :
> riscv_regno_to_class [VL_REGNUM] = NO_REGS;
> riscv_regno_to_class [VTYPE_RENUM] = NO_REGS;
> The CSE now can do the optimization now!
> 
> 1) Would you mind telling me the difference between them?
 
No idea.  I think CSE avoids to touch hard register references because
eliding them to copies can increase register pressure.
 
> 2) If I set these 2 global status register as NO_REGS, will it create 
>issues for the global status configuration of each RVV instructions ?
 
No idea either.  Usually these kind of dependences are introduced
by targets at the point the VL setting is introduced to avoid
pessimizing optimizations earlier.  Often, for cases like a VL
register, this is done after register allocation only and indeed
necessary to avoid the second scheduling pass from breaking things.
 
Richard.
 


Re: [committed] libgomp.texi (OpenMP TR11 impl. status): Fix 'strict' item

2023-02-02 Thread Jakub Jelinek via Gcc-patches
On Thu, Feb 02, 2023 at 12:19:56PM +0100, Tobias Burnus wrote:
> There is less new in TR11 as claimed ... 'strict' on grainsize/num_tasks is 
> already
> in OpenMP 5.1, it is implemented and also listed as 'Y' under 5.1.
> Only 'num_threads(strict: int-expr)' is new in TR11.
> 
> Tobias
> -
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
> München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
> Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
> München, HRB 106955

> commit 8da7476c5fa8870c2fcded48d3de95978434c1be
> Author: Tobias Burnus 
> Date:   Thu Feb 2 12:05:58 2023 +0100
> 
> libgomp.texi (OpenMP TR11 impl. status): Fix 'strict' item
> 
> Fix the 'strict' modifier status: it is already listed (as 'Y') for OpenMP
> 5.1 for num_task and grainsize; only strict on num_threads is new with 
> TR11.
> 
> libgomp/
> * libgomp.texi (OpenMP TR11): Fix item for 'strict' modifier.

Ok.

Jakub



Re: [PATCH] RISC-V: Fix bug of TARGET_COMPUTE_MULTILIB implemented in riscv.

2023-02-02 Thread Kito Cheng
Good catch! thanks for fixing that, committed to trunk :)

On Thu, Feb 2, 2023 at 7:46 PM Jin Ma  wrote:
>
> MAX_MATCH_SCORE is not assigned anywhere except initialized to 0,
> causing BEST_MATCH_MULTI_LIB to always be 0 or -1, which will
> cause the result of TARGET_COMPUTE_MULTILIB hook to fail.
>
> gcc/ChangeLog:
>
> * common/config/riscv/riscv-common.cc:
> ---
>  gcc/common/config/riscv/riscv-common.cc | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/common/config/riscv/riscv-common.cc 
> b/gcc/common/config/riscv/riscv-common.cc
> index 616e2f897b9..787674003cb 100644
> --- a/gcc/common/config/riscv/riscv-common.cc
> +++ b/gcc/common/config/riscv/riscv-common.cc
> @@ -1700,7 +1700,10 @@ riscv_compute_multilib (
>
>/* Record highest match score multi-lib setting.  */
>if (match_score > max_match_score)
> -   best_match_multi_lib = i;
> +   {
> + best_match_multi_lib = i;
> + max_match_score = match_score;
> +   }
>  }
>
>if (best_match_multi_lib == -1)
> --
> 2.17.1
>


Re: [PATCH] driver, toplevel: Avoid emitting the version information twice.

2023-02-02 Thread Richard Biener via Gcc-patches
On Thu, Feb 2, 2023 at 12:41 PM Iain Sandoe  wrote:
>
>
>
> > On 30 Jan 2023, at 07:48, Richard Biener  wrote:
> >
> > On Sun, Jan 29, 2023 at 12:35 PM Iain Sandoe via Gcc-patches
> >  wrote:
> >>
> >> Technically, this is seems to be a regression somewhere between 4.2 and
> >> 4.6 but, it seems, not enough for anyone to care too much.  Tested on
> >> various Darwin versions and x86_64, powerpc64 linux,
> >> OK for trunk {now,stage1}?
> >
> > This will elide the earlier printing, right?
>
> Yes.
>
> > eliding the 2nd would be prefered so the info comes first?
>
> Indeed; that is better ..
>
> how about this update then?
> OK for trunk now/stage1?

OK now.

Richard.

> — 8< ---
>
> For a regular compile job, with -v we emit the GCC version information
> twice - once from main() and once from process_options().
>
> We do not need to output the second header.
>
> Signed-off-by: Iain Sandoe 
>
> gcc/ChangeLog:
>
> * toplev.cc (toplev::main): Only print the version information header
> from toplevel main().
> ---
>  gcc/toplev.cc | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/toplev.cc b/gcc/toplev.cc
> index 42937f0ba00..4c15d4f542e 100644
> --- a/gcc/toplev.cc
> +++ b/gcc/toplev.cc
> @@ -1358,7 +1358,7 @@ process_options (bool no_backend)
>   option flags in use.  */
>if (version_flag)
>  {
> -  print_version (stderr, "", true);
> +  /* We already printed the version header in main ().  */
>if (!quiet_flag)
> {
>   fputs ("options passed: ", stderr);
> --
> 2.37.1 (Apple Git-137.1)
>
>


Re: [PATCH v5] [RISCV] Add 'Zfa' extension according to riscv-isa-manual

2023-02-02 Thread Kito Cheng
ack, just let you know reviewing this patch is on my todo list :)

My first impression is...you need to write something in your
changelog, it seems like generated by contrib/git-commit-mklog.py
without any modification.

On Thu, Feb 2, 2023 at 1:46 PM Jin Ma  wrote:
>
> This patch adds the 'Zfa' extension for riscv, which is based on:
> ( 
> https://github.com/riscv/riscv-isa-manual/commit/d74d99e22d5f68832f70982d867614e2149a3bd7
>  )
> latest 'Zfa' change on the master branch of the RISC-V ISA Manual as
> of this writing.
>
> The Wiki Page (details):
> ( https://github.com/a4lg/binutils-gdb/wiki/riscv_zfa )
>
> The binutils-gdb for 'Zfa' extension:
> ( https://sourceware.org/pipermail/binutils/2022-September/122938.html )
>
> gcc/ChangeLog:
>
> * common/config/riscv/riscv-common.cc:
> * config/riscv/constraints.md (Zf):
> * config/riscv/predicates.md:
> * config/riscv/riscv-builtins.cc (RISCV_FTYPE_NAME2):
> (AVAIL):
> (RISCV_ATYPE_SF):
> (RISCV_ATYPE_DF):
> (RISCV_FTYPE_ATYPES2):
> * config/riscv/riscv-ftypes.def (2):
> * config/riscv/riscv-opts.h (MASK_ZFA):
> (TARGET_ZFA):
> * config/riscv/riscv-protos.h (riscv_float_const_rtx_index_for_fli):
> * config/riscv/riscv.cc (riscv_float_const_rtx_index_for_fli):
> (riscv_cannot_force_const_mem):
> (riscv_const_insns):
> (riscv_legitimize_const_move):
> (riscv_split_64bit_move_p):
> (riscv_output_move):
> (riscv_memmodel_needs_release_fence):
> (riscv_print_operand):
> (riscv_secondary_memory_needed):
> * config/riscv/riscv.h (GP_REG_RTX_P):
> * config/riscv/riscv.md (riscv_fminm3):
> (riscv_fmaxm3):
> (fix_truncdfsi2_zfa):
> (round2):
> (rint2):
> (f_quiet4_zfa):
> * config/riscv/riscv.opt:
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/zfa-fcvtmod.c: New test.
> * gcc.target/riscv/zfa-fleq-fltq.c: New test.
> * gcc.target/riscv/zfa-fli-zfh.c: New test.
> * gcc.target/riscv/zfa-fli.c: New test.
> * gcc.target/riscv/zfa-fminm-fmaxm.c: New test.
> * gcc.target/riscv/zfa-fmovh-fmovp.c: New test.
> * gcc.target/riscv/zfa-fround.c: New test.
> ---
>  gcc/common/config/riscv/riscv-common.cc   |   4 +
>  gcc/config/riscv/constraints.md   |   7 ++
>  gcc/config/riscv/predicates.md|   4 +
>  gcc/config/riscv/riscv-builtins.cc|  11 ++
>  gcc/config/riscv/riscv-ftypes.def |   2 +
>  gcc/config/riscv/riscv-opts.h |   3 +
>  gcc/config/riscv/riscv-protos.h   |   1 +
>  gcc/config/riscv/riscv.cc | 113 -
>  gcc/config/riscv/riscv.h  |   1 +
>  gcc/config/riscv/riscv.md | 114 ++
>  gcc/config/riscv/riscv.opt|   4 +
>  gcc/testsuite/gcc.target/riscv/zfa-fcvtmod.c  |  12 ++
>  .../gcc.target/riscv/zfa-fleq-fltq.c  |  20 +++
>  gcc/testsuite/gcc.target/riscv/zfa-fli-zfh.c  |  42 +++
>  gcc/testsuite/gcc.target/riscv/zfa-fli.c  |  80 
>  .../gcc.target/riscv/zfa-fminm-fmaxm.c|  25 
>  .../gcc.target/riscv/zfa-fmovh-fmovp.c|  11 ++
>  gcc/testsuite/gcc.target/riscv/zfa-fround.c   |  25 
>  18 files changed, 456 insertions(+), 23 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fcvtmod.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fleq-fltq.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-zfh.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fminm-fmaxm.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fround.c
>
> diff --git a/gcc/common/config/riscv/riscv-common.cc 
> b/gcc/common/config/riscv/riscv-common.cc
> index 616e2f897b9..977c8a9acf7 100644
> --- a/gcc/common/config/riscv/riscv-common.cc
> +++ b/gcc/common/config/riscv/riscv-common.cc
> @@ -217,6 +217,8 @@ static const struct riscv_ext_version 
> riscv_ext_version_table[] =
>{"zfh",   ISA_SPEC_CLASS_NONE, 1, 0},
>{"zfhmin",ISA_SPEC_CLASS_NONE, 1, 0},
>
> +  {"zfa", ISA_SPEC_CLASS_NONE, 1, 0},
> +
>{"zmmul", ISA_SPEC_CLASS_NONE, 1, 0},
>
>{"svinval", ISA_SPEC_CLASS_NONE, 1, 0},
> @@ -1242,6 +1244,8 @@ static const riscv_ext_flag_table_t 
> riscv_ext_flag_table[] =
>{"zfhmin",&gcc_options::x_riscv_zf_subext, MASK_ZFHMIN},
>{"zfh",   &gcc_options::x_riscv_zf_subext, MASK_ZFH},
>
> +  {"zfa",   &gcc_options::x_riscv_zf_subext, MASK_ZFA},
> +
>{"zmmul", &gcc_options::x_riscv_zm_subext, MASK_ZMMUL},
>
>{"svinval", &gcc_options::x_riscv_sv_subext, MASK_SVINVAL},
> diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
> i

Re: [PATCH 1/2] Handle component_ref to a structre/union field including flexible array member [PR101832]

2023-02-02 Thread Qing Zhao via Gcc-patches


> On Feb 2, 2023, at 3:07 AM, Richard Biener  wrote:
> 
> On Wed, 1 Feb 2023, Qing Zhao wrote:
> 
>> 
>> 
>>> On Feb 1, 2023, at 6:41 AM, Richard Biener  wrote:
>>> 
>>> On Tue, 31 Jan 2023, Qing Zhao wrote:
>>> 
 GCC extension accepts the case when a struct with a flexible array member
 is embedded into another struct (possibly recursively).
 __builtin_object_size should treat such struct as flexible size per
 -fstrict-flex-arrays.
 
PR tree-optimization/101832
 
 gcc/ChangeLog:
 
PR tree-optimization/101832
* tree-object-size.cc (flexible_size_type_p): New function.
(addr_object_size): Handle structure/union type when it has
flexible size.
 
 gcc/testsuite/ChangeLog:
 
PR tree-optimization/101832
* gcc.dg/builtin-object-size-pr101832-2.c: New test.
* gcc.dg/builtin-object-size-pr101832-3.c: New test.
* gcc.dg/builtin-object-size-pr101832-4.c: New test.
* gcc.dg/builtin-object-size-pr101832.c: New test.
 ---
 .../gcc.dg/builtin-object-size-pr101832-2.c   | 135 ++
 .../gcc.dg/builtin-object-size-pr101832-3.c   | 135 ++
 .../gcc.dg/builtin-object-size-pr101832-4.c   | 135 ++
 .../gcc.dg/builtin-object-size-pr101832.c | 119 +++
 gcc/tree-object-size.cc   | 115 +++
 5 files changed, 611 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
 create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-3.c
 create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-4.c
 create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
 
 diff --git a/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c 
 b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
 new file mode 100644
 index 000..f38babc5415
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
 @@ -0,0 +1,135 @@
 +/* PR 101832: 
 +   GCC extension accepts the case when a struct with a flexible array 
 member
 +   is embedded into another struct (possibly recursively).
 +   __builtin_object_size will treat such struct as flexible size per
 +   -fstrict-flex-arrays.  */ 
 +/* { dg-do run } */
 +/* { dg-options "-O2 -fstrict-flex-arrays=1" } */
 +
 +#include 
 +
 +unsigned n_fails = 0;
 +
 +#define expect(p, _v) do { \
 +  size_t v = _v; \
 +  if (p == v) \
 +printf("ok:  %s == %zd\n", #p, p); \
 +  else {\
 +printf("WAT: %s == %zd (expected %zd)\n", #p, p, v); \
 +n_fails++; \
 +  } \
 +} while (0);
 +
 +struct A {
 +  int n;
 +  char data[];/* Content following header */
 +};
 +
 +struct B {
 +  int m;
 +  struct A a;
 +};
 +
 +struct C {
 +  int q;
 +  struct B b;
 +};
 +
 +struct A0 {
 +  int n;
 +  char data[0];/* Content following header */
 +};
 +
 +struct B0 {
 +  int m;
 +  struct A0 a;
 +};
 +
 +struct C0 {
 +  int q;
 +  struct B0 b;
 +};
 +
 +struct A1 {
 +  int n;
 +  char data[1];/* Content following header */
 +};
 +
 +struct B1 {
 +  int m;
 +  struct A1 a;
 +};
 +
 +struct C1 {
 +  int q;
 +  struct B1 b;
 +};
 +
 +struct An {
 +  int n;
 +  char data[8];/* Content following header */
 +};
 +
 +struct Bn {
 +  int m;
 +  struct An a;
 +};
 +
 +struct Cn {
 +  int q;
 +  struct Bn b;
 +};
 +
 +volatile void *magic1, *magic2;
 +
 +int main(int argc, char *argv[])
 +{
 +struct B *outer;
 +struct C *outest;
 +
 +/* Make sure optimization can't find some other object size. */
 +outer = (void *)magic1;
 +outest = (void *)magic2;
 +
 +expect(__builtin_object_size(&outer->a, 1), -1);
 +expect(__builtin_object_size(&outest->b, 1), -1);
 +expect(__builtin_object_size(&outest->b.a, 1), -1);
 +
 +struct B0 *outer0;
 +struct C0 *outest0;
 +
 +/* Make sure optimization can't find some other object size. */
 +outer0 = (void *)magic1;
 +outest0 = (void *)magic2;
 +
 +expect(__builtin_object_size(&outer0->a, 1), -1);
 +expect(__builtin_object_size(&outest0->b, 1), -1);
 +expect(__builtin_object_size(&outest0->b.a, 1), -1);
 +
 +struct B1 *outer1;
 +struct C1 *outest1;
 +
 +/* Make sure optimization can't find some other object size. */
 +outer1 = (void *)magic1;
 +outest1 = (void *)magic2;
 +
 +expect(__builtin_object_size(&outer1->a, 1), -1);
 +expect(__builtin_object_size(&outest1->b, 1),

Re: [PATCH 1/2] Handle component_ref to a structre/union field including flexible array member [PR101832]

2023-02-02 Thread Richard Biener via Gcc-patches
On Thu, 2 Feb 2023, Qing Zhao wrote:

> 
> 
> > On Feb 2, 2023, at 3:07 AM, Richard Biener  wrote:
> > 
> > On Wed, 1 Feb 2023, Qing Zhao wrote:
> > 
> >> 
> >> 
> >>> On Feb 1, 2023, at 6:41 AM, Richard Biener  wrote:
> >>> 
> >>> On Tue, 31 Jan 2023, Qing Zhao wrote:
> >>> 
>  GCC extension accepts the case when a struct with a flexible array member
>  is embedded into another struct (possibly recursively).
>  __builtin_object_size should treat such struct as flexible size per
>  -fstrict-flex-arrays.
>  
>   PR tree-optimization/101832
>  
>  gcc/ChangeLog:
>  
>   PR tree-optimization/101832
>   * tree-object-size.cc (flexible_size_type_p): New function.
>   (addr_object_size): Handle structure/union type when it has
>   flexible size.
>  
>  gcc/testsuite/ChangeLog:
>  
>   PR tree-optimization/101832
>   * gcc.dg/builtin-object-size-pr101832-2.c: New test.
>   * gcc.dg/builtin-object-size-pr101832-3.c: New test.
>   * gcc.dg/builtin-object-size-pr101832-4.c: New test.
>   * gcc.dg/builtin-object-size-pr101832.c: New test.
>  ---
>  .../gcc.dg/builtin-object-size-pr101832-2.c   | 135 ++
>  .../gcc.dg/builtin-object-size-pr101832-3.c   | 135 ++
>  .../gcc.dg/builtin-object-size-pr101832-4.c   | 135 ++
>  .../gcc.dg/builtin-object-size-pr101832.c | 119 +++
>  gcc/tree-object-size.cc   | 115 +++
>  5 files changed, 611 insertions(+), 28 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-3.c
>  create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-4.c
>  create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
>  
>  diff --git a/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c 
>  b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
>  new file mode 100644
>  index 000..f38babc5415
>  --- /dev/null
>  +++ b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
>  @@ -0,0 +1,135 @@
>  +/* PR 101832: 
>  +   GCC extension accepts the case when a struct with a flexible array 
>  member
>  +   is embedded into another struct (possibly recursively).
>  +   __builtin_object_size will treat such struct as flexible size per
>  +   -fstrict-flex-arrays.  */ 
>  +/* { dg-do run } */
>  +/* { dg-options "-O2 -fstrict-flex-arrays=1" } */
>  +
>  +#include 
>  +
>  +unsigned n_fails = 0;
>  +
>  +#define expect(p, _v) do { \
>  +  size_t v = _v; \
>  +  if (p == v) \
>  +printf("ok:  %s == %zd\n", #p, p); \
>  +  else {\
>  +printf("WAT: %s == %zd (expected %zd)\n", #p, p, v); \
>  +n_fails++; \
>  +  } \
>  +} while (0);
>  +
>  +struct A {
>  +  int n;
>  +  char data[];/* Content following header */
>  +};
>  +
>  +struct B {
>  +  int m;
>  +  struct A a;
>  +};
>  +
>  +struct C {
>  +  int q;
>  +  struct B b;
>  +};
>  +
>  +struct A0 {
>  +  int n;
>  +  char data[0];/* Content following header */
>  +};
>  +
>  +struct B0 {
>  +  int m;
>  +  struct A0 a;
>  +};
>  +
>  +struct C0 {
>  +  int q;
>  +  struct B0 b;
>  +};
>  +
>  +struct A1 {
>  +  int n;
>  +  char data[1];/* Content following header */
>  +};
>  +
>  +struct B1 {
>  +  int m;
>  +  struct A1 a;
>  +};
>  +
>  +struct C1 {
>  +  int q;
>  +  struct B1 b;
>  +};
>  +
>  +struct An {
>  +  int n;
>  +  char data[8];/* Content following header */
>  +};
>  +
>  +struct Bn {
>  +  int m;
>  +  struct An a;
>  +};
>  +
>  +struct Cn {
>  +  int q;
>  +  struct Bn b;
>  +};
>  +
>  +volatile void *magic1, *magic2;
>  +
>  +int main(int argc, char *argv[])
>  +{
>  +struct B *outer;
>  +struct C *outest;
>  +
>  +/* Make sure optimization can't find some other object size. */
>  +outer = (void *)magic1;
>  +outest = (void *)magic2;
>  +
>  +expect(__builtin_object_size(&outer->a, 1), -1);
>  +expect(__builtin_object_size(&outest->b, 1), -1);
>  +expect(__builtin_object_size(&outest->b.a, 1), -1);
>  +
>  +struct B0 *outer0;
>  +struct C0 *outest0;
>  +
>  +/* Make sure optimization can't find some other object size. */
>  +outer0 = (void *)magic1;
>  +outest0 = (void *)magic2;
>  +
>  +expect(__builtin_object_size(&outer0->a, 1), -1);
>  +expect(__builtin_object_size(&outest0->b, 1), -1);
>  +expect(__builtin_object_size(&outest0->b.a, 1), -1);
> 

Re: [PATCH v5 0/5] P1689R5 support

2023-02-02 Thread Ben Boeckel via Gcc-patches
On Wed, Jan 25, 2023 at 16:06:31 -0500, Ben Boeckel wrote:
> This patch series adds initial support for ISO C++'s [P1689R5][], a
> format for describing C++ module requirements and provisions based on
> the source code. This is required because compiling C++ with modules is
> not embarrassingly parallel and need to be ordered to ensure that
> `import some_module;` can be satisfied in time by making sure that any
> TU with `export import some_module;` is compiled first.
> 
> [P1689R5]: https://isocpp.org/files/papers/P1689R5.html
> 
> I've also added patches to include imported module CMI files and the
> module mapper file as dependencies of the compilation. I briefly looked
> into adding dependencies on response files as well, but that appeared to
> need some code contortions to have a `class mkdeps` available before
> parsing the command line or to keep the information around until one was
> made.
> 
> I'd like feedback on the approach taken here with respect to the
> user-visible flags. I'll also note that header units are not supported
> at this time because the current `-E` behavior with respect to `import
> ;` is to search for an appropriate `.gcm` file which is not
> something such a "scan" can support. A new mode will likely need to be
> created (e.g., replacing `-E` with `-fc++-module-scanning` or something)
> where headers are looked up "normally" and processed only as much as
> scanning requires.
> 
> FWIW, Clang as taken an alternate approach with its `clang-scan-deps`
> tool rather than using the compiler directly.

Ping? It'd be nice to have this supported in at least GCC 14 (since it
missed 13).

Thanks,

--Ben


[Patch] libgomp: Fix reverse offload issues

2023-02-02 Thread Tobias Burnus

Found when testing AMD GCN offloading, the second issue came up with
libgomp.fortran/reverse-offload-5.f90. (But oddly not with nvptx.)

While the first one (new test: libgomp.fortran/reverse-offload-6.f90)
came up when debugging the issue.

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp: Fix reverse offload issues

If there is nothing to map, skip the mapping and attempting to copy
addrs, sizes and kinds which may have issues for size = 0.

Additionally, it could happen that a non-allocated address was deallocated,
e.g. a pointer set - such that there was a double free for the actual data
or in multiple other ways.

libgomp/
	* target.c (gomp_target_rev): Handle mapnum == 0 and avoid
	freeing not allocated memory.
	* testsuite/libgomp.fortran/reverse-offload-6.f90: New test.

 libgomp/target.c   |  8 +++---
 .../libgomp.fortran/reverse-offload-6.f90  | 32 ++
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/libgomp/target.c b/libgomp/target.c
index b16ee761a95..c1682caea13 100644
--- a/libgomp/target.c
+++ b/libgomp/target.c
@@ -3324,7 +3324,7 @@ gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
 gomp_fatal ("Cannot find reverse-offload function");
   void (*host_fn)() = (void (*)()) n->k->host_start;
 
-  if (devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM)
+  if ((devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) || mapnum == 0)
 {
   devaddrs = (uint64_t *) (uintptr_t) devaddrs_ptr;
   sizes = (uint64_t *) (uintptr_t) sizes_ptr;
@@ -3402,7 +3402,7 @@ gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
 	  }
 }
 
-  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
 {
   size_t j, struct_cpy = 0;
   splay_tree_key n2;
@@ -3638,7 +3638,7 @@ gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
 
   host_fn (devaddrs);
 
-  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM))
+  if (!(devicep->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) && mapnum > 0)
 {
   uint64_t struct_cpy = 0;
   bool clean_struct = false;
@@ -3680,7 +3680,7 @@ gomp_target_rev (uint64_t fn_ptr, uint64_t mapnum, uint64_t devaddrs_ptr,
 	  clean_struct = true;
 	  struct_cpy = sizes[i];
 	}
-	  else if (cdata[i].aligned)
+	  else if (!cdata[i].present && cdata[i].aligned)
 	gomp_aligned_free ((void *) (uintptr_t) devaddrs[i]);
 	  else if (!cdata[i].present)
 	free ((void *) (uintptr_t) devaddrs[i]);
diff --git a/libgomp/testsuite/libgomp.fortran/reverse-offload-6.f90 b/libgomp/testsuite/libgomp.fortran/reverse-offload-6.f90
new file mode 100644
index 000..04866edbba7
--- /dev/null
+++ b/libgomp/testsuite/libgomp.fortran/reverse-offload-6.f90
@@ -0,0 +1,32 @@
+!
+! Ensure that a mapping with no argument works
+!
+
+module m
+  implicit none (type, external)
+  integer :: x = 32
+  integer :: dev_num2 = -1
+contains
+subroutine  foo()
+  use omp_lib, only: omp_get_device_num
+  x = x + 10
+  dev_num2 = omp_get_device_num()
+end
+end module m
+
+use m
+use omp_lib
+!$omp requires reverse_offload
+implicit none (type, external)
+integer :: dev_num = -1
+!$omp target map(from:dev_num)
+  dev_num = omp_get_device_num()
+  ! This calls GOMP_target_ext with number of maps = 0
+  !$omp target device(ancestor:1)
+call foo
+  !$omp end target
+!$omp end target
+
+if (omp_get_num_devices() > 0 .and.  dev_num2 == dev_num) stop 1
+if (x /= 42) stop 2
+end


[pushed] analyzer: add deref-before-check-qemu-qtest_rsp_args.c test case

2023-02-02 Thread David Malcolm via Gcc-patches
Successfully regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-5654-g598e10cf415f0a.

gcc/testsuite/ChangeLog:
* gcc.dg/analyzer/deref-before-check-qemu-qtest_rsp_args.c: New test.

Signed-off-by: David Malcolm 
---
 .../deref-before-check-qemu-qtest_rsp_args.c  | 73 +++
 1 file changed, 73 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.dg/analyzer/deref-before-check-qemu-qtest_rsp_args.c

diff --git 
a/gcc/testsuite/gcc.dg/analyzer/deref-before-check-qemu-qtest_rsp_args.c 
b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-qemu-qtest_rsp_args.c
new file mode 100644
index 000..2b3ad8c5fb3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/deref-before-check-qemu-qtest_rsp_args.c
@@ -0,0 +1,73 @@
+/* Reduced from qemu-7.2.0's tests/qtest/libqtest.c.  */
+
+#define TRUE 1
+#define NULL ((void *)0)
+
+#define g_assert(expr) \
+  do { \
+if (expr) ; else /* { dg-warning "check of '\\*words' for NULL after 
already dereferencing it" } */ \
+  g_assertion_message_expr (#expr);\
+} while (0)
+
+voidg_assertion_message_expr(const char *expr) 
__attribute__((noreturn));
+
+extern int strcmp (const char *__s1, const char *__s2)
+  __attribute__ ((__nothrow__ , __leaf__, __pure__, __nonnull__ (1, 2)));
+typedef char gchar;
+typedef int gint;
+typedef gint gboolean;
+typedef struct _GString GString;
+
+struct _GString
+{
+  gchar *str;
+  /* [...snip...] */
+};
+
+extern
+gchar* g_string_free (GString *string,
+ gboolean free_segment);
+extern
+gchar** g_strsplit (const gchar *string,
+   const gchar *delimiter,
+   gint max_tokens);
+extern
+void g_strfreev (gchar **str_array);
+
+typedef struct QTestState QTestState;
+typedef GString* (*QTestRecvFn)(QTestState *);
+
+typedef struct QTestClientTransportOps {
+/* [...snip...] */
+QTestRecvFn recv_line;
+} QTestTransportOps;
+
+struct QTestState
+{
+/* [...snip...] */
+QTestTransportOps ops;
+/* [...snip...] */
+};
+
+gchar **qtest_rsp_args(QTestState *s, int expected_args)
+{
+GString *line;
+gchar **words;
+/* [...snip...] */
+
+redo:
+line = s->ops.recv_line(s);
+words = g_strsplit(line->str, " ", 0);
+g_string_free(line, TRUE);
+
+if (strcmp(words[0], "IRQ") == 0) { /* { dg-message "pointer '\\*words' is 
dereferenced here" } */
+/* [...snip...] */
+g_strfreev(words);
+goto redo;
+}
+
+g_assert(words[0] != NULL); /* { dg-message "in expansion of macro 
'g_assert'" } */
+/* [...snip...] */
+
+return words;
+}
-- 
2.26.3



[pushed] analyzer: fix -Wanalyzer-fd-type-mismatch false +ve on "listen" [PR108633]

2023-02-02 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Integration testing shows it fixes a false positive seen on qemu, and
has no other changes.
Pushed to trunk as r13-5655-gd84dc419e692d4.

gcc/analyzer/ChangeLog:
PR analyzer/108633
* sm-fd.cc (fd_state_machine::check_for_fd_attrs): Add missing
"continue".
(fd_state_machine::on_listen): Don't issue phase-mismatch or
type-mismatch warnings for the "invalid" state.

gcc/testsuite/ChangeLog:
PR analyzer/108633
* gcc.dg/analyzer/fd-pr108633.c: New test.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/sm-fd.cc   |  8 ++-
 gcc/testsuite/gcc.dg/analyzer/fd-pr108633.c | 79 +
 2 files changed, 85 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/fd-pr108633.c

diff --git a/gcc/analyzer/sm-fd.cc b/gcc/analyzer/sm-fd.cc
index 9225ac4acc4..494d802a1d4 100644
--- a/gcc/analyzer/sm-fd.cc
+++ b/gcc/analyzer/sm-fd.cc
@@ -1339,11 +1339,14 @@ fd_state_machine::check_for_fd_attrs (
  if (!(is_valid_fd_p (state) || (state == m_stop)))
{
  if (!is_constant_fd_p (state))
-   sm_ctxt->warn (node, stmt, arg,
-  make_unique
+   {
+ sm_ctxt->warn (node, stmt, arg,
+make_unique
 (*this, diag_arg,
  callee_fndecl, attr_name,
  arg_idx));
+ continue;
+   }
}
 
  switch (fd_attr_access_dir)
@@ -1906,6 +1909,7 @@ fd_state_machine::on_listen (const call_details &cd,
   if (!(old_state == m_start
|| old_state == m_constant_fd
|| old_state == m_stop
+   || old_state == m_invalid
|| old_state == m_bound_stream_socket
|| old_state == m_bound_unknown_socket
/* Assume it's OK to call "listen" more than once.  */
diff --git a/gcc/testsuite/gcc.dg/analyzer/fd-pr108633.c 
b/gcc/testsuite/gcc.dg/analyzer/fd-pr108633.c
new file mode 100644
index 000..6d923b7cfc0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/fd-pr108633.c
@@ -0,0 +1,79 @@
+/* Reduced from qemu-7.2.0's tests/qtest/libqtest.c.  */
+
+#defineEINTR 4
+
+#define g_assert_cmpint(n1, cmp, n2)   \
+  do { \
+gint64 __n1 = (n1), __n2 = (n2);   \
+if (__n1 cmp __n2) ; else  \
+  g_assertion_message_cmpnum ("", __FILE__, __LINE__, __func__, \
+ #n1 " " #cmp " " #n2, (long double) __n1, 
#cmp, (long double) __n2, 'i'); \
+  } while (0)
+
+typedef __SIZE_TYPE__ size_t;
+typedef unsigned int __socklen_t;
+extern int snprintf (char *__restrict __s, size_t __maxlen,
+   const char *__restrict __format, ...)
+ __attribute__ ((__nothrow__)) __attribute__ ((__format__ (__printf__, 3, 
4)));
+typedef __socklen_t socklen_t;
+extern int *__errno_location (void) __attribute__ ((__nothrow__ , __leaf__)) 
__attribute__ ((__const__));
+#define errno (*__errno_location ())
+typedef signed long gint64;
+typedef char gchar;
+extern
+void g_assertion_message_cmpnum (const char *domain,
+const char *file,
+int line,
+const char *func,
+const char *expr,
+long double arg1,
+const char *cmp,
+long double arg2,
+char numtype);
+enum __socket_type
+{
+  SOCK_STREAM = 1,
+  /* [...snip...] */
+};
+
+typedef unsigned short int sa_family_t;
+
+typedef union {
+  const struct sockaddr *__restrict __sockaddr__;
+  /* [...snip...] */
+} __CONST_SOCKADDR_ARG __attribute__ ((__transparent_union__));
+
+extern int socket (int __domain, int __type, int __protocol)
+  __attribute__ ((__nothrow__ , __leaf__));
+extern int bind (int __fd, __CONST_SOCKADDR_ARG __addr, socklen_t __len)
+  __attribute__ ((__nothrow__ , __leaf__));
+extern int listen (int __fd, int __n)
+  __attribute__ ((__nothrow__ , __leaf__));
+
+struct sockaddr_un
+{
+  sa_family_t sun_family;
+  char sun_path[108];
+};
+
+int qtest_socket_server(const char *socket_path)
+{
+struct sockaddr_un addr;
+int sock;
+int ret;
+
+sock = socket(1, SOCK_STREAM, 0); /* { dg-message "when 'socket' fails" } 
*/
+g_assert_cmpint(sock, !=, -1); /* this isn't marked "noreturn" */
+
+addr.sun_family = 1;
+snprintf(addr.sun_path, sizeof(addr.sun_path), "%s", socket_path);
+
+do {
+ret = bind(sock, (struct sockaddr *)&addr, sizeof(addr));
+} while (ret == -1 && errno == EINTR);
+g_assert_cmpint(ret, !=, -1);
+ret = listen(sock, 1); /* { dg-war

[PATCH] tree: Use comdat tree_code_{type,length} even for C++11/14 [PR108634]

2023-02-02 Thread Jakub Jelinek via Gcc-patches
Hi!

The recent change to undo the tree_code_type/tree_code_length
excessive duplication apparently broke building the Linux kernel
plugin.  While it is certainly desirable that GCC plugins are built
with the same compiler as GCC has been built and with the same options
(at least the important ones), it might be hard to arrange that,
e.g. if gcc is built using a cross-compiler but the plugin then built
natively, or GCC isn't bootstrapped for other reasons, or just as in
the kernel case they were building the plugin with -std=gnu++11 while
the bootstrapped GCC has been built without any such option and so with
whatever the compiler defaulted to.

For C++17 and later tree_code_{type,length} are UNIQUE symbols with
those assembler names, while for C++11/14 they were
_ZL14tree_code_type and _ZL16tree_code_length.

The following patch uses a comdat var for those even for C++11/14
as suggested by Maciej Cencora.  Relying on weak attribute is not an
option because not all hosts support it and there are non-GNU system
compilers.  While we could use it unconditionally,
I think defining a template just to make it comdat is weird, and
the compiler itself is always built with the same compiler.
Plugins, being separate shared libraries, will have a separate copy of
the arrays if they are ODR-used in the plugin, so there is not a big
deal if e.g. cc1plus uses tree_code_type while plugin uses
_ZN19tree_code_type_tmplILi0EE14tree_code_typeE or vice versa.

Tested in non-bootstrapped build with both -std=gnu++17 and -std=gnu++11,
ok for trunk if it passes full bootstrap/regtest?

2023-02-02  Jakub Jelinek  

PR plugins/108634
* tree-core.h (tree_code_type, tree_code_length): For C++11 or
C++14, don't declare as extern const arrays.
(tree_code_type_tmpl, tree_code_length_tmpl): New types with
static constexpr member arrays for C++11 or C++14.
* tree.h (TREE_CODE_CLASS): For C++11 or C++14 use
tree_code_type_tmpl <0>::tree_code_type instead of tree_code_type.
(TREE_CODE_LENGTH): For C++11 or C++14 use
tree_code_length_tmpl <0>::tree_code_length instead of
tree_code_length.
* tree.cc (tree_code_type, tree_code_length): Remove.

--- gcc/tree-core.h.jj  2023-01-27 10:51:27.575399052 +0100
+++ gcc/tree-core.h 2023-02-02 15:06:05.048665279 +0100
@@ -2285,19 +2285,27 @@ struct floatn_type_info {
 extern bool tree_contains_struct[MAX_TREE_CODES][64];
 
 /* Class of tree given its code.  */
-#if __cpp_inline_variables >= 201606L
 #define DEFTREECODE(SYM, NAME, TYPE, LENGTH) TYPE,
 #define END_OF_BASE_TREE_CODES tcc_exceptional,
 
+#if __cpp_inline_variables < 201606L
+template 
+struct tree_code_type_tmpl {
+  static constexpr enum tree_code_class tree_code_type[] = {
+#include "all-tree.def"
+  };
+};
+
+template 
+constexpr enum tree_code_class tree_code_type_tmpl::tree_code_type[];
+#else
 constexpr inline enum tree_code_class tree_code_type[] = {
 #include "all-tree.def"
 };
+#endif
 
 #undef DEFTREECODE
 #undef END_OF_BASE_TREE_CODES
-#else
-extern const enum tree_code_class tree_code_type[];
-#endif
 
 /* Each tree code class has an associated string representation.
These must correspond to the tree_code_class entries.  */
@@ -2305,18 +2313,27 @@ extern const char *const tree_code_class
 
 /* Number of argument-words in each kind of tree-node.  */
 
-#if __cpp_inline_variables >= 201606L
 #define DEFTREECODE(SYM, NAME, TYPE, LENGTH) LENGTH,
 #define END_OF_BASE_TREE_CODES 0,
+
+#if __cpp_inline_variables < 201606L
+template 
+struct tree_code_length_tmpl {
+  static constexpr unsigned char tree_code_length[] = {
+#include "all-tree.def"
+  };
+};
+
+template 
+constexpr unsigned char tree_code_length_tmpl::tree_code_length[];
+#else
 constexpr inline unsigned char tree_code_length[] = {
 #include "all-tree.def"
 };
+#endif
 
 #undef DEFTREECODE
 #undef END_OF_BASE_TREE_CODES
-#else
-extern const unsigned char tree_code_length[];
-#endif
 
 /* Vector of all alias pairs for global symbols.  */
 extern GTY(()) vec *alias_pairs;
--- gcc/tree.h.jj   2023-01-27 20:09:16.183970583 +0100
+++ gcc/tree.h  2023-02-02 14:37:17.255004291 +0100
@@ -177,7 +177,12 @@ code_helper::is_builtin_fn () const
 #define TREE_CODE_CLASS_STRING(CLASS)\
 tree_code_class_strings[(int) (CLASS)]
 
+#if __cpp_inline_variables < 201606L
+#define TREE_CODE_CLASS(CODE)  \
+  tree_code_type_tmpl <0>::tree_code_type[(int) (CODE)]
+#else
 #define TREE_CODE_CLASS(CODE)  tree_code_type[(int) (CODE)]
+#endif
 
 /* Nonzero if NODE represents an exceptional code.  */
 
@@ -271,7 +276,12 @@ code_helper::is_builtin_fn () const
 
 #define EXPR_P(NODE) IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (TREE_CODE (NODE)))
 
+#if __cpp_inline_variables < 201606L
+#define TREE_CODE_LENGTH(CODE) \
+  tree_code_length_tmpl <0>::tree_code_length[(int) (CODE)]
+#else
 #define TREE_CODE_LENGTH(CODE) tree_code_length[(int) (CODE)]
+#endif
 
 
 /* Helper macros for math builtins.  */
--- gc

[Patch] libgomp: enable reverse offload for AMDGCN

2023-02-02 Thread Tobias Burnus

Now that the stack handling has been changed for AMDGCN, this patch enables 
reverse offload.
(cf. today's "[committed] amdgcn, libgomp: Manually allocated stacks" patch 
email/commit
by Andrew).

Any comments, suggestions?

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH 2/2] Documentation Update.

2023-02-02 Thread Qing Zhao via Gcc-patches


> On Feb 2, 2023, at 3:33 AM, Richard Biener  wrote:
> 
> On Wed, 1 Feb 2023, Siddhesh Poyarekar wrote:
> 
>> On 2023-02-01 13:24, Qing Zhao wrote:
>>> 
>>> 
 On Feb 1, 2023, at 11:55 AM, Siddhesh Poyarekar 
 wrote:
 
 On 2023-01-31 09:11, Qing Zhao wrote:
> Update documentation to clarify a GCC extension on structure with
> flexible array member being nested in another structure.
> gcc/ChangeLog:
> * doc/extend.texi: Document GCC extension on a structure containing
> a flexible array member to be a member of another structure.
 
 Should this resolve pr#77650 since the proposed action there appears to be
 to document these semantics?
>>> 
>>> My understanding of pr77650 is specifically for documentation on the
>>> following case:
>>> 
>>> The structure with a flexible array member is the middle field of another
>>> structure.
>>> 
>>> Which I added in the documentation as the 2nd situation.
>>> However, I am still not very comfortable on my current clarification on this
>>> situation: how should we document on
>>> the expected gcc behavior to handle such situation?
>> 
>> I reckon wording that dissuades programmers from using this might be
>> appropriate, i.e. don't rely on this and if you already have such nested flex
>> arrays, change code to remove them.
>> 
> +In the above, @code{flex_data.data[]} is allowed to be extended flexibly
> to
> +the padding. E.g, up to 4 elements.
>> 
>> """
>> ... Relying on space in struct padding is bad programming practice and any
>> code relying on this behaviour should be modified to ensure that flexible
>> array members only end up at the ends of arrays.  The `-pedantic` flag should
>> help identify such uses.
>> """
>> 
>> Although -pedantic will also flag on flex arrays nested in structs even if
>> they're at the end of the parent struct, so my suggestion on the warning is
>> not really perfect.
> 
> Wow, so I checked and we indeed accept
> 
> struct X { int n; int data[]; };
> struct Y { struct X x; int end; };
> 
> and -pedantic says
> 
> t.c:2:21: warning: invalid use of structure with flexible array member 
> [-Wpedantic]
>2 | struct Y { struct X x; int end; };
>  |

Currently, -pedantic report the same message for flex arrays nested in structs 
at the end of the parent struct AND in the middle of the parent struct. 
Shall we distinguish them and report different warning messages in order to 
discourage the latter case? 

And at the same time, in the documentation, clarify these two situations, and 
discourage the latter case at the same time as well?
>   
> 
> and clang reports
> 
> t.c:2:21: warning: field 'x' with variable sized type 'struct X' not at 
> the end of a struct or class is a GNU extension 
> [-Wgnu-variable-sized-type-not-at-end]
> struct Y { struct X x; int end; };
>  
>  ^

Clang’s warning message is clearer. 
> 
> looking at PR77650 what seems missing there is the semantics of this
> extension as expected/required by the glibc use.  comment#5 seems
> to suggest that for my example above its expected that
> Y.x.data[0] aliases Y.end?!

Should we mentioned this alias relationship in the doc?

>  There must be a better way to write
> the glibc code and IMHO it would be best to deprecate this extension.

Agreed. This is really a bad practice, should be deprecated. 
We can give warning first in this release, and then deprecate this extension in 
a latter release. 

> Definitely the middle-end wouldn't consider this aliasing for
> my example - maybe it "works" when wrapped inside a union but
> then for sure only when the union is visible in all accesses ...
> 
> typedef union
> {
>  struct __gconv_info __cd;
>  struct
>  {
>struct __gconv_info __cd;
>struct __gconv_step_data __data;
>  } __combined;
> } _G_iconv_t;
> 
> could be written as
> 
> typedef union
> {
>  struct __gconv_info __cd;
>  char __dummy[sizeof(struct __gconv_info) + sizeof(struct 
> __gconv_step_data)];
> } _G_iconv_t;
> 
> in case the intent is to provide a complete type with space for
> a single __gconv_step_data.

Since the current middle end doesn’t handle such case consistently, what should 
we document this case? 
Or just mentioned this case is not handled consistently in the compiler and 
will be deprecated in the future, 
 user should not depend on it and should rewrite their code?

I don’t think it worth the effort to update GCC to consistently handle this 
case in general.

What’s your opinion?

Qing


> 
> Richard.



Re: [PATCH] CPROP: Allow cprop optimization when the function has a single block

2023-02-02 Thread Jeff Law via Gcc-patches




On 2/2/23 05:26, Richard Biener wrote:

On Thu, 2 Feb 2023, juzhe.zh...@rivai.ai wrote:


Yeah, Thanks. You are right. CSE should do the job.
Now I know the reason CSE failed to optimize is I include 
VL_REGNUM(66)/VTYPE_RENUM(67) hard reg
as the dependency of pred_broadcast:
(insn 19 18 20 4 (set (reg:VNx1DI 152)

 (if_then_else:VNx1DI (unspec:VNx1BI [
 (const_vector:VNx1BI repeat [
 (const_int 1 [0x1])
 ])
 (const_int 4 [0x4])
 (const_int 2 [0x2]) repeated x2
 (const_int 0 [0])
 (reg:SI 66 vl)
 (reg:SI 67 vtype)
 ] UNSPEC_VPREDICATE)
 (vec_duplicate:VNx1DI (reg/v:DI 148 [ x ]))
 (unspec:VNx1DI [
 (const_int 0 [0])
 ] UNSPEC_VUNDEF))) "rvv.c":22:23 695 {pred_broadcastvnx1di}
  (nil))

Then CSE failed to set the 152 as copy.

VL_REGNUM(66)/VTYPE_RENUM(67) are the global hard reg that I should make each 
RVV instruction depend on them.
Since we use vsetvl instruction (which is setting global 
VL_REGNUM(66)/VTYPE_RENUM(67) status) to set the global status for
each RVV instruction.
Including the dependency here is to make sure the global VL/VTYPE status is 
correct of each RVV instruction. (If we don't include
such dependency in RVV instruction, instruction scheduling may move the RVV 
instructions and vsetvl instructions randomly then
produce incorrect vsetvl configuration)

The original reg_class of VL_REGNUM(66)/VTYPE_RENUM(67) I set here:
riscv_regno_to_class [VL_REGNUM] = VL_REGS;
riscv_regno_to_class [VTYPE_RENUM] = VTYPE_REGS;
Such configuration make CSE failed.

However, if I change the reg_class :
riscv_regno_to_class [VL_REGNUM] = NO_REGS;
riscv_regno_to_class [VTYPE_RENUM] = NO_REGS;
The CSE now can do the optimization now!

1) Would you mind telling me the difference between them?


No idea.  I think CSE avoids to touch hard register references because
eliding them to copies can increase register pressure.IIRC the costing is set up differently and for a given partition a 
pseudo will be preferred over a hard reg.  This is in addition to other 
places that test the small register class hooks.







2) If I set these 2 global status register as NO_REGS, will it create
issues for the global status configuration of each RVV instructions ?


No idea either.  Usually these kind of dependences are introduced
by targets at the point the VL setting is introduced to avoid
pessimizing optimizations earlier.  Often, for cases like a VL
register, this is done after register allocation only and indeed
necessary to avoid the second scheduling pass from breaking things.
Yea.  I'm wondering about when the right place to introduce these 
dependencies might be.  I'm still a few months out from worrying about 
RVV, but it's not too far away.

jeff


Re: [PATCH] CPROP: Allow cprop optimization when the function has a single block

2023-02-02 Thread Jeff Law via Gcc-patches




On 2/2/23 05:35, juzhe.zh...@rivai.ai wrote:

Thank you so much. Kito helped me fix it already.
RVV instruction patterns can have CSE optimizations now.

What was the issue?

jeff


Re: [PATCH 1/2] Handle component_ref to a structre/union field including flexible array member [PR101832]

2023-02-02 Thread Qing Zhao via Gcc-patches



> On Feb 2, 2023, at 8:54 AM, Richard Biener  wrote:
> 
> On Thu, 2 Feb 2023, Qing Zhao wrote:
> 
>> 
>> 
>>> On Feb 2, 2023, at 3:07 AM, Richard Biener  wrote:
>>> 
>>> On Wed, 1 Feb 2023, Qing Zhao wrote:
>>> 
 
 
> On Feb 1, 2023, at 6:41 AM, Richard Biener  wrote:
> 
> On Tue, 31 Jan 2023, Qing Zhao wrote:
> 
>> GCC extension accepts the case when a struct with a flexible array member
>> is embedded into another struct (possibly recursively).
>> __builtin_object_size should treat such struct as flexible size per
>> -fstrict-flex-arrays.
>> 
>>  PR tree-optimization/101832
>> 
>> gcc/ChangeLog:
>> 
>>  PR tree-optimization/101832
>>  * tree-object-size.cc (flexible_size_type_p): New function.
>>  (addr_object_size): Handle structure/union type when it has
>>  flexible size.
>> 
>> gcc/testsuite/ChangeLog:
>> 
>>  PR tree-optimization/101832
>>  * gcc.dg/builtin-object-size-pr101832-2.c: New test.
>>  * gcc.dg/builtin-object-size-pr101832-3.c: New test.
>>  * gcc.dg/builtin-object-size-pr101832-4.c: New test.
>>  * gcc.dg/builtin-object-size-pr101832.c: New test.
>> ---
>> .../gcc.dg/builtin-object-size-pr101832-2.c   | 135 ++
>> .../gcc.dg/builtin-object-size-pr101832-3.c   | 135 ++
>> .../gcc.dg/builtin-object-size-pr101832-4.c   | 135 ++
>> .../gcc.dg/builtin-object-size-pr101832.c | 119 +++
>> gcc/tree-object-size.cc   | 115 +++
>> 5 files changed, 611 insertions(+), 28 deletions(-)
>> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
>> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-3.c
>> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832-4.c
>> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
>> 
>> diff --git a/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c 
>> b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
>> new file mode 100644
>> index 000..f38babc5415
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832-2.c
>> @@ -0,0 +1,135 @@
>> +/* PR 101832: 
>> +   GCC extension accepts the case when a struct with a flexible array 
>> member
>> +   is embedded into another struct (possibly recursively).
>> +   __builtin_object_size will treat such struct as flexible size per
>> +   -fstrict-flex-arrays.  */ 
>> +/* { dg-do run } */
>> +/* { dg-options "-O2 -fstrict-flex-arrays=1" } */
>> +
>> +#include 
>> +
>> +unsigned n_fails = 0;
>> +
>> +#define expect(p, _v) do { \
>> +  size_t v = _v; \
>> +  if (p == v) \
>> +printf("ok:  %s == %zd\n", #p, p); \
>> +  else {\
>> +printf("WAT: %s == %zd (expected %zd)\n", #p, p, v); \
>> +n_fails++; \
>> +  } \
>> +} while (0);
>> +
>> +struct A {
>> +  int n;
>> +  char data[];/* Content following header */
>> +};
>> +
>> +struct B {
>> +  int m;
>> +  struct A a;
>> +};
>> +
>> +struct C {
>> +  int q;
>> +  struct B b;
>> +};
>> +
>> +struct A0 {
>> +  int n;
>> +  char data[0];/* Content following header */
>> +};
>> +
>> +struct B0 {
>> +  int m;
>> +  struct A0 a;
>> +};
>> +
>> +struct C0 {
>> +  int q;
>> +  struct B0 b;
>> +};
>> +
>> +struct A1 {
>> +  int n;
>> +  char data[1];/* Content following header */
>> +};
>> +
>> +struct B1 {
>> +  int m;
>> +  struct A1 a;
>> +};
>> +
>> +struct C1 {
>> +  int q;
>> +  struct B1 b;
>> +};
>> +
>> +struct An {
>> +  int n;
>> +  char data[8];/* Content following header */
>> +};
>> +
>> +struct Bn {
>> +  int m;
>> +  struct An a;
>> +};
>> +
>> +struct Cn {
>> +  int q;
>> +  struct Bn b;
>> +};
>> +
>> +volatile void *magic1, *magic2;
>> +
>> +int main(int argc, char *argv[])
>> +{
>> +struct B *outer;
>> +struct C *outest;
>> +
>> +/* Make sure optimization can't find some other object size. */
>> +outer = (void *)magic1;
>> +outest = (void *)magic2;
>> +
>> +expect(__builtin_object_size(&outer->a, 1), -1);
>> +expect(__builtin_object_size(&outest->b, 1), -1);
>> +expect(__builtin_object_size(&outest->b.a, 1), -1);
>> +
>> +struct B0 *outer0;
>> +struct C0 *outest0;
>> +
>> +/* Make sure optimization can't find some other object size. */
>> +outer0 = (void *)magic1;
>> +outest0 = (void *)magic2;
>> +
>> +expect(__builtin_object_size(&outer0->a, 1), -1);
>> +expect(__builtin_object_size(&outest0->b, 1), -1);
>>

Re: Re: [PATCH] CPROP: Allow cprop optimization when the function has a single block

2023-02-02 Thread juzhe.zhong
We set VL/VTYPE these 2 implicit global status denpency register as fixed reg.
Then CSE can do the optimization now.

>> Yea.  I'm wondering about when the right place to introduce these
>>dependencies might be.  I'm still a few months out from worrying about
>>RVV, but it's not too far away.
You don't need to worry about RVV. I can promise you that RVV support in GCC 
will be solid and
optimal. You can just try. For example, try VSETVL PASS,  this PASS implemented 
in GCC is much better
than LLVM. I have include so many fancy optimizations there.


juzhe.zh...@rivai.ai
 
From: Jeff Law
Date: 2023-02-02 22:36
To: juzhe.zh...@rivai.ai; rguenther
CC: gcc-patches; kito.cheng; richard.sandiford; apinski
Subject: Re: [PATCH] CPROP: Allow cprop optimization when the function has a 
single block
 
 
On 2/2/23 05:35, juzhe.zh...@rivai.ai wrote:
> Thank you so much. Kito helped me fix it already.
> RVV instruction patterns can have CSE optimizations now.
What was the issue?
 
jeff
 


Re: [aarch64] Use dup and zip1 for interleaving elements in initializing vector

2023-02-02 Thread Prathamesh Kulkarni via Gcc-patches
On Wed, 1 Feb 2023 at 21:56, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> > On Thu, 12 Jan 2023 at 21:21, Richard Sandiford
> >  wrote:
> >>
> >> Prathamesh Kulkarni  writes:
> >> > On Tue, 6 Dec 2022 at 07:01, Prathamesh Kulkarni
> >> >  wrote:
> >> >>
> >> >> On Mon, 5 Dec 2022 at 16:50, Richard Sandiford
> >> >>  wrote:
> >> >> >
> >> >> > Richard Sandiford via Gcc-patches  writes:
> >> >> > > Prathamesh Kulkarni  writes:
> >> >> > >> Hi,
> >> >> > >> For the following test-case:
> >> >> > >>
> >> >> > >> int16x8_t foo(int16_t x, int16_t y)
> >> >> > >> {
> >> >> > >>   return (int16x8_t) { x, y, x, y, x, y, x, y };
> >> >> > >> }
> >> >> > >>
> >> >> > >> Code gen at -O3:
> >> >> > >> foo:
> >> >> > >> dupv0.8h, w0
> >> >> > >> ins v0.h[1], w1
> >> >> > >> ins v0.h[3], w1
> >> >> > >> ins v0.h[5], w1
> >> >> > >> ins v0.h[7], w1
> >> >> > >> ret
> >> >> > >>
> >> >> > >> For 16 elements, it results in 8 ins instructions which might not 
> >> >> > >> be
> >> >> > >> optimal perhaps.
> >> >> > >> I guess, the above code-gen would be equivalent to the following ?
> >> >> > >> dup v0.8h, w0
> >> >> > >> dup v1.8h, w1
> >> >> > >> zip1 v0.8h, v0.8h, v1.8h
> >> >> > >>
> >> >> > >> I have attached patch to do the same, if number of elements >= 8,
> >> >> > >> which should be possibly better compared to current code-gen ?
> >> >> > >> Patch passes bootstrap+test on aarch64-linux-gnu.
> >> >> > >> Does the patch look OK ?
> >> >> > >>
> >> >> > >> Thanks,
> >> >> > >> Prathamesh
> >> >> > >>
> >> >> > >> diff --git a/gcc/config/aarch64/aarch64.cc 
> >> >> > >> b/gcc/config/aarch64/aarch64.cc
> >> >> > >> index c91df6f5006..e5dea70e363 100644
> >> >> > >> --- a/gcc/config/aarch64/aarch64.cc
> >> >> > >> +++ b/gcc/config/aarch64/aarch64.cc
> >> >> > >> @@ -22028,6 +22028,39 @@ aarch64_expand_vector_init (rtx target, 
> >> >> > >> rtx vals)
> >> >> > >>return;
> >> >> > >>  }
> >> >> > >>
> >> >> > >> +  /* Check for interleaving case.
> >> >> > >> + For eg if initializer is (int16x8_t) {x, y, x, y, x, y, x, 
> >> >> > >> y}.
> >> >> > >> + Generate following code:
> >> >> > >> + dup v0.h, x
> >> >> > >> + dup v1.h, y
> >> >> > >> + zip1 v0.h, v0.h, v1.h
> >> >> > >> + for "large enough" initializer.  */
> >> >> > >> +
> >> >> > >> +  if (n_elts >= 8)
> >> >> > >> +{
> >> >> > >> +  int i;
> >> >> > >> +  for (i = 2; i < n_elts; i++)
> >> >> > >> +if (!rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, i % 
> >> >> > >> 2)))
> >> >> > >> +  break;
> >> >> > >> +
> >> >> > >> +  if (i == n_elts)
> >> >> > >> +{
> >> >> > >> +  machine_mode mode = GET_MODE (target);
> >> >> > >> +  rtx dest[2];
> >> >> > >> +
> >> >> > >> +  for (int i = 0; i < 2; i++)
> >> >> > >> +{
> >> >> > >> +  rtx x = copy_to_mode_reg (GET_MODE_INNER (mode), 
> >> >> > >> XVECEXP (vals, 0, i));
> >> >> > >
> >> >> > > Formatting nit: long line.
> >> >> > >
> >> >> > >> +  dest[i] = gen_reg_rtx (mode);
> >> >> > >> +  aarch64_emit_move (dest[i], gen_vec_duplicate (mode, 
> >> >> > >> x));
> >> >> > >> +}
> >> >> > >
> >> >> > > This could probably be written:
> >> >> > >
> >> >> > > for (int i = 0; i < 2; i++)
> >> >> > >   {
> >> >> > > rtx x = expand_vector_broadcast (mode, XVECEXP (vals, 
> >> >> > > 0, i));
> >> >> > > dest[i] = force_reg (GET_MODE_INNER (mode), x);
> >> >> >
> >> >> > Oops, I meant "mode" rather than "GET_MODE_INNER (mode)", sorry.
> >> >> Thanks, I have pushed the change in
> >> >> 769370f3e2e04823c8a621d8ffa756dd83ebf21e after running
> >> >> bootstrap+test on aarch64-linux-gnu.
> >> > Hi Richard,
> >> > I have attached a patch that extends the transform if one half is dup
> >> > and other is set of constants.
> >> > For eg:
> >> > int8x16_t f(int8_t x)
> >> > {
> >> >   return (int8x16_t) { x, 1, x, 2, x, 3, x, 4, x, 5, x, 6, x, 7, x, 8 };
> >> > }
> >> >
> >> > code-gen trunk:
> >> > f:
> >> > adrpx1, .LC0
> >> > ldr q0, [x1, #:lo12:.LC0]
> >> > ins v0.b[0], w0
> >> > ins v0.b[2], w0
> >> > ins v0.b[4], w0
> >> > ins v0.b[6], w0
> >> > ins v0.b[8], w0
> >> > ins v0.b[10], w0
> >> > ins v0.b[12], w0
> >> > ins v0.b[14], w0
> >> > ret
> >> >
> >> > code-gen with patch:
> >> > f:
> >> > dup v0.16b, w0
> >> > adrpx0, .LC0
> >> > ldr q1, [x0, #:lo12:.LC0]
> >> > zip1v0.16b, v0.16b, v1.16b
> >> > ret
> >> >
> >> > Bootstrapped+tested on aarch64-linux-gnu.
> >> > Does it look OK ?
> >>
> >> Looks like a nice improvement.  It'll need to wait for GCC 14 now though.
> >>
> >> However, rather than handle this case specially, I think we should instead
> >> take a divide-and-conquer approach: split the initialis

[pushed] testsuite: Add case-values-threshold to pr107876.C

2023-02-02 Thread Richard Sandiford via Gcc-patches
This test was failing on aarch64 because aarch64 overrides
TARGET_CASE_VALUES_THRESHOLD.  The maximum value that allows
the test to pass appears to be 6, but the default threshold
is 4 or 5 (depending on whether casesi is provided).
Going for 4 seemed safest, in case any target-specific
features force the maximum passing threshold to be lower
on some targets.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  Pushed as obvious.

Richard


gcc/testsuite/
* g++.dg/tree-ssa/pr107876.C: Add --param case-values-threshold=4.
---
 gcc/testsuite/g++.dg/tree-ssa/pr107876.C | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr107876.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr107876.C
index 6cff2f1b166..d920ba050ea 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/pr107876.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr107876.C
@@ -1,6 +1,6 @@
 // { dg-do compile }
 // { dg-require-effective-target c++11 }
-// { dg-options "-O2 -funswitch-loops --param max-unswitch-insns=5 
-fdump-tree-unswitch-details" }
+// { dg-options "-O2 -funswitch-loops --param max-unswitch-insns=5 --param 
case-values-threshold=4 -fdump-tree-unswitch-details" }
 
 class X {
 public:
-- 
2.25.1



[pushed] rtl-ssa: Extend m_num_defs to a full unsigned int [PR108086]

2023-02-02 Thread Richard Sandiford via Gcc-patches
insn_info tried to save space by storing the number of
definitions in a 16-bit bitfield.  The justification was:

  // ...  FIRST_PSEUDO_REGISTER + 1
  // is the maximum number of accesses to hard registers and memory, and
  // MAX_RECOG_OPERANDS is the maximum number of pseudos that can be
  // defined by an instruction, so the number of definitions should fit
  // easily in 16 bits.

But while that reasoning holds (I think) for real instructions,
it doesn't hold for artificial instructions.  I don't think there's
any sensible higher limit we can use, so this patch goes for a full
unsigned int.

Tested on aarch64-linux-gnu.  Pushed (as obvious) to trunk so far,
will backport to GCC 12 and GCC 11 too.

Richard


gcc/
PR rtl-optimization/108086
* rtl-ssa/insns.h (insn_info): Make m_num_defs a full unsigned int.
Adjust size-related commentary accordingly.
---
 gcc/rtl-ssa/insns.h | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/gcc/rtl-ssa/insns.h b/gcc/rtl-ssa/insns.h
index ffaf22d4b8e..a604fe295cd 100644
--- a/gcc/rtl-ssa/insns.h
+++ b/gcc/rtl-ssa/insns.h
@@ -141,7 +141,7 @@ using insn_call_clobbers_tree = 
default_splay_tree;
 // of "notes", a bit like REG_NOTES for the underlying RTL insns.
 class insn_info
 {
-  // Size: 8 LP64 words.
+  // Size: 9 LP64 words.
   friend class ebb_info;
   friend class function_info;
 
@@ -401,10 +401,11 @@ private:
   // The number of definitions and the number uses.  FIRST_PSEUDO_REGISTER + 1
   // is the maximum number of accesses to hard registers and memory, and
   // MAX_RECOG_OPERANDS is the maximum number of pseudos that can be
-  // defined by an instruction, so the number of definitions should fit
-  // easily in 16 bits.
+  // defined by an instruction, so the number of definitions in a real
+  // instruction should fit easily in 16 bits.  However, there are no
+  // limits on the number of definitions in artifical instructions.
   unsigned int m_num_uses;
-  unsigned int m_num_defs : 16;
+  unsigned int m_num_defs;
 
   // Flags returned by the accessors above.
   unsigned int m_is_debug_insn : 1;
@@ -414,7 +415,7 @@ private:
   unsigned int m_has_volatile_refs : 1;
 
   // For future expansion.
-  unsigned int m_spare : 11;
+  unsigned int m_spare : 27;
 
   // The program point at which the instruction occurs.
   //
@@ -431,6 +432,9 @@ private:
   // instruction.
   mutable int m_cost_or_uid;
 
+  // On LP64 systems, there's a gap here that could be used for future
+  // expansion.
+
   // The list of notes that have been attached to the instruction.
   insn_note *m_first_note;
 };
-- 
2.25.1



Re: [Patch] libgomp: enable reverse offload for AMDGCN

2023-02-02 Thread Tobias Burnus

Maybe it becomes better reviewable with an attached patch ...

On 02.02.23 15:31, Tobias Burnus wrote:

Now that the stack handling has been changed for AMDGCN, this patch
enables reverse offload.
(cf. today's "[committed] amdgcn, libgomp: Manually allocated stacks"
patch email/commit
by Andrew).

Any comments, suggestions?

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
libgomp: enable reverse offload for AMDGCN

libgomp/ChangeLog:

	* libgomp.texi (5.0 Impl. Status, gcn specifics): Update for
	reverse offload.
	* plugin/plugin-gcn.c (GOMP_OFFLOAD_get_num_devices): Accept
	reverse-offload requirement.

 libgomp/libgomp.texi| 13 -
 libgomp/plugin/plugin-gcn.c |  3 ++-
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/libgomp/libgomp.texi b/libgomp/libgomp.texi
index 1f84b050eb2..698ae330942 100644
--- a/libgomp/libgomp.texi
+++ b/libgomp/libgomp.texi
@@ -227,8 +227,7 @@ The OpenMP 4.5 specification is fully supported.
 @item @code{allocate} directive @tab N @tab
 @item @code{allocate} clause @tab P @tab Initial support
 @item @code{use_device_addr} clause on @code{target data} @tab Y @tab
-@item @code{ancestor} modifier on @code{device} clause
-  @tab Y @tab Host fallback with GCN devices
+@item @code{ancestor} modifier on @code{device} clause @tab Y @tab
 @item Implicit declare target directive @tab Y @tab
 @item Discontiguous array section with @code{target update} construct
   @tab N @tab
@@ -4455,9 +4454,13 @@ The implementation remark:
 @item I/O within OpenMP target regions and OpenACC parallel/kernels is supported
   using the C library @code{printf} functions and the Fortran
   @code{print}/@code{write} statements.
-@item OpenMP code that has a requires directive with @code{unified_address},
-  @code{unified_shared_memory} or @code{reverse_offload} will remove
-  any GCN device from the list of available devices (``host fallback'').
+@item Reverse offload (i.e. @code{target} regions with
+  @code{device(ancestor:1)}) are processed serially per @code{target} region
+  such that the next reverse offload region is only executed after the previous
+  one returned.
+@item OpenMP code that has a requires directive with @code{unified_address} or
+  @code{unified_shared_memory} will remove any GCN device from the list of
+  available devices (``host fallback'').
 @end itemize
 
 
diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c
index a7b35059ab3..11ce6b0fa8d 100644
--- a/libgomp/plugin/plugin-gcn.c
+++ b/libgomp/plugin/plugin-gcn.c
@@ -3262,7 +3262,8 @@ GOMP_OFFLOAD_get_num_devices (unsigned int omp_requires_mask)
 return 0;
   /* Return -1 if no omp_requires_mask cannot be fulfilled but
  devices were present.  */
-  if (hsa_context.agent_count > 0 && omp_requires_mask != 0)
+  if (hsa_context.agent_count > 0
+  && (omp_requires_mask & ~GOMP_REQUIRES_REVERSE_OFFLOAD) != 0)
 return -1;
   return hsa_context.agent_count;
 }


Re: [PATCH] CPROP: Allow cprop optimization when the function has a single block

2023-02-02 Thread Kito Cheng via Gcc-patches
> > Thank you so much. Kito helped me fix it already.
> > RVV instruction patterns can have CSE optimizations now.
> What was the issue?

VL and VTYPE isn't listed in fixed register so CSE feel that isn't
cheap (See CHEAP_REGNO in cse.cc),
but actually it's kind of mistake sett for VL and VTYPE register to
non fixed register,
it all managed by vsetvl insertion pass, and won't involved into the
register allocation
process, so it should be set 1 in FIXED_REGISTERS,

then CSE pass is happy to cse that after we fix that :)

More story behind that is we were trying to rely on RA to manage VL
and VTYPE before,
and then...we gave up and decided to manage that by ourselves.


Re: [aarch64] Use dup and zip1 for interleaving elements in initializing vector

2023-02-02 Thread Richard Sandiford via Gcc-patches
Prathamesh Kulkarni  writes:
>> >> > I have attached a patch that extends the transform if one half is dup
>> >> > and other is set of constants.
>> >> > For eg:
>> >> > int8x16_t f(int8_t x)
>> >> > {
>> >> >   return (int8x16_t) { x, 1, x, 2, x, 3, x, 4, x, 5, x, 6, x, 7, x, 8 };
>> >> > }
>> >> >
>> >> > code-gen trunk:
>> >> > f:
>> >> > adrpx1, .LC0
>> >> > ldr q0, [x1, #:lo12:.LC0]
>> >> > ins v0.b[0], w0
>> >> > ins v0.b[2], w0
>> >> > ins v0.b[4], w0
>> >> > ins v0.b[6], w0
>> >> > ins v0.b[8], w0
>> >> > ins v0.b[10], w0
>> >> > ins v0.b[12], w0
>> >> > ins v0.b[14], w0
>> >> > ret
>> >> >
>> >> > code-gen with patch:
>> >> > f:
>> >> > dup v0.16b, w0
>> >> > adrpx0, .LC0
>> >> > ldr q1, [x0, #:lo12:.LC0]
>> >> > zip1v0.16b, v0.16b, v1.16b
>> >> > ret
>> >> >
>> >> > Bootstrapped+tested on aarch64-linux-gnu.
>> >> > Does it look OK ?
>> >>
>> >> Looks like a nice improvement.  It'll need to wait for GCC 14 now though.
>> >>
>> >> However, rather than handle this case specially, I think we should instead
>> >> take a divide-and-conquer approach: split the initialiser into even and
>> >> odd elements, find the best way of loading each part, then compare the
>> >> cost of these sequences + ZIP with the cost of the fallback code (the code
>> >> later in aarch64_expand_vector_init).
>> >>
>> >> For example, doing that would allow:
>> >>
>> >>   { x, y, 0, y, 0, y, 0, y, 0, y }
>> >>
>> >> to be loaded more easily, even though the even elements aren't wholly
>> >> constant.
>> > Hi Richard,
>> > I have attached a prototype patch based on the above approach.
>> > It subsumes specializing for above {x, y, x, y, x, y, x, y} case by 
>> > generating
>> > same sequence, thus I removed that hunk, and improves the following cases:
>> >
>> > (a)
>> > int8x16_t f_s16(int8_t x)
>> > {
>> >   return (int8x16_t) { x, 1, x, 2, x, 3, x, 4,
>> >  x, 5, x, 6, x, 7, x, 8 };
>> > }
>> >
>> > code-gen trunk:
>> > f_s16:
>> > adrpx1, .LC0
>> > ldr q0, [x1, #:lo12:.LC0]
>> > ins v0.b[0], w0
>> > ins v0.b[2], w0
>> > ins v0.b[4], w0
>> > ins v0.b[6], w0
>> > ins v0.b[8], w0
>> > ins v0.b[10], w0
>> > ins v0.b[12], w0
>> > ins v0.b[14], w0
>> > ret
>> >
>> > code-gen with patch:
>> > f_s16:
>> > dup v0.16b, w0
>> > adrpx0, .LC0
>> > ldr q1, [x0, #:lo12:.LC0]
>> > zip1v0.16b, v0.16b, v1.16b
>> > ret
>> >
>> > (b)
>> > int8x16_t f_s16(int8_t x, int8_t y)
>> > {
>> >   return (int8x16_t) { x, y, 1, y, 2, y, 3, y,
>> > 4, y, 5, y, 6, y, 7, y };
>> > }
>> >
>> > code-gen trunk:
>> > f_s16:
>> > adrpx2, .LC0
>> > ldr q0, [x2, #:lo12:.LC0]
>> > ins v0.b[0], w0
>> > ins v0.b[1], w1
>> > ins v0.b[3], w1
>> > ins v0.b[5], w1
>> > ins v0.b[7], w1
>> > ins v0.b[9], w1
>> > ins v0.b[11], w1
>> > ins v0.b[13], w1
>> > ins v0.b[15], w1
>> > ret
>> >
>> > code-gen patch:
>> > f_s16:
>> > adrpx2, .LC0
>> > dup v1.16b, w1
>> > ldr q0, [x2, #:lo12:.LC0]
>> > ins v0.b[0], w0
>> > zip1v0.16b, v0.16b, v1.16b
>> > ret
>>
>> Nice.
>>
>> > There are a couple of issues I have come across:
>> > (1) Choosing element to pad vector.
>> > For eg, if we are initiailizing a vector say { x, y, 0, y, 1, y, 2, y }
>> > with mode V8HI.
>> > We split it into { x, 0, 1, 2 } and { y, y, y, y}
>> > However since the mode is V8HI, we would need to pad the above split 
>> > vectors
>> > with 4 more elements to match up to vector length.
>> > For {x, 0, 1, 2} using any constant is the obvious choice while for {y, y, 
>> > y, y}
>> > using 'y' is the obvious choice thus making them:
>> > {x, 0, 1, 2, 0, 0, 0, 0} and {y, y, y, y, y, y, y, y}
>> > These would be then merged using zip1 which would discard the lower half
>> > of both vectors.
>> > Currently I encoded the above two heuristics in
>> > aarch64_expand_vector_init_get_padded_elem:
>> > (a) If split portion contains a constant, use the constant to pad the 
>> > vector.
>> > (b) If split portion only contains variables, then use the most
>> > frequently repeating variable
>> > to pad the vector.
>> > I suppose tho this could be improved ?
>>
>> I think we should just build two 64-bit vectors (V4HIs) and use a subreg
>> to fill the upper elements with undefined values.
>>
>> I suppose in principle we would have the same problem when splitting
>> a 64-bit vector into 2 32-bit vectors, but it's probably better to punt
>> on that for now.  Eventually it would be worth adding full support fo

Re: [Patch] libgomp: enable reverse offload for AMDGCN

2023-02-02 Thread Andrew Stubbs

On 02/02/2023 14:59, Tobias Burnus wrote:

Maybe it becomes better reviewable with an attached patch ...

On 02.02.23 15:31, Tobias Burnus wrote:

Now that the stack handling has been changed for AMDGCN, this patch
enables reverse offload.
(cf. today's "[committed] amdgcn, libgomp: Manually allocated stacks"
patch email/commit
by Andrew).

Any comments, suggestions?


LGTM.

Andrew


[PATCH] ipa: Avoid invalid gimple when IPA-CP and IPA-SRA disagree on types (108384)

2023-02-02 Thread Martin Jambor
Hi,

when the compiled program contains type mismatches between callers and
callees when it comes to a parameter, IPA-CP can try to propagate one
constant from callers while IPA-SRA may try to split a parameter
expecting a value of a different size on the same offset.  This then
currently leads to creation of a VIEW_CONVERT_EXPR with mismatching
type sizes of LHS and RHS which is correctly flagged by the GIMPLE
verifier as invalid.

It seems that the best course of action is to try and avoid the
situation altogether and so this patch adds a check to IPA-SRA that
peeks into the result of IPA-CP and when it sees a value on the same
offset but with a mismatching size, it just decides to leave that
particular parameter be.

Bootstrapped and tested on x86_64-linux, OK for master?

Thanks,

Martin


gcc/ChangeLog:

2023-02-02  Martin Jambor  

PR ipa/108384
* ipa-sra.cc (push_param_adjustments_for_index): Remove a size check
when comparing to an IPA-CP value.
(dump_list_of_param_indices): New function.
(adjust_parameter_descriptions): Check for mismatching IPA-CP values.
Dump removed candidates using dump_list_of_param_indices.
* ipa-param-manipulation.cc
(ipa_param_body_adjustments::modify_expression): Add assert checking
sizes of a VIEW_CONVERT_EXPR will match.
(ipa_param_body_adjustments::modify_assignment): Likewise.

gcc/testsuite/ChangeLog:

2023-02-02  Martin Jambor  

PR ipa/108384
* gcc.dg/ipa/pr108384.c: New test.
---
 gcc/ipa-param-manipulation.cc   |  4 ++
 gcc/ipa-sra.cc  | 66 -
 gcc/testsuite/gcc.dg/ipa/pr108384.c | 25 +++
 3 files changed, 76 insertions(+), 19 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr108384.c

diff --git a/gcc/ipa-param-manipulation.cc b/gcc/ipa-param-manipulation.cc
index 1de9ca2ceb8..42488ee09c3 100644
--- a/gcc/ipa-param-manipulation.cc
+++ b/gcc/ipa-param-manipulation.cc
@@ -1857,6 +1857,8 @@ ipa_param_body_adjustments::modify_expression (tree 
*expr_p, bool convert)
   if (convert && !useless_type_conversion_p (TREE_TYPE (expr),
 TREE_TYPE (repl)))
 {
+  gcc_checking_assert (tree_to_shwi (TYPE_SIZE (TREE_TYPE (expr)))
+  == tree_to_shwi (TYPE_SIZE (TREE_TYPE (repl;
   tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expr), repl);
   *expr_p = vce;
 }
@@ -1900,6 +1902,8 @@ ipa_param_body_adjustments::modify_assignment (gimple 
*stmt,
}
   else
{
+ gcc_checking_assert (tree_to_shwi (TYPE_SIZE (TREE_TYPE (*lhs_p)))
+ == tree_to_shwi (TYPE_SIZE (TREE_TYPE (*rhs_p;
  tree new_rhs = fold_build1_loc (gimple_location (stmt),
  VIEW_CONVERT_EXPR, TREE_TYPE (*lhs_p),
  *rhs_p);
diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
index 81b75910db1..7a2b4dc8608 100644
--- a/gcc/ipa-sra.cc
+++ b/gcc/ipa-sra.cc
@@ -3989,9 +3989,7 @@ push_param_adjustments_for_index (isra_func_summary *ifs, 
unsigned base_index,
{
  ipa_argagg_value_list avl (ipcp_ts);
  tree value = avl.get_value (base_index, pa->unit_offset);
- if (value
- && (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value))) / BITS_PER_UNIT
- == pa->unit_size))
+ if (value)
{
  if (dump_file)
fprintf (dump_file, "- omitting component at byte "
@@ -4130,6 +4128,22 @@ process_isra_node_results (cgraph_node *node,
   callers.release ();
 }
 
+/* If INDICES is not empty, dump a combination of NODE's dump_name and MSG
+   followed by the list of numbers in INDICES.  */
+
+static void
+dump_list_of_param_indices (const cgraph_node *node, const char* msg,
+   const vec &indices)
+{
+  if (indices.is_empty ())
+return;
+  fprintf (dump_file, "The following parameters of %s %s:", node->dump_name (),
+  msg);
+  for (unsigned i : indices)
+fprintf (dump_file, " %u", i);
+  fprintf (dump_file, "\n");
+}
+
 /* Check which parameters of NODE described by IFS have survived until IPA-SRA
and disable transformations for those which have not or which should not
transformed because the associated debug counter reached its limit.  Return
@@ -4153,6 +4167,7 @@ adjust_parameter_descriptions (cgraph_node *node, 
isra_func_summary *ifs)
   check_surviving = true;
   cinfo->param_adjustments->get_surviving_params (&surviving_params);
 }
+  ipcp_transformation *ipcp_ts = ipcp_get_transformation_summary (node);
   auto_vec  dump_dead_indices;
   auto_vec  dump_bad_cond_indices;
   for (unsigned i = 0; i < len; i++)
@@ -4202,27 +4217,40 @@ adjust_parameter_descriptions (cgraph_node *node, 
isra_func_summary *ifs)
  if (size_would_violate_limit_p (desc, desc->size_reached))
   

[committed] libstdc++: Use emplace in std::variant::operator=(T&&) as per LWG 3585

2023-02-02 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

This was approved at the October 2021 plenary.

libstdc++-v3/ChangeLog:

* include/std/variant (variant::operator=): Implement resolution
of LWG 3585.
* testsuite/20_util/variant/lwg3585.cc: New test.
---
 libstdc++-v3/include/std/variant |  4 +++-
 .../testsuite/20_util/variant/lwg3585.cc | 16 
 2 files changed, 19 insertions(+), 1 deletion(-)
 create mode 100644 libstdc++-v3/testsuite/20_util/variant/lwg3585.cc

diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index 35781495e31..5155124522f 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -1481,7 +1481,9 @@ namespace __variant
|| !is_nothrow_move_constructible_v<_Tj>)
this->emplace<__index>(std::forward<_Tp>(__rhs));
  else
-   operator=(variant(std::forward<_Tp>(__rhs)));
+   // _GLIBCXX_RESOLVE_LIB_DEFECTS
+   // 3585. converting assignment with immovable alternative
+   this->emplace<__index>(_Tj(std::forward<_Tp>(__rhs)));
}
  return *this;
}
diff --git a/libstdc++-v3/testsuite/20_util/variant/lwg3585.cc 
b/libstdc++-v3/testsuite/20_util/variant/lwg3585.cc
new file mode 100644
index 000..0cbfc0db7f5
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/variant/lwg3585.cc
@@ -0,0 +1,16 @@
+// { dg-do compile { target c++17 } }
+
+// LWG 3585. Variant converting assignment with immovable alternative
+
+#include 
+#include 
+
+struct A {
+  A() = default;
+  A(A&&) = delete;
+};
+
+int main() {
+  std::variant v;
+  v = "hello";
+}
-- 
2.39.1



[committed] libstdc++: Fix std::filesystem errors with -fkeep-inline-functions [PR108636]

2023-02-02 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

The source code changes should be backported (the linker script changes
aren't needed because those symbols aren't present on the branches).

-- >8 --

With -fkeep-inline-functions there are linker errors when including
. This happens because there are some filesystem::path
constructors defined inline which call non-exported functions defined in
the library. That's usually not a problem, because those constructors
are only called by code that's also inside the library. But when the
header is compiled with -fkeep-inline-functions those inline functions
are emitted even though they aren't called. That then creates an
undefined reference to the other library internsl. The fix is to just
move the private constructors into the library where they are called.
That way they are never even seen by users, and so not compiled even if
-fkeep-inline-functions is used.

On trunk there is a second problem, which is that the new equality
operators for comparing directory iterators with default_sentinel use
the shared_ptr::operator bool() conversion operator. The shared_ptr
specializations used by directory iterators are explicitly instantiated
in the library, but the bool conversion operators are not exported. This
causes linker errors at -O0 or with -fkeep-inline-functions. That just
requires the conversion operators to be exported.

libstdc++-v3/ChangeLog:

PR libstdc++/108636
* config/abi/pre/gnu.ver (GLIBCXX_3.4.31): Export shared_ptr
conversion operators for directory iterator comparisons with
std::default_sentinel_t.
* include/bits/fs_path.h (path::path(string_view, _Type))
(path::_Cmpt::_Cmpt(string_view, _Type, size_t)): Move inline
definitions to ...
* src/c++17/fs_path.cc: ... here.
* testsuite/27_io/filesystem/path/108636.cc: New test.
---
 libstdc++-v3/config/abi/pre/gnu.ver |  7 +++
 libstdc++-v3/include/bits/fs_path.h | 10 ++
 libstdc++-v3/src/c++17/fs_path.cc   | 13 +
 .../testsuite/27_io/filesystem/path/108636.cc   |  8 
 4 files changed, 30 insertions(+), 8 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/27_io/filesystem/path/108636.cc

diff --git a/libstdc++-v3/config/abi/pre/gnu.ver 
b/libstdc++-v3/config/abi/pre/gnu.ver
index 72716414ccb..34f23bcbce0 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -2504,6 +2504,13 @@ GLIBCXX_3.4.31 {
 _ZNSt6chrono9tzdb_list14const_iteratorppEi;
 _ZN9__gnu_cxx21zoneinfo_dir_overrideEv;
 
+# __shared_ptr::operator bool()
+
_ZNKSt12__shared_ptrINSt10filesystem4_DirELN9__gnu_cxx12_Lock_policyE[012]EEcvbEv;
+
_ZNKSt12__shared_ptrINSt10filesystem7__cxx114_DirELN9__gnu_cxx12_Lock_policyE[012]EEcvbEv;
+# __shared_ptr::operator bool()
+
_ZNKSt12__shared_ptrINSt10filesystem28recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE[012]EEcvbEv;
+
_ZNKSt12__shared_ptrINSt10filesystem7__cxx1128recursive_directory_iterator10_Dir_stackELN9__gnu_cxx12_Lock_policyE[012]EEcvbEv;
+
 } GLIBCXX_3.4.30;
 
 # Symbols in the support library (libsupc++) have their own tag.
diff --git a/libstdc++-v3/include/bits/fs_path.h 
b/libstdc++-v3/include/bits/fs_path.h
index 1cbfaaa5427..0d7bb10c1a0 100644
--- a/libstdc++-v3/include/bits/fs_path.h
+++ b/libstdc++-v3/include/bits/fs_path.h
@@ -596,12 +596,7 @@ namespace __detail
   _Multi = 0, _Root_name, _Root_dir, _Filename
 };
 
-path(basic_string_view __str, _Type __type)
-: _M_pathname(__str)
-{
-  __glibcxx_assert(__type != _Type::_Multi);
-  _M_cmpts.type(__type);
-}
+path(basic_string_view __str, _Type __type);
 
 enum class _Split { _Stem, _Extension };
 
@@ -851,8 +846,7 @@ namespace __detail
 
   struct path::_Cmpt : path
   {
-_Cmpt(basic_string_view __s, _Type __t, size_t __pos)
-  : path(__s, __t), _M_pos(__pos) { }
+_Cmpt(basic_string_view __s, _Type __t, size_t __pos);
 
 _Cmpt() : _M_pos(-1) { }
 
diff --git a/libstdc++-v3/src/c++17/fs_path.cc 
b/libstdc++-v3/src/c++17/fs_path.cc
index 93149c4b415..aaea7d2725d 100644
--- a/libstdc++-v3/src/c++17/fs_path.cc
+++ b/libstdc++-v3/src/c++17/fs_path.cc
@@ -187,6 +187,19 @@ struct path::_Parser
   { return origin + c.str.data() - input.data(); }
 };
 
+inline
+path::path(basic_string_view __str, _Type __type)
+: _M_pathname(__str)
+{
+  __glibcxx_assert(__type != _Type::_Multi);
+  _M_cmpts.type(__type);
+}
+
+inline
+path::_Cmpt::_Cmpt(basic_string_view __s, _Type __t, size_t __pos)
+: path(__s, __t), _M_pos(__pos)
+{ }
+
 struct path::_List::_Impl
 {
   using value_type = _Cmpt;
diff --git a/libstdc++-v3/testsuite/27_io/filesystem/path/108636.cc 
b/libstdc++-v3/testsuite/27_io/filesystem/path/108636.cc
new file mode 100644
index 000..d58de461090
--- /dev/null
+++ b/libstdc++-v3/testsuite/27_io/filesystem/path/108636.cc
@@ -0,0 +1,8 @@
+//

Re: [PATCH 2/2] Documentation Update.

2023-02-02 Thread Kees Cook via Gcc-patches
On Thu, Feb 02, 2023 at 02:31:53PM +, Qing Zhao wrote:
> 
> > On Feb 2, 2023, at 3:33 AM, Richard Biener  wrote:
> > 
> > On Wed, 1 Feb 2023, Siddhesh Poyarekar wrote:
> > 
> >> On 2023-02-01 13:24, Qing Zhao wrote:
> >>> 
> >>> 
>  On Feb 1, 2023, at 11:55 AM, Siddhesh Poyarekar 
>  wrote:
>  
>  On 2023-01-31 09:11, Qing Zhao wrote:
> > Update documentation to clarify a GCC extension on structure with
> > flexible array member being nested in another structure.
> > gcc/ChangeLog:
> > * doc/extend.texi: Document GCC extension on a structure containing
> > a flexible array member to be a member of another structure.
>  
>  Should this resolve pr#77650 since the proposed action there appears to 
>  be
>  to document these semantics?
> >>> 
> >>> My understanding of pr77650 is specifically for documentation on the
> >>> following case:
> >>> 
> >>> The structure with a flexible array member is the middle field of another
> >>> structure.
> >>> 
> >>> Which I added in the documentation as the 2nd situation.
> >>> However, I am still not very comfortable on my current clarification on 
> >>> this
> >>> situation: how should we document on
> >>> the expected gcc behavior to handle such situation?
> >> 
> >> I reckon wording that dissuades programmers from using this might be
> >> appropriate, i.e. don't rely on this and if you already have such nested 
> >> flex
> >> arrays, change code to remove them.
> >> 
> > +In the above, @code{flex_data.data[]} is allowed to be extended 
> > flexibly
> > to
> > +the padding. E.g, up to 4 elements.
> >> 
> >> """
> >> ... Relying on space in struct padding is bad programming practice and any
> >> code relying on this behaviour should be modified to ensure that flexible
> >> array members only end up at the ends of arrays.  The `-pedantic` flag 
> >> should
> >> help identify such uses.
> >> """
> >> 
> >> Although -pedantic will also flag on flex arrays nested in structs even if
> >> they're at the end of the parent struct, so my suggestion on the warning is
> >> not really perfect.
> > 
> > Wow, so I checked and we indeed accept
> > 
> > struct X { int n; int data[]; };
> > struct Y { struct X x; int end; };
> > 
> > and -pedantic says
> > 
> > t.c:2:21: warning: invalid use of structure with flexible array member 
> > [-Wpedantic]
> >2 | struct Y { struct X x; int end; };
> >  |
> 
> Currently, -pedantic report the same message for flex arrays nested in 
> structs at the end of the parent struct AND in the middle of the parent 
> struct. 
> Shall we distinguish them and report different warning messages in order to 
> discourage the latter case? 
> 
> And at the same time, in the documentation, clarify these two situations, and 
> discourage the latter case at the same time as well?
> >   
> > 
> > and clang reports
> > 
> > t.c:2:21: warning: field 'x' with variable sized type 'struct X' not at 
> > the end of a struct or class is a GNU extension 
> > [-Wgnu-variable-sized-type-not-at-end]
> > struct Y { struct X x; int end; };
> >  
> >  ^
> 
> Clang’s warning message is clearer. 
> > 
> > looking at PR77650 what seems missing there is the semantics of this
> > extension as expected/required by the glibc use.  comment#5 seems
> > to suggest that for my example above its expected that
> > Y.x.data[0] aliases Y.end?!
> 
> Should we mentioned this alias relationship in the doc?
> 
> >  There must be a better way to write
> > the glibc code and IMHO it would be best to deprecate this extension.
> 
> Agreed. This is really a bad practice, should be deprecated. 
> We can give warning first in this release, and then deprecate this extension 
> in a latter release. 

Right -- this can lead (at least) to type confusion and other problems
too. We've been trying to remove all of these overlaps in the Linux
kernel. I mention it the "Overlapping composite structure members"
section at https://people.kernel.org/kees/bounded-flexible-arrays-in-c

-- 
Kees Cook


[PATCH] Bump up precision size to 16 bits.

2023-02-02 Thread Michael Meissner via Gcc-patches
The new __dmr type that is being added as a possible future PowerPC instruction
set bumps into a structure field size issue.  The size of the __dmr type is 
1024 bits.
The precision field in tree_type_common is currently 10 bits, so if you store
1,024 into field, you get a 0 back.  When you get 0 in the precision field, the
ccp pass passes this 0 to sext_hwi in hwint.h.  That function in turn generates
a shift that is equal to the host wide int bit size, which is undefined as
machine dependent for shifting in C/C++.

  int shift = HOST_BITS_PER_WIDE_INT - prec;
  return ((HOST_WIDE_INT) ((unsigned HOST_WIDE_INT) src << shift)) >> shift;

It turns out the x86_64 where I first did my tests returns the original input
before the two shifts, while the PowerPC always returns 0.  In the ccp pass, the
original input is -1, and so it worked.  When I did the runs on the PowerPC, the
result was 0, which ultimately led to the failure.

In addition, once the precision field is larger, it will help PR C/102989 (C2x
_BigInt) as well as the implementation of the SET_TYPE_VECTOR_SUBPARTS macro.

I bootstraped various PowerPC compilers (power10 LE, power9 LE, power8 BE)
along with an x86_64 build.  There were no regressions.  My proposed patches
for the __dmr type now run fine.  Can I install this into the master branch for
GCC 13?

2023-02-02   Richard Biener  
 Michael Meissner  

gcc/

PR middle-end/108623
* hwint.h (sext_hwi): Add assertion against precision 0.
* tree-core.h (tree_type_common): Bump up precision field to 16 bits.
Align bit fields > 1 bit to at least an 8-bit boundary.
---
 gcc/hwint.h |  1 +
 gcc/tree-core.h | 24 
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/gcc/hwint.h b/gcc/hwint.h
index e31aa006fa4..ba92efbfc25 100644
--- a/gcc/hwint.h
+++ b/gcc/hwint.h
@@ -277,6 +277,7 @@ ctz_or_zero (unsigned HOST_WIDE_INT x)
 static inline HOST_WIDE_INT
 sext_hwi (HOST_WIDE_INT src, unsigned int prec)
 {
+  gcc_checking_assert (prec != 0);
   if (prec == HOST_BITS_PER_WIDE_INT)
 return src;
   else
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index 8124a1328d4..b71748c6c02 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1686,18 +1686,8 @@ struct GTY(()) tree_type_common {
   tree attributes;
   unsigned int uid;
 
-  unsigned int precision : 10;
-  unsigned no_force_blk_flag : 1;
-  unsigned needs_constructing_flag : 1;
-  unsigned transparent_aggr_flag : 1;
-  unsigned restrict_flag : 1;
-  unsigned contains_placeholder_bits : 2;
-
+  unsigned int precision : 16;
   ENUM_BITFIELD(machine_mode) mode : 8;
-
-  /* TYPE_STRING_FLAG for INTEGER_TYPE and ARRAY_TYPE.
- TYPE_CXX_ODR_P for RECORD_TYPE and UNION_TYPE.  */
-  unsigned string_flag : 1;
   unsigned lang_flag_0 : 1;
   unsigned lang_flag_1 : 1;
   unsigned lang_flag_2 : 1;
@@ -1713,12 +1703,22 @@ struct GTY(()) tree_type_common {
  so we need to store the value 32 (not 31, as we need the zero
  as well), hence six bits.  */
   unsigned align : 6;
+  /* TYPE_STRING_FLAG for INTEGER_TYPE and ARRAY_TYPE.
+ TYPE_CXX_ODR_P for RECORD_TYPE and UNION_TYPE.  */
+  unsigned string_flag : 1;
+  unsigned no_force_blk_flag : 1;
+
   unsigned warn_if_not_align : 6;
+  unsigned needs_constructing_flag : 1;
+  unsigned transparent_aggr_flag : 1;
+
+  unsigned contains_placeholder_bits : 2;
+  unsigned restrict_flag : 1;
   unsigned typeless_storage : 1;
   unsigned empty_flag : 1;
   unsigned indivisible_p : 1;
   unsigned no_named_args_stdarg_p : 1;
-  unsigned spare : 15;
+  unsigned spare : 9;
 
   alias_set_type alias_set;
   tree pointer_to;
-- 
2.39.1


-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


[committed] libstdc++: Define std::basic_stringbuf::view() for old std::string ABI

2023-02-02 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Pushed to trunk.

This could be backported to gcc-11 and gcc-12 but I have no immediate
plans to do that (it's needed on trunk because of the new stuff in
 and that stuff isn't on the branches).

-- >8 --

Unlike the new str()&& members in , there is no real difficulty
in supporting the new view() members for the old std::string ABI.
Enabling it fixes errors in  where std::ostringstream::view() is
used by ostream insertion operators for calendar types.

We just need to use [[gnu::always_inline]] on the view() members for the
old ABI, because the library doesn't contain instantiations of them for
the old ABI. Making them always inline avoids needing to add those
instantiations and export them.

libstdc++-v3/ChangeLog:

* include/std/sstream  (basic_stringbuf::view): Define for old
std::string ABI.
(basic_istringstream::view, basic_0stringstream::view)
(basic_stringstream::view): Likewise.
* testsuite/27_io/basic_istringstream/view/char/1.cc: Remove
{ dg-require-effective-target cxx11_abi }.
* testsuite/27_io/basic_istringstream/view/wchar_t/1.cc:
Likewise.
* testsuite/27_io/basic_ostringstream/view/char/1.cc: Likewise.
* testsuite/27_io/basic_ostringstream/view/wchar_t/1.cc:
Likewise.
* testsuite/27_io/basic_stringbuf/view/char/1.cc: Likewise.
* testsuite/27_io/basic_stringbuf/view/wchar_t/1.cc: Likewise.
* testsuite/27_io/basic_stringstream/view/char/1.cc: Likewise.
* testsuite/27_io/basic_stringstream/view/wchar_t/1.cc:
Likewise.
---
 libstdc++-v3/include/std/sstream  | 32 +++
 .../27_io/basic_istringstream/view/char/1.cc  |  1 -
 .../basic_istringstream/view/wchar_t/1.cc |  1 -
 .../27_io/basic_ostringstream/view/char/1.cc  |  1 -
 .../basic_ostringstream/view/wchar_t/1.cc |  1 -
 .../27_io/basic_stringbuf/view/char/1.cc  |  1 -
 .../27_io/basic_stringbuf/view/wchar_t/1.cc   |  1 -
 .../27_io/basic_stringstream/view/char/1.cc   |  1 -
 .../basic_stringstream/view/wchar_t/1.cc  |  1 -
 9 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/libstdc++-v3/include/std/sstream b/libstdc++-v3/include/std/sstream
index 4f0c50fcc8a..6b3139714c7 100644
--- a/libstdc++-v3/include/std/sstream
+++ b/libstdc++-v3/include/std/sstream
@@ -43,10 +43,15 @@
 
 #if __cplusplus > 201703L && _GLIBCXX_USE_CXX11_ABI
 # define _GLIBCXX_LVAL_REF_QUAL &
+# define _GLIBCXX_SSTREAM_ALWAYS_INLINE
 #else
 # define _GLIBCXX_LVAL_REF_QUAL
+// For symbols that are not exported from libstdc++.so for the COW string ABI.
+# define _GLIBCXX_SSTREAM_ALWAYS_INLINE [[__gnu__::__always_inline__]]
 #endif
 
+
+
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -251,7 +256,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
return __ret;
   }
 
-#if __cplusplus > 201703L && _GLIBCXX_USE_CXX11_ABI
+#if __cplusplus > 201703L
+#if _GLIBCXX_USE_CXX11_ABI
 #if __cpp_concepts
   template<__allocator_like _SAlloc>
basic_string<_CharT, _Traits, _SAlloc>
@@ -275,7 +281,9 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
_M_sync(_M_string.data(), 0, 0);
return __str;
   }
+#endif // cxx11 ABI
 
+  _GLIBCXX_SSTREAM_ALWAYS_INLINE
   basic_string_view
   view() const noexcept
   {
@@ -698,7 +706,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   str() const _GLIBCXX_LVAL_REF_QUAL
   { return _M_stringbuf.str(); }
 
-#if __cplusplus > 201703L && _GLIBCXX_USE_CXX11_ABI
+#if __cplusplus > 201703L
+#if _GLIBCXX_USE_CXX11_ABI
 #if __cpp_concepts
   template<__allocator_like _SAlloc>
basic_string<_CharT, _Traits, _SAlloc>
@@ -709,11 +718,13 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   __string_type
   str() &&
   { return std::move(_M_stringbuf).str(); }
+#endif // cxx11 ABI
 
+  _GLIBCXX_SSTREAM_ALWAYS_INLINE
   basic_string_view
   view() const noexcept
   { return _M_stringbuf.view(); }
-#endif
+#endif // C++20
 
   /**
*  @brief  Setting a new buffer.
@@ -919,7 +930,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   str() const _GLIBCXX_LVAL_REF_QUAL
   { return _M_stringbuf.str(); }
 
-#if __cplusplus > 201703L && _GLIBCXX_USE_CXX11_ABI
+#if __cplusplus > 201703L
+#if _GLIBCXX_USE_CXX11_ABI
 #if __cpp_concepts
   template<__allocator_like _SAlloc>
basic_string<_CharT, _Traits, _SAlloc>
@@ -930,11 +942,13 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   __string_type
   str() &&
   { return std::move(_M_stringbuf).str(); }
+#endif // cxx11 ABI
 
+  _GLIBCXX_SSTREAM_ALWAYS_INLINE
   basic_string_view
   view() const noexcept
   { return _M_stringbuf.view(); }
-#endif
+#endif // C++20
 
   /**
*  @brief  Setting a new buffer.
@@ -1138,7 +1152,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   str() const _GLIBCXX_LVAL_REF_QUAL
   { return _M_stringbuf.str(); }
 
-#if __cplusplus > 201703L && _GLIBCXX_USE_CXX11_ABI
+#if __cpluspl

[committed] libstdc++: Use ENOSYS for unsupported filesystem ops on AVR

2023-02-02 Thread Jonathan Wakely via Gcc-patches
Tested x86_64-linux, built on avr. Pushed to trunk.

I might backport this to gcc-12 too, although realistically I doubt
anybody is going to try to use the filesystem library on avr anyway, so
it doesn't matter.

-- >8 --

Because avr-libc  defines most error numbers with duplicate
values it's not sufficient to check #ifdef ENOTSUP when deciding which
std::errc constant to use for the filesystem library's __unsupported()
helper. Add a special case for AVR to always use the ENOSYS value.

libstdc++-v3/ChangeLog:

* src/filesystem/ops-common.h [AVR] (__unsupported): Always use
errc::function_not_supported instead of errc::not_supported.
---
 libstdc++-v3/src/filesystem/ops-common.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/src/filesystem/ops-common.h 
b/libstdc++-v3/src/filesystem/ops-common.h
index 02c75be09d2..abbfca43e5c 100644
--- a/libstdc++-v3/src/filesystem/ops-common.h
+++ b/libstdc++-v3/src/filesystem/ops-common.h
@@ -84,7 +84,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   inline error_code
   __unsupported() noexcept
   {
-#if defined ENOTSUP
+#if defined __AVR__
+// avr-libc defines ENOTSUP and EOPNOTSUPP but with nonsense values.
+// ENOSYS is defined though, so use an error_code corresponding to that.
+// This contradicts the comment above, but we don't have much choice.
+return std::make_error_code(std::errc::function_not_supported);
+#elif defined ENOTSUP
 return std::make_error_code(std::errc::not_supported);
 #elif defined EOPNOTSUPP
 // This is supposed to be for socket operations
-- 
2.39.1



[PATCH] c++: spurious ADDR_EXPR after overload set pruning [PR107461]

2023-02-02 Thread Patrick Palka via Gcc-patches
Here the ahead-of-time overload set pruning in finish_call_expr is
unintentionally returning a CALL_EXPR whose pruned callee is wrapped in
an ADDR_EXPR, despite the original callee not being wrapped in an
ADDR_EXPR.  This ends up causing a bogus declaration matching error in
the below testcase because the call to min in #1 is expressed as a
CALL_EXPR to ADDR_EXPR to FUNCTION_DECL, whereas the level-lowered call
to min in #2 is expressed instead as a CALL_EXPR to FUNCTION_DECL.

This patch fixes this by stripping this ADDR_EXPR appropriately.
Thus the first call to min now gets expresssed as a CALL_EXPR to
FUNCTION_DECL, matching the form it had before r12-6075-g2decd2cabe5a4f.

Bootstrapped and regtested on x86_64-pc-linu-xgnu, does this look OK
for trunk and 12?

PR c++/107461

gcc/cp/ChangeLog:

* semantics.cc (finish_call_expr): Strip ADDR_EXPR from
the selected callee during overload set pruning.

gcc/testsuite/ChangeLog:

* g++.dg/template/friend75.C: New test.
---
 gcc/cp/semantics.cc  | 15 +-
 gcc/testsuite/g++.dg/template/friend75.C | 26 
 2 files changed, 36 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/friend75.C

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index b3afea85196..fe9262a257f 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -2957,13 +2957,18 @@ finish_call_expr (tree fn, vec **args, 
bool disallow_virtual,
   if (TREE_CODE (result) == CALL_EXPR
  && really_overloaded_fn (orig_fn))
{
- orig_fn = CALL_EXPR_FN (result);
- if (TREE_CODE (orig_fn) == COMPONENT_REF)
+ tree sel_fn = CALL_EXPR_FN (result);
+ if (TREE_CODE (sel_fn) == COMPONENT_REF)
{
  /* The non-dependent result of build_new_method_call.  */
- orig_fn = TREE_OPERAND (orig_fn, 1);
- gcc_assert (BASELINK_P (orig_fn));
-   }
+ sel_fn = TREE_OPERAND (sel_fn, 1);
+ gcc_assert (BASELINK_P (sel_fn));
+   }
+ else if (TREE_CODE (sel_fn) == ADDR_EXPR)
+   /* Undo the ADDR_EXPR callee wrapping performed by build_over_call
+  since the original callee didn't have it.  */
+   sel_fn = TREE_OPERAND (sel_fn, 0);
+ orig_fn = sel_fn;
}
 
   result = build_call_vec (TREE_TYPE (result), orig_fn, orig_args);
diff --git a/gcc/testsuite/g++.dg/template/friend75.C 
b/gcc/testsuite/g++.dg/template/friend75.C
new file mode 100644
index 000..800d3043c8a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/friend75.C
@@ -0,0 +1,26 @@
+// PR c++/107461
+// { dg-do compile { target c++11 } }
+
+template
+constexpr T min(T t0, T t1) {
+  return t0 < t1 ? t0 : t1;
+}
+
+template
+struct Matrix;
+
+template
+Matrix
+operator+(Matrix const& lhs, Matrix const& rhs); // #1
+
+template
+struct Matrix {
+  template
+  friend Matrix
+  operator+(Matrix const& lhs, Matrix const& rhs); // #2
+};
+
+void f() {
+  Matrix<1> a;
+  a+a;
+}
-- 
2.39.1.388.g2fc9e9ca3c



[PATCH] libstdc++: Limit allocations in _Rb_tree

2023-02-02 Thread François Dumont via Gcc-patches

This is PR 96088 but this time for _Rb_tree based containers.

I guess it won't go in for the moment but I wanted to submit it already 
because of the changes I had to do in stl_functions.h. It sounds like 
missing parts for C++11 move-semantic. I still need to run all tests to 
see if they can have side effects.


 libstdc++: [_Rb_tree] Limit allocation on iterator insertion [PR 
96088]


    Detect when invoking the comparer require an allocation and in this 
case
    create a temporary instance that will be moved to storage location 
if the

    insertion eventually takes place. Avoid to allocate a node otherwise.

    libstdc++-v3/ChangeLog:

    PR libstdc++/96088
    * include/bits/stl_function.h
    (std::less<>::operator()): Add noexcept qualification.
    (std::greater::operator()): Likewise.
(std::_Identity<>::operator<_Tp2>(_Tp2&&)): New perfect forwarding operator.
(std::_Select1st<>::operator<_Pair2>(_Pair2&&)): New move operator.
    * include/bits/stl_tree.h 
(_Rb_tree<>::_ConvertToValueType<>): New helper type.

    (_Rb_tree<>::_M_get_insert_unique_pos_tr): New.
    (_Rb_tree<>::_S_forward_key): New.
    (_Rb_tree<>::_M_emplace_unique_kv): New.
    (_Rb_tree<>::_M_emplace_unique_aux): New, use latter.
    (_Rb_tree<>::_M_emplace_unique): New, use latter.
    * testsuite/23_containers/map/96088.cc: New test case.
    * testsuite/23_containers/multimap/96088.cc: New test case.
    * testsuite/23_containers/multiset/96088.cc: New test case.
    * testsuite/23_containers/set/96088.cc: New test case.

Ok to commit ?

François

diff --git a/libstdc++-v3/include/bits/stl_function.h b/libstdc++-v3/include/bits/stl_function.h
index fa03f32b1b8..5e04c82629b 100644
--- a/libstdc++-v3/include/bits/stl_function.h
+++ b/libstdc++-v3/include/bits/stl_function.h
@@ -395,6 +395,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   _GLIBCXX14_CONSTEXPR
   bool
   operator()(const _Tp& __x, const _Tp& __y) const
+	_GLIBCXX_NOEXCEPT_IF( noexcept(__x > __y) )
   { return __x > __y; }
 };
 
@@ -405,6 +406,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   _GLIBCXX14_CONSTEXPR
   bool
   operator()(const _Tp& __x, const _Tp& __y) const
+	_GLIBCXX_NOEXCEPT_IF( noexcept(__x < __y) )
   { return __x < __y; }
 };
 
@@ -1165,6 +1167,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   const _Tp&
   operator()(const _Tp& __x) const
   { return __x; }
+
+#if __cplusplus >= 201103L
+template
+  _Tp2&&
+  operator()(_Tp2&& __x) const noexcept
+  { return std::forward<_Tp2>(__x); }
+#endif
 };
 
   // Partial specialization, avoids confusing errors in e.g. std::set.
@@ -1192,6 +1201,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	const typename _Pair2::first_type&
 	operator()(const _Pair2& __x) const
 	{ return __x.first; }
+
+  template
+	typename _Pair2::first_type&&
+	operator()(_Pair2&& __x) const
+	{ return std::move(__x.first); }
 #endif
 };
 
diff --git a/libstdc++-v3/include/bits/stl_tree.h b/libstdc++-v3/include/bits/stl_tree.h
index 3c331fbc952..8096ba97f18 100644
--- a/libstdc++-v3/include/bits/stl_tree.h
+++ b/libstdc++-v3/include/bits/stl_tree.h
@@ -534,6 +534,42 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_Rb_tree& _M_t;
   };
 
+#if __cplusplus >= 201103L
+  template
+	struct _ConvertToValueType;
+
+  template
+	struct _ConvertToValueType, _Value>
+	{
+	  template
+	constexpr _Kt&&
+	operator()(_Kt&& __k) const noexcept
+	{ return std::forward<_Kt>(__k); }
+	};
+
+  template
+	struct _ConvertToValueType, _Value>
+	{
+	  constexpr _Value&&
+	  operator()(_Value&& __x) const noexcept
+	  { return std::move(__x); }
+
+	  constexpr const _Value&
+	  operator()(const _Value& __x) const noexcept
+	  { return __x; }
+
+	  template
+	constexpr std::pair<_Kt, _Vt>&&
+	operator()(std::pair<_Kt, _Vt>&& __x) const noexcept
+	{ return std::move(__x); }
+
+	  template
+	constexpr const std::pair<_Kt, _Vt>&
+	operator()(const std::pair<_Kt, _Vt>& __x) const noexcept
+	{ return __x; }
+  };
+#endif // C++11
+
 public:
   typedef _Key key_type;
   typedef _Val value_type;
@@ -830,6 +866,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   pair<_Base_ptr, _Base_ptr>
   _M_get_insert_unique_pos(const key_type& __k);
 
+#if __cplusplus >= 201103L
+  template
+	pair<_Base_ptr, _Base_ptr>
+	_M_get_insert_unique_pos_tr(const _Kt& __k);
+#endif
+
   pair<_Base_ptr, _Base_ptr>
   _M_get_insert_equal_pos(const key_type& __k);
 
@@ -1075,6 +1117,45 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  return _M_insert_equal_(__pos, std::forward<_Arg>(__x), __an);
 	}
 
+  template
+	static __conditional_t<
+	__and_<__is_nothrow_invocable<_Compare&,
+  const key_type&, const key_type&>,
+	   __not_<__is_nothrow_invocable<_Compare&,
+	 _Kt, const key_type&>>>::value,
+	  key_type, _Kt&&>
+	_S_f

[committed] c: Update checks on constexpr floating-point initializers

2023-02-02 Thread Joseph Myers
WG14 has agreed some changes (detailed at the end of N3082) to the
rules on constexpr initializers for floating types.  Update GCC's
implementation to match: binary initializers are now allowed for
decimal types, and real initializers for complex types, but signaling
NaN initializers can't be used for a different type with the same
mode.

There are also changes to the constexpr rules for pointer types
(allowing null pointer address constants that aren't null pointer
constants), which I'll deal with separately.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

gcc/c/
* c-typeck.cc: Include "realmpfr.h".
(constexpr_init_fits_real_type): Do not allow signaling NaN
conversions to different types with the same mode.  Handle
conversions from binary to decimal types.
(check_constexpr_init): Do not disallow real initializers for
complex types.  Do not disallow binary initializers for decimal
floating types.

gcc/testsuite/
* gcc.dg/c2x-constexpr-1.c: Test constexpr initializers of complex
types with real initializers are allowed.
* gcc.dg/c2x-constexpr-3.c: Do not test for constexpr initializers
of complex types with real initializers being disallowed.
* gcc.dg/c2x-constexpr-8.c: Add tests of signaling NaN complex
initializers.
* gcc.dg/c2x-constexpr-9.c: Add more tests.
* gcc.dg/dfp/c2x-constexpr-dfp-1.c: Add tests of binary floating
initializers for decimal types.
* gcc.dg/dfp/c2x-constexpr-dfp-2.c: Change tests of binary
initializers for decimal types.  Add more tests of decimal
initializers for binary types.

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 2737b14ea18..9d65130154d 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -52,6 +52,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "stringpool.h"
 #include "attribs.h"
 #include "asan.h"
+#include "realmpfr.h"
 
 /* Possible cases of implicit conversions.  Used to select diagnostic messages
and control folding initializers in convert_for_assignment.  */
@@ -8121,8 +8122,9 @@ print_spelling (char *buffer)
 }
 
 /* Check whether INIT, a floating or integer constant, is
-   representable in TYPE, a real floating type with the same radix.
-   Return true if OK, false if not.  */
+   representable in TYPE, a real floating type with the same radix or
+   a decimal floating type initialized with a binary floating
+   constant.  Return true if OK, false if not.  */
 static bool
 constexpr_init_fits_real_type (tree type, tree init)
 {
@@ -8130,8 +8132,16 @@ constexpr_init_fits_real_type (tree type, tree init)
   gcc_assert (TREE_CODE (init) == INTEGER_CST || TREE_CODE (init) == REAL_CST);
   if (TREE_CODE (init) == REAL_CST
   && TYPE_MODE (TREE_TYPE (init)) == TYPE_MODE (type))
-/* Same mode, no conversion required.  */
-return true;
+{
+  /* Same mode, no conversion required except for the case of
+signaling NaNs if the types are incompatible (e.g. double and
+long double with the same mode).  */
+  if (REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (init))
+ && !comptypes (TYPE_MAIN_VARIANT (type),
+TYPE_MAIN_VARIANT (TREE_TYPE (init
+   return false;
+  return true;
+}
   if (TREE_CODE (init) == INTEGER_CST)
 {
   tree converted = build_real_from_int_cst (type, init);
@@ -8140,6 +8150,33 @@ constexpr_init_fits_real_type (tree type, tree init)
TYPE_PRECISION (TREE_TYPE (init)));
   return !fail && wi::eq_p (w, wi::to_wide (init));
 }
+  if (REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (init)))
+return false;
+  if ((REAL_VALUE_ISINF (TREE_REAL_CST (init))
+   && MODE_HAS_INFINITIES (TYPE_MODE (type)))
+  || (REAL_VALUE_ISNAN (TREE_REAL_CST (init))
+ && MODE_HAS_NANS (TYPE_MODE (type
+return true;
+  if (DECIMAL_FLOAT_TYPE_P (type)
+  && !DECIMAL_FLOAT_TYPE_P (TREE_TYPE (init)))
+{
+  /* This is valid if the real number represented by the
+initializer can be exactly represented in the decimal
+type.  Compare the values using MPFR.  */
+  REAL_VALUE_TYPE t;
+  real_convert (&t, TYPE_MODE (type), &TREE_REAL_CST (init));
+  mpfr_t bin_val, dec_val;
+  mpfr_init2 (bin_val, REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (init)))->p);
+  mpfr_init2 (dec_val, REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (init)))->p);
+  mpfr_from_real (bin_val, &TREE_REAL_CST (init), MPFR_RNDN);
+  char string[256];
+  real_to_decimal (string, &t, sizeof string, 0, 1);
+  bool res = (mpfr_strtofr (dec_val, string, NULL, 10, MPFR_RNDN) == 0
+ && mpfr_equal_p (bin_val, dec_val));
+  mpfr_clear (bin_val);
+  mpfr_clear (dec_val);
+  return res;
+}
   /* exact_real_truncate is not quite right here, since it doesn't
  allow even an exact conversion to s

Re: [PATCH v5 0/5] P1689R5 support

2023-02-02 Thread Harald Anlauf via Gcc-patches

Hi Ben,

Am 25.01.23 um 22:06 schrieb Ben Boeckel via Gcc-patches:

Hi,

This patch series adds initial support for ISO C++'s [P1689R5][], a
format for describing C++ module requirements and provisions based on
the source code. This is required because compiling C++ with modules is
not embarrassingly parallel and need to be ordered to ensure that
`import some_module;` can be satisfied in time by making sure that any
TU with `export import some_module;` is compiled first.

[P1689R5]: https://isocpp.org/files/papers/P1689R5.html


while that paper mentions Fortran, the patch in its present version
does not seem to implement anything related to Fortran and does not
touch the gfortran frontend.  Or am I missing anything?  Otherwise,
could you give an example how it would be used with Fortran?

Thus I'd say that it is OK from the gfortran side.

Thanks,
Harald


I've also added patches to include imported module CMI files and the
module mapper file as dependencies of the compilation. I briefly looked
into adding dependencies on response files as well, but that appeared to
need some code contortions to have a `class mkdeps` available before
parsing the command line or to keep the information around until one was
made.

I'd like feedback on the approach taken here with respect to the
user-visible flags. I'll also note that header units are not supported
at this time because the current `-E` behavior with respect to `import
;` is to search for an appropriate `.gcm` file which is not
something such a "scan" can support. A new mode will likely need to be
created (e.g., replacing `-E` with `-fc++-module-scanning` or something)
where headers are looked up "normally" and processed only as much as
scanning requires.

FWIW, Clang as taken an alternate approach with its `clang-scan-deps`
tool rather than using the compiler directly.

Thanks,

--Ben

---
v4 -> v5:

- add dependency tracking for imported modules to `-MF`
- add dependency tracking for static module mapper files given to
   `-fmodule-mapper=`

v3 -> v4:

- add missing spaces between function names and arguments

v2 -> v3:

- changelog entries moved to commit messages
- documentation updated/added in the UTF-8 routine editing

v1 -> v2:

- removal of the `deps_write(extra)` parameter to option-checking where
   ndeeded
- default parameter of `cpp_finish(fdeps_stream = NULL)`
- unification of libcpp UTF-8 validity functions from v1
- test cases for flag parsing states (depflags-*) and p1689 output
   (p1689-*)

Ben Boeckel (5):
   libcpp: reject codepoints above 0x10
   libcpp: add a function to determine UTF-8 validity of a C string
   p1689r5: initial support
   c++modules: report imported CMI files as dependencies
   c++modules: report module mapper files as a dependency

  gcc/c-family/c-opts.cc|  40 +++-
  gcc/c-family/c.opt|  12 +
  gcc/cp/mapper-client.cc   |   4 +
  gcc/cp/mapper-client.h|   1 +
  gcc/cp/module.cc  |  23 +-
  gcc/doc/invoke.texi   |  15 ++
  gcc/testsuite/g++.dg/modules/depflags-f-MD.C  |   2 +
  gcc/testsuite/g++.dg/modules/depflags-f.C |   1 +
  gcc/testsuite/g++.dg/modules/depflags-fi.C|   3 +
  gcc/testsuite/g++.dg/modules/depflags-fj-MD.C |   3 +
  gcc/testsuite/g++.dg/modules/depflags-fj.C|   4 +
  .../g++.dg/modules/depflags-fjo-MD.C  |   4 +
  gcc/testsuite/g++.dg/modules/depflags-fjo.C   |   5 +
  gcc/testsuite/g++.dg/modules/depflags-fo-MD.C |   3 +
  gcc/testsuite/g++.dg/modules/depflags-fo.C|   4 +
  gcc/testsuite/g++.dg/modules/depflags-j-MD.C  |   2 +
  gcc/testsuite/g++.dg/modules/depflags-j.C |   3 +
  gcc/testsuite/g++.dg/modules/depflags-jo-MD.C |   3 +
  gcc/testsuite/g++.dg/modules/depflags-jo.C|   4 +
  gcc/testsuite/g++.dg/modules/depflags-o-MD.C  |   2 +
  gcc/testsuite/g++.dg/modules/depflags-o.C |   3 +
  gcc/testsuite/g++.dg/modules/modules.exp  |   1 +
  gcc/testsuite/g++.dg/modules/p1689-1.C|  18 ++
  gcc/testsuite/g++.dg/modules/p1689-1.exp.json |  27 +++
  gcc/testsuite/g++.dg/modules/p1689-2.C|  16 ++
  gcc/testsuite/g++.dg/modules/p1689-2.exp.json |  16 ++
  gcc/testsuite/g++.dg/modules/p1689-3.C|  14 ++
  gcc/testsuite/g++.dg/modules/p1689-3.exp.json |  16 ++
  gcc/testsuite/g++.dg/modules/p1689-4.C|  14 ++
  gcc/testsuite/g++.dg/modules/p1689-4.exp.json |  14 ++
  gcc/testsuite/g++.dg/modules/p1689-5.C|  14 ++
  gcc/testsuite/g++.dg/modules/p1689-5.exp.json |  14 ++
  gcc/testsuite/g++.dg/modules/test-p1689.py| 222 ++
  gcc/testsuite/lib/modules.exp |  71 ++
  libcpp/charset.cc |  28 ++-
  libcpp/include/cpplib.h   |  12 +-
  libcpp/include/mkdeps.h   |  17 +-
  libcpp/init.cc|  13 +-
  libcpp/internal.h |   2

[PATCH] testsuite: XFAIL g++.dg/pr71488.C and warn/Warray-bounds-16.C, PR107561

2023-02-02 Thread Hans-Peter Nilsson via Gcc-patches
Tested cris-elf, native x86_64-pc-linux-gnu.
Ok to commit?
--- 8< ---

These appear as regressions from a baseline before
r13-3761-ga239a63f868e29.  See the PR trail.

Note that the warning for g++.dg/pr71488.C is for a *header*
file, thus we can't match the line number (sanely).

gcc/testsuite:

PR tree-optimization/107561
* g++.dg/warn/Warray-bounds-16.C: XFAIL bogus "overflows destination"
warning.
* g++.dg/pr71488.C: Ditto, but just for ilp32 targets.
---
 gcc/testsuite/g++.dg/pr71488.C   | 1 +
 gcc/testsuite/g++.dg/warn/Warray-bounds-16.C | 1 +
 2 files changed, 2 insertions(+)

diff --git a/gcc/testsuite/g++.dg/pr71488.C b/gcc/testsuite/g++.dg/pr71488.C
index ffe30f1afeb1..a89e6465a945 100644
--- a/gcc/testsuite/g++.dg/pr71488.C
+++ b/gcc/testsuite/g++.dg/pr71488.C
@@ -3,6 +3,7 @@
 // { dg-options "-O3 -std=c++11" }
 // { dg-additional-options "-msse4" { target sse4_runtime } }
 // { dg-require-effective-target c++11 }
+// { dg-bogus "size 0 overflows" "pr107561" { xfail *-*-* } 0 }
 
 #include 
 
diff --git a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C 
b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
index 89cbadb91c7b..49d3437fa1b7 100644
--- a/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
+++ b/gcc/testsuite/g++.dg/warn/Warray-bounds-16.C
@@ -20,6 +20,7 @@ struct S
 
 for (int i = 0; i < m; i++)
   new (p + i) int (); /* { dg-bogus "bounds" "pr102690" { xfail *-*-* } } 
*/
+// { dg-bogus "size 0 overflows" "pr107561" { xfail ilp32 } .-1 }
   }
 };
 
-- 
2.30.2



GSoC project idea: Separate Host Process Offloading (was: Remove support for Intel MIC offloading)

2023-02-02 Thread Thomas Schwinge
Hi!

On 2023-02-01T16:12:07+0100, Martin Jambor  wrote:
> On Thu, Oct 20 2022, Richard Biener via Gcc-patches wrote:
>>> Am 20.10.2022 um 14:41 schrieb Jakub Jelinek via Gcc-patches 
>>> :
>>> On Thu, Oct 20, 2022 at 12:33:28PM +, Michael Matz wrote:
> On Thu, 20 Oct 2022, Thomas Schwinge wrote:
> This had been done in
> wwwdocs commit 5c7ecfb5627e412a3d142d8dc212f4cd39b3b73f
> "Document deprecation of OpenMP MIC offloading in GCC 12".
>
> I'm sad about this, because -- in theory -- such a plugin is very useful
> for offloading simulation/debugging (separate host/device memory spaces,
> allow sanitizers to run on offloaded code

 Yeah, I think that's a _very_ useful feature, but indeed ...

> (like LLVM a while ago
> implemented), and so on), but all that doesn't help -- in practice -- if
> nobody is maintaining that code.

 ... it should then be somewhat maintained properly.  Maybe the
 MIC-specifics could be removed from the code, and it could be transformed
 into a "null"-offload target, as example and testing vehicle (and implying
 that such new liboffloadmic^H^H^Hnull would have its upstream in the GCC
 repo).  Alas, if noone is going to do that work removing is the right
 choice.
>>>
>>> Yeah.  But we really shouldn't need a large MIC specific library for that,
>>> everything should be implementable with a simple portable plugin that just
>>> forks + execs the offloading ELF and transfers data to/out of it etc.
>>> And the config/i386/intelmic-mkoffload etc. stuff would need to be done
>>> somewhere in generic code, such that we can do it for all targets.
>>> Also ideally by using just the normal lto1 with some special option that
>>> it acts as an offloading compiler, so that we don't need to bother with
>>> building a separate offloading compiler for it.
>>> True, everything guarded with #ifdef ACCEL_COMPILER etc. would need to
>>> change into code guarded with some option.
>>
>> Might be a nice GSoC project …
>
> I really think it could be.

Agreed!  Something like: "Separate Host Process Offloading"!  (Back
in October, I actually had made a TODO note to put this one onto
, but so far...)

> Would any one of those involved in this
> thread be willing to mentor it?

I'd offer to co-mentor, but I'd rather not be the only one.


I'm now off for FOSDEM, but unless someone gets it done before, I'll cook
up a GSoC project idea text when I'm back, on Tuesday.


Grüße
 Thomas
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH] c++: spurious ADDR_EXPR after overload set pruning [PR107461]

2023-02-02 Thread Jason Merrill via Gcc-patches

On 2/2/23 13:09, Patrick Palka wrote:

Here the ahead-of-time overload set pruning in finish_call_expr is
unintentionally returning a CALL_EXPR whose pruned callee is wrapped in
an ADDR_EXPR, despite the original callee not being wrapped in an
ADDR_EXPR.  This ends up causing a bogus declaration matching error in
the below testcase because the call to min in #1 is expressed as a
CALL_EXPR to ADDR_EXPR to FUNCTION_DECL, whereas the level-lowered call
to min in #2 is expressed instead as a CALL_EXPR to FUNCTION_DECL.

This patch fixes this by stripping this ADDR_EXPR appropriately.
Thus the first call to min now gets expresssed as a CALL_EXPR to
FUNCTION_DECL, matching the form it had before r12-6075-g2decd2cabe5a4f.

Bootstrapped and regtested on x86_64-pc-linu-xgnu, does this look OK
for trunk and 12?


OK.  As a future direction, I'd think we want to do the same pruning for 
other forms of "FN".


Jason


PR c++/107461

gcc/cp/ChangeLog:

* semantics.cc (finish_call_expr): Strip ADDR_EXPR from
the selected callee during overload set pruning.

gcc/testsuite/ChangeLog:

* g++.dg/template/friend75.C: New test.
---
  gcc/cp/semantics.cc  | 15 +-
  gcc/testsuite/g++.dg/template/friend75.C | 26 
  2 files changed, 36 insertions(+), 5 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/template/friend75.C

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index b3afea85196..fe9262a257f 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -2957,13 +2957,18 @@ finish_call_expr (tree fn, vec **args, 
bool disallow_virtual,
if (TREE_CODE (result) == CALL_EXPR
  && really_overloaded_fn (orig_fn))
{
- orig_fn = CALL_EXPR_FN (result);
- if (TREE_CODE (orig_fn) == COMPONENT_REF)
+ tree sel_fn = CALL_EXPR_FN (result);
+ if (TREE_CODE (sel_fn) == COMPONENT_REF)
{
  /* The non-dependent result of build_new_method_call.  */
- orig_fn = TREE_OPERAND (orig_fn, 1);
- gcc_assert (BASELINK_P (orig_fn));
-   }
+ sel_fn = TREE_OPERAND (sel_fn, 1);
+ gcc_assert (BASELINK_P (sel_fn));
+   }
+ else if (TREE_CODE (sel_fn) == ADDR_EXPR)
+   /* Undo the ADDR_EXPR callee wrapping performed by build_over_call
+  since the original callee didn't have it.  */
+   sel_fn = TREE_OPERAND (sel_fn, 0);
+ orig_fn = sel_fn;
}
  
result = build_call_vec (TREE_TYPE (result), orig_fn, orig_args);

diff --git a/gcc/testsuite/g++.dg/template/friend75.C 
b/gcc/testsuite/g++.dg/template/friend75.C
new file mode 100644
index 000..800d3043c8a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/friend75.C
@@ -0,0 +1,26 @@
+// PR c++/107461
+// { dg-do compile { target c++11 } }
+
+template
+constexpr T min(T t0, T t1) {
+  return t0 < t1 ? t0 : t1;
+}
+
+template
+struct Matrix;
+
+template
+Matrix
+operator+(Matrix const& lhs, Matrix const& rhs); // #1
+
+template
+struct Matrix {
+  template
+  friend Matrix
+  operator+(Matrix const& lhs, Matrix const& rhs); // #2
+};
+
+void f() {
+  Matrix<1> a;
+  a+a;
+}




[pushed] wwwdocs: gcc-11: Update arm "Straight-line Speculation vulnerability" link

2023-02-02 Thread Gerald Pfeifer
Pushed.

Gerald
---
 htdocs/gcc-11/changes.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/htdocs/gcc-11/changes.html b/htdocs/gcc-11/changes.html
index 4787fc26..cc0fdc19 100644
--- a/htdocs/gcc-11/changes.html
+++ b/htdocs/gcc-11/changes.html
@@ -807,7 +807,7 @@ You may also want to check out our
   The AArch64 Armv8-R architecture is now supported through the
   -march=armv8-r option.
   Mitigation against the
-  https://developer.arm.com/support/arm-security-updates/speculative-processor-vulnerability/downloads/straight-line-speculation";>Straight-line
 Speculation vulnerability
+  https://developer.arm.com/documentation/102587/0102/Straight-line-speculation-frequently-asked-questions";>Straight-line
 Speculation vulnerability
   is supported with the -mharden-sls= option.  Please refer to the
   documentation for usage instructions.
   The availability of Advanced SIMD intrinsics available through the
-- 
2.39.1


Re: [PATCH] c++: wrong error with constexpr array and value-init [PR108158]

2023-02-02 Thread Jason Merrill via Gcc-patches

On 1/30/23 21:35, Marek Polacek wrote:

In this test case, we find ourselves evaluating 't' which is
((const struct carray *) this)->data_[VIEW_CONVERT_EXPR(index)]
in cxx_eval_array_reference.  ctx->object is non-null, a RESULT_DECL, so
we replace it with 't':

   new_ctx.object = t; // result_decl replaced

and then we go to cxx_eval_constant_expression to evaluate an
AGGR_INIT_EXPR, where we end up evaluating an INIT_EXPR (which is in the
body of the constructor for seed_or_index):

   ((struct seed_or_index *) this)->value_ = NON_LVALUE_EXPR <0>

whereupon in cxx_eval_store_expression we go to the probe loop
where the 'this' is evaluated to

   ze_set.tables_.first_table_.data_[0]

so the 'object' is ze_set, but that isn't in ctx->global->get_value_ptr
so we fail with a bogus error.  ze_set is not there because it comes
from a different constexpr context (it's not in cv_cache either).

The problem started with r12-2304 where I added the new_ctx.object
replacement.  That was to prevent a type mismatch: the type of 't'
and ctx.object were different.

It seems clear that we shouldn't have replaced ctx.object here.
The cxx_eval_array_reference I mentioned earlier is called from
cxx_eval_store_expression:
  6257   init = cxx_eval_constant_expression (&new_ctx, init, vc_prvalue,
  6258non_constant_p, overflow_p);
which already created a new context, whose .object we should be
using unless, for instance, INIT contained a.b and we're evaluating
the 'a' part, which I think was the case for r12-2304; in that case
ctx.object has to be something different.

A relatively safe fix should be to check the types before replacing
ctx.object, as in the below.


Agreed.  I'm trying to understand when the replacement could ever make 
sense, since 't' is not the target, it's the initializer.  The 
replacement comes from Patrick's fix for 98295, but that testcase no 
longer hits that code (likely due to changes in empty class handling).


If you add a gcc_checking_assert (false) to the replacement, does 
anything trip it?



Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/108158

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_array_reference): Don't replace
new_ctx.object if its type is the same as elem_type.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1y/constexpr-108158.C: New test.
---
  gcc/cp/constexpr.cc   |  8 +++--
  gcc/testsuite/g++.dg/cpp1y/constexpr-108158.C | 32 +++
  2 files changed, 38 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp1y/constexpr-108158.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index be99bec17e7..00582cfffe2 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -4301,9 +4301,13 @@ cxx_eval_array_reference (const constexpr_ctx *ctx, tree 
t,
if (!SCALAR_TYPE_P (elem_type))
  {
new_ctx = *ctx;
-  if (ctx->object)
+  if (ctx->object
+ && !same_type_ignoring_top_level_qualifiers_p
+ (elem_type, TREE_TYPE (ctx->object)))
/* If there was no object, don't add one: it could confuse us
-  into thinking we're modifying a const object.  */
+  into thinking we're modifying a const object.  Similarly, if
+  the types are the same, replacing .object could lead to a
+  failure to evaluate it (c++/108158).  */
new_ctx.object = t;
new_ctx.ctor = build_constructor (elem_type, NULL);
ctx = &new_ctx;
diff --git a/gcc/testsuite/g++.dg/cpp1y/constexpr-108158.C 
b/gcc/testsuite/g++.dg/cpp1y/constexpr-108158.C
new file mode 100644
index 000..e5f5e9954e5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/constexpr-108158.C
@@ -0,0 +1,32 @@
+// PR c++/108158
+// { dg-do compile { target c++14 } }
+
+template  struct carray {
+  T data_[N]{};
+  constexpr T operator[](long index) const { return data_[index]; }
+};
+struct seed_or_index {
+private:
+  long value_ = 0;
+};
+template  struct pmh_tables {
+  carray first_table_;
+  template 
+  constexpr void lookup(KeyType, HasherType) const {
+first_table_[0];
+  }
+};
+template  struct unordered_set {
+  int equal_;
+  carray keys_;
+  pmh_tables tables_;
+  constexpr unordered_set() : equal_{} {}
+  template 
+  constexpr auto lookup(KeyType key, Hasher hash) const {
+tables_.lookup(key, hash);
+return keys_;
+  }
+};
+constexpr unordered_set<3> ze_set;
+constexpr auto nocount = ze_set.lookup(4, int());
+constexpr auto nocount2 = unordered_set<3>{}.lookup(4, int());

base-commit: 897a0502056e6cc6613f26e0b22d1c1e06b1490f




Re: [PATCH] c++: ICE on unviable/ambiguous constrained dtors [PR96745]

2023-02-02 Thread Jason Merrill via Gcc-patches

On 1/30/23 16:36, Patrick Palka wrote:

Here we're crashing from check_bases_and_members due to
CLASSTYPE_DESTRUCTOR being an OVERLOAD which, due to the pruning
performed by add_method, should only happen if there is no viable
destructor or the destructor is ambiguous.

This patch fixes this by making check_bases_and_members naturally handle
CLASSTYPE_DESTRUCTOR being an OVERLOAD.  It's then convenient to prune
the OVERLOAD after diagnosing the inevitable OR failure in check_methods.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk and perhaps 12?


OK for trunk, this doesn't seem important to backport.


PR c++/96745

gcc/cp/ChangeLog:

* class.cc (check_methods): Diagnose an unviable OVERLOAD
set for CLASSTYPE_DESTRUCTOR differently from an ambiguous one.
Then prune the OVERLOAD to a single function.
(check_bases_and_members): Handle CLASSTYPE_DESTRUCTOR being
an OVERLOAD when calling deduce_noexcept_on_destructor.
Document why it has to be called before check_methods.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-dtor1.C: New test.
---
  gcc/cp/class.cc | 22 ++---
  gcc/testsuite/g++.dg/cpp2a/concepts-dtor1.C | 18 +
  2 files changed, 37 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-dtor1.C

diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index d3ce8532d56..c6878cba2ae 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -4808,9 +4808,23 @@ check_methods (tree t)
 in that class with an empty argument list to select the destructor
 for the class, also known as the selected destructor. The program
 is ill-formed if overload resolution fails. */
+ int viable = 0;
+ for (tree fn : ovl_range (dtor))
+   if (constraints_satisfied_p (fn))
+ ++viable;
+ gcc_checking_assert (viable != 1);
+
  auto_diagnostic_group d;
- error_at (location_of (t), "destructor for %qT is ambiguous", t);
+ if (viable == 0)
+   error_at (location_of (t), "no viable destructor for %qT", t);
+ else
+   error_at (location_of (t), "destructor for %qT is ambiguous", t);
  print_candidates (dtor);
+
+ /* Arbitrarily prune the overload set to a single function for
+sake of error recovery.  */
+ tree *slot = find_member_slot (t, dtor_identifier);
+ *slot = get_first_fn (dtor);
}
else if (user_provided_p (dtor))
TYPE_HAS_NONTRIVIAL_DESTRUCTOR (t) = true;
@@ -6048,10 +6062,12 @@ check_bases_and_members (tree t)
check_bases (t, &cant_have_const_ctor, &no_const_asn_ref);
  
/* Deduce noexcept on destructor.  This needs to happen after we've set

- triviality flags appropriately for our bases.  */
+ triviality flags appropriately for our bases, and before checking
+ overriden virtual functions via check_methods.  */
if (cxx_dialect >= cxx11)
  if (tree dtor = CLASSTYPE_DESTRUCTOR (t))
-  deduce_noexcept_on_destructor (dtor);
+  for (tree fn : ovl_range (dtor))
+   deduce_noexcept_on_destructor (fn);
  
/* Check all the method declarations.  */

check_methods (t);
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-dtor1.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-dtor1.C
new file mode 100644
index 000..b1f3b4e579f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-dtor1.C
@@ -0,0 +1,18 @@
+// PR c++/96745
+// { dg-do compile { target c++20 } }
+
+template
+struct A { // { dg-error "destructor for 'A' is ambiguous" }
+  ~A() requires true;
+  ~A() requires (!!true);
+};
+
+A a;
+
+template
+struct B { // { dg-error "no viable destructor for 'B'" }
+  ~B() requires false;
+  ~B() requires (!!false);
+};
+
+B b;




Re: [PATCH] c++: excessive satisfaction in check_methods [PR108579]

2023-02-02 Thread Jason Merrill via Gcc-patches

On 1/30/23 14:10, Patrick Palka wrote:

In check_methods we're unnecessarily checking satisfaction for all
constructors and assignment operators, even those that don't look like
copy/move special members.  In the testcase below this manifests as an
unstable satisfaction error because the satisfaction result is first
determined to be false during check_methods (since A is incomplete
at this point) and later true after completion of A.

This patch fixes this simply by swapping the order of the
constraint_satisfied_p and copy_fn_p / move_fn_p tests.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look
OK for trunk?  This doesn't fix the regression completely, since
we get a similar unstable satisfaction error if one of the constrained
members is actually a copy/move special member.  I suppose we need to
rearrange things in finish_struct_1 so that check_methods gets called in
a complete class context?


I think the way to make that work, if indeed that's desirable, would be 
to determine those properties lazily instead of at finish_struct time.


The patch is OK.


PR c++/108579

gcc/cp/ChangeLog:

* class.cc (check_methods): Test constraints_satisfied_p after
testing copy_fn_p / move_fn_p instead of beforehand.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/concepts-pr108579.C: New test.
---
  gcc/cp/class.cc| 16 
  gcc/testsuite/g++.dg/cpp2a/concepts-pr108579.C | 14 ++
  2 files changed, 22 insertions(+), 8 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-pr108579.C

diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index 351de6c5419..d3ce8532d56 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -4822,11 +4822,11 @@ check_methods (tree t)
/* Might be trivial.  */;
else if (TREE_CODE (fn) == TEMPLATE_DECL)
/* Templates are never special members.  */;
-  else if (!constraints_satisfied_p (fn))
-   /* Not eligible.  */;
-  else if (copy_fn_p (fn))
+  else if (copy_fn_p (fn)
+  && constraints_satisfied_p (fn))
TYPE_HAS_COMPLEX_COPY_CTOR (t) = true;
-  else if (move_fn_p (fn))
+  else if (move_fn_p (fn)
+  && constraints_satisfied_p (fn))
TYPE_HAS_COMPLEX_MOVE_CTOR (t) = true;
  }
  
@@ -4836,11 +4836,11 @@ check_methods (tree t)

/* Might be trivial.  */;
else if (TREE_CODE (fn) == TEMPLATE_DECL)
/* Templates are never special members.  */;
-  else if (!constraints_satisfied_p (fn))
-   /* Not eligible.  */;
-  else if (copy_fn_p (fn))
+  else if (copy_fn_p (fn)
+  && constraints_satisfied_p (fn))
TYPE_HAS_COMPLEX_COPY_ASSIGN (t) = true;
-  else if (move_fn_p (fn))
+  else if (move_fn_p (fn)
+  && constraints_satisfied_p (fn))
TYPE_HAS_COMPLEX_MOVE_ASSIGN (t) = true;
  }
  }
diff --git a/gcc/testsuite/g++.dg/cpp2a/concepts-pr108579.C 
b/gcc/testsuite/g++.dg/cpp2a/concepts-pr108579.C
new file mode 100644
index 000..bc7d709f889
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/concepts-pr108579.C
@@ -0,0 +1,14 @@
+// PR c++/108579
+// { dg-do compile { target c++20 } }
+
+template
+struct A {
+  A(double, char);
+  A(int) requires requires { A(0.0, 'c'); };
+  A& operator=(int) requires requires { A(1.0, 'd'); };
+};
+
+int main() {
+  A x(3);
+  x = 5;
+}




[committed] c: Update nullptr_t comparison checks

2023-02-02 Thread Joseph Myers
WG14 has agreed to allow equality comparisons between pointers and
nullptr_t values that are not null pointer constants (this was
previously an exceptional case where such nullptr_t values were
handled differently from null pointer constants; other places in the
standard allowed nullptr_t values, whether or not those values are
null pointer constants, in the same contexts as null pointer
constants); see the wording at the end of N3077.  Update GCC's
implementation to match this change.

There are also changes to allow null pointer constants of integer or
pointer type to be converted to nullptr_t (by assignment, cast or
conversion as if by assignment), which I'll deal with separately.

Bootstrapped with no regressions for x86_64-pc-linux-gnu.

gcc/c/
* c-typeck.cc (build_binary_op): Allow comparisons between
pointers and nullptr_t values that are not null pointer constants.

gcc/testsuite/
* gcc.dg/c2x-constexpr-3.c: Do not expect comparison of nullptr_t
and pointer to be disallowed.
* gcc.dg/c2x-nullptr-1.c: Test comparisons of nullptr_t and
pointers are allowed.
* gcc.dg/c2x-nullptr-3.c: Do not test that comparisons of
nullptr_t and pointers are disallowed.

diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc
index 9d65130154d..224a9cbdc3d 100644
--- a/gcc/c/c-typeck.cc
+++ b/gcc/c/c-typeck.cc
@@ -12749,12 +12749,16 @@ build_binary_op (location_t location, enum tree_code 
code,
  && (code1 == INTEGER_TYPE || code1 == REAL_TYPE
  || code1 == FIXED_POINT_TYPE || code1 == COMPLEX_TYPE))
short_compare = 1;
-  else if (code0 == POINTER_TYPE && null_pointer_constant_p (orig_op1))
+  else if (code0 == POINTER_TYPE
+  && (code1 == NULLPTR_TYPE
+  || null_pointer_constant_p (orig_op1)))
{
  maybe_warn_for_null_address (location, op0, code);
  result_type = type0;
}
-  else if (code1 == POINTER_TYPE && null_pointer_constant_p (orig_op0))
+  else if (code1 == POINTER_TYPE
+  && (code0 == NULLPTR_TYPE
+  || null_pointer_constant_p (orig_op0)))
{
  maybe_warn_for_null_address (location, op1, code);
  result_type = type1;
diff --git a/gcc/testsuite/gcc.dg/c2x-constexpr-3.c 
b/gcc/testsuite/gcc.dg/c2x-constexpr-3.c
index 4f6b8ed6779..44a3ed358e1 100644
--- a/gcc/testsuite/gcc.dg/c2x-constexpr-3.c
+++ b/gcc/testsuite/gcc.dg/c2x-constexpr-3.c
@@ -219,7 +219,6 @@ f0 ()
   (constexpr signed char []) { u8"\xff" }; /* { dg-error "'constexpr' 
initializer not representable in type of object" } */
   constexpr typeof (nullptr) not_npc = nullptr;
   int *ptr = 0;
-  (void) (ptr == not_npc); /* { dg-error "invalid operands" } */
   /* auto may only be used with another storage class specifier, such as
  constexpr, if the type is inferred.  */
   auto constexpr int a_c_t = 1; /* { dg-error "'auto' used with 'constexpr'" } 
*/
diff --git a/gcc/testsuite/gcc.dg/c2x-nullptr-1.c 
b/gcc/testsuite/gcc.dg/c2x-nullptr-1.c
index 9f2cb6c8256..04f9901bb12 100644
--- a/gcc/testsuite/gcc.dg/c2x-nullptr-1.c
+++ b/gcc/testsuite/gcc.dg/c2x-nullptr-1.c
@@ -141,6 +141,23 @@ test2 (int *p)
   (void) (p != _Generic(0, int : nullptr));
   (void) (_Generic(0, int : nullptr) == p);
   (void) (_Generic(0, int : nullptr) != p);
+
+  /* "(nullptr_t)nullptr" has type nullptr_t but isn't an NPC; these
+ comparisons are valid after C2X CD comments GB-071 and FR-073 were
+ resolved by the wording in N3077.  */
+  (void) ((nullptr_t)nullptr == p);
+  (void) ((nullptr_t)nullptr != p);
+  (void) (p == (nullptr_t)nullptr);
+  (void) (p != (nullptr_t)nullptr);
+  (void) (cmp () == p);
+  (void) (cmp () != p);
+  (void) (p == cmp ());
+  (void) (p != cmp ());
+  /* "(void *)nullptr" is not an NPC, either.  */
+  (void) ((void *)nullptr == cmp ());
+  (void) ((void *)nullptr != cmp ());
+  (void) (cmp () == (void *)nullptr);
+  (void) (cmp () != (void *)nullptr);
 }
 
 /* Test ?:.  */
diff --git a/gcc/testsuite/gcc.dg/c2x-nullptr-3.c 
b/gcc/testsuite/gcc.dg/c2x-nullptr-3.c
index 34e3e03ba9d..591ab7e6158 100644
--- a/gcc/testsuite/gcc.dg/c2x-nullptr-3.c
+++ b/gcc/testsuite/gcc.dg/c2x-nullptr-3.c
@@ -19,21 +19,6 @@ test1 (int *p)
   (void) (nullptr != 1); /* { dg-error "invalid operands" } */
   (void) (1 != nullptr); /* { dg-error "invalid operands" } */
   (void) (1 > nullptr); /* { dg-error "invalid operands" } */
-
-  /* "(nullptr_t)nullptr" has type nullptr_t but isn't an NPC.  */
-  (void) ((nullptr_t)nullptr == p); /* { dg-error "invalid operands" } */
-  (void) ((nullptr_t)nullptr != p); /* { dg-error "invalid operands" } */
-  (void) (p == (nullptr_t)nullptr); /* { dg-error "invalid operands" } */
-  (void) (p != (nullptr_t)nullptr); /* { dg-error "invalid operands" } */
-  (void) (cmp () == p); /* { dg-error "invalid operands" } */
-  (void) (cmp () != p); /* { dg-error "invalid operands" } */
-  (void) (p == cmp ()); /* { dg-error 

[PATCH] c++: can't eval PTRMEM_CST in incomplete class [PR107574]

2023-02-02 Thread Marek Polacek via Gcc-patches
Here we're attempting to evaluate a PTRMEM_CST in a class that hasn't
been completed yet, but that doesn't work:

/* We can't lower this until the class is complete.  */
if (!COMPLETE_TYPE_P (DECL_CONTEXT (member)))
  return cst;

and then this unlowered PTRMEM_CST is used as EXPR in

tree op1 = build_nop (ptrdiff_type_node, expr);

and we crash in a subsequent cp_fold_convert which gets type=ptrdiff_type_node,
expr=PTRMEM_CST and does

  else if (TREE_CODE (expr) == PTRMEM_CST
   && same_type_p (TYPE_PTRMEM_CLASS_TYPE (type),
   PTRMEM_CST_CLASS (expr)))

where TYPE_PTRMEM_CLASS_TYPE (type) is going to crash since the type
is ptrdiff_type_node.  We could just add a TYPE_PTRMEM_P check before
accessing TYPE_PTRMEM_CLASS_TYPE but I think it's nicer to explain why
we couldn't evaluate the expression.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/107574

gcc/cp/ChangeLog:

* constexpr.cc (cxx_eval_constant_expression): Emit an error when
a PTRMEM_CST cannot be evaluated.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/ptrmem-cst1.C: New test.
---
 gcc/cp/constexpr.cc  |  9 +
 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst1.C | 11 +++
 2 files changed, 20 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/ptrmem-cst1.C

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index 5b31f9c27d1..2c03988b097 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -7691,6 +7691,15 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
if (!same_type_ignoring_top_level_qualifiers_p (type, TREE_TYPE 
(op))
&& !can_convert_qual (type, op))
  op = cplus_expand_constant (op);
+   if (TREE_CODE (op) == PTRMEM_CST && !TYPE_PTRMEM_P (type))
+ {
+   if (!ctx->quiet)
+ error_at (loc, "%qE is not a constant expression when the "
+   "class %qT is still incomplete", op,
+   PTRMEM_CST_CLASS (op));
+   *non_constant_p = true;
+   return t;
+ }
return cp_fold_convert (type, op);
  }
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst1.C 
b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst1.C
new file mode 100644
index 000..0d6a6b6445d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/ptrmem-cst1.C
@@ -0,0 +1,11 @@
+// PR c++/107574
+// { dg-do compile { target c++11 } }
+
+struct A { int i; };
+struct B:A { int j; };
+struct C:B {
+  int k;
+  static_assert((int B::*) &C::k, ""); // { dg-error "non-constant|still 
incomplete" }
+};
+
+static_assert((int B::*) &C::k, "");

base-commit: 07c87fce63541846ca2951e22dac04fcaa66475f
-- 
2.39.1



Re: [PATCH] c++ modules: uninstantiated template friend class [PR104234]

2023-02-02 Thread Nathan Sidwell via Gcc-patches
That might be sufficient for this case, but temploid friends violate an 
assumption of the implementation -- namely that module A cannot create an entity 
that belongs in module B's symbol table.  This causes a bunch of excitement, 
particularly around handling (well formed) duplicatd instantions.


I'm not sure of the way to handle that, but I suspect something along the lines 
of a flag on such decls and a new hash table to hold these exceptions.


nathan

On 1/25/23 15:16, Patrick Palka wrote:

Here we're not clearing DECL_UNINSTANTIATED_TEMPLATE_FRIEND_P for
the instantiated/injected template friend class B, which confuses a
later call to get_originating_module_decl for B.  This patch fixes this
by clearing the flag in tsubst_friend_class (as is already done for
template friend functions by r11-5730-gf7aeb823d9b0de).

After fixing that, we still fail to compile the testcase, rejecting the
later definition of B with

   friend-6_a.C:10:26: error: cannot declare ‘struct B’ in a different module

ultimately because DECL_MODULE_ATTACH_P wasn't set on the original
(injected) declaration of B.  This patch fixes this by calling
set_originating_module in tsubst_friend_class, but for that to work it
seems we need to relax the assert in this latter function since
get_originating_module_decl when called on the TYPE_DECL for B returns
the corresponding TEMPLATE_DECL.

(Alternatively we can instead call set_originating_module on the
TYPE_DECL B as soon as it's created in lookup_template_class (which is
what pushtag does), which doesn't need this assert change because at
this point the TYPE_DECL doesn't have any TEMPLATE_INFO so
get_originating_module_decl becomes a no-op.  Would that be preferable?)

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

PR c++/104234

gcc/cp/ChangeLog:

* module.cc (set_originating_module): Document default argument.
Relax assert to look through DECL_TEMPLATE_RESULT in the result
of get_originating_module_decl.
* pt.cc (tsubst_friend_class): Clear
DECL_UNINSTANTIATED_TEMPLATE_FRIEND_P and call
set_originating_module on the instantiated template friend class.

gcc/testsuite/ChangeLog:

* g++.dg/modules/friend-6_a.C: New test.
---
  gcc/cp/module.cc  |  8 ++--
  gcc/cp/pt.cc  |  3 +++
  gcc/testsuite/g++.dg/modules/friend-6_a.C | 10 ++
  3 files changed, 19 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/modules/friend-6_a.C

diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
index 7133009dba5..234ce43b70f 100644
--- a/gcc/cp/module.cc
+++ b/gcc/cp/module.cc
@@ -18843,14 +18843,18 @@ set_defining_module (tree decl)
  }
  
  void

-set_originating_module (tree decl, bool friend_p ATTRIBUTE_UNUSED)
+set_originating_module (tree decl, bool friend_p /* = false */)
  {
set_instantiating_module (decl);
  
if (!DECL_NAMESPACE_SCOPE_P (decl))

  return;
  
-  gcc_checking_assert (friend_p || decl == get_originating_module_decl (decl));

+  if (!friend_p)
+{
+  tree o = get_originating_module_decl (decl);
+  gcc_checking_assert (STRIP_TEMPLATE (o) == decl);
+}
  
if (module_attach_p ())

  {
diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index cbe5898b553..f2ee74025e7 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -11520,6 +11520,9 @@ tsubst_friend_class (tree friend_tmpl, tree args)
  CLASSTYPE_TI_ARGS (TREE_TYPE (tmpl))
= INNERMOST_TEMPLATE_ARGS (CLASSTYPE_TI_ARGS (TREE_TYPE (tmpl)));
  
+	  DECL_UNINSTANTIATED_TEMPLATE_FRIEND_P (tmpl) = false;

+ set_originating_module (DECL_TEMPLATE_RESULT (tmpl));
+
  /* Substitute into and set the constraints on the new declaration.  */
  if (tree ci = get_constraints (friend_tmpl))
{
diff --git a/gcc/testsuite/g++.dg/modules/friend-6_a.C 
b/gcc/testsuite/g++.dg/modules/friend-6_a.C
new file mode 100644
index 000..97017e4ee78
--- /dev/null
+++ b/gcc/testsuite/g++.dg/modules/friend-6_a.C
@@ -0,0 +1,10 @@
+// PR c++/104234
+// { dg-additional-options "-fmodules-ts" }
+// { dg-module-cmi pr104234 }
+export module pr104234;
+
+template struct A {
+  template friend struct B;
+};
+A a;
+template struct B { };


--
Nathan Sidwell



Re: [aarch64] Use dup and zip1 for interleaving elements in initializing vector

2023-02-02 Thread Prathamesh Kulkarni via Gcc-patches
On Thu, 2 Feb 2023 at 20:50, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> >> >> > I have attached a patch that extends the transform if one half is dup
> >> >> > and other is set of constants.
> >> >> > For eg:
> >> >> > int8x16_t f(int8_t x)
> >> >> > {
> >> >> >   return (int8x16_t) { x, 1, x, 2, x, 3, x, 4, x, 5, x, 6, x, 7, x, 8 
> >> >> > };
> >> >> > }
> >> >> >
> >> >> > code-gen trunk:
> >> >> > f:
> >> >> > adrpx1, .LC0
> >> >> > ldr q0, [x1, #:lo12:.LC0]
> >> >> > ins v0.b[0], w0
> >> >> > ins v0.b[2], w0
> >> >> > ins v0.b[4], w0
> >> >> > ins v0.b[6], w0
> >> >> > ins v0.b[8], w0
> >> >> > ins v0.b[10], w0
> >> >> > ins v0.b[12], w0
> >> >> > ins v0.b[14], w0
> >> >> > ret
> >> >> >
> >> >> > code-gen with patch:
> >> >> > f:
> >> >> > dup v0.16b, w0
> >> >> > adrpx0, .LC0
> >> >> > ldr q1, [x0, #:lo12:.LC0]
> >> >> > zip1v0.16b, v0.16b, v1.16b
> >> >> > ret
> >> >> >
> >> >> > Bootstrapped+tested on aarch64-linux-gnu.
> >> >> > Does it look OK ?
> >> >>
> >> >> Looks like a nice improvement.  It'll need to wait for GCC 14 now 
> >> >> though.
> >> >>
> >> >> However, rather than handle this case specially, I think we should 
> >> >> instead
> >> >> take a divide-and-conquer approach: split the initialiser into even and
> >> >> odd elements, find the best way of loading each part, then compare the
> >> >> cost of these sequences + ZIP with the cost of the fallback code (the 
> >> >> code
> >> >> later in aarch64_expand_vector_init).
> >> >>
> >> >> For example, doing that would allow:
> >> >>
> >> >>   { x, y, 0, y, 0, y, 0, y, 0, y }
> >> >>
> >> >> to be loaded more easily, even though the even elements aren't wholly
> >> >> constant.
> >> > Hi Richard,
> >> > I have attached a prototype patch based on the above approach.
> >> > It subsumes specializing for above {x, y, x, y, x, y, x, y} case by 
> >> > generating
> >> > same sequence, thus I removed that hunk, and improves the following 
> >> > cases:
> >> >
> >> > (a)
> >> > int8x16_t f_s16(int8_t x)
> >> > {
> >> >   return (int8x16_t) { x, 1, x, 2, x, 3, x, 4,
> >> >  x, 5, x, 6, x, 7, x, 8 };
> >> > }
> >> >
> >> > code-gen trunk:
> >> > f_s16:
> >> > adrpx1, .LC0
> >> > ldr q0, [x1, #:lo12:.LC0]
> >> > ins v0.b[0], w0
> >> > ins v0.b[2], w0
> >> > ins v0.b[4], w0
> >> > ins v0.b[6], w0
> >> > ins v0.b[8], w0
> >> > ins v0.b[10], w0
> >> > ins v0.b[12], w0
> >> > ins v0.b[14], w0
> >> > ret
> >> >
> >> > code-gen with patch:
> >> > f_s16:
> >> > dup v0.16b, w0
> >> > adrpx0, .LC0
> >> > ldr q1, [x0, #:lo12:.LC0]
> >> > zip1v0.16b, v0.16b, v1.16b
> >> > ret
> >> >
> >> > (b)
> >> > int8x16_t f_s16(int8_t x, int8_t y)
> >> > {
> >> >   return (int8x16_t) { x, y, 1, y, 2, y, 3, y,
> >> > 4, y, 5, y, 6, y, 7, y };
> >> > }
> >> >
> >> > code-gen trunk:
> >> > f_s16:
> >> > adrpx2, .LC0
> >> > ldr q0, [x2, #:lo12:.LC0]
> >> > ins v0.b[0], w0
> >> > ins v0.b[1], w1
> >> > ins v0.b[3], w1
> >> > ins v0.b[5], w1
> >> > ins v0.b[7], w1
> >> > ins v0.b[9], w1
> >> > ins v0.b[11], w1
> >> > ins v0.b[13], w1
> >> > ins v0.b[15], w1
> >> > ret
> >> >
> >> > code-gen patch:
> >> > f_s16:
> >> > adrpx2, .LC0
> >> > dup v1.16b, w1
> >> > ldr q0, [x2, #:lo12:.LC0]
> >> > ins v0.b[0], w0
> >> > zip1v0.16b, v0.16b, v1.16b
> >> > ret
> >>
> >> Nice.
> >>
> >> > There are a couple of issues I have come across:
> >> > (1) Choosing element to pad vector.
> >> > For eg, if we are initiailizing a vector say { x, y, 0, y, 1, y, 2, y }
> >> > with mode V8HI.
> >> > We split it into { x, 0, 1, 2 } and { y, y, y, y}
> >> > However since the mode is V8HI, we would need to pad the above split 
> >> > vectors
> >> > with 4 more elements to match up to vector length.
> >> > For {x, 0, 1, 2} using any constant is the obvious choice while for {y, 
> >> > y, y, y}
> >> > using 'y' is the obvious choice thus making them:
> >> > {x, 0, 1, 2, 0, 0, 0, 0} and {y, y, y, y, y, y, y, y}
> >> > These would be then merged using zip1 which would discard the lower half
> >> > of both vectors.
> >> > Currently I encoded the above two heuristics in
> >> > aarch64_expand_vector_init_get_padded_elem:
> >> > (a) If split portion contains a constant, use the constant to pad the 
> >> > vector.
> >> > (b) If split portion only contains variables, then use the most
> >> > frequently repeating variable
> >> > to pad the vector.
> >> > I suppose tho 

Re: [aarch64] Use dup and zip1 for interleaving elements in initializing vector

2023-02-02 Thread Prathamesh Kulkarni via Gcc-patches
On Fri, 3 Feb 2023 at 07:10, Prathamesh Kulkarni
 wrote:
>
> On Thu, 2 Feb 2023 at 20:50, Richard Sandiford
>  wrote:
> >
> > Prathamesh Kulkarni  writes:
> > >> >> > I have attached a patch that extends the transform if one half is 
> > >> >> > dup
> > >> >> > and other is set of constants.
> > >> >> > For eg:
> > >> >> > int8x16_t f(int8_t x)
> > >> >> > {
> > >> >> >   return (int8x16_t) { x, 1, x, 2, x, 3, x, 4, x, 5, x, 6, x, 7, x, 
> > >> >> > 8 };
> > >> >> > }
> > >> >> >
> > >> >> > code-gen trunk:
> > >> >> > f:
> > >> >> > adrpx1, .LC0
> > >> >> > ldr q0, [x1, #:lo12:.LC0]
> > >> >> > ins v0.b[0], w0
> > >> >> > ins v0.b[2], w0
> > >> >> > ins v0.b[4], w0
> > >> >> > ins v0.b[6], w0
> > >> >> > ins v0.b[8], w0
> > >> >> > ins v0.b[10], w0
> > >> >> > ins v0.b[12], w0
> > >> >> > ins v0.b[14], w0
> > >> >> > ret
> > >> >> >
> > >> >> > code-gen with patch:
> > >> >> > f:
> > >> >> > dup v0.16b, w0
> > >> >> > adrpx0, .LC0
> > >> >> > ldr q1, [x0, #:lo12:.LC0]
> > >> >> > zip1v0.16b, v0.16b, v1.16b
> > >> >> > ret
> > >> >> >
> > >> >> > Bootstrapped+tested on aarch64-linux-gnu.
> > >> >> > Does it look OK ?
> > >> >>
> > >> >> Looks like a nice improvement.  It'll need to wait for GCC 14 now 
> > >> >> though.
> > >> >>
> > >> >> However, rather than handle this case specially, I think we should 
> > >> >> instead
> > >> >> take a divide-and-conquer approach: split the initialiser into even 
> > >> >> and
> > >> >> odd elements, find the best way of loading each part, then compare the
> > >> >> cost of these sequences + ZIP with the cost of the fallback code (the 
> > >> >> code
> > >> >> later in aarch64_expand_vector_init).
> > >> >>
> > >> >> For example, doing that would allow:
> > >> >>
> > >> >>   { x, y, 0, y, 0, y, 0, y, 0, y }
> > >> >>
> > >> >> to be loaded more easily, even though the even elements aren't wholly
> > >> >> constant.
> > >> > Hi Richard,
> > >> > I have attached a prototype patch based on the above approach.
> > >> > It subsumes specializing for above {x, y, x, y, x, y, x, y} case by 
> > >> > generating
> > >> > same sequence, thus I removed that hunk, and improves the following 
> > >> > cases:
> > >> >
> > >> > (a)
> > >> > int8x16_t f_s16(int8_t x)
> > >> > {
> > >> >   return (int8x16_t) { x, 1, x, 2, x, 3, x, 4,
> > >> >  x, 5, x, 6, x, 7, x, 8 };
> > >> > }
> > >> >
> > >> > code-gen trunk:
> > >> > f_s16:
> > >> > adrpx1, .LC0
> > >> > ldr q0, [x1, #:lo12:.LC0]
> > >> > ins v0.b[0], w0
> > >> > ins v0.b[2], w0
> > >> > ins v0.b[4], w0
> > >> > ins v0.b[6], w0
> > >> > ins v0.b[8], w0
> > >> > ins v0.b[10], w0
> > >> > ins v0.b[12], w0
> > >> > ins v0.b[14], w0
> > >> > ret
> > >> >
> > >> > code-gen with patch:
> > >> > f_s16:
> > >> > dup v0.16b, w0
> > >> > adrpx0, .LC0
> > >> > ldr q1, [x0, #:lo12:.LC0]
> > >> > zip1v0.16b, v0.16b, v1.16b
> > >> > ret
> > >> >
> > >> > (b)
> > >> > int8x16_t f_s16(int8_t x, int8_t y)
> > >> > {
> > >> >   return (int8x16_t) { x, y, 1, y, 2, y, 3, y,
> > >> > 4, y, 5, y, 6, y, 7, y };
> > >> > }
> > >> >
> > >> > code-gen trunk:
> > >> > f_s16:
> > >> > adrpx2, .LC0
> > >> > ldr q0, [x2, #:lo12:.LC0]
> > >> > ins v0.b[0], w0
> > >> > ins v0.b[1], w1
> > >> > ins v0.b[3], w1
> > >> > ins v0.b[5], w1
> > >> > ins v0.b[7], w1
> > >> > ins v0.b[9], w1
> > >> > ins v0.b[11], w1
> > >> > ins v0.b[13], w1
> > >> > ins v0.b[15], w1
> > >> > ret
> > >> >
> > >> > code-gen patch:
> > >> > f_s16:
> > >> > adrpx2, .LC0
> > >> > dup v1.16b, w1
> > >> > ldr q0, [x2, #:lo12:.LC0]
> > >> > ins v0.b[0], w0
> > >> > zip1v0.16b, v0.16b, v1.16b
> > >> > ret
> > >>
> > >> Nice.
> > >>
> > >> > There are a couple of issues I have come across:
> > >> > (1) Choosing element to pad vector.
> > >> > For eg, if we are initiailizing a vector say { x, y, 0, y, 1, y, 2, y }
> > >> > with mode V8HI.
> > >> > We split it into { x, 0, 1, 2 } and { y, y, y, y}
> > >> > However since the mode is V8HI, we would need to pad the above split 
> > >> > vectors
> > >> > with 4 more elements to match up to vector length.
> > >> > For {x, 0, 1, 2} using any constant is the obvious choice while for 
> > >> > {y, y, y, y}
> > >> > using 'y' is the obvious choice thus making them:
> > >> > {x, 0, 1, 2, 0, 0, 0, 0} and {y, y, y, y, y, y, y, y}
> > >> > These would be then merged using zip1 which would discard the lower 
> > >> > half
> > >> > of

Re: [PATCH v5 0/5] P1689R5 support

2023-02-02 Thread Ben Boeckel via Gcc-patches
On Thu, Feb 02, 2023 at 21:24:12 +0100, Harald Anlauf wrote:
> Am 25.01.23 um 22:06 schrieb Ben Boeckel via Gcc-patches:
> > Hi,
> >
> > This patch series adds initial support for ISO C++'s [P1689R5][], a
> > format for describing C++ module requirements and provisions based on
> > the source code. This is required because compiling C++ with modules is
> > not embarrassingly parallel and need to be ordered to ensure that
> > `import some_module;` can be satisfied in time by making sure that any
> > TU with `export import some_module;` is compiled first.
> >
> > [P1689R5]: https://isocpp.org/files/papers/P1689R5.html
> 
> while that paper mentions Fortran, the patch in its present version
> does not seem to implement anything related to Fortran and does not
> touch the gfortran frontend.  Or am I missing anything?  Otherwise,
> could you give an example how it would be used with Fortran?

Correct. Still trying to put the walls back together after modules
KoolAid Man'd their way into the build graph structure :) . Being able
to drop our Fortran parser (well, we'd have to drop support for Fortran
compilers that exist today…so maybe in 2075 or something) and rely on
compilers to tell us the information would be amazing though :) .

FWIW, the initial revision of the patchset did touch the gfortran
frontend, but the new parameter is now defaulted and therefore the
callsite doesn't need an update anymore. I still thought it worthwhile
to keep the Fortran side aware of what is going on in the space.

The link to Fortran comes up because the build graph problem is
isomorphic (Fortran supports exporting multiple modules from a single
TU, but it's not relevant at the graph level; it's the zero -> any case
that is hard), CMake "solved" it already, and C++ is going to have a
*lot* more "I want to consume $other_project's modules using my favorite
compiler/flags" than seems to happen in Fortran. If you're interested,
this is the paper showing how we do it:

https://mathstuf.fedorapeople.org/fortran-modules/fortran-modules.html

> Thus I'd say that it is OK from the gfortran side.

Eventually we'll like to get gfortran supporting this type of scanning,
but…as above.

Thanks,

--Ben


Re: [PATCH v5 0/5] P1689R5 support

2023-02-02 Thread Andrew Pinski via Gcc-patches
On Wed, Jan 25, 2023 at 1:07 PM Ben Boeckel via Fortran
 wrote:
>
> Hi,
>
> This patch series adds initial support for ISO C++'s [P1689R5][], a
> format for describing C++ module requirements and provisions based on
> the source code. This is required because compiling C++ with modules is
> not embarrassingly parallel and need to be ordered to ensure that
> `import some_module;` can be satisfied in time by making sure that any
> TU with `export import some_module;` is compiled first.


I like how folks are complaining that GCC outputs POSIX makefile
syntax from GCC's dependency files which are supposed to be in POSIX
Makefile syntax.
It seems like rather the build tools are people like to use are not
understanding POSIX makefile syntax any more rather.
Also I am not a fan of json, it is too verbose for no use. Maybe it is
time to go back to standardizing a new POSIX makefile syntax rather
than changing C++ here.

Thanks,
Andrew Pinski

>
> [P1689R5]: https://isocpp.org/files/papers/P1689R5.html
>
> I've also added patches to include imported module CMI files and the
> module mapper file as dependencies of the compilation. I briefly looked
> into adding dependencies on response files as well, but that appeared to
> need some code contortions to have a `class mkdeps` available before
> parsing the command line or to keep the information around until one was
> made.
>
> I'd like feedback on the approach taken here with respect to the
> user-visible flags. I'll also note that header units are not supported
> at this time because the current `-E` behavior with respect to `import
> ;` is to search for an appropriate `.gcm` file which is not
> something such a "scan" can support. A new mode will likely need to be
> created (e.g., replacing `-E` with `-fc++-module-scanning` or something)
> where headers are looked up "normally" and processed only as much as
> scanning requires.
>
> FWIW, Clang as taken an alternate approach with its `clang-scan-deps`
> tool rather than using the compiler directly.
>
> Thanks,
>
> --Ben
>
> ---
> v4 -> v5:
>
> - add dependency tracking for imported modules to `-MF`
> - add dependency tracking for static module mapper files given to
>   `-fmodule-mapper=`
>
> v3 -> v4:
>
> - add missing spaces between function names and arguments
>
> v2 -> v3:
>
> - changelog entries moved to commit messages
> - documentation updated/added in the UTF-8 routine editing
>
> v1 -> v2:
>
> - removal of the `deps_write(extra)` parameter to option-checking where
>   ndeeded
> - default parameter of `cpp_finish(fdeps_stream = NULL)`
> - unification of libcpp UTF-8 validity functions from v1
> - test cases for flag parsing states (depflags-*) and p1689 output
>   (p1689-*)
>
> Ben Boeckel (5):
>   libcpp: reject codepoints above 0x10
>   libcpp: add a function to determine UTF-8 validity of a C string
>   p1689r5: initial support
>   c++modules: report imported CMI files as dependencies
>   c++modules: report module mapper files as a dependency
>
>  gcc/c-family/c-opts.cc|  40 +++-
>  gcc/c-family/c.opt|  12 +
>  gcc/cp/mapper-client.cc   |   4 +
>  gcc/cp/mapper-client.h|   1 +
>  gcc/cp/module.cc  |  23 +-
>  gcc/doc/invoke.texi   |  15 ++
>  gcc/testsuite/g++.dg/modules/depflags-f-MD.C  |   2 +
>  gcc/testsuite/g++.dg/modules/depflags-f.C |   1 +
>  gcc/testsuite/g++.dg/modules/depflags-fi.C|   3 +
>  gcc/testsuite/g++.dg/modules/depflags-fj-MD.C |   3 +
>  gcc/testsuite/g++.dg/modules/depflags-fj.C|   4 +
>  .../g++.dg/modules/depflags-fjo-MD.C  |   4 +
>  gcc/testsuite/g++.dg/modules/depflags-fjo.C   |   5 +
>  gcc/testsuite/g++.dg/modules/depflags-fo-MD.C |   3 +
>  gcc/testsuite/g++.dg/modules/depflags-fo.C|   4 +
>  gcc/testsuite/g++.dg/modules/depflags-j-MD.C  |   2 +
>  gcc/testsuite/g++.dg/modules/depflags-j.C |   3 +
>  gcc/testsuite/g++.dg/modules/depflags-jo-MD.C |   3 +
>  gcc/testsuite/g++.dg/modules/depflags-jo.C|   4 +
>  gcc/testsuite/g++.dg/modules/depflags-o-MD.C  |   2 +
>  gcc/testsuite/g++.dg/modules/depflags-o.C |   3 +
>  gcc/testsuite/g++.dg/modules/modules.exp  |   1 +
>  gcc/testsuite/g++.dg/modules/p1689-1.C|  18 ++
>  gcc/testsuite/g++.dg/modules/p1689-1.exp.json |  27 +++
>  gcc/testsuite/g++.dg/modules/p1689-2.C|  16 ++
>  gcc/testsuite/g++.dg/modules/p1689-2.exp.json |  16 ++
>  gcc/testsuite/g++.dg/modules/p1689-3.C|  14 ++
>  gcc/testsuite/g++.dg/modules/p1689-3.exp.json |  16 ++
>  gcc/testsuite/g++.dg/modules/p1689-4.C|  14 ++
>  gcc/testsuite/g++.dg/modules/p1689-4.exp.json |  14 ++
>  gcc/testsuite/g++.dg/modules/p1689-5.C|  14 ++
>  gcc/testsuite/g++.dg/modules/p1689-5.exp.json |  14 ++
>  gcc/testsuite/g++.dg/modules/test-p1689.py| 222 ++
>  gcc/testsuite/lib/modules.exp |  71 ++
>  libcpp/charset.c

Re: [PATCH 2/2] Documentation Update.

2023-02-02 Thread Siddhesh Poyarekar

On 2023-02-02 03:33, Richard Biener wrote:

looking at PR77650 what seems missing there is the semantics of this
extension as expected/required by the glibc use.  comment#5 seems
to suggest that for my example above its expected that
Y.x.data[0] aliases Y.end?!  There must be a better way to write
the glibc code and IMHO it would be best to deprecate this extension.
Definitely the middle-end wouldn't consider this aliasing for
my example - maybe it "works" when wrapped inside a union but
then for sure only when the union is visible in all accesses ...

typedef union
{
   struct __gconv_info __cd;
   struct
   {
 struct __gconv_info __cd;
 struct __gconv_step_data __data;
   } __combined;
} _G_iconv_t;

could be written as

typedef union
{
   struct __gconv_info __cd;
   char __dummy[sizeof(struct __gconv_info) + sizeof(struct
__gconv_step_data)];
} _G_iconv_t;

in case the intent is to provide a complete type with space for
a single __gconv_step_data.


I dug into this on the glibc end and it looks like this commit:

commit 63fb8f9aa9d19f85599afe4b849b567aefd70a36
Author: Zack Weinberg 
Date:   Mon Feb 5 14:13:41 2018 -0500

Post-cleanup 2: minimize _G_config.h.

ripped all of that gunk out.  AFAICT there's no use of struct 
__gconv_info anywhere else in the code.


I reckon it is safe to say now that glibc no longer needs this misfeature.

Sid


[PATCH] RISC-V: Remove unnecessary register class.

2023-02-02 Thread Monk Chiang
Avoid VL_REGS, VTYPE_REGS join register allocation.

gcc/ChangeLog:

* config/riscv/riscv.h: Remove VL_REGS, VTYPE_REGS class.
* config/riscv/riscv.cc: Ditto.
---
 gcc/config/riscv/riscv.cc | 8 +---
 gcc/config/riscv/riscv.h  | 6 --
 2 files changed, 1 insertion(+), 13 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 209d9a53e7b..3b7804b7501 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -293,7 +293,7 @@ const enum reg_class 
riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
   FP_REGS, FP_REGS,FP_REGS,FP_REGS,
   FP_REGS, FP_REGS,FP_REGS,FP_REGS,
   FP_REGS, FP_REGS,FP_REGS,FP_REGS,
-  FRAME_REGS,  FRAME_REGS, VL_REGS,VTYPE_REGS,
+  FRAME_REGS,  FRAME_REGS, NO_REGS,NO_REGS,
   NO_REGS, NO_REGS,NO_REGS,NO_REGS,
   NO_REGS, NO_REGS,NO_REGS,NO_REGS,
   NO_REGS, NO_REGS,NO_REGS,NO_REGS,
@@ -5831,12 +5831,6 @@ riscv_class_max_nregs (reg_class_t rclass, machine_mode 
mode)
   if (reg_class_subset_p (rclass, V_REGS))
 return riscv_hard_regno_nregs (V_REG_FIRST, mode);
 
-  if (reg_class_subset_p (rclass, VL_REGS))
-return 1;
-
-  if (reg_class_subset_p (rclass, VTYPE_REGS))
-return 1;
-
   return 0;
 }
 
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 0ab739bd6eb..02e1224c3cd 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -462,8 +462,6 @@ enum reg_class
   GR_REGS, /* integer registers */
   FP_REGS, /* floating-point registers */
   FRAME_REGS,  /* arg pointer and frame pointer */
-  VL_REGS, /* vl register */
-  VTYPE_REGS,  /* vtype register */
   VM_REGS, /* v0.t registers */
   VD_REGS, /* vector registers except v0.t */
   V_REGS,  /* vector registers */
@@ -487,8 +485,6 @@ enum reg_class
   "GR_REGS",   \
   "FP_REGS",   \
   "FRAME_REGS",
\
-  "VL_REGS",   \
-  "VTYPE_REGS",
\
   "VM_REGS",   \
   "VD_REGS",   \
   "V_REGS",\
@@ -514,8 +510,6 @@ enum reg_class
   { 0x, 0x, 0x, 0x },  /* GR_REGS */   
\
   { 0x, 0x, 0x, 0x },  /* FP_REGS */   
\
   { 0x, 0x, 0x0003, 0x },  /* FRAME_REGS */
\
-  { 0x, 0x, 0x0004, 0x },  /* VL_REGS */   
\
-  { 0x, 0x, 0x0008, 0x },  /* VTYPE_REGS */
\
   { 0x, 0x, 0x, 0x0001 },  /* V0_REGS */   
\
   { 0x, 0x, 0x, 0xfffe },  /* VNoV0_REGS */
\
   { 0x, 0x, 0x, 0x },  /* V_REGS */
\
-- 
2.37.2



[PATCH 0/2] Repost of patches for solving the build on Fedora 36 problem

2023-02-02 Thread Michael Meissner via Gcc-patches
I'm reposting these two patches that allow GCC to build on Fedora 36 just to be
clear which patches I'm talking about.  The issue is that if GCC is configured
with long double using the IEEE 128-bit representation, it currently cannot
build _mulkc3 and _divkc3 in libgcc.

Note, these patches do not solve the underlying problem of mixing _Float128 and
long double types and using built-in functions (i.e. calling a _Float128
built-in function with long double arguments when long double is IEEE 128-bit,
or vice versa calling a long double built-in function with _Float128
arguments).  But they do allow the compiler to build.

Note, it is the morning of February 3rd, and I will be off on vacation from
February 7th through February 14th.

The first patch changes libgcc so that it uses either _Float128 or long double
as the base IEEE 128-bit type, depending on whether long double uses the IBM
double-double representation, or the IEEE 128-bit representation.  And for the
complex type it uses _Complex _Float128 or _Complex long double.  The _mulkc3
and _divkc3 functions are adjusted to use the f128 built-in functions or the
long double built-in functions, based on the long double type.

The second patch improves how the compiler generates the call to _mulkc3 and
_divkc3.  I've discovered as I have tried to fix underlying problem with the
IEEE 128-bit floating point types, it breaks the calls for IEEE 128-bit complex
multiply and divide.  This patch uses a cleaner approach to generate these
calls, and it will work with the current setup, and with the various fixes that
I've attempted to do to fix the underlying problem.

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


[PATCH 1/2] PR target/107299: Fix build issue when long double is IEEE 128-bit

2023-02-02 Thread Michael Meissner via Gcc-patches
This patch is a repost of a patch:

| Date: Thu, 19 Jan 2023 11:37:27 -0500
| Subject: [PATCH] PR target/107299: Fix build issue when long double is IEEE 
128-bit
| Message-ID: 

This patch updates the IEEE 128-bit types used in libgcc.

At the moment, we cannot build GCC when the target uses IEEE 128-bit long
doubles, such as building the compiler for a native Fedora 36 system.  The
build dies when it is trying to build the _mulkc3.c and _divkc3 modules.

This patch changes libgcc to use long double for the IEEE 128-bit base type if
long double is IEEE 128-bit, and it uses _Float128 otherwise.  The built-in
functions are adjusted to be the correct version based on the IEEE 128-bit base
type used.

While it is desirable to ultimately have __float128 and _Float128 use the same
internal type and mode within GCC, at present if you use the option
-mabi=ieeelongdouble, the __float128 type will use the long double type and not
the _Float128 type.  We get an internal compiler error if we combine the
signbitf128 built-in with a long double type.

I've gone through several iterations of trying to fix this within GCC, and
there are various problems that have come up.  I developed this alternative
patch that changes libgcc so that it does not tickle the issue.  I hope we can
fix the compiler at some point, but right now, this is preventing people on
Fedora 36 systems from building compilers where the default long double is IEEE
128-bit.

I have built a GCC compiler tool chain on the following platforms and there
were no regressions caused by these patches.

*   Power10 little endian, IBM long double, --with-cpu=power10

*   Power9 little endian, IBM long double, --with-cpu=power9

*   Power8 big endian, IBM long double, --with-cpu=power8, both
32-bit/64-bit tests.

In addition, I have built a GCC compiler tool chain on the following systems
with IEEE 128-bit long double as the default.  Comparing the test suite runs to
the runs for the toolchain with IBM long double as the default, I only get the
expected differences (C++ modules test fail on IEEE long double, 3 Fortran
tests pass on IEEE long double that fail on IBM long double, C test pr105334.c
fails, and C test fp128_conversions.c fails on power10):

*   Power10 little endian, IEEE long double, --with-cpu=power10

*   Power9 little endian, IEEE long double, --with-cpu=power9

Note, it is Friday February 3rd, and I will be on vacation from Tuesday
February 7th through Tuesday February 14th.

Can I check this change into the master branch?

2023-02-02   Michael Meissner  

PR target/107299
* config/rs6000/_divkc3.c (COPYSIGN): Use the correct built-in based on
whether long double is IBM or IEEE.
(INFINITY): Likewise.
(FABS): Likewise.
* config/rs6000/_mulkc3.c (COPYSIGN): Likewise.
(INFINITY): Likewise.
* config/rs6000/quad-float128.h (TF): Remove definition.
(TFtype): Define to be long double or _Float128.
(TCtype): Define to be _Complex long double or _Complex _Float128.
* libgcc2.h (TFtype): Allow machine config files to override this.
(TCtype): Likewise.
* soft-fp/quad.h (TFtype): Likewise.
---
 libgcc/config/rs6000/_divkc3.c   |  8 
 libgcc/config/rs6000/_mulkc3.c   |  7 +++
 libgcc/config/rs6000/quad-float128.h | 19 ++-
 libgcc/libgcc2.h |  4 
 libgcc/soft-fp/quad.h|  2 ++
 5 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/libgcc/config/rs6000/_divkc3.c b/libgcc/config/rs6000/_divkc3.c
index 9f52428cfa0..e3bb97c9cb7 100644
--- a/libgcc/config/rs6000/_divkc3.c
+++ b/libgcc/config/rs6000/_divkc3.c
@@ -26,9 +26,17 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 #include "soft-fp.h"
 #include "quad-float128.h"
 
+#ifndef __LONG_DOUBLE_IEEE128__
 #define COPYSIGN(x,y) __builtin_copysignf128 (x, y)
 #define INFINITY __builtin_inff128 ()
 #define FABS __builtin_fabsf128
+
+#else
+#define COPYSIGN(x,y) __builtin_copysignl (x, y)
+#define INFINITY __builtin_infl ()
+#define FABS __builtin_fabsl
+#endif
+
 #define isnan __builtin_isnan
 #define isinf __builtin_isinf
 #define isfinite __builtin_isfinite
diff --git a/libgcc/config/rs6000/_mulkc3.c b/libgcc/config/rs6000/_mulkc3.c
index 299d8d147b0..3d98436d1d4 100644
--- a/libgcc/config/rs6000/_mulkc3.c
+++ b/libgcc/config/rs6000/_mulkc3.c
@@ -26,8 +26,15 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 #include "soft-fp.h"
 #include "quad-float128.h"
 
+#ifndef __LONG_DOUBLE_IEEE128__
 #define COPYSIGN(x,y) __builtin_copysignf128 (x, y)
 #define INFINITY __builtin_inff128 ()
+
+#else
+#define COPYSIGN(x,y) __builtin_copysignl (x, y)
+#define INFINITY __builtin_infl ()
+#endif
+
 #define isnan __builtin_isnan
 #define isinf __builtin_isinf
 
diff --git a/libgcc/config/rs6000/quad-float128.h 
b/libgcc/config/rs6000/quad-float128.h
index 68bd9b97f23..3

[PATCH 2/2] Rework 128-bit complex multiply and divide.

2023-02-02 Thread Michael Meissner via Gcc-patches
This patch reworks how the complex multiply and divide built-in functions are
done.  Previously we created built-in declarations for doing long double complex
multiply and divide when long double is IEEE 128-bit.  The old code also did not
support __ibm128 complex multiply and divide if long double is IEEE 128-bit.

This patch was originally posted on December 13th, 2022:

| Date: Tue, 13 Dec 2022 01:21:06 -0500
| Subject: [PATCH V2] Rework 128-bit complex multiply and divide, PR 
target/107299
| Message-ID: 

In terms of history, I wrote the original code just as I was starting to test
GCC on systems where IEEE 128-bit long double was the default.  At the time, we
had not yet started mangling the built-in function names as a way to bridge
going from a system with 128-bit IBM long double to 128-bin IEEE long double.

The original code depends on there only being two 128-bit types invovled.  With
the next patch in this series, this assumption will no longer be true.  When
long double is IEEE 128-bit, there will be 2 IEEE 128-bit types (one for the
explicit __float128/_Float128 type and one for long double).

The problem is we cannot create two separate built-in functions that resolve to
the same name.  This is a requirement of add_builtin_function and the C front
end.  That means for the 3 possible modes (IFmode, KFmode, and TFmode), you can
only use 2 of them.

This code does not create the built-in declaration with the changed name.
Instead, it uses the TARGET_MANGLE_DECL_ASSEMBLER_NAME hook to change the name
before it is written out to the assembler file like it now does for all of the
other long double built-in functions.

When I wrote these patches, I discovered that __ibm128 complex multiply and
divide had originally not been supported if long double is IEEE 128-bit as it
would generate calls to __mulic3 and __divic3.  I added tests in the testsuite
to verify that the correct name (i.e. __multc3 and __divtc3) is used in this
case.

I had previously sent this patch out on November 1st.  Compared to that version,
this version no longer disables the special mapping when you are building
libgcc, as it turns out we don't need it.

I tested all 3 patchs for PR target/107299 on:

1)  LE Power10 using --with-cpu=power10 --with-long-double-format=ieee
2)  LE Power10 using --with-cpu=power10 --with-long-double-format=ibm
3)  LE Power9  using --with-cpu=power9  --with-long-double-format=ibm
4)  BE Power8  using --with-cpu=power8  --with-long-double-format=ibm

Once all 3 patches have been applied, we can once again build GCC when long
double is IEEE 128-bit.  There were no other regressions with these patches.
Can I check these patches into the trunk?

Note, it is Friday February 3rd, 2023.  I will be on vacation Tuesday February
7th through February 14th.

2023-02-02   Michael Meissner  

gcc/

PR target/107299
* config/rs6000/rs6000.cc (create_complex_muldiv): Delete.
(init_float128_ieee): Delete code to switch complex multiply and divide
for long double.
(complex_multiply_builtin_code): New helper function.
(complex_divide_builtin_code): Likewise.
(rs6000_mangle_decl_assembler_name): Add support for mangling the name
of complex 128-bit multiply and divide built-in functions.

gcc/testsuite/

PR target/107299
* gcc.target/powerpc/divic3-1.c: New test.
* gcc.target/powerpc/divic3-2.c: Likewise.
* gcc.target/powerpc/mulic3-1.c: Likewise.
* gcc.target/powerpc/mulic3-2.c: Likewise.
---
 gcc/config/rs6000/rs6000.cc | 109 +++-
 gcc/testsuite/gcc.target/powerpc/divic3-1.c |  18 
 gcc/testsuite/gcc.target/powerpc/divic3-2.c |  17 +++
 gcc/testsuite/gcc.target/powerpc/mulic3-1.c |  18 
 gcc/testsuite/gcc.target/powerpc/mulic3-2.c |  17 +++
 5 files changed, 132 insertions(+), 47 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/divic3-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/divic3-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mulic3-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/mulic3-2.c

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 16ca3a31757..7e76c37fdab 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -11151,26 +11151,6 @@ init_float128_ibm (machine_mode mode)
 }
 }
 
-/* Create a decl for either complex long double multiply or complex long double
-   divide when long double is IEEE 128-bit floating point.  We can't use
-   __multc3 and __divtc3 because the original long double using IBM extended
-   double used those names.  The complex multiply/divide functions are encoded
-   as builtin functions with a complex result and 4 scalar inputs.  */
-
-static void
-create_complex_muldiv (const char *name, built_in_function fncode, tree fntype)
-{
-  tree fndecl = add_builtin_function (name, fntype, fncode, BUILT_IN_NORMAL,
-  

[PATCH] RISC-V: Add binary vx C/C++ support

2023-02-02 Thread juzhe . zhong
From: Ju-Zhe Zhong 

gcc/ChangeLog:

* config/riscv/constraints.md (Wdm): Adjust constraint.
(Wbr): New constraint.
* config/riscv/predicates.md (reg_or_int_operand): New predicate.
* config/riscv/riscv-protos.h (emit_pred_op): Remove function.
(emit_vlmax_op): New function.
(emit_nonvlmax_op): Ditto.
(simm32_p): Ditto.
(neg_simm5_p): Ditto.
(has_vi_variant_p): Ditto.
* config/riscv/riscv-v.cc (emit_pred_op): Adjust function.
(emit_vlmax_op): New function.
(emit_nonvlmax_op): Ditto.
(expand_const_vector): Adjust function.
(legitimize_move): Ditto.
(simm32_p): New function.
(simm5_p): Ditto.
(neg_simm5_p): Ditto.
(has_vi_variant_p): Ditto.
* config/riscv/riscv-vector-builtins-bases.cc (class vrsub): New class.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def (vmin): Remove 
unsigned cases.
(vmax): Ditto.
(vminu): Remove signed cases.
(vmaxu): Ditto.
(vdiv): Remove unsigned cases.
(vrem): Ditto.
(vdivu): Remove signed cases.
(vremu): Ditto.
(vadd): Adjust.
(vsub): Ditto.
(vrsub): New class.
(vand): Adjust.
(vor): Ditto.
(vxor): Ditto.
(vmul): Ditto.
* config/riscv/riscv-vector-builtins.cc (DEF_RVV_U_OPS): New macro.
* config/riscv/riscv.h: change VL/VTYPE as fixed reg.
* config/riscv/vector-iterators.md: New iterators.
* config/riscv/vector.md (@pred_broadcast): Adjust pattern for vx 
support.
(@pred__scalar): New pattern.
(@pred_sub_reverse_scalar): Ditto.
(*pred__scalar): Ditto.
(*pred__extended_scalar): Ditto.
(*pred_sub_reverse_scalar): Ditto.
(*pred_sub_extended_reverse_scalar): Ditto.

---
 gcc/config/riscv/constraints.md   |  17 +-
 gcc/config/riscv/predicates.md|  16 +-
 gcc/config/riscv/riscv-protos.h   |   8 +-
 gcc/config/riscv/riscv-v.cc   |  90 -
 .../riscv/riscv-vector-builtins-bases.cc  |  16 +-
 .../riscv/riscv-vector-builtins-bases.h   |   1 +
 .../riscv/riscv-vector-builtins-functions.def |  31 +-
 gcc/config/riscv/riscv-vector-builtins.cc |  51 +++
 gcc/config/riscv/riscv.h  |   2 +-
 gcc/config/riscv/vector-iterators.md  |  37 +-
 gcc/config/riscv/vector.md| 366 +-
 11 files changed, 591 insertions(+), 44 deletions(-)

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 3637380ee47..b646ad4853c 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -162,7 +162,20 @@
  (and (match_code "const_vector")
   (match_test "op == CONSTM1_RTX (GET_MODE (op))")))
 
-(define_constraint "Wdm"
+(define_memory_constraint "Wdm"
   "Vector duplicate memory operand"
-  (and (match_operand 0 "memory_operand")
+  (and (match_code "mem")
(match_code "reg" "0")))
+
+;; (vec_duplicate:V (const_int 2863311530 [0x])) of pred_broadcast
+;; is CSEed into (const_vector:V (const_int 2863311530 [0x])) here
+;; which is not the pattern matching we want since we can't generate
+;; instruction directly for it when SEW = 64 and !TARGET_64BIT. We should
+;; not allow RA (register allocation) allocate a DImode register in
+;; pred_broadcast pattern.
+(define_constraint "Wbr"
+  "@internal
+   Broadcast register operand"
+  (and (match_code "reg")
+   (match_test "REGNO (op) <= GP_REG_LAST
+   && direct_broadcast_operand (op, GET_MODE (op))")))
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 895831443e1..8d2ccb0f7a2 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -272,9 +272,16 @@
   (ior (match_operand 0 "register_operand")
(match_operand 0 "memory_operand")))
 
+(define_predicate "reg_or_int_operand"
+  (ior (match_operand 0 "register_operand")
+   (match_operand 0 "const_int_operand")))
+
 (define_predicate "vector_move_operand"
   (ior (match_operand 0 "nonimmediate_operand")
-   (match_code "const_vector")))
+   (and (match_code "const_vector")
+(match_test "reload_completed
+   || satisfies_constraint_vi (op)
+   || satisfies_constraint_Wc0 (op)"
 
 (define_predicate "vector_mask_operand"
   (ior (match_operand 0 "register_operand")
@@ -315,8 +322,11 @@
 
 ;; The scalar operand can be directly broadcast by RVV instructions.
 (define_predicate "direct_broadcast_operand"
-  (ior (match_operand 0 "register_operand")
-   (match_test "satisfies_constraint_Wdm (op)")))
+  (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op))
+   && register_operand (op, GET_MODE (op))
+   && maybe_gt (GET_MO

Re: [PATCH] RISC-V: Remove unnecessary register class.

2023-02-02 Thread Kito Cheng via Gcc-patches
committed, also updated mask for ALL_REGS, thanks.

On Fri, Feb 3, 2023 at 12:59 PM Monk Chiang  wrote:
>
> Avoid VL_REGS, VTYPE_REGS join register allocation.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv.h: Remove VL_REGS, VTYPE_REGS class.
> * config/riscv/riscv.cc: Ditto.
> ---
>  gcc/config/riscv/riscv.cc | 8 +---
>  gcc/config/riscv/riscv.h  | 6 --
>  2 files changed, 1 insertion(+), 13 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index 209d9a53e7b..3b7804b7501 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -293,7 +293,7 @@ const enum reg_class 
> riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
>FP_REGS, FP_REGS,FP_REGS,FP_REGS,
>FP_REGS, FP_REGS,FP_REGS,FP_REGS,
>FP_REGS, FP_REGS,FP_REGS,FP_REGS,
> -  FRAME_REGS,  FRAME_REGS, VL_REGS,VTYPE_REGS,
> +  FRAME_REGS,  FRAME_REGS, NO_REGS,NO_REGS,
>NO_REGS, NO_REGS,NO_REGS,NO_REGS,
>NO_REGS, NO_REGS,NO_REGS,NO_REGS,
>NO_REGS, NO_REGS,NO_REGS,NO_REGS,
> @@ -5831,12 +5831,6 @@ riscv_class_max_nregs (reg_class_t rclass, 
> machine_mode mode)
>if (reg_class_subset_p (rclass, V_REGS))
>  return riscv_hard_regno_nregs (V_REG_FIRST, mode);
>
> -  if (reg_class_subset_p (rclass, VL_REGS))
> -return 1;
> -
> -  if (reg_class_subset_p (rclass, VTYPE_REGS))
> -return 1;
> -
>return 0;
>  }
>
> diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
> index 0ab739bd6eb..02e1224c3cd 100644
> --- a/gcc/config/riscv/riscv.h
> +++ b/gcc/config/riscv/riscv.h
> @@ -462,8 +462,6 @@ enum reg_class
>GR_REGS, /* integer registers */
>FP_REGS, /* floating-point registers */
>FRAME_REGS,  /* arg pointer and frame pointer */
> -  VL_REGS, /* vl register */
> -  VTYPE_REGS,  /* vtype register */
>VM_REGS, /* v0.t registers */
>VD_REGS, /* vector registers except v0.t */
>V_REGS,  /* vector registers */
> @@ -487,8 +485,6 @@ enum reg_class
>"GR_REGS",   \
>"FP_REGS",   \
>"FRAME_REGS",  
>   \
> -  "VL_REGS",   \
> -  "VTYPE_REGS",  
>   \
>"VM_REGS",   \
>"VD_REGS",   \
>"V_REGS",\
> @@ -514,8 +510,6 @@ enum reg_class
>{ 0x, 0x, 0x, 0x },  /* GR_REGS */ 
>   \
>{ 0x, 0x, 0x, 0x },  /* FP_REGS */ 
>   \
>{ 0x, 0x, 0x0003, 0x },  /* FRAME_REGS */  
>   \
> -  { 0x, 0x, 0x0004, 0x },  /* VL_REGS */ 
>   \
> -  { 0x, 0x, 0x0008, 0x },  /* VTYPE_REGS */  
>   \
>{ 0x, 0x, 0x, 0x0001 },  /* V0_REGS */ 
>   \
>{ 0x, 0x, 0x, 0xfffe },  /* VNoV0_REGS */  
>   \
>{ 0x, 0x, 0x, 0x },  /* V_REGS */  
>   \
> --
> 2.37.2
>


Re: [PATCH] RISC-V: Add vsra.vx C API tests

2023-02-02 Thread Kito Cheng via Gcc-patches
committed, thanks!

On Wed, Feb 1, 2023 at 6:11 AM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/vsra_vx-1.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx-2.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx-3.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_m-1.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_m-2.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_m-3.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_mu-1.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_mu-2.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_mu-3.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_tu-1.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_tu-2.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_tu-3.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_tum-1.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_tum-2.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_tum-3.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_tumu-1.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_tumu-2.c: New test.
> * gcc.target/riscv/rvv/base/vsra_vx_tumu-3.c: New test.
>
> ---
>  .../gcc.target/riscv/rvv/base/vsra_vx-1.c | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx-2.c | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx-3.c | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_m-1.c   | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_m-2.c   | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_m-3.c   | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_mu-1.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_mu-2.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_mu-3.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_tu-1.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_tu-2.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_tu-3.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_tum-1.c | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_tum-2.c | 160 ++
>  .../gcc.target/riscv/rvv/base/vsra_vx_tum-3.c | 160 ++
>  .../riscv/rvv/base/vsra_vx_tumu-1.c   | 160 ++
>  .../riscv/rvv/base/vsra_vx_tumu-2.c   | 160 ++
>  .../riscv/rvv/base/vsra_vx_tumu-3.c   | 160 ++
>  18 files changed, 2880 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_m-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_m-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_m-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_mu-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_mu-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_mu-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_tu-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_tu-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_tu-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_tum-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_tum-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_tum-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_tumu-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_tumu-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx_tumu-3.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx-1.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx-1.c
> new file mode 100644
> index 000..7a7a5a1a933
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vsra_vx-1.c
> @@ -0,0 +1,160 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns 
> -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +vint8mf8_t test___riscv_vsra_vx_i8mf8(vint8mf8_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra_vx_i8mf8(op1,shift,vl);
> +}
> +
> +
> +vint8mf4_t test___riscv_vsra_vx_i8mf4(vint8mf4_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra_vx_i8mf4(op1,shift,vl);
> +}
> +
> +
> +vint8mf2_t test___riscv_vsra_vx_i8mf2(vint8mf2_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra_vx_i8mf2(op1,shift,vl);
> +}
> +
> +
> +vint8m1_t test___riscv_vsra_vx_i8m1(vint8m1_t op1,size_t shift,size_t vl)
> +{

Re: [PATCH] RISC-V: Add vsrl.vx C API tests

2023-02-02 Thread Kito Cheng via Gcc-patches
committed, thanks!

On Wed, Feb 1, 2023 at 6:10 AM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/vsrl_vx-1.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx-2.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx-3.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_m-1.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_m-2.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_m-3.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_mu-1.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_mu-2.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_mu-3.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_tu-1.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_tu-2.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_tu-3.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_tum-1.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_tum-2.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_tum-3.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_tumu-1.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_tumu-2.c: New test.
> * gcc.target/riscv/rvv/base/vsrl_vx_tumu-3.c: New test.
>
> ---
>  .../gcc.target/riscv/rvv/base/vsrl_vx-1.c | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx-2.c | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx-3.c | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_m-1.c   | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_m-2.c   | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_m-3.c   | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_mu-1.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_mu-2.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_mu-3.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_tu-1.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_tu-2.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_tu-3.c  | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_tum-1.c | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_tum-2.c | 160 ++
>  .../gcc.target/riscv/rvv/base/vsrl_vx_tum-3.c | 160 ++
>  .../riscv/rvv/base/vsrl_vx_tumu-1.c   | 160 ++
>  .../riscv/rvv/base/vsrl_vx_tumu-2.c   | 160 ++
>  .../riscv/rvv/base/vsrl_vx_tumu-3.c   | 160 ++
>  18 files changed, 2880 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_m-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_m-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_m-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_mu-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_mu-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_mu-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_tu-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_tu-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_tu-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_tum-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_tum-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_tum-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_tumu-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_tumu-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx_tumu-3.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx-1.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx-1.c
> new file mode 100644
> index 000..284289a59f4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vsrl_vx-1.c
> @@ -0,0 +1,160 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns 
> -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint8mf8_t test___riscv_vsrl_vx_u8mf8(vuint8mf8_t op1,size_t shift,size_t 
> vl)
> +{
> +return __riscv_vsrl_vx_u8mf8(op1,shift,vl);
> +}
> +
> +
> +vuint8mf4_t test___riscv_vsrl_vx_u8mf4(vuint8mf4_t op1,size_t shift,size_t 
> vl)
> +{
> +return __riscv_vsrl_vx_u8mf4(op1,shift,vl);
> +}
> +
> +
> +vuint8mf2_t test___riscv_vsrl_vx_u8mf2(vuint8mf2_t op1,size_t shift,size_t 
> vl)
> +{
> +return __riscv_vsrl_vx_u8mf2(op1,shift,vl);
> +}
> +
> +
> +vuint8m1_t test___riscv_vsrl_vx_u8m1(vuint8m1_t op1,size_t shift

Re: [PATCH] RISC-V: Add vsrl.vx C++ API tests

2023-02-02 Thread Kito Cheng via Gcc-patches
committed, thanks!

On Wed, Feb 1, 2023 at 6:18 AM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/testsuite/ChangeLog:
>
> * g++.target/riscv/rvv/base/vsrl_vx-1.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx-2.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx-3.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_mu-1.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_mu-2.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_mu-3.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_tu-1.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_tu-2.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_tu-3.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_tum-1.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_tum-2.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_tum-3.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_tumu-1.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_tumu-2.C: New test.
> * g++.target/riscv/rvv/base/vsrl_vx_tumu-3.C: New test.
>
> ---
>  .../g++.target/riscv/rvv/base/vsrl_vx-1.C | 314 ++
>  .../g++.target/riscv/rvv/base/vsrl_vx-2.C | 314 ++
>  .../g++.target/riscv/rvv/base/vsrl_vx-3.C | 314 ++
>  .../g++.target/riscv/rvv/base/vsrl_vx_mu-1.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsrl_vx_mu-2.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsrl_vx_mu-3.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsrl_vx_tu-1.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsrl_vx_tu-2.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsrl_vx_tu-3.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsrl_vx_tum-1.C | 160 +
>  .../g++.target/riscv/rvv/base/vsrl_vx_tum-2.C | 160 +
>  .../g++.target/riscv/rvv/base/vsrl_vx_tum-3.C | 160 +
>  .../riscv/rvv/base/vsrl_vx_tumu-1.C   | 160 +
>  .../riscv/rvv/base/vsrl_vx_tumu-2.C   | 160 +
>  .../riscv/rvv/base/vsrl_vx_tumu-3.C   | 160 +
>  15 files changed, 2862 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx-2.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx-3.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_mu-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_mu-2.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_mu-3.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_tu-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_tu-2.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_tu-3.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_tum-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_tum-2.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_tum-3.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_tumu-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_tumu-2.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx_tumu-3.C
>
> diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx-1.C 
> b/gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx-1.C
> new file mode 100644
> index 000..2c4a990fa28
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/riscv/rvv/base/vsrl_vx-1.C
> @@ -0,0 +1,314 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns 
> -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +vuint8mf8_t test___riscv_vsrl(vuint8mf8_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsrl(op1,shift,vl);
> +}
> +
> +
> +vuint8mf4_t test___riscv_vsrl(vuint8mf4_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsrl(op1,shift,vl);
> +}
> +
> +
> +vuint8mf2_t test___riscv_vsrl(vuint8mf2_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsrl(op1,shift,vl);
> +}
> +
> +
> +vuint8m1_t test___riscv_vsrl(vuint8m1_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsrl(op1,shift,vl);
> +}
> +
> +
> +vuint8m2_t test___riscv_vsrl(vuint8m2_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsrl(op1,shift,vl);
> +}
> +
> +
> +vuint8m4_t test___riscv_vsrl(vuint8m4_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsrl(op1,shift,vl);
> +}
> +
> +
> +vuint8m8_t test___riscv_vsrl(vuint8m8_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsrl(op1,shift,vl);
> +}
> +
> +
> +vuint16mf4_t test___riscv_vsrl(vuint16mf4_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsrl(op1,shift,vl);
> +}
> +
> +
> +vuint16mf2_t test___riscv_vsrl(vuint16mf2_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsrl(op1,shift,vl);
> +}
> +
> +
> +vuint16m1_t test___riscv_vsrl(vuint16m1_t op1,size_t shift,size_t vl)
> +{
> +  

Re: [PATCH] RISC-V: Add vsra.vx C++ API tests

2023-02-02 Thread Kito Cheng via Gcc-patches
committed, thanks!

On Wed, Feb 1, 2023 at 6:20 AM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/testsuite/ChangeLog:
>
> * g++.target/riscv/rvv/base/vsra_vx-1.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx-2.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx-3.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_mu-1.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_mu-2.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_mu-3.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_tu-1.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_tu-2.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_tu-3.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_tum-1.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_tum-2.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_tum-3.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_tumu-1.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_tumu-2.C: New test.
> * g++.target/riscv/rvv/base/vsra_vx_tumu-3.C: New test.
>
> ---
>  .../g++.target/riscv/rvv/base/vsra_vx-1.C | 314 ++
>  .../g++.target/riscv/rvv/base/vsra_vx-2.C | 314 ++
>  .../g++.target/riscv/rvv/base/vsra_vx-3.C | 314 ++
>  .../g++.target/riscv/rvv/base/vsra_vx_mu-1.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsra_vx_mu-2.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsra_vx_mu-3.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsra_vx_tu-1.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsra_vx_tu-2.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsra_vx_tu-3.C  | 160 +
>  .../g++.target/riscv/rvv/base/vsra_vx_tum-1.C | 160 +
>  .../g++.target/riscv/rvv/base/vsra_vx_tum-2.C | 160 +
>  .../g++.target/riscv/rvv/base/vsra_vx_tum-3.C | 160 +
>  .../riscv/rvv/base/vsra_vx_tumu-1.C   | 160 +
>  .../riscv/rvv/base/vsra_vx_tumu-2.C   | 160 +
>  .../riscv/rvv/base/vsra_vx_tumu-3.C   | 160 +
>  15 files changed, 2862 insertions(+)
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx-2.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx-3.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_mu-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_mu-2.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_mu-3.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_tu-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_tu-2.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_tu-3.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_tum-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_tum-2.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_tum-3.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_tumu-1.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_tumu-2.C
>  create mode 100644 gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx_tumu-3.C
>
> diff --git a/gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx-1.C 
> b/gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx-1.C
> new file mode 100644
> index 000..e3c152f7f4e
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/riscv/rvv/base/vsra_vx-1.C
> @@ -0,0 +1,314 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -fno-schedule-insns 
> -fno-schedule-insns2" } */
> +
> +#include "riscv_vector.h"
> +
> +vint8mf8_t test___riscv_vsra(vint8mf8_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra(op1,shift,vl);
> +}
> +
> +
> +vint8mf4_t test___riscv_vsra(vint8mf4_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra(op1,shift,vl);
> +}
> +
> +
> +vint8mf2_t test___riscv_vsra(vint8mf2_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra(op1,shift,vl);
> +}
> +
> +
> +vint8m1_t test___riscv_vsra(vint8m1_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra(op1,shift,vl);
> +}
> +
> +
> +vint8m2_t test___riscv_vsra(vint8m2_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra(op1,shift,vl);
> +}
> +
> +
> +vint8m4_t test___riscv_vsra(vint8m4_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra(op1,shift,vl);
> +}
> +
> +
> +vint8m8_t test___riscv_vsra(vint8m8_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra(op1,shift,vl);
> +}
> +
> +
> +vint16mf4_t test___riscv_vsra(vint16mf4_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra(op1,shift,vl);
> +}
> +
> +
> +vint16mf2_t test___riscv_vsra(vint16mf2_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsra(op1,shift,vl);
> +}
> +
> +
> +vint16m1_t test___riscv_vsra(vint16m1_t op1,size_t shift,size_t vl)
> +{
> +return __riscv_vsr

Re: [PATCH] RISC-V: Fix constraint bug for binary operation

2023-02-02 Thread Kito Cheng via Gcc-patches
committed, thanks!

On Wed, Feb 1, 2023 at 9:48 AM  wrote:
>
> From: Ju-Zhe Zhong 
>
> Current constraint configuration will generate:
> vadd.vv v0,v24,v25,v0.t
> vsll.vx v0,v24,a5,v0.t
>
> They are incorrect according to RVV ISA.
> This patch fix this obvious issue.
>
> gcc/ChangeLog:
>
> * config/riscv/vector-iterators.md (sll.vi): Fix constraint bug.
> (sll.vv): Ditto.
> (%3,%4): Ditto.
> (%3,%v4): Ditto.
> * config/riscv/vector.md: Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/binop_vv_constraint-1.c:
> * gcc.target/riscv/rvv/base/shift_vx_constraint-1.c:
>
> ---
>  gcc/config/riscv/vector-iterators.md  | 86 +--
>  gcc/config/riscv/vector.md| 41 +
>  .../riscv/rvv/base/binop_vv_constraint-1.c|  8 +-
>  .../riscv/rvv/base/shift_vx_constraint-1.c|  9 +-
>  4 files changed, 75 insertions(+), 69 deletions(-)
>
> diff --git a/gcc/config/riscv/vector-iterators.md 
> b/gcc/config/riscv/vector-iterators.md
> index a2f192d6ba0..4f9799ade05 100644
> --- a/gcc/config/riscv/vector-iterators.md
> +++ b/gcc/config/riscv/vector-iterators.md
> @@ -229,42 +229,42 @@
> (umod "register_operand")])
>
>  (define_code_attr binop_rhs1_constraint [
> -   (plus "vr,vr,vr")
> -   (minus "vr,vr,vi")
> -   (ior "vr,vr,vr")
> -   (xor "vr,vr,vr")
> -   (and "vr,vr,vr")
> -   (ashift "vr,vr,vr")
> -   (ashiftrt "vr,vr,vr")
> -   (lshiftrt "vr,vr,vr")
> -   (smin "vr,vr,vr")
> -   (smax "vr,vr,vr")
> -   (umin "vr,vr,vr")
> -   (umax "vr,vr,vr")
> -   (mult "vr,vr,vr")
> -   (div "vr,vr,vr")
> -   (mod "vr,vr,vr")
> -   (udiv "vr,vr,vr")
> -   (umod "vr,vr,vr")])
> +   (plus "vr,vr,vr,vr,vr,vr")
> +   (minus "vr,vr,vr,vr,vi,vi")
> +   (ior "vr,vr,vr,vr,vr,vr")
> +   (xor "vr,vr,vr,vr,vr,vr")
> +   (and "vr,vr,vr,vr,vr,vr")
> +   (ashift "vr,vr,vr,vr,vr,vr")
> +   (ashiftrt "vr,vr,vr,vr,vr,vr")
> +   (lshiftrt "vr,vr,vr,vr,vr,vr")
> +   (smin "vr,vr,vr,vr,vr,vr")
> +   (smax "vr,vr,vr,vr,vr,vr")
> +   (umin "vr,vr,vr,vr,vr,vr")
> +   (umax "vr,vr,vr,vr,vr,vr")
> +   (mult "vr,vr,vr,vr,vr,vr")
> +   (div "vr,vr,vr,vr,vr,vr")
> +   (mod "vr,vr,vr,vr,vr,vr")
> +   (udiv "vr,vr,vr,vr,vr,vr")
> +   (umod "vr,vr,vr,vr,vr,vr")])
>
>  (define_code_attr binop_rhs2_constraint [
> -   (plus "vr,vi,vr")
> -   (minus "vr,vj,vr")
> -   (ior "vr,vi,vr")
> -   (xor "vr,vi,vr")
> -   (and "vr,vi,vr")
> -   (ashift "vr,vk,vr")
> -   (ashiftrt "vr,vk,vr")
> -   (lshiftrt "vr,vk,vr")
> -   (smin "vr,vr,vr")
> -   (smax "vr,vr,vr")
> -   (umin "vr,vr,vr")
> -   (umax "vr,vr,vr")
> -   (mult "vr,vr,vr")
> -   (div "vr,vr,vr")
> -   (mod "vr,vr,vr")
> -   (udiv "vr,vr,vr")
> -   (umod "vr,vr,vr")])
> +   (plus "vr,vr,vi,vi,vr,vr")
> +   (minus "vr,vr,vj,vj,vr,vr")
> +   (ior "vr,vr,vi,vi,vr,vr")
> +   (xor "vr,vr,vi,vi,vr,vr")
> +   (and "vr,vr,vi,vi,vr,vr")
> +   (ashift "vr,vr,vk,vk,vr,vr")
> +   (ashiftrt "vr,vr,vk,vk,vr,vr")
> +   (lshiftrt "vr,vr,vk,vk,vr,vr")
> +   (smin "vr,vr,vr,vr,vr,vr")
> +   (smax "vr,vr,vr,vr,vr,vr")
> +   (umin "vr,vr,vr,vr,vr,vr")
> +   (umax "vr,vr,vr,vr,vr,vr")
> +   (mult "vr,vr,vr,vr,vr,vr")
> +   (div "vr,vr,vr,vr,vr,vr")
> +   (mod "vr,vr,vr,vr,vr,vr")
> +   (udiv "vr,vr,vr,vr,vr,vr")
> +   (umod "vr,vr,vr,vr,vr,vr")])
>
>  (define_code_attr int_binop_insn_type [
> (plus "vialu")
> @@ -285,9 +285,9 @@
> (udiv "vidiv")
> (umod "vidiv")])
>
> -;;  expands to the insn name of binop matching constraint 
> alternative = 1.
> +;;  expands to the insn name of bin

Re: [PATCH] RISC-V: Add RVV shift.vx C/C++ API support

2023-02-02 Thread Kito Cheng via Gcc-patches
committed, thanks!

On Wed, Feb 1, 2023 at 6:08 AM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/ChangeLog:
>
> * config/riscv/predicates.md (pmode_reg_or_uimm5_operand): New 
> predicate.
> * config/riscv/riscv-vector-builtins-bases.cc: New class.
> * config/riscv/riscv-vector-builtins-functions.def (vsll): Ditto.
> (vsra): Ditto.
> (vsrl): Ditto.
> * config/riscv/riscv-vector-builtins.cc: Ditto.
> * config/riscv/vector.md (@pred__scalar): New pattern.
>
> ---
>  gcc/config/riscv/predicates.md|  8 ++
>  .../riscv/riscv-vector-builtins-bases.cc  | 10 ++-
>  .../riscv/riscv-vector-builtins-functions.def |  3 +++
>  gcc/config/riscv/riscv-vector-builtins.cc | 13 ++
>  gcc/config/riscv/vector.md| 26 +++
>  5 files changed, 59 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
> index 57f7ddfbd7d..895831443e1 100644
> --- a/gcc/config/riscv/predicates.md
> +++ b/gcc/config/riscv/predicates.md
> @@ -301,6 +301,14 @@
> (and (match_code "const_vector")
>  (match_test "riscv_vector::const_vec_all_same_in_range_p (op, 0, 
> 31)"
>
> +;; pmode_reg_or_uimm5_operand can be used by vsll.vx/vsrl.vx/vsra.vx 
> instructions.
> +;; Since it has the same predicate with vector_length_operand which allows 
> register
> +;; or immediate (0 ~ 31), we define this predicate same as 
> vector_length_operand here.
> +;; We don't use vector_length_operand directly to predicate 
> vsll.vx/vsrl.vx/vsra.vx
> +;; since it may be confusing.
> +(define_special_predicate "pmode_reg_or_uimm5_operand"
> +  (match_operand 0 "vector_length_operand"))
> +
>  (define_special_predicate "pmode_reg_or_0_operand"
>(ior (match_operand 0 "const_0_operand")
> (match_operand 0 "pmode_register_operand")))
> diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
> b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> index f4256fedc5b..00d357a0d36 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
> @@ -163,7 +163,15 @@ class binop : public function_base
>  public:
>rtx expand (function_expander &e) const override
>{
> -return e.use_exact_insn (code_for_pred (CODE, e.vector_mode ()));
> +switch (e.op_info->op)
> +  {
> +  case OP_TYPE_vx:
> +   return e.use_exact_insn (code_for_pred_scalar (CODE, e.vector_mode 
> ()));
> +  case OP_TYPE_vv:
> +   return e.use_exact_insn (code_for_pred (CODE, e.vector_mode ()));
> +  default:
> +   gcc_unreachable ();
> +  }
>}
>  };
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
> b/gcc/config/riscv/riscv-vector-builtins-functions.def
> index 9f9678ab6dd..b543946c72e 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-functions.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
> @@ -80,5 +80,8 @@ DEF_RVV_FUNCTION (vdiv, binop, full_preds, iu_vvv_ops)
>  DEF_RVV_FUNCTION (vrem, binop, full_preds, iu_vvv_ops)
>  DEF_RVV_FUNCTION (vdivu, binop, full_preds, iu_vvv_ops)
>  DEF_RVV_FUNCTION (vremu, binop, full_preds, iu_vvv_ops)
> +DEF_RVV_FUNCTION (vsll, binop, full_preds, iu_shift_vvx_ops)
> +DEF_RVV_FUNCTION (vsra, binop, full_preds, iu_shift_vvx_ops)
> +DEF_RVV_FUNCTION (vsrl, binop, full_preds, iu_shift_vvx_ops)
>
>  #undef DEF_RVV_FUNCTION
> diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
> b/gcc/config/riscv/riscv-vector-builtins.cc
> index 3a6c2c7c6f2..12fea2b3594 100644
> --- a/gcc/config/riscv/riscv-vector-builtins.cc
> +++ b/gcc/config/riscv/riscv-vector-builtins.cc
> @@ -249,6 +249,11 @@ static CONSTEXPR const rvv_arg_type_info shift_vv_args[]
>= {rvv_arg_type_info (RVV_BASE_vector),
>   rvv_arg_type_info (RVV_BASE_shift_vector), rvv_arg_type_info_end};
>
> +/* A list of args for vector_type func (vector_type, size) function.  */
> +static CONSTEXPR const rvv_arg_type_info vector_size_args[]
> +  = {rvv_arg_type_info (RVV_BASE_vector), rvv_arg_type_info (RVV_BASE_size),
> + rvv_arg_type_info_end};
> +
>  /* A list of none preds that will be registered for intrinsic functions.  */
>  static CONSTEXPR const predication_type_index none_preds[]
>= {PRED_TYPE_none, NUM_PRED_TYPES};
> @@ -405,6 +410,14 @@ static CONSTEXPR const rvv_op_info iu_shift_vvv_ops
>   rvv_arg_type_info (RVV_BASE_vector), /* Return type */
>   shift_vv_args /* Args */};
>
> +/* A static operand information for vector_type func (vector_type, size_t)
> + * function registration. */
> +static CONSTEXPR const rvv_op_info iu_shift_vvx_ops
> +  = {iu_ops, /* Types */
> + OP_TYPE_vx, /* Suffix */
> + rvv_arg_type_info (RVV_BASE_vector), /* Return type */
> + vector_size_args /* Args */};
> +
>  /* A list of all RVV intrinsic functions.  */
>  static function_group_info function_gr

Re: [PATCH] RISC-V: Add shift constraint tests

2023-02-02 Thread Kito Cheng via Gcc-patches
committed, thanks!

On Wed, Feb 1, 2023 at 6:15 AM  wrote:
>
> From: Ju-Zhe Zhong 
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/base/shift_vx_constraint-1.c: New test.
>
> ---
>  .../riscv/rvv/base/shift_vx_constraint-1.c| 133 ++
>  1 file changed, 133 insertions(+)
>  create mode 100644 
> gcc/testsuite/gcc.target/riscv/rvv/base/shift_vx_constraint-1.c
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/shift_vx_constraint-1.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/shift_vx_constraint-1.c
> new file mode 100644
> index 000..ae3883c5af9
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/shift_vx_constraint-1.c
> @@ -0,0 +1,133 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +#include "riscv_vector.h"
> +
> +/*
> +** f1:
> +** vsetivli\tzero,4,e32,m1,tu,ma
> +** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vsll\.vi\tv[0-9]+,\s*v[0-9]+,31
> +** vsll\.vi\tv[0-9]+,\s*v[0-9]+,31
> +** vse32\.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ret
> +*/
> +void f1 (void * in, void *out)
> +{
> +vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
> +vint32m1_t v2 = __riscv_vle32_v_i32m1_tu (v, in, 4);
> +vint32m1_t v3 = __riscv_vsll_vx_i32m1 (v2, 31, 4);
> +vint32m1_t v4 = __riscv_vsll_vx_i32m1_tu (v3, v2, 31, 4);
> +__riscv_vse32_v_i32m1 (out, v4, 4);
> +}
> +
> +/*
> +** f2:
> +** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
> +** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ...
> +** vsetivli\tzero,4,e32,m1,ta,ma
> +** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
> +** vsll\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
> +** vsll\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
> +** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ret
> +*/
> +void f2 (void * in, void *out)
> +{
> +vbool32_t mask = *(vbool32_t*)in;
> +asm volatile ("":::"memory");
> +vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
> +vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4);
> +vint32m1_t v3 = __riscv_vsll_vx_i32m1 (v2, 32, 4);
> +vint32m1_t v4 = __riscv_vsll_vx_i32m1_m (mask, v3, 32, 4);
> +__riscv_vse32_v_i32m1 (out, v4, 4);
> +}
> +
> +/*
> +** f3:
> +** vsetvli\t[a-x0-9]+,zero,e8,mf4,ta,ma
> +** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vsetivli\tzero,4,e32,m1,tu,mu
> +** vle32\.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vle32.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
> +** vsll\.vi\tv[0-9]+,\s*v[0-9]+,\s*17
> +** vsll\.vi\tv[0-9]+,\s*v[0-9]+,\s*17,\s*v0.t
> +** vse32.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ret
> +*/
> +void f3 (void * in, void *out)
> +{
> +vbool32_t mask = *(vbool32_t*)in;
> +asm volatile ("":::"memory");
> +vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4);
> +vint32m1_t v2 = __riscv_vle32_v_i32m1_tumu (mask, v, in, 4);
> +vint32m1_t v3 = __riscv_vsll_vx_i32m1 (v2, 17, 4);
> +vint32m1_t v4 = __riscv_vsll_vx_i32m1_tumu (mask, v3, v2, 17, 4);
> +__riscv_vse32_v_i32m1 (out, v4, 4);
> +}
> +
> +/*
> +** f4:
> +** vsetivli\tzero,4,e8,mf8,tu,ma
> +** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vsll\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
> +** vsll\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
> +** vse8\.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ret
> +*/
> +void f4 (void * in, void *out, size_t x)
> +{
> +vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
> +vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in, 4);
> +vint8mf8_t v3 = __riscv_vsll_vx_i8mf8 (v2, x, 4);
> +vint8mf8_t v4 = __riscv_vsll_vx_i8mf8_tu (v3, v2, x, 4);
> +__riscv_vse8_v_i8mf8 (out, v4, 4);
> +}
> +
> +/*
> +** f5:
> +** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
> +** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vsetivli\tzero,4,e8,mf8,ta,ma
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
> +** vsll\.vi\tv[0-9]+,\s*v[0-9]+,\s*5
> +** vsll\.vi\tv[0-9]+,\s*v[0-9]+,\s*5,\s*v0.t
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ret
> +*/
> +void f5 (void * in, void *out)
> +{
> +vbool64_t mask = *(vbool64_t*)in;
> +asm volatile ("":::"memory");
> +vint8mf8_t v = __riscv_vle8_v_i8mf8 (in, 4);
> +vint8mf8_t v2 = __riscv_vle8_v_i8mf8_m (mask, in, 4);
> +vint8mf8_t v3 = __riscv_vsll_vx_i8mf8 (v2, 5, 4);
> +vint8mf8_t v4 = __riscv_vsll_vx_i8mf8_m (mask, v3, 5, 4);
> +__riscv_vse8_v_i8mf8 (out, v4, 4);
> +}
> +
> +/*
> +** f6:
> +** vsetvli\t[a-x0-9]+,zero,e8,mf8,ta,ma
> +** vlm.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vsetivli\tzero,4,e8,mf8,tu,mu
> +** vle8\.v\tv[0-9]+,0\([a-x0-9]+\)
> +** vle8.v\tv[0-9]+,0\([a-x0-9]+\),v0.t
> +** vsll\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+
> +** vsll\.vx\tv[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t
> +** vse8.v\tv[0-9]+,0\([a-x0-9]+\)
> +** ret
> +*/
> +void f6 (void * in, void *out, size_t x)
> +{
> +vbool64_t mask = *(vbool64_t*)in;
> +asm volatile

[aarch64] Code-gen for vector initialization involving constants

2023-02-02 Thread Prathamesh Kulkarni via Gcc-patches
Hi Richard,
While digging thru aarch64_expand_vector_init, I noticed it gives
priority to loading a constant first:
 /* Initialise a vector which is part-variable.  We want to first try
 to build those lanes which are constant in the most efficient way we
 can.  */

which results in suboptimal code-gen for following case:
int16x8_t f_s16(int16_t x)
{
  return (int16x8_t) { x, x, x, x, x, x, x, 1 };
}

code-gen trunk:
f_s16:
moviv0.8h, 0x1
ins v0.h[0], w0
ins v0.h[1], w0
ins v0.h[2], w0
ins v0.h[3], w0
ins v0.h[4], w0
ins v0.h[5], w0
ins v0.h[6], w0
ret

The attached patch tweaks the following condition:
if (n_var == n_elts && n_elts <= 16)
  {
...
  }

to pass if maxv >= 80% of n_elts, with 80% being an
arbitrary "high enough" threshold. The intent is to dup
the most repeating variable if it it's repetition
is "high enough" and insert constants which should be "better" than
loading constant first and inserting variables like in the above case.

Alternatively, I suppose we can remove threshold and for constants,
generate both sequences and check which one is more
efficient ?

code-gen with patch:
f_s16:
dup v0.8h, w0
moviv1.4h, 0x1
ins v0.h[7], v1.h[0]
ret

The patch is lightly tested to verify that vec[t]-init-*.c tests pass
with bootstrap+test
in progress.
Does this look OK ?

Thanks,
Prathamesh
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index acc0cfe5f94..df33509c6e4 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22079,30 +22079,36 @@ aarch64_expand_vector_init (rtx target, rtx vals)
  and matches[X][1] with the count of duplicate elements (if X is the
  earliest element which has duplicates).  */
 
-  if (n_var == n_elts && n_elts <= 16)
+  int matches[16][2] = {0};
+  for (int i = 0; i < n_elts; i++)
 {
-  int matches[16][2] = {0};
-  for (int i = 0; i < n_elts; i++)
+  for (int j = 0; j <= i; j++)
{
- for (int j = 0; j <= i; j++)
+ if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
{
- if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
-   {
- matches[i][0] = j;
- matches[j][1]++;
- break;
-   }
+ matches[i][0] = j;
+ matches[j][1]++;
+ break;
}
}
-  int maxelement = 0;
-  int maxv = 0;
-  for (int i = 0; i < n_elts; i++)
-   if (matches[i][1] > maxv)
- {
-   maxelement = i;
-   maxv = matches[i][1];
- }
+}
 
+  int maxelement = 0;
+  int maxv = 0;
+  for (int i = 0; i < n_elts; i++)
+if (matches[i][1] > maxv)
+  {
+   maxelement = i;
+   maxv = matches[i][1];
+  }
+
+  rtx max_elem = XVECEXP (vals, 0, maxelement); 
+  if (n_elts <= 16
+  && ((n_var == n_elts)
+  || (maxv >= (int)(0.8 * n_elts)
+  && !CONST_INT_P (max_elem)
+  && !CONST_DOUBLE_P (max_elem
+{
   /* Create a duplicate of the most common element, unless all elements
 are equally useless to us, in which case just immediately set the
 vector register using the first element.  */
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-18.c 
b/gcc/testsuite/gcc.target/aarch64/vec-init-18.c
new file mode 100644
index 000..e20b813559e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-18.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include 
+
+/*
+** f1_s16:
+** ...
+** dup v[0-9]+\.8h, w[0-9]+
+** moviv[0-9]+\.4h, 0x1
+** ins v[0-9]+\.h\[7\], v[0-9]+\.h\[0\]
+** ...
+** ret
+*/
+
+int16x8_t f1_s16(int16_t x)
+{
+  return (int16x8_t) {x, x, x, x, x, x, x, 1};
+}
+
+/*
+** f2_s16:
+** ...
+** dup v[0-9]+\.8h, w[0-9]+
+** moviv[0-9]+\.4h, 0x1
+** moviv[0-9]+\.4h, 0x2
+** ins v[0-9]+\.h\[6\], v[0-9]+\.h\[0\]
+** ins v[0-9]+\.h\[7\], v[0-9]+\.h\[0\]
+** ...
+** ret
+*/
+
+int16x8_t f2_s16(int16_t x)
+{
+  return (int16x8_t) { x, x, x, x, x, x, 1, 2 };
+}
+
+/*
+** f3_s16:
+** ...
+** moviv[0-9]+\.8h, 0x1
+** ins v[0-9]+\.h\[0\], w0
+** ins v[0-9]+\.h\[1\], w0
+** ins v[0-9]+\.h\[2\], w0
+** ...
+** ret
+*/
+
+int16x8_t f3_s16(int16_t x)
+{
+  return (int16x8_t) {x, x, x, 1, 1, 1, 1, 1};
+}


Re: [PATCH] ipa: Avoid invalid gimple when IPA-CP and IPA-SRA disagree on types (108384)

2023-02-02 Thread Richard Biener via Gcc-patches
On Thu, Feb 2, 2023 at 5:20 PM Martin Jambor  wrote:
>
> Hi,
>
> when the compiled program contains type mismatches between callers and
> callees when it comes to a parameter, IPA-CP can try to propagate one
> constant from callers while IPA-SRA may try to split a parameter
> expecting a value of a different size on the same offset.  This then
> currently leads to creation of a VIEW_CONVERT_EXPR with mismatching
> type sizes of LHS and RHS which is correctly flagged by the GIMPLE
> verifier as invalid.
>
> It seems that the best course of action is to try and avoid the
> situation altogether and so this patch adds a check to IPA-SRA that
> peeks into the result of IPA-CP and when it sees a value on the same
> offset but with a mismatching size, it just decides to leave that
> particular parameter be.
>
> Bootstrapped and tested on x86_64-linux, OK for master?

OK.  I suppose there are guards elsewhere that never lets a
non-UHWI size type (like variable size or poly-int-size) through
any of the SRA or CP lattices?

Thanks,
Richard.

> Thanks,
>
> Martin
>
>
> gcc/ChangeLog:
>
> 2023-02-02  Martin Jambor  
>
> PR ipa/108384
> * ipa-sra.cc (push_param_adjustments_for_index): Remove a size check
> when comparing to an IPA-CP value.
> (dump_list_of_param_indices): New function.
> (adjust_parameter_descriptions): Check for mismatching IPA-CP values.
> Dump removed candidates using dump_list_of_param_indices.
> * ipa-param-manipulation.cc
> (ipa_param_body_adjustments::modify_expression): Add assert checking
> sizes of a VIEW_CONVERT_EXPR will match.
> (ipa_param_body_adjustments::modify_assignment): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2023-02-02  Martin Jambor  
>
> PR ipa/108384
> * gcc.dg/ipa/pr108384.c: New test.
> ---
>  gcc/ipa-param-manipulation.cc   |  4 ++
>  gcc/ipa-sra.cc  | 66 -
>  gcc/testsuite/gcc.dg/ipa/pr108384.c | 25 +++
>  3 files changed, 76 insertions(+), 19 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/ipa/pr108384.c
>
> diff --git a/gcc/ipa-param-manipulation.cc b/gcc/ipa-param-manipulation.cc
> index 1de9ca2ceb8..42488ee09c3 100644
> --- a/gcc/ipa-param-manipulation.cc
> +++ b/gcc/ipa-param-manipulation.cc
> @@ -1857,6 +1857,8 @@ ipa_param_body_adjustments::modify_expression (tree 
> *expr_p, bool convert)
>if (convert && !useless_type_conversion_p (TREE_TYPE (expr),
>  TREE_TYPE (repl)))
>  {
> +  gcc_checking_assert (tree_to_shwi (TYPE_SIZE (TREE_TYPE (expr)))
> +  == tree_to_shwi (TYPE_SIZE (TREE_TYPE (repl;
>tree vce = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (expr), repl);
>*expr_p = vce;
>  }
> @@ -1900,6 +1902,8 @@ ipa_param_body_adjustments::modify_assignment (gimple 
> *stmt,
> }
>else
> {
> + gcc_checking_assert (tree_to_shwi (TYPE_SIZE (TREE_TYPE (*lhs_p)))
> + == tree_to_shwi (TYPE_SIZE (TREE_TYPE 
> (*rhs_p;
>   tree new_rhs = fold_build1_loc (gimple_location (stmt),
>   VIEW_CONVERT_EXPR, TREE_TYPE 
> (*lhs_p),
>   *rhs_p);
> diff --git a/gcc/ipa-sra.cc b/gcc/ipa-sra.cc
> index 81b75910db1..7a2b4dc8608 100644
> --- a/gcc/ipa-sra.cc
> +++ b/gcc/ipa-sra.cc
> @@ -3989,9 +3989,7 @@ push_param_adjustments_for_index (isra_func_summary 
> *ifs, unsigned base_index,
> {
>   ipa_argagg_value_list avl (ipcp_ts);
>   tree value = avl.get_value (base_index, pa->unit_offset);
> - if (value
> - && (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (value))) / BITS_PER_UNIT
> - == pa->unit_size))
> + if (value)
> {
>   if (dump_file)
> fprintf (dump_file, "- omitting component at byte "
> @@ -4130,6 +4128,22 @@ process_isra_node_results (cgraph_node *node,
>callers.release ();
>  }
>
> +/* If INDICES is not empty, dump a combination of NODE's dump_name and MSG
> +   followed by the list of numbers in INDICES.  */
> +
> +static void
> +dump_list_of_param_indices (const cgraph_node *node, const char* msg,
> +   const vec &indices)
> +{
> +  if (indices.is_empty ())
> +return;
> +  fprintf (dump_file, "The following parameters of %s %s:", node->dump_name 
> (),
> +  msg);
> +  for (unsigned i : indices)
> +fprintf (dump_file, " %u", i);
> +  fprintf (dump_file, "\n");
> +}
> +
>  /* Check which parameters of NODE described by IFS have survived until 
> IPA-SRA
> and disable transformations for those which have not or which should not
> transformed because the associated debug counter reached its limit.  
> Return
> @@ -4153,6 +4167,7 @@ adjust_parameter_descriptions (cgraph_node *node, 
> isra_func_summary *ifs)
>check_surv

Re: [PATCH] Bump up precision size to 16 bits.

2023-02-02 Thread Richard Biener via Gcc-patches
On Thu, Feb 2, 2023 at 6:39 PM Michael Meissner via Gcc-patches
 wrote:
>
> The new __dmr type that is being added as a possible future PowerPC 
> instruction

"is being added" means this feature is already in GCC 13?

> set bumps into a structure field size issue.  The size of the __dmr type is 
> 1024 bits.
> The precision field in tree_type_common is currently 10 bits, so if you store
> 1,024 into field, you get a 0 back.  When you get 0 in the precision field, 
> the
> ccp pass passes this 0 to sext_hwi in hwint.h.  That function in turn 
> generates
> a shift that is equal to the host wide int bit size, which is undefined as
> machine dependent for shifting in C/C++.
>
>   int shift = HOST_BITS_PER_WIDE_INT - prec;
>   return ((HOST_WIDE_INT) ((unsigned HOST_WIDE_INT) src << shift)) >> 
> shift;
>
> It turns out the x86_64 where I first did my tests returns the original input
> before the two shifts, while the PowerPC always returns 0.  In the ccp pass, 
> the
> original input is -1, and so it worked.  When I did the runs on the PowerPC, 
> the
> result was 0, which ultimately led to the failure.
>
> In addition, once the precision field is larger, it will help PR C/102989 (C2x
> _BigInt) as well as the implementation of the SET_TYPE_VECTOR_SUBPARTS macro.
>
> I bootstraped various PowerPC compilers (power10 LE, power9 LE, power8 BE)
> along with an x86_64 build.  There were no regressions.  My proposed patches
> for the __dmr type now run fine.  Can I install this into the master branch 
> for
> GCC 13?

... because since we're in stage4 this should fix a regression or at least a
bug that's like ice-on-valid or wrong-code?

Definitely OK for stage1.

Thanks,
Richard.

> 2023-02-02   Richard Biener  
>  Michael Meissner  
>
> gcc/
>
> PR middle-end/108623
> * hwint.h (sext_hwi): Add assertion against precision 0.
> * tree-core.h (tree_type_common): Bump up precision field to 16 bits.
> Align bit fields > 1 bit to at least an 8-bit boundary.
> ---
>  gcc/hwint.h |  1 +
>  gcc/tree-core.h | 24 
>  2 files changed, 13 insertions(+), 12 deletions(-)
>
> diff --git a/gcc/hwint.h b/gcc/hwint.h
> index e31aa006fa4..ba92efbfc25 100644
> --- a/gcc/hwint.h
> +++ b/gcc/hwint.h
> @@ -277,6 +277,7 @@ ctz_or_zero (unsigned HOST_WIDE_INT x)
>  static inline HOST_WIDE_INT
>  sext_hwi (HOST_WIDE_INT src, unsigned int prec)
>  {
> +  gcc_checking_assert (prec != 0);
>if (prec == HOST_BITS_PER_WIDE_INT)
>  return src;
>else
> diff --git a/gcc/tree-core.h b/gcc/tree-core.h
> index 8124a1328d4..b71748c6c02 100644
> --- a/gcc/tree-core.h
> +++ b/gcc/tree-core.h
> @@ -1686,18 +1686,8 @@ struct GTY(()) tree_type_common {
>tree attributes;
>unsigned int uid;
>
> -  unsigned int precision : 10;
> -  unsigned no_force_blk_flag : 1;
> -  unsigned needs_constructing_flag : 1;
> -  unsigned transparent_aggr_flag : 1;
> -  unsigned restrict_flag : 1;
> -  unsigned contains_placeholder_bits : 2;
> -
> +  unsigned int precision : 16;
>ENUM_BITFIELD(machine_mode) mode : 8;
> -
> -  /* TYPE_STRING_FLAG for INTEGER_TYPE and ARRAY_TYPE.
> - TYPE_CXX_ODR_P for RECORD_TYPE and UNION_TYPE.  */
> -  unsigned string_flag : 1;
>unsigned lang_flag_0 : 1;
>unsigned lang_flag_1 : 1;
>unsigned lang_flag_2 : 1;
> @@ -1713,12 +1703,22 @@ struct GTY(()) tree_type_common {
>   so we need to store the value 32 (not 31, as we need the zero
>   as well), hence six bits.  */
>unsigned align : 6;
> +  /* TYPE_STRING_FLAG for INTEGER_TYPE and ARRAY_TYPE.
> + TYPE_CXX_ODR_P for RECORD_TYPE and UNION_TYPE.  */
> +  unsigned string_flag : 1;
> +  unsigned no_force_blk_flag : 1;
> +
>unsigned warn_if_not_align : 6;
> +  unsigned needs_constructing_flag : 1;
> +  unsigned transparent_aggr_flag : 1;
> +
> +  unsigned contains_placeholder_bits : 2;
> +  unsigned restrict_flag : 1;
>unsigned typeless_storage : 1;
>unsigned empty_flag : 1;
>unsigned indivisible_p : 1;
>unsigned no_named_args_stdarg_p : 1;
> -  unsigned spare : 15;
> +  unsigned spare : 9;
>
>alias_set_type alias_set;
>tree pointer_to;
> --
> 2.39.1
>
>
> --
> Michael Meissner, IBM
> PO Box 98, Ayer, Massachusetts, USA, 01432
> email: meiss...@linux.ibm.com


Re: [PATCH 0/2] Repost of patches for solving the build on Fedora 36 problem

2023-02-02 Thread Richard Biener via Gcc-patches
On Fri, Feb 3, 2023 at 6:44 AM Michael Meissner via Gcc-patches
 wrote:
>
> I'm reposting these two patches that allow GCC to build on Fedora 36 just to 
> be
> clear which patches I'm talking about.  The issue is that if GCC is configured
> with long double using the IEEE 128-bit representation, it currently cannot
> build _mulkc3 and _divkc3 in libgcc.

It's interesting that we do not see this with openSUSE where I configure with

--with-cpu=power8 --with-tune=power9 --with-long-double-format=ieee
--with-long-double-128

note this is ppc64le, we leave ppc64 and ppc with their default.

> Note, these patches do not solve the underlying problem of mixing _Float128 
> and
> long double types and using built-in functions (i.e. calling a _Float128
> built-in function with long double arguments when long double is IEEE 128-bit,
> or vice versa calling a long double built-in function with _Float128
> arguments).  But they do allow the compiler to build.
>
> Note, it is the morning of February 3rd, and I will be off on vacation from
> February 7th through February 14th.
>
> The first patch changes libgcc so that it uses either _Float128 or long double
> as the base IEEE 128-bit type, depending on whether long double uses the IBM
> double-double representation, or the IEEE 128-bit representation.  And for the
> complex type it uses _Complex _Float128 or _Complex long double.  The _mulkc3
> and _divkc3 functions are adjusted to use the f128 built-in functions or the
> long double built-in functions, based on the long double type.
>
> The second patch improves how the compiler generates the call to _mulkc3 and
> _divkc3.  I've discovered as I have tried to fix underlying problem with the
> IEEE 128-bit floating point types, it breaks the calls for IEEE 128-bit 
> complex
> multiply and divide.  This patch uses a cleaner approach to generate these
> calls, and it will work with the current setup, and with the various fixes 
> that
> I've attempted to do to fix the underlying problem.
>
> --
> Michael Meissner, IBM
> PO Box 98, Ayer, Massachusetts, USA, 01432
> email: meiss...@linux.ibm.com


Re: [PATCH 1/2] Handle component_ref to a structre/union field including flexible array member [PR101832]

2023-02-02 Thread Richard Biener via Gcc-patches
On Thu, 2 Feb 2023, Qing Zhao wrote:

> 
> 
> > On Feb 2, 2023, at 8:54 AM, Richard Biener  wrote:
> > 
> > On Thu, 2 Feb 2023, Qing Zhao wrote:
> > 
> >> 
> >> 

[...]

> >> +  return flexible_size_type_p (TREE_TYPE (last));
> > 
> > For types with many members this can become quite slow (IIRC we had
> > bugs about similar walks of all fields in types), and this function
> > looks like it's invoked multiple times on the same type per TU.
> > 
> > In principle the property is fixed at the time we lay out a record
> > type, so we might want to compute it at that time and record the
> > result.
>  
>  You mean in FE? 
> >>> 
> >>> Yes, either in the frontend or in the middle-ends layout_type.
> >>> 
>  Yes, that?s better and cleaner.
>  
>  I will add one more field in the TYPE structure to record this 
>  information and check this field during middle end.
>  
>  I had the same thought in the beginning, but not sure whether adding a 
>  new field in IR is necessary or not, other places in middle end might 
>  not use this new field.
> >>> 
> >>> It might be interesting to search for other code walking all fields of
> >>> a type to determine this or similar info.
> >> 
> >> There is one which is defined in tree.cc but only is referenced in 
> >> c/c-decl.cc:
> >> 
> >> /* Determine whether TYPE is a structure with a flexible array member,
> >>   or a union containing such a structure (possibly recursively).  */
> >> flexible_array_type_p
> >> 
> >> However, this routine is a little different than the one I tried to add:
> >> 
> >> In the current routine ?flexible_array_type_p?,  only one level nesting in 
> >> the structure is accepted, multiple nesting in structure is not permitted.
> >> 
> >> So, my question is:  shall we accept multiple nesting in structure? i.e.
> > 
> > If we don't reject the testcase with an error, then yes.
> 
> Gcc currently accepts the multiple nesting in structure without error.  
> So, we will continue to accept such extension as long as the flex array 
> is at the end of the structure. At the same time, for the case the flex 
> array is in the middle of the structure, issue additional warnings now 
> to discourage such usage, and deprecate this case in a future release.
> 
> Does this sound reasonable? 

Please don't mix several issues - I think the flex array in the
middle of a structure is separate and we shouldn't report that
as flexible_array_type_p or flexible_size_type_p since the size
of the containing structure is not variable.

For diagnostic purposes the intended use case is to treat
a pointer to a structure that appears to have a fixed size
but has (recursive) a member with a flexible array at the end
as having variable size.  Just the same as array_at_struct_end_p
treats this for the case of accesses involving such a type.

For the middle position case that's not the case.

Richard.

> Qing
> > 
> >> struct A {
> >>  int n;
> >>  char data[];/* Content following header */
> >> };
> >> 
> >> struct B {
> >>  int m;
> >>  struct A a;
> >> };
> >> 
> >> struct C {
> >>  int q;
> >>  struct B b;
> >> };
> >> 
> >> Qing
> >>> 
>  thanks.
>  
>  Qing
>  
> > 
> >> +  return false;
> >> +case UNION_TYPE:
> >> +  for (x = TYPE_FIELDS (type); x != NULL_TREE; x = DECL_CHAIN (x))
> >> +  {
> >> +if (TREE_CODE (x) == FIELD_DECL
> >> +&& flexible_array_type_p (TREE_TYPE (x)))
> >> +  return true;
> >> +  }
> >> +  return false;
> >> +default:
> >> +  return false;
> >> +  }
> >> +}
> >> +
> >> /* Compute __builtin_object_size for PTR, which is a ADDR_EXPR.
> >>  OBJECT_SIZE_TYPE is the second argument from __builtin_object_size.
> >>  If unknown, return size_unknown (object_size_type).  */
> >> @@ -633,45 +669,68 @@ addr_object_size (struct object_size_info *osi, 
> >> const_tree ptr,
> >>v = NULL_TREE;
> >>break;
> >>  case COMPONENT_REF:
> >> -  if (TREE_CODE (TREE_TYPE (v)) != ARRAY_TYPE)
> >> +  /* When the ref is not to an array, a record or a 
> >> union, it
> >> + will not have flexible size, compute the object 
> >> size
> >> + directly.  */
> >> +  if ((TREE_CODE (TREE_TYPE (v)) != ARRAY_TYPE)
> >> +  && (TREE_CODE (TREE_TYPE (v)) != RECORD_TYPE)
> >> +  && (TREE_CODE (TREE_TYPE (v)) != UNION_TYPE))
> >>  {
> >>v = NULL_TREE;
> >>break;
> >>  }
> >> -  is_flexible_array_mem_ref = 
> >> array_ref_flexible_size_p (v);
> >> -  while (v != pt_var && TREE_CODE (v) == 
> >>