date:20230331

Re: [PATCH] tree-optimization/109304 - properly handle instrumented aliases

2023-03-31 Thread Richard Biener via Gcc-patches

On Tue, 28 Mar 2023, Richard Biener wrote:

> When adjusting calls to reflect instrumentation we failed to handle
> calls to aliases since they appear to have no body.  Instead resort
> to symtab node availability.  The patch also avoids touching
> internal function calls in a more obvious way (builtins might
> have a body available).
> 
> profiledbootstrap & regtest running on x86_64-unknown-linux-gnu.
> 
> Honza - does this look OK?

Honza - ping!

>   PR tree-optimization/109304
>   * tree-profile.cc (tree_profiling): Use symtab node
>   availability to decide whether to skip adjusting calls.
>   Do not adjust calls to internal functions.
> 
>   * gcc.dg/pr109304.c: New testcase.
> ---
>  gcc/testsuite/gcc.dg/pr109304.c | 12 
>  gcc/tree-profile.cc |  9 ++---
>  2 files changed, 18 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr109304.c
> 
> diff --git a/gcc/testsuite/gcc.dg/pr109304.c b/gcc/testsuite/gcc.dg/pr109304.c
> new file mode 100644
> index 000..d435b04d4d5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr109304.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-profiling "-fprofile-generate" } */
> +/* { dg-require-effective-target fpic } */
> +/* { dg-options "-O3 -fprofile-generate -fPIC -fno-semantic-interposition" } 
> */
> +
> +int PyUnicode_FindChar_i;
> +int PyUnicode_FindChar()
> +{
> +  while (PyUnicode_FindChar_i)
> +if (PyUnicode_FindChar())
> +  break;
> +}
> diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc
> index 6f9a43e4bd5..da300d5f9e8 100644
> --- a/gcc/tree-profile.cc
> +++ b/gcc/tree-profile.cc
> @@ -808,7 +808,7 @@ tree_profiling (void)
>{
>   if (!gimple_has_body_p (node->decl)
>   || !(!node->clone_of
> - || node->decl != node->clone_of->decl))
> +  || node->decl != node->clone_of->decl))
> continue;
>  
>   /* Don't profile functions produced for builtin stuff.  */
> @@ -842,12 +842,15 @@ tree_profiling (void)
>   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> {
>   gcall *call = dyn_cast  (gsi_stmt (gsi));
> - if (!call)
> + if (!call || gimple_call_internal_p (call))
> continue;
>  
>   /* We do not clear pure/const on decls without body.  */
>   tree fndecl = gimple_call_fndecl (call);
> - if (fndecl && !gimple_has_body_p (fndecl))
> + cgraph_node *callee;
> + if (fndecl
> + && (callee = cgraph_node::get (fndecl))
> + && callee->get_availability (node) == AVAIL_NOT_AVAILABLE)
> continue;
>  
>   /* Drop the const attribute from the call type (the pure
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

[committed] RISC-V: Fix missing file dependency in RISC-V back-end [PR109328]

2023-03-31 Thread Kito Cheng via Gcc-patches

gcc/ChangeLog:

PR target/109328
* config/riscv/t-riscv: Add missing dependencies.

Co-authored-by: Andrew Pinski 
---
 gcc/config/riscv/t-riscv | 43 
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 01f30a853e3..6e326fc7e02 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -1,6 +1,13 @@
+RISCV_BUILTINS_H = $(srcdir)/config/riscv/riscv-vector-builtins.h \
+  $(srcdir)/config/riscv/riscv-vector-builtins.def \
+  $(srcdir)/config/riscv/riscv-vector-builtins-functions.def \
+  riscv-vector-type-indexer.gen.def
+
 riscv-builtins.o: $(srcdir)/config/riscv/riscv-builtins.cc $(CONFIG_H) \
   $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) $(RECOG_H) langhooks.h \
-  $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(srcdir)/config/riscv/riscv-ftypes.def \
+  $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(RISCV_BUILTINS_H) \
+  $(srcdir)/config/riscv/riscv-ftypes.def \
+  $(srcdir)/config/riscv/riscv-vector-builtins-types.def \
   $(srcdir)/config/riscv/riscv-modes.def
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-builtins.cc
@@ -11,12 +18,10 @@ riscv-vector-builtins.o: 
$(srcdir)/config/riscv/riscv-vector-builtins.cc \
   $(FUNCTION_H) fold-const.h gimplify.h explow.h stor-layout.h $(REGS_H) \
   alias.h langhooks.h attribs.h stringpool.h emit-rtl.h basic-block.h \
   gimple.h gimple-iterator.h \
-  $(srcdir)/config/riscv/riscv-vector-builtins.h \
   $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h \
   $(srcdir)/config/riscv/riscv-vector-builtins-bases.h \
-  $(srcdir)/config/riscv/riscv-vector-builtins.def \
   $(srcdir)/config/riscv/riscv-vector-builtins-types.def \
-  $(srcdir)/config/riscv/riscv-vector-builtins-functions.def
+  $(RISCV_BUILTINS_H)
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-vector-builtins.cc
 
@@ -24,8 +29,9 @@ riscv-vector-builtins-shapes.o: \
   $(srcdir)/config/riscv/riscv-vector-builtins-shapes.cc \
   $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
   $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) \
-  $(srcdir)/config/riscv/riscv-vector-builtins.h \
-  $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h
+  $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h \
+  $(srcdir)/config/riscv/riscv-vector-builtins-bases.h \
+  $(RISCV_BUILTINS_H)
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-vector-builtins-shapes.cc
 
@@ -36,9 +42,9 @@ riscv-vector-builtins-bases.o: \
   $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \
   gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \
   rtx-vector-builder.h \
-  $(srcdir)/config/riscv/riscv-vector-builtins.h \
   $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h \
-  $(srcdir)/config/riscv/riscv-vector-builtins-bases.h
+  $(srcdir)/config/riscv/riscv-vector-builtins-bases.h \
+  $(RISCV_BUILTINS_H)
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-vector-builtins-bases.cc
 
@@ -60,23 +66,33 @@ riscv-vsetvl.o: $(srcdir)/config/riscv/riscv-vsetvl.cc \
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/riscv/riscv-vsetvl.cc
 
-riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc
+riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
$(COMPILE) $<
$(POSTCOMPILE)
 
-riscv-shorten-memrefs.o: $(srcdir)/config/riscv/riscv-shorten-memrefs.cc
+riscv-shorten-memrefs.o: $(srcdir)/config/riscv/riscv-shorten-memrefs.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TARGET_H)
$(COMPILE) $<
$(POSTCOMPILE)
 
-riscv-selftests.o: $(srcdir)/config/riscv/riscv-selftests.cc
+riscv-selftests.o: $(srcdir)/config/riscv/riscv-selftests.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) output.h \
+  $(C_COMMON_H) $(TARGET_H) $(OPTABS_H) $(EXPR_H) $(INSN_ATTR_H) $(EMIT_RTL_H)
$(COMPILE) $<
$(POSTCOMPILE)
 
-riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc
+riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
+  $(TM_P_H) $(TARGET_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \
+  $(EXPR_H) $(INSN_ATTR_H) explow.h $(EMIT_RTL_H) tree-vector-builder.h \
+  rtx-vector-builder.h
$(COMPILE) $<
$(POSTCOMPILE)
 
-thead.o: $(srcdir)/config/riscv/thead.cc
+thead.o: $(srcdir)/config/riscv/thead.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) backend.h $(RTL_H) \
+  memmodel.h $(EMIT_RTL_H) poly-int.h output.h
$(COMPILE) $<
$(POSTCOMPILE)
 
@@ -94,6 +110,7 @@ build/genrvv-type-indexer$(build_exeext): 
build

[PATCH] ipa: Avoid constructing aggregate jump functions with huge offsets (PR 109303)

2023-03-31 Thread Martin Jambor

Hi,

we are in the process of changing data structures holding information
about constants passed by reference and in aggregates to use unsigned
int offsets rather than HOST_WIDE_INT (which was selected simply
because that is what fell out of get_ref_base_and_extent at that time)
in order to conserve memory, especially at WPA time.

PR 109303 testcase discovers that we do not properly check that we
only create jump functions with offsets (plus sizes) that fit into the
smaller type.  This patch adds the necessary check.

Bootstrapped and tested on x86_64-linux.  OK for master?

Thanks,

Martin


gcc/ChangeLog:

2023-03-30  Martin Jambor  

PR ipa/109303
* ipa-prop.cc (determine_known_aggregate_parts): Check that the
offset + size will be representable in unsigned int.

gcc/testsuite/ChangeLog:

2023-03-30  Jakub Jelinek  
Martin Jambor  

PR ipa/109303
* gcc.dg/pr109303.c: New test.
---
 gcc/ipa-prop.cc |  4 +++-
 gcc/testsuite/gcc.dg/pr109303.c | 24 
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr109303.c

diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index de45dbccf16..9ffd49b590c 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -2086,7 +2086,9 @@ determine_known_aggregate_parts (struct 
ipa_func_body_info *fbi,
 whether its value is clobbered any other dominating one.  */
  if ((content->value.pass_through.formal_id >= 0
   || content->value.pass_through.operand)
- && !clobber_by_agg_contents_list_p (all_list, content))
+ && !clobber_by_agg_contents_list_p (all_list, content)
+ && (content->offset + content->size - arg_offset
+ <= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT))
{
  struct ipa_known_agg_contents_list *copy
= XALLOCA (struct ipa_known_agg_contents_list);
diff --git a/gcc/testsuite/gcc.dg/pr109303.c b/gcc/testsuite/gcc.dg/pr109303.c
new file mode 100644
index 000..f91535991c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr109303.c
@@ -0,0 +1,24 @@
+/* PR ipa/109303 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2" } */
+
+struct __attribute__((packed)) A { char c1; short a1[__INT_MAX__]; };
+struct __attribute__((packed)) B { char c2; short a2[100]; };
+struct S { struct A p1; struct B p2[4]; };
+void bar (short int);
+
+static void
+foo (struct S *q)
+{
+  for (int i = 0; i < q->p1.c1; i++)
+for (int j = 0; j < q->p2[i].c2; j++)
+  bar (q->p2[i].a2[j]);
+}
+
+int
+main ()
+{
+  struct S q = {};
+  q.p2[0].c2 = q.p2[1].c2 = 3;
+  foo (&q);
+}
-- 
2.40.0

Re: [PATCH] Document signbitm2.

2023-03-31 Thread Richard Biener via Gcc-patches

On Fri, Mar 31, 2023 at 8:59 AM liuhongt via Gcc-patches
 wrote:
>
> Look through all backends which defined signbitm2.
> 1. When m is a scalar mode, the dest is SImode.
> 2. When m is a vector mode, the dest mode is the vector integer
> mode has the same size and elements number as m.
>
> Ok for trunk?

OK.

> gcc/ChangeLog:
>
> * doc/md.texi: Document signbitm2.
> ---
>  gcc/doc/md.texi | 11 +++
>  1 file changed, 11 insertions(+)
>
> diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
> index 8e3113599fd..edfa51e867a 100644
> --- a/gcc/doc/md.texi
> +++ b/gcc/doc/md.texi
> @@ -6030,6 +6030,17 @@ floating-point mode.
>
>  This pattern is not allowed to @code{FAIL}.
>
> +@cindex @code{signbit@var{m}2} instruction pattern
> +@item @samp{signbit@var{m}2}
> +Store the sign bit of floating-point operand 1 in operand 0.
> +@var{m} is either a scalar or vector mode.  When it is a scalar,
> +operand 1 has mode @var{m} but operand 0 must have mode @code{SImode}.
> +When @var{m} is a vector, operand 1 has the mode @var{m}.
> +operand 0's mode should be an vector integer mode which has
> +the same number of elements and the same size as mode @var{m}.
> +
> +This pattern is not allowed to @code{FAIL}.
> +
>  @cindex @code{significand@var{m}2} instruction pattern
>  @item @samp{significand@var{m}2}
>  Store the significand of floating-point operand 1 in operand 0.
> --
> 2.39.1.388.g2fc9e9ca3c
>

Re: [PATCH] ipa: Avoid constructing aggregate jump functions with huge offsets (PR 109303)

2023-03-31 Thread Richard Biener via Gcc-patches

On Fri, Mar 31, 2023 at 10:46 AM Martin Jambor  wrote:
>
> Hi,
>
> we are in the process of changing data structures holding information
> about constants passed by reference and in aggregates to use unsigned
> int offsets rather than HOST_WIDE_INT (which was selected simply
> because that is what fell out of get_ref_base_and_extent at that time)
> in order to conserve memory, especially at WPA time.
>
> PR 109303 testcase discovers that we do not properly check that we
> only create jump functions with offsets (plus sizes) that fit into the
> smaller type.  This patch adds the necessary check.
>
> Bootstrapped and tested on x86_64-linux.  OK for master?
>
> Thanks,
>
> Martin
>
>
> gcc/ChangeLog:
>
> 2023-03-30  Martin Jambor  
>
> PR ipa/109303
> * ipa-prop.cc (determine_known_aggregate_parts): Check that the
> offset + size will be representable in unsigned int.
>
> gcc/testsuite/ChangeLog:
>
> 2023-03-30  Jakub Jelinek  
> Martin Jambor  
>
> PR ipa/109303
> * gcc.dg/pr109303.c: New test.
> ---
>  gcc/ipa-prop.cc |  4 +++-
>  gcc/testsuite/gcc.dg/pr109303.c | 24 
>  2 files changed, 27 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr109303.c
>
> diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
> index de45dbccf16..9ffd49b590c 100644
> --- a/gcc/ipa-prop.cc
> +++ b/gcc/ipa-prop.cc
> @@ -2086,7 +2086,9 @@ determine_known_aggregate_parts (struct 
> ipa_func_body_info *fbi,
>  whether its value is clobbered any other dominating one.  */
>   if ((content->value.pass_through.formal_id >= 0
>|| content->value.pass_through.operand)
> - && !clobber_by_agg_contents_list_p (all_list, content))
> + && !clobber_by_agg_contents_list_p (all_list, content)
> + && (content->offset + content->size - arg_offset
> + <= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT))
> {

it does seem a bit misplaced since after the if we add the same
'content' to another
list anyway.  Wouldn't a more obvious place be where we end up truncating
this sum?

>   struct ipa_known_agg_contents_list *copy
> = XALLOCA (struct ipa_known_agg_contents_list);
> diff --git a/gcc/testsuite/gcc.dg/pr109303.c b/gcc/testsuite/gcc.dg/pr109303.c
> new file mode 100644
> index 000..f91535991c7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr109303.c
> @@ -0,0 +1,24 @@
> +/* PR ipa/109303 */
> +/* { dg-do compile { target lp64 } } */
> +/* { dg-options "-O2" } */
> +
> +struct __attribute__((packed)) A { char c1; short a1[__INT_MAX__]; };
> +struct __attribute__((packed)) B { char c2; short a2[100]; };
> +struct S { struct A p1; struct B p2[4]; };
> +void bar (short int);
> +
> +static void
> +foo (struct S *q)
> +{
> +  for (int i = 0; i < q->p1.c1; i++)
> +for (int j = 0; j < q->p2[i].c2; j++)
> +  bar (q->p2[i].a2[j]);
> +}
> +
> +int
> +main ()
> +{
> +  struct S q = {};
> +  q.p2[0].c2 = q.p2[1].c2 = 3;
> +  foo (&q);
> +}
> --
> 2.40.0
>

Re: [PATCH] ipa: Avoid constructing aggregate jump functions with huge offsets (PR 109303)

2023-03-31 Thread Martin Jambor

Hi,

On Fri, Mar 31 2023, Richard Biener wrote:
> On Fri, Mar 31, 2023 at 10:46 AM Martin Jambor  wrote:
>>
>> Hi,
>>
>> we are in the process of changing data structures holding information
>> about constants passed by reference and in aggregates to use unsigned
>> int offsets rather than HOST_WIDE_INT (which was selected simply
>> because that is what fell out of get_ref_base_and_extent at that time)
>> in order to conserve memory, especially at WPA time.
>>
>> PR 109303 testcase discovers that we do not properly check that we
>> only create jump functions with offsets (plus sizes) that fit into the
>> smaller type.  This patch adds the necessary check.
>>
>> Bootstrapped and tested on x86_64-linux.  OK for master?
>>
>> Thanks,
>>
>> Martin
>>
>>
>> gcc/ChangeLog:
>>
>> 2023-03-30  Martin Jambor  
>>
>> PR ipa/109303
>> * ipa-prop.cc (determine_known_aggregate_parts): Check that the
>> offset + size will be representable in unsigned int.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2023-03-30  Jakub Jelinek  
>> Martin Jambor  
>>
>> PR ipa/109303
>> * gcc.dg/pr109303.c: New test.
>> ---
>>  gcc/ipa-prop.cc |  4 +++-
>>  gcc/testsuite/gcc.dg/pr109303.c | 24 
>>  2 files changed, 27 insertions(+), 1 deletion(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/pr109303.c
>>
>> diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
>> index de45dbccf16..9ffd49b590c 100644
>> --- a/gcc/ipa-prop.cc
>> +++ b/gcc/ipa-prop.cc
>> @@ -2086,7 +2086,9 @@ determine_known_aggregate_parts (struct 
>> ipa_func_body_info *fbi,
>>  whether its value is clobbered any other dominating one.  */
>>   if ((content->value.pass_through.formal_id >= 0
>>|| content->value.pass_through.operand)
>> - && !clobber_by_agg_contents_list_p (all_list, content))
>> + && !clobber_by_agg_contents_list_p (all_list, content)
>> + && (content->offset + content->size - arg_offset
>> + <= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT))
>> {
>
> it does seem a bit misplaced since after the if we add the same
> 'content' to another
> list anyway. 

The other list is a clobber list, as we crawl backwards from the call
statement searching for stores, we also look whether we have already
encountered a store of something else to an overlapping area.  In theory
we could have a store to a smaller data type, where the offset + size
would still fit unsigned int, be followed by a larger store, which would
not.  We want the large store to end up in the clobber list so that the
smaller one does not.

This is the place where we also calculate the size of the final
heap-allocated vector, so that is why eventually I put it there.

> Wouldn't a more obvious place be where we end up truncating this sum?

My reasoning was that since we know we would not be able to use it, it
makes sense to discard the data before we stream it from compilation to
WPA.

Also, when we shorten the offset type also in ipa_agg_jf_item (which is
what I want to do next), this is where the check eventually needs to
be.

Thanks,

Martin

Re: [PATCH] ipa: Avoid constructing aggregate jump functions with huge offsets (PR 109303)

2023-03-31 Thread Richard Biener via Gcc-patches

On Fri, Mar 31, 2023 at 11:18 AM Martin Jambor  wrote:
>
> Hi,
>
> On Fri, Mar 31 2023, Richard Biener wrote:
> > On Fri, Mar 31, 2023 at 10:46 AM Martin Jambor  wrote:
> >>
> >> Hi,
> >>
> >> we are in the process of changing data structures holding information
> >> about constants passed by reference and in aggregates to use unsigned
> >> int offsets rather than HOST_WIDE_INT (which was selected simply
> >> because that is what fell out of get_ref_base_and_extent at that time)
> >> in order to conserve memory, especially at WPA time.
> >>
> >> PR 109303 testcase discovers that we do not properly check that we
> >> only create jump functions with offsets (plus sizes) that fit into the
> >> smaller type.  This patch adds the necessary check.
> >>
> >> Bootstrapped and tested on x86_64-linux.  OK for master?
> >>
> >> Thanks,
> >>
> >> Martin
> >>
> >>
> >> gcc/ChangeLog:
> >>
> >> 2023-03-30  Martin Jambor  
> >>
> >> PR ipa/109303
> >> * ipa-prop.cc (determine_known_aggregate_parts): Check that the
> >> offset + size will be representable in unsigned int.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >> 2023-03-30  Jakub Jelinek  
> >> Martin Jambor  
> >>
> >> PR ipa/109303
> >> * gcc.dg/pr109303.c: New test.
> >> ---
> >>  gcc/ipa-prop.cc |  4 +++-
> >>  gcc/testsuite/gcc.dg/pr109303.c | 24 
> >>  2 files changed, 27 insertions(+), 1 deletion(-)
> >>  create mode 100644 gcc/testsuite/gcc.dg/pr109303.c
> >>
> >> diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
> >> index de45dbccf16..9ffd49b590c 100644
> >> --- a/gcc/ipa-prop.cc
> >> +++ b/gcc/ipa-prop.cc
> >> @@ -2086,7 +2086,9 @@ determine_known_aggregate_parts (struct 
> >> ipa_func_body_info *fbi,
> >>  whether its value is clobbered any other dominating one.  */
> >>   if ((content->value.pass_through.formal_id >= 0
> >>|| content->value.pass_through.operand)
> >> - && !clobber_by_agg_contents_list_p (all_list, content))
> >> + && !clobber_by_agg_contents_list_p (all_list, content)
> >> + && (content->offset + content->size - arg_offset
> >> + <= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT))
> >> {
> >
> > it does seem a bit misplaced since after the if we add the same
> > 'content' to another
> > list anyway.
>
> The other list is a clobber list, as we crawl backwards from the call
> statement searching for stores, we also look whether we have already
> encountered a store of something else to an overlapping area.  In theory
> we could have a store to a smaller data type, where the offset + size
> would still fit unsigned int, be followed by a larger store, which would
> not.  We want the large store to end up in the clobber list so that the
> smaller one does not.
>
> This is the place where we also calculate the size of the final
> heap-allocated vector, so that is why eventually I put it there.
>
> > Wouldn't a more obvious place be where we end up truncating this sum?
>
> My reasoning was that since we know we would not be able to use it, it
> makes sense to discard the data before we stream it from compilation to
> WPA.
>
> Also, when we shorten the offset type also in ipa_agg_jf_item (which is
> what I want to do next), this is where the check eventually needs to
> be.

OK, maybe add the above as comment then.  OK with that change.

Richard.

> Thanks,
>
> Martin

Re: [PATCH v3] RISC-V: Add Z*inx imcompatible check in gcc

2023-03-31 Thread Andreas Schwab

../../gcc/common/config/riscv/riscv-common.cc: In static member function 
'static riscv_subset_list* riscv_subset_list::parse(const char*, location_t)':
../../gcc/common/config/riscv/riscv-common.cc:1158:48: error: unquoted keyword 
'float' in format [-Werror=format-diag]
 1158 | "%<-march=%s%>: z*inx is conflict with float extensions",
  |^

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."

Re: [PATCH] vect: Verify that GET_MODE_NUNITS is greater than one.

2023-03-31 Thread Richard Sandiford via Gcc-patches

Michael Collison  writes:
> While working on autovectorizing for the RISCV port I encountered an issue
> where can_duplicate_and_interleave_p assumes that GET_MODE_NUNITS is a
> evenly divisible by two. The RISC-V target has vector modes (e.g. VNx1DImode),
> where GET_MODE_NUNITS is equal to one.
>
> Tested on RISCV and x86_64-linux-gnu. Okay?
>
> 2023-03-09  Michael Collison  
>
>   * tree-vect-slp.cc (can_duplicate_and_interleave_p):
>   Check that GET_MODE_NUNITS is greater than one.
> ---
>  gcc/tree-vect-slp.cc | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 9a4e000925e..add58113fa8 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -426,7 +426,8 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned 
> int count,
> if (vector_type
> && VECTOR_MODE_P (TYPE_MODE (vector_type))
> && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
> -GET_MODE_SIZE (base_vector_mode)))
> +GET_MODE_SIZE (base_vector_mode))
> +   && known_gt (GET_MODE_NUNITS (TYPE_MODE (vector_type)), 1))
>   {
> /* Try fusing consecutive sequences of COUNT / NVECTORS elements
>together into elements of type INT_TYPE and using the result

FWIW, I think it'd better to remove:

  poly_int64 half_nelts = exact_div (nelts, 2);

declare:

  poly_uint64 half_nelts;

before the if condition, and use:

&& multiple_p (GET_MODE_NUNITS (TYPE_MODE (vector_type)),
   2, &half_nelts)

instead of the known_gt.  In other words, now that we can't assert
the exact_div, we should check it (using multiple_p) instead.

Thanks,
Richard

[PATCH] range-op-float, value-range: Fix up handling of UN{LT,LE,GT,GE,EQ}_EXPR and handle comparisons in get_tree_range [PR91645]

2023-03-31 Thread Jakub Jelinek via Gcc-patches

Hi!

When looking into PR91645, I've noticed we handle UN{LT,LE,GT,GE,EQ}_EXPR
comparisons incorrectly.
All those are unordered or ..., we correctly return [1, 1] if one or
both operands are known NANs, and correctly ask the non-UN prefixed
op to fold_range if neither operand may be NAN.
But for the case where one or both operands may be NAN, we always
return [0, 1].  The UN* fold_range tries to handle it by asking
the non-UN prefixed fold_range and if it returns [1, 1] return that,
if it returns [0, 0] or [0, 1] return [0, 1], which makes sense,
because the maybe NAN means that it is the non-UN prefixed fold_range
unioned with [1, 1] in case the maybe NAN is actually NAN at runtime.
The problem is that the non-UN prefixed fold_range always returns [0, 1]
because those fold_range implementations are like:
  if (op1.known_isnan () || op2.known_isnan ())
r = range_false (type);
  else if (!maybe_isnan (op1, op2))
{
...
}
  else
r = range_true_and_false (type);
and so if maybe_isnan, they always return [0, 1].  Now, thinking about it,
this is unnecessary pessimization, for the case where the ... block
returns range_false (type) we actually could do it also if maybe_isnan (op1,
op2), because if one or both operands are NAN, the comparison will be false,
and if neither is NAN, the comparison will be also false.  Will fix
incrementally today.
Anyway, the following patch fixes it by asking the non-UN prefixed
fold_range on ranges with NAN cleared, which I think does the right
thing in all cases.

Another change in the patch is that range_query::get_tree_range
always returned VARYING for comparisons, this patch allows to ask about
those as well (they are very much like binary ops, except they take
the important type from the types of the operands rather than result).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2023-03-31  Jakub Jelinek  

PR tree-optimization/91645
* range-op-float.cc (foperator_unordered_lt::fold_range,
foperator_unordered_le::fold_range,
foperator_unordered_gt::fold_range,
foperator_unordered_ge::fold_range,
foperator_unordered_equal::fold_range): Call the ordered
fold_range on ranges with cleared NaNs.
* value-query.cc (range_query::get_tree_range): Handle also
COMPARISON_CLASS_P trees.

--- gcc/range-op-float.cc.jj2023-03-28 11:00:29.205986288 +0200
+++ gcc/range-op-float.cc   2023-03-30 15:14:12.219183066 +0200
@@ -1587,7 +1587,13 @@ public:
r = range_true (type);
return true;
   }
-if (!fop_lt.fold_range (r, type, op1, op2, rel))
+frange op1_no_nan = op1;
+frange op2_no_nan = op2;
+if (op1.maybe_isnan ())
+  op1_no_nan.clear_nan ();
+if (op2.maybe_isnan ())
+  op2_no_nan.clear_nan ();
+if (!fop_lt.fold_range (r, type, op1_no_nan, op2_no_nan, rel))
   return false;
 // The result is the same as the ordered version when the
 // comparison is true or when the operands cannot be NANs.
@@ -1692,7 +1698,13 @@ public:
r = range_true (type);
return true;
   }
-if (!fop_le.fold_range (r, type, op1, op2, rel))
+frange op1_no_nan = op1;
+frange op2_no_nan = op2;
+if (op1.maybe_isnan ())
+  op1_no_nan.clear_nan ();
+if (op2.maybe_isnan ())
+  op2_no_nan.clear_nan ();
+if (!fop_le.fold_range (r, type, op1_no_nan, op2_no_nan, rel))
   return false;
 // The result is the same as the ordered version when the
 // comparison is true or when the operands cannot be NANs.
@@ -1793,7 +1805,13 @@ public:
r = range_true (type);
return true;
   }
-if (!fop_gt.fold_range (r, type, op1, op2, rel))
+frange op1_no_nan = op1;
+frange op2_no_nan = op2;
+if (op1.maybe_isnan ())
+  op1_no_nan.clear_nan ();
+if (op2.maybe_isnan ())
+  op2_no_nan.clear_nan ();
+if (!fop_gt.fold_range (r, type, op1_no_nan, op2_no_nan, rel))
   return false;
 // The result is the same as the ordered version when the
 // comparison is true or when the operands cannot be NANs.
@@ -1898,7 +1916,13 @@ public:
r = range_true (type);
return true;
   }
-if (!fop_ge.fold_range (r, type, op1, op2, rel))
+frange op1_no_nan = op1;
+frange op2_no_nan = op2;
+if (op1.maybe_isnan ())
+  op1_no_nan.clear_nan ();
+if (op2.maybe_isnan ())
+  op2_no_nan.clear_nan ();
+if (!fop_ge.fold_range (r, type, op1_no_nan, op2_no_nan, rel))
   return false;
 // The result is the same as the ordered version when the
 // comparison is true or when the operands cannot be NANs.
@@ -2002,7 +2026,13 @@ public:
r = range_true (type);
return true;
   }
-if (!fop_equal.fold_range (r, type, op1, op2, rel))
+frange op1_no_nan = op1;
+frange op2_no_nan = op2;
+if (op1.maybe_isnan ())
+  op1_no_nan.clear_nan ();
+if (op2.maybe_isnan ())
+  op2_no_nan.clear_nan ();
+

[RFC PATCH] Use ranger in the cdce pass [PR91645]

2023-03-31 Thread Jakub Jelinek via Gcc-patches

Hi!

The cdce pass among other things replaces calls like sqrt with code
like
  if (condition(s))
ret = .IFN_SQRT (x);
  else
ret = sqrt (x);
so that in the common case when we know the argument doesn't trigger
any range/domain errors we use the hardware instruction and defer to
a library call just to handle the erroneous conditions.
Now, on pr103559-3.c we already fold that condition during vrp2 or
so, but for pr103559-{2,4}.c we don't.
The following patch is an attempt to ask the ranger already during the
cdce pass.
Unfortunately, bootstrap/regtest of this patch found one regression:
FAIL: 26_numerics/headers/cmath/functions_std_c++23.cc (test for excess errors)
Excess errors:
during GIMPLE pass: cdce
/home/jakub/src/gcc/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc:26:
 internal compiler error: in operator[], at vec.h:890
0x94b822 vec::operator[](unsigned int)
../../gcc/vec.h:890
0x94ba2a vec::operator[](unsigned int)
../../gcc/value-relation.cc:705
0x94ba2a vec::operator[](unsigned int)
../../gcc/vec.h:1505
0x94ba2a equiv_oracle::add_equiv_to_block(basic_block_def*, bitmap_head*)
../../gcc/value-relation.cc:690
The problem is that if the pass asks ranger about something, then splits
some basic blocks to add the above if (condition(s)) stuff and then
asks the ranger again, the ranger caches stuff based on basic blocks
but the pass adds more basic blocks.
So, if I wanted to use ranger in the pass, is it true I'd need to
do all the ranger queries in some analysis phase before changes are
made to the cfg?

Now, I've just tried and apparently already the patch I've just posted
about foperator_un*::fold_range seems to fix those testcases (during vrp2
or when), so maybe we don't need to bother with cdce here and can just
commit the testcases.  Are they ok for trunk as incremental change to
the previous patch?

2023-03-31  Jakub Jelinek  

PR tree-optimization/91645
* tree-call-cdce.cc: Include gimple-range.h.
(gen_one_condition): Add STMT argument.  Ask ranger if the
comparison is known to be always false or always true and use
false or true instead of the actual comparison in that case.
(gen_conditions_for_domain, gen_conditions_for_pow_cst_base,
gen_conditions_for_pow_int_base): Add STMT argument, pass it
through.
(gen_conditions_for_pow): Pass pow_call as STMT to
gen_conditions_for_pow_cst_base and gen_conditions_for_pow_int_base.
(gen_shrink_wrap_conditions): Pass bi_call as STMT to
gen_conditions_for_domain.
(pass_call_cdce::execute): Disable ranger if it has been enabled.

* gcc.target/i386/pr103559-1.c: New test.
* gcc.target/i386/pr103559-2.c: New test.
* gcc.target/i386/pr103559-3.c: New test.
* gcc.target/i386/pr103559-4.c: New test.

--- gcc/tree-call-cdce.cc.jj2023-01-02 09:32:45.940944935 +0100
+++ gcc/tree-call-cdce.cc   2023-03-30 14:54:25.248544702 +0200
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.
 #include "builtins.h"
 #include "internal-fn.h"
 #include "tree-dfa.h"
+#include "gimple-range.h"
 
 
 /* This pass serves two closely-related purposes:
@@ -425,12 +426,9 @@ comparison_code_if_no_nans (tree_code co
null tree.  */
 
 static void
-gen_one_condition (tree arg, int lbub,
-   enum tree_code tcode,
-   const char *temp_name1,
-  const char *temp_name2,
-  vec conds,
-   unsigned *nconds)
+gen_one_condition (tree arg, int lbub, enum tree_code tcode,
+  const char *temp_name1, const char *temp_name2,
+  vec conds, unsigned *nconds, gimple *stmt)
 {
   if (!HONOR_NANS (arg))
 tcode = comparison_code_if_no_nans (tcode);
@@ -451,10 +449,24 @@ gen_one_condition (tree arg, int lbub,
   gimple_assign_set_lhs (stmt1, tempn);
 
   tempc = create_tmp_var (boolean_type_node, temp_name2);
-  stmt2 = gimple_build_assign (tempc,
-   fold_build2 (tcode,
-   boolean_type_node,
-   tempn, lbub_real_cst));
+  tree tcond = build2 (tcode, boolean_type_node, arg, lbub_real_cst);
+  int_range_max r;
+  range_query *q = get_range_query (cfun);
+  if (q == get_global_range_query ())
+q = enable_ranger (cfun);
+  /* Ask the ranger whether it knows the condition will be always false or
+ always true.  */
+  if (!q->range_of_expr (r, tcond, stmt) || r.undefined_p ())
+tcond = NULL_TREE;
+  else if (r.upper_bound () == 0)
+tcond = boolean_false_node;
+  else if (r.lower_bound () == 1)
+tcond = boolean_true_node;
+  else
+tcond = NULL_TREE;
+  if (!tcond)
+tcond = fold_build2 (tcode, boolean_type_node, tempn, lbub_real_cst);
+  stmt2 = gimple_build_assign (tempc, tcond);
   tempcn = make_ssa_name (tempc, stmt2);
   gimple_assign_set_lhs (stmt2, te

[PATCH] range-op-float: Further comparison fixes

2023-03-31 Thread Jakub Jelinek via Gcc-patches

On Fri, Mar 31, 2023 at 09:57:54AM +0200, Jakub Jelinek via Gcc-patches wrote:
> and so if maybe_isnan, they always return [0, 1].  Now, thinking about it,
> this is unnecessary pessimization, for the case where the ... block
> returns range_false (type) we actually could do it also if maybe_isnan (op1,
> op2), because if one or both operands are NAN, the comparison will be false,
> and if neither is NAN, the comparison will be also false.  Will fix
> incrementally today.

Here it is.
1) the foperator_{,not_}equal::fold_range cases
   - we have correctly first a case handling known_isnan on either operand,
 return there range_false (type) - EQ, resp. range_true (type) - NE
   - next we handle the singleton cases, maybe_isnan + singleton isn't
 singleton, so these are ok
   - there is a missing case (not handled in this patch) where both operands
 are known to be zeros, but not singleton zeros
   - there is some !maybe_isnan (op1, op2) handling which tries to prove
 when the operands are certainly not equal and results in
 range_false (type) - EQ, resp. range_true (type) - NE
   - otherwise range_true_and_false (type)
   Now, I think (and this patch implements it) that the !maybe_isnan (op1, op2)
   check is unnecessary.  If we prove that when ignoring maybe NANs, the
   ranges don't intersect and so the comparison is always false for EQ or
   always true for NE, if NANs can appear, it doesn't change anything on
   that either, for EQ if NAN appears, the result is false like what we
   proved for the finite ranges, for NE if NAN appears, the result is true
   like what we proved for the finite ranges
2) foperator_{lt,le,gt,ge}::fold_range cases
   - these have correctly known_isnan on either operand first and return
 there range_false (type)
   - then !maybe_isnan (op1, op2) condition guarded checks
   - finally range_true_and_false (type) - so do that for
 maybe_isnan (op1, op2)
   Here in the !maybe_isnan (op1, op2) guarded code we have some condition
   which results in range_true (type), another condition which results in
   range_false (type) and otherwise range_true_and_false (type).
   Now, the condition which results in range_false (type) can be IMHO done
   also for the maybe_isnan (op1, op2) cases, because it is [0, 0]
   if both operands are finite or [0, 0] if either operand is NAN.
3) finally, LTGT_EXPR wasn't handled at all.  LTGT_EXPR is the inverse
   comparision to UNEQ_EXPR, I believe both raise exceptions only if
   either operand is sNaN, UNEQ_EXPR is true if both operands are
   either equal or either of the operands is NAN, while LTGT_EXPR is
   true if the operands compare either smaller or larger and is false if
   either of the operands is NAN.

Ok for trunk if this passes bootstrap/regtest?

2023-03-31  Jakub Jelinek  

* range-op-float.cc (foperator_equal::fold_range): Perform the
non-singleton handling regardless of maybe_isnan (op1, op2).
(foperator_not_equal::fold_range): Likewise.
(foperator_lt::fold_range, foperator_le::fold_range,
foperator_gt::fold_range, foperator_ge::fold_range): Perform the
real_* comparison check which results in range_false (type)
even if maybe_isnan (op1, op2).  Simplify.
(foperator_ltgt): New class.
(fop_ltgt): New variable.
(floating_op_table::floating_op_table): Handle LTGT_EXPR using
fop_ltgt.

--- gcc/range-op-float.cc.jj2023-03-31 09:30:11.245296618 +0200
+++ gcc/range-op-float.cc   2023-03-31 11:23:04.817876083 +0200
@@ -616,7 +616,7 @@ foperator_equal::fold_range (irange &r,
   else
r = range_false (type);
 }
-  else if (!maybe_isnan (op1, op2))
+  else
 {
   // If ranges do not intersect, we know the range is not equal,
   // otherwise we don't know anything for sure.
@@ -638,8 +638,6 @@ foperator_equal::fold_range (irange &r,
   else
r = range_true_and_false (type);
 }
-  else
-r = range_true_and_false (type);
   return true;
 }
 
@@ -734,7 +732,7 @@ foperator_not_equal::fold_range (irange
   else
r = range_true (type);
 }
-  else if (!maybe_isnan (op1, op2))
+  else
 {
   // If ranges do not intersect, we know the range is not equal,
   // otherwise we don't know anything for sure.
@@ -756,8 +754,6 @@ foperator_not_equal::fold_range (irange
   else
r = range_true_and_false (type);
 }
-  else
-r = range_true_and_false (type);
   return true;
 }
 
@@ -839,17 +835,13 @@ foperator_lt::fold_range (irange &r, tre
   if (frelop_early_resolve (r, type, op1, op2, rel, VREL_LT))
 return true;
 
-  if (op1.known_isnan () || op2.known_isnan ())
+  if (op1.known_isnan ()
+  || op2.known_isnan ()
+  || !real_less (&op1.lower_bound (), &op2.upper_bound ()))
 r = range_false (type);
-  else if (!maybe_isnan (op1, op2))
-{
-  if (real_less (&op1.upper_bound (), &op2.lower_bound ()))
-   r = range_true (type);
-

[PATCH] range-op-float: Further foperator_{,not_}equal::fold_range fix

2023-03-31 Thread Jakub Jelinek via Gcc-patches

On Fri, Mar 31, 2023 at 12:45:10PM +0200, Jakub Jelinek via Gcc-patches wrote:
>- there is a missing case (not handled in this patch) where both operands
>  are known to be zeros, but not singleton zeros

This patch adds those cases.

Ok for trunk if it passes bootstrap/regtest?

2023-03-31  Jakub Jelinek  

* range-op-float.cc (foperator_equal::fold_range): If at least
one of the op ranges is not singleton and neither is NaN and all
4 bounds are zero, return [1, 1].
(foperator_not_equal::fold_range): In the same case return [0, 0].

--- gcc/range-op-float.cc.jj2023-03-31 11:23:04.817876083 +0200
+++ gcc/range-op-float.cc   2023-03-31 12:51:34.757480162 +0200
@@ -616,6 +616,13 @@ foperator_equal::fold_range (irange &r,
   else
r = range_false (type);
 }
+  else if (real_iszero (&op1.lower_bound ())
+  && real_iszero (&op1.upper_bound ())
+  && real_iszero (&op2.lower_bound ())
+  && real_iszero (&op2.upper_bound ())
+  && !maybe_isnan (op1, op2))
+// [-0.0, 0.0] == [-0.0, 0.0] or similar.
+r = range_true (type);
   else
 {
   // If ranges do not intersect, we know the range is not equal,
@@ -732,6 +739,13 @@ foperator_not_equal::fold_range (irange
   else
r = range_true (type);
 }
+  else if (real_iszero (&op1.lower_bound ())
+  && real_iszero (&op1.upper_bound ())
+  && real_iszero (&op2.lower_bound ())
+  && real_iszero (&op2.upper_bound ())
+  && !maybe_isnan (op1, op2))
+// [-0.0, 0.0] != [-0.0, 0.0] or similar.
+r = range_false (type);
   else
 {
   // If ranges do not intersect, we know the range is not equal,


Jakub

Re: [committed] libstdc++: Fix constexpr functions in

2023-03-31 Thread Jonathan Wakely via Gcc-patches

On Thu, 30 Mar 2023 at 17:01, Daniel Krügler wrote:
>
> Am Do., 30. März 2023 um 18:00 Uhr schrieb Jonathan Wakely via
> Libstdc++ :
> >
> [..]
> >
> > In fact, thinking about P2641 some more, I might revert this change.
> > Instead of adding an extra bool member to support constexpr, I think
> > I'll just remove the 'constexpr' keywords from basic_endpoint for now,
> > and implement it in terms of just inspecting the sa_family_t member of
> > the union members. And then later, once we have something like P2641,
> > we can re-add the constexpr keywords and use is_within_lifetime during
> > constant evaluation. That way we don't add a bool then need to take it
> > away again, changing the ABI each time.
>
> I was just going to make the same suggestion.

Actually, as pointed out in Barry's P2641R1 paper, we can just use
GCC's __builtin_constant_p:

+  constexpr bool
+  _M_is_v6() const noexcept
+  {
+   // For constexpr eval we can just detect which union member is active.
+   // i.e. emulate P2641R1's std::is_active_member(&_M_data._M_v6)).
+   if (std::__is_constant_evaluated())
+ return __builtin_constant_p(_M_data._M_v6.sin6_family);
+   return _M_data._M_v6.sin6_family == AF_INET6;
+  }

Re: [PATCH v2 1/2] combine: Split code out of make_compound_operation_int

2023-03-31 Thread Segher Boessenkool

Hi!

On Thu, Mar 09, 2023 at 12:09:59PM +, Richard Sandiford wrote:
> This patch just splits some code out of make_compound_operation_int
> into a new function called make_compound_operation_and.  It is a
> prerequisite for the fix for PR106594.
> 
> It might (or might not) make sense to put more of the existing
> "and" handling into the new function, so that the subreg+lshiftrt
> case can be handled through recursion rather than duplication.
> But that's certainly not necessary to fix the bug, so is at
> best stage 1 material.
> 
> No behavioural change intended.

That doesn't sound as if you are very sure about things.  I'll just
pretend it says "no functional changes" :-)

(*Is* this a pure refactoring?)


Segher

Re: [PATCH] range-op-float, value-range: Fix up handling of UN{LT,LE,GT,GE,EQ}_EXPR and handle comparisons in get_tree_range [PR91645]

2023-03-31 Thread Aldy Hernandez via Gcc-patches





On 3/31/23 09:57, Jakub Jelinek wrote:

Hi!

When looking into PR91645, I've noticed we handle UN{LT,LE,GT,GE,EQ}_EXPR
comparisons incorrectly.
All those are unordered or ..., we correctly return [1, 1] if one or
both operands are known NANs, and correctly ask the non-UN prefixed
op to fold_range if neither operand may be NAN.
But for the case where one or both operands may be NAN, we always
return [0, 1].  The UN* fold_range tries to handle it by asking
the non-UN prefixed fold_range and if it returns [1, 1] return that,
if it returns [0, 0] or [0, 1] return [0, 1], which makes sense,
because the maybe NAN means that it is the non-UN prefixed fold_range
unioned with [1, 1] in case the maybe NAN is actually NAN at runtime.
The problem is that the non-UN prefixed fold_range always returns [0, 1]
because those fold_range implementations are like:
   if (op1.known_isnan () || op2.known_isnan ())
 r = range_false (type);
   else if (!maybe_isnan (op1, op2))
 {
...
 }
   else
 r = range_true_and_false (type);
and so if maybe_isnan, they always return [0, 1].  Now, thinking about it,
this is unnecessary pessimization, for the case where the ... block
returns range_false (type) we actually could do it also if maybe_isnan (op1,
op2), because if one or both operands are NAN, the comparison will be false,
and if neither is NAN, the comparison will be also false.  Will fix
incrementally today.
Anyway, the following patch fixes it by asking the non-UN prefixed
fold_range on ranges with NAN cleared, which I think does the right
thing in all cases.

Another change in the patch is that range_query::get_tree_range
always returned VARYING for comparisons, this patch allows to ask about
those as well (they are very much like binary ops, except they take
the important type from the types of the operands rather than result).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?


LGTM
Aldy


2023-03-31  Jakub Jelinek  

PR tree-optimization/91645
* range-op-float.cc (foperator_unordered_lt::fold_range,
foperator_unordered_le::fold_range,
foperator_unordered_gt::fold_range,
foperator_unordered_ge::fold_range,
foperator_unordered_equal::fold_range): Call the ordered
fold_range on ranges with cleared NaNs.
* value-query.cc (range_query::get_tree_range): Handle also
COMPARISON_CLASS_P trees.

--- gcc/range-op-float.cc.jj2023-03-28 11:00:29.205986288 +0200
+++ gcc/range-op-float.cc   2023-03-30 15:14:12.219183066 +0200
@@ -1587,7 +1587,13 @@ public:
r = range_true (type);
return true;
}
-if (!fop_lt.fold_range (r, type, op1, op2, rel))
+frange op1_no_nan = op1;
+frange op2_no_nan = op2;
+if (op1.maybe_isnan ())
+  op1_no_nan.clear_nan ();
+if (op2.maybe_isnan ())
+  op2_no_nan.clear_nan ();
+if (!fop_lt.fold_range (r, type, op1_no_nan, op2_no_nan, rel))
return false;
  // The result is the same as the ordered version when the
  // comparison is true or when the operands cannot be NANs.
@@ -1692,7 +1698,13 @@ public:
r = range_true (type);
return true;
}
-if (!fop_le.fold_range (r, type, op1, op2, rel))
+frange op1_no_nan = op1;
+frange op2_no_nan = op2;
+if (op1.maybe_isnan ())
+  op1_no_nan.clear_nan ();
+if (op2.maybe_isnan ())
+  op2_no_nan.clear_nan ();
+if (!fop_le.fold_range (r, type, op1_no_nan, op2_no_nan, rel))
return false;
  // The result is the same as the ordered version when the
  // comparison is true or when the operands cannot be NANs.
@@ -1793,7 +1805,13 @@ public:
r = range_true (type);
return true;
}
-if (!fop_gt.fold_range (r, type, op1, op2, rel))
+frange op1_no_nan = op1;
+frange op2_no_nan = op2;
+if (op1.maybe_isnan ())
+  op1_no_nan.clear_nan ();
+if (op2.maybe_isnan ())
+  op2_no_nan.clear_nan ();
+if (!fop_gt.fold_range (r, type, op1_no_nan, op2_no_nan, rel))
return false;
  // The result is the same as the ordered version when the
  // comparison is true or when the operands cannot be NANs.
@@ -1898,7 +1916,13 @@ public:
r = range_true (type);
return true;
}
-if (!fop_ge.fold_range (r, type, op1, op2, rel))
+frange op1_no_nan = op1;
+frange op2_no_nan = op2;
+if (op1.maybe_isnan ())
+  op1_no_nan.clear_nan ();
+if (op2.maybe_isnan ())
+  op2_no_nan.clear_nan ();
+if (!fop_ge.fold_range (r, type, op1_no_nan, op2_no_nan, rel))
return false;
  // The result is the same as the ordered version when the
  // comparison is true or when the operands cannot be NANs.
@@ -2002,7 +2026,13 @@ public:
r = range_true (type);
return true;
}
-if (!fop_equal.fold_range (r, type, op1, op2, rel))
+frange op1_no_nan = op1;
+frange op2_no_nan = op2;
+if (op1.maybe_isnan ())
+  op1_no_

Re: [PATCH] range-op-float: Further comparison fixes

2023-03-31 Thread Aldy Hernandez via Gcc-patches





On 3/31/23 12:45, Jakub Jelinek wrote:

On Fri, Mar 31, 2023 at 09:57:54AM +0200, Jakub Jelinek via Gcc-patches wrote:

and so if maybe_isnan, they always return [0, 1].  Now, thinking about it,
this is unnecessary pessimization, for the case where the ... block
returns range_false (type) we actually could do it also if maybe_isnan (op1,
op2), because if one or both operands are NAN, the comparison will be false,
and if neither is NAN, the comparison will be also false.  Will fix
incrementally today.


Here it is.
1) the foperator_{,not_}equal::fold_range cases
- we have correctly first a case handling known_isnan on either operand,
  return there range_false (type) - EQ, resp. range_true (type) - NE
- next we handle the singleton cases, maybe_isnan + singleton isn't
  singleton, so these are ok
- there is a missing case (not handled in this patch) where both operands
  are known to be zeros, but not singleton zeros
- there is some !maybe_isnan (op1, op2) handling which tries to prove
  when the operands are certainly not equal and results in
  range_false (type) - EQ, resp. range_true (type) - NE
- otherwise range_true_and_false (type)
Now, I think (and this patch implements it) that the !maybe_isnan (op1, op2)
check is unnecessary.  If we prove that when ignoring maybe NANs, the
ranges don't intersect and so the comparison is always false for EQ or
always true for NE, if NANs can appear, it doesn't change anything on
that either, for EQ if NAN appears, the result is false like what we
proved for the finite ranges, for NE if NAN appears, the result is true
like what we proved for the finite ranges
2) foperator_{lt,le,gt,ge}::fold_range cases
- these have correctly known_isnan on either operand first and return
  there range_false (type)
- then !maybe_isnan (op1, op2) condition guarded checks
- finally range_true_and_false (type) - so do that for
  maybe_isnan (op1, op2)
Here in the !maybe_isnan (op1, op2) guarded code we have some condition
which results in range_true (type), another condition which results in
range_false (type) and otherwise range_true_and_false (type).
Now, the condition which results in range_false (type) can be IMHO done
also for the maybe_isnan (op1, op2) cases, because it is [0, 0]
if both operands are finite or [0, 0] if either operand is NAN.
3) finally, LTGT_EXPR wasn't handled at all.  LTGT_EXPR is the inverse
comparision to UNEQ_EXPR, I believe both raise exceptions only if
either operand is sNaN, UNEQ_EXPR is true if both operands are
either equal or either of the operands is NAN, while LTGT_EXPR is
true if the operands compare either smaller or larger and is false if
either of the operands is NAN.

Ok for trunk if this passes bootstrap/regtest?


LGTM
Aldy



2023-03-31  Jakub Jelinek  

* range-op-float.cc (foperator_equal::fold_range): Perform the
non-singleton handling regardless of maybe_isnan (op1, op2).
(foperator_not_equal::fold_range): Likewise.
(foperator_lt::fold_range, foperator_le::fold_range,
foperator_gt::fold_range, foperator_ge::fold_range): Perform the
real_* comparison check which results in range_false (type)
even if maybe_isnan (op1, op2).  Simplify.
(foperator_ltgt): New class.
(fop_ltgt): New variable.
(floating_op_table::floating_op_table): Handle LTGT_EXPR using
fop_ltgt.

--- gcc/range-op-float.cc.jj2023-03-31 09:30:11.245296618 +0200
+++ gcc/range-op-float.cc   2023-03-31 11:23:04.817876083 +0200
@@ -616,7 +616,7 @@ foperator_equal::fold_range (irange &r,
else
r = range_false (type);
  }
-  else if (!maybe_isnan (op1, op2))
+  else
  {
// If ranges do not intersect, we know the range is not equal,
// otherwise we don't know anything for sure.
@@ -638,8 +638,6 @@ foperator_equal::fold_range (irange &r,
else
r = range_true_and_false (type);
  }
-  else
-r = range_true_and_false (type);
return true;
  }
  
@@ -734,7 +732,7 @@ foperator_not_equal::fold_range (irange

else
r = range_true (type);
  }
-  else if (!maybe_isnan (op1, op2))
+  else
  {
// If ranges do not intersect, we know the range is not equal,
// otherwise we don't know anything for sure.
@@ -756,8 +754,6 @@ foperator_not_equal::fold_range (irange
else
r = range_true_and_false (type);
  }
-  else
-r = range_true_and_false (type);
return true;
  }
  
@@ -839,17 +835,13 @@ foperator_lt::fold_range (irange &r, tre

if (frelop_early_resolve (r, type, op1, op2, rel, VREL_LT))
  return true;
  
-  if (op1.known_isnan () || op2.known_isnan ())

+  if (op1.known_isnan ()
+  || op2.known_isnan ()
+  || !real_less (&op1.lower_bound (), &op2.upper_bound ()))
  r = range_false (type);
-  else if (!maybe_isnan (op1, op2))

Re: [PATCH] range-op-float: Further foperator_{,not_}equal::fold_range fix

2023-03-31 Thread Aldy Hernandez via Gcc-patches





On 3/31/23 12:57, Jakub Jelinek wrote:

On Fri, Mar 31, 2023 at 12:45:10PM +0200, Jakub Jelinek via Gcc-patches wrote:

- there is a missing case (not handled in this patch) where both operands
  are known to be zeros, but not singleton zeros


This patch adds those cases.

Ok for trunk if it passes bootstrap/regtest?


LGTM.

Thanks so much for taking care of all this.
Aldy



2023-03-31  Jakub Jelinek  

* range-op-float.cc (foperator_equal::fold_range): If at least
one of the op ranges is not singleton and neither is NaN and all
4 bounds are zero, return [1, 1].
(foperator_not_equal::fold_range): In the same case return [0, 0].

--- gcc/range-op-float.cc.jj2023-03-31 11:23:04.817876083 +0200
+++ gcc/range-op-float.cc   2023-03-31 12:51:34.757480162 +0200
@@ -616,6 +616,13 @@ foperator_equal::fold_range (irange &r,
else
r = range_false (type);
  }
+  else if (real_iszero (&op1.lower_bound ())
+  && real_iszero (&op1.upper_bound ())
+  && real_iszero (&op2.lower_bound ())
+  && real_iszero (&op2.upper_bound ())
+  && !maybe_isnan (op1, op2))
+// [-0.0, 0.0] == [-0.0, 0.0] or similar.
+r = range_true (type);
else
  {
// If ranges do not intersect, we know the range is not equal,
@@ -732,6 +739,13 @@ foperator_not_equal::fold_range (irange
else
r = range_true (type);
  }
+  else if (real_iszero (&op1.lower_bound ())
+  && real_iszero (&op1.upper_bound ())
+  && real_iszero (&op2.lower_bound ())
+  && real_iszero (&op2.upper_bound ())
+  && !maybe_isnan (op1, op2))
+// [-0.0, 0.0] != [-0.0, 0.0] or similar.
+r = range_false (type);
else
  {
// If ranges do not intersect, we know the range is not equal,


Jakub

Re: [RFC PATCH] Use ranger in the cdce pass [PR91645]

2023-03-31 Thread Aldy Hernandez via Gcc-patches





On 3/31/23 10:16, Jakub Jelinek wrote:

Hi!

The cdce pass among other things replaces calls like sqrt with code
like
   if (condition(s))
 ret = .IFN_SQRT (x);
   else
 ret = sqrt (x);
so that in the common case when we know the argument doesn't trigger
any range/domain errors we use the hardware instruction and defer to
a library call just to handle the erroneous conditions.
Now, on pr103559-3.c we already fold that condition during vrp2 or
so, but for pr103559-{2,4}.c we don't.
The following patch is an attempt to ask the ranger already during the
cdce pass.
Unfortunately, bootstrap/regtest of this patch found one regression:
FAIL: 26_numerics/headers/cmath/functions_std_c++23.cc (test for excess errors)
Excess errors:
during GIMPLE pass: cdce
/home/jakub/src/gcc/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc:26:
 internal compiler error: in operator[], at vec.h:890
0x94b822 vec::operator[](unsigned int)
 ../../gcc/vec.h:890
0x94ba2a vec::operator[](unsigned int)
 ../../gcc/value-relation.cc:705
0x94ba2a vec::operator[](unsigned int)
 ../../gcc/vec.h:1505
0x94ba2a equiv_oracle::add_equiv_to_block(basic_block_def*, bitmap_head*)
 ../../gcc/value-relation.cc:690
The problem is that if the pass asks ranger about something, then splits
some basic blocks to add the above if (condition(s)) stuff and then
asks the ranger again, the ranger caches stuff based on basic blocks
but the pass adds more basic blocks.
So, if I wanted to use ranger in the pass, is it true I'd need to
do all the ranger queries in some analysis phase before changes are
made to the cfg?


Most likely, but Andrew had some fixes to make sure we didn't tooo bad 
when simple changes to the IL happened.  Perhaps he has some thoughts here.


Aldy



Now, I've just tried and apparently already the patch I've just posted
about foperator_un*::fold_range seems to fix those testcases (during vrp2
or when), so maybe we don't need to bother with cdce here and can just
commit the testcases.  Are they ok for trunk as incremental change to
the previous patch?

2023-03-31  Jakub Jelinek  

PR tree-optimization/91645
* tree-call-cdce.cc: Include gimple-range.h.
(gen_one_condition): Add STMT argument.  Ask ranger if the
comparison is known to be always false or always true and use
false or true instead of the actual comparison in that case.
(gen_conditions_for_domain, gen_conditions_for_pow_cst_base,
gen_conditions_for_pow_int_base): Add STMT argument, pass it
through.
(gen_conditions_for_pow): Pass pow_call as STMT to
gen_conditions_for_pow_cst_base and gen_conditions_for_pow_int_base.
(gen_shrink_wrap_conditions): Pass bi_call as STMT to
gen_conditions_for_domain.
(pass_call_cdce::execute): Disable ranger if it has been enabled.

* gcc.target/i386/pr103559-1.c: New test.
* gcc.target/i386/pr103559-2.c: New test.
* gcc.target/i386/pr103559-3.c: New test.
* gcc.target/i386/pr103559-4.c: New test.

--- gcc/tree-call-cdce.cc.jj2023-01-02 09:32:45.940944935 +0100
+++ gcc/tree-call-cdce.cc   2023-03-30 14:54:25.248544702 +0200
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.
  #include "builtins.h"
  #include "internal-fn.h"
  #include "tree-dfa.h"
+#include "gimple-range.h"
  
  
  /* This pass serves two closely-related purposes:

@@ -425,12 +426,9 @@ comparison_code_if_no_nans (tree_code co
 null tree.  */
  
  static void

-gen_one_condition (tree arg, int lbub,
-   enum tree_code tcode,
-   const char *temp_name1,
-  const char *temp_name2,
-  vec conds,
-   unsigned *nconds)
+gen_one_condition (tree arg, int lbub, enum tree_code tcode,
+  const char *temp_name1, const char *temp_name2,
+  vec conds, unsigned *nconds, gimple *stmt)
  {
if (!HONOR_NANS (arg))
  tcode = comparison_code_if_no_nans (tcode);
@@ -451,10 +449,24 @@ gen_one_condition (tree arg, int lbub,
gimple_assign_set_lhs (stmt1, tempn);
  
tempc = create_tmp_var (boolean_type_node, temp_name2);

-  stmt2 = gimple_build_assign (tempc,
-   fold_build2 (tcode,
-   boolean_type_node,
-   tempn, lbub_real_cst));
+  tree tcond = build2 (tcode, boolean_type_node, arg, lbub_real_cst);
+  int_range_max r;
+  range_query *q = get_range_query (cfun);
+  if (q == get_global_range_query ())
+q = enable_ranger (cfun);
+  /* Ask the ranger whether it knows the condition will be always false or
+ always true.  */
+  if (!q->range_of_expr (r, tcond, stmt) || r.undefined_p ())
+tcond = NULL_TREE;
+  else if (r.upper_bound () == 0)
+tcond = boolean_false_node;
+  else if (r.lower_bound () == 1)
+tcond = boolean_true_node;
+  else
+tcon

Re: [PATCH v2 1/2] combine: Split code out of make_compound_operation_int

2023-03-31 Thread Richard Sandiford via Gcc-patches

Segher Boessenkool  writes:
> Hi!
>
> On Thu, Mar 09, 2023 at 12:09:59PM +, Richard Sandiford wrote:
>> This patch just splits some code out of make_compound_operation_int
>> into a new function called make_compound_operation_and.  It is a
>> prerequisite for the fix for PR106594.
>> 
>> It might (or might not) make sense to put more of the existing
>> "and" handling into the new function, so that the subreg+lshiftrt
>> case can be handled through recursion rather than duplication.
>> But that's certainly not necessary to fix the bug, so is at
>> best stage 1 material.
>> 
>> No behavioural change intended.
>
> That doesn't sound as if you are very sure about things.  I'll just
> pretend it says "no functional changes" :-)

Heh.  Stock phrase borrowed from LLVM.  I'm not going to sign off with
"this patch contains no bugs". :-)

> (*Is* this a pure refactoring?)

Yeah, this patch is a pure refactoring.  It's the follow-on 2/2 part
that does something useful.

Thanks,
Richard

Re: [PATCH v2 2/2] combine: Try harder to form zero_extends [PR106594]

2023-03-31 Thread Segher Boessenkool

On Thu, Mar 09, 2023 at 12:10:51PM +, Richard Sandiford wrote:
> g:c23a9c87cc62bd177fd0d4db6ad34b34e1b9a31f uses nonzero_bits
> information to convert sign_extends into zero_extends.
> That change is semantically correct in itself, but for the
> testcase in the PR, it leads to a series of unfortunate events,
> as described below.

That patch also does other much more obviously beneficial transforms.
These things should not have been mashed together.  Smaller more atomic
patches are much nicer to handle :-/

> We try to combine:
> 
> Trying 24 -> 25:
>24: r116:DI=sign_extend(r115:SI)
>   REG_DEAD r115:SI
>25: r117:SI=[r116:DI*0x4+r118:DI]
>   REG_DEAD r116:DI
>   REG_EQUAL [r116:DI*0x4+`constellation_64qam']
> 
> which previously succeeded, giving:
> 
> (set (reg:SI 117 [ constellation_64qam[_5] ])
> (mem/u:SI (plus:DI (mult:DI (sign_extend:DI (reg:SI 115))
> (const_int 4 [0x4]))
> (reg/f:DI 118)) [1 constellation_64qam[_5]+0 S4 A32]))
> 
> However, nonzero_bits can tell that only the low 6 bits of r115
> can be nonzero.  The commit above therefore converts the sign_extend
> to a zero_extend.  Using the same nonzero_bits information, we then
> "expand" the zero_extend to:
> 
>   (and:DI (subreg:DI (reg:SI r115) 0)
>   (const_int 63))

This is more expensive already?!  An "and" usually costs a real machine
instruction, while subregs are usually free (just notational).  The
"and" can not easily be optimised away either (after combine we only
have less accurate nonzero_bits, to start with).

> Substituting into the mult gives the unsimplified expression:
> 
>   (mult:DI (and:DI (subreg:DI (reg:SI r115) 0)
>(const_int 63))
>(const_int 4))
> 
> The simplification rules for mult convert this to an ashift by 2.
> Then, this rule in simplify_shift_const_1:
> 
> /* If we have (shift (logical)), move the logical to the outside
>to allow it to possibly combine with another logical and the
>shift to combine with another shift.  This also canonicalizes to
>what a ZERO_EXTRACT looks like.  Also, some machines have
>(and (shift)) insns.  */
> 
> moves the shift inside the "and", so that the expression becomes:
> 
>   (and:DI (ashift:DI (subreg:DI (reg:SI r115) 0)
>  (const_int 2))
>   (const_int 252))
> 
> We later recanonicalise to a mult (since this is an address):
> 
>   (and:DI (mult:DI (subreg:DI (reg:SI r115) 0)
>(const_int 4))
>   (const_int 252))
> 
> But we fail to transform this back to the natural substitution:
> 
>   (mult:DI (zero_extend:DI (reg:SI r115))
>(const_int 4))
> 
> There are several other cases in which make_compound_operation
> needs to look more than one level down in order to complete a
> compound operation.  For example:
> 
> (a) the ashiftrt handling uses extract_left_shift to look through
> things like logic ops in order to find a partnering ashift
> operation
> 
> (b) the "and" handling looks through subregs, xors and iors
> to find a partnerning lshiftrt
> 
> This patch takes the same approach for mult.
> 
> gcc/
>   PR rtl-optimization/106594
>   * combine.cc (make_compound_operation_and): Look through
>   multiplications by a power of two.
> 
> gcc/testsuite/
>   * gcc.target/aarch64/pr106594.c: New test.
> ---
>  gcc/combine.cc  | 17 +
>  gcc/testsuite/gcc.target/aarch64/pr106594.c | 20 
>  2 files changed, 37 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr106594.c
> 
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index 7d446d02cb4..36d04ad6703 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -7996,6 +7996,23 @@ make_compound_operation_and (scalar_int_mode mode, rtx 
> x,
>   break;
>}
>  
> +case MULT:
> +  /* Recurse through a power of 2 multiplication (as can be found
> +  in an address), using the relationship:
> +
> +  (and (mult X 2**N1) N2) == (mult (and X (lshifrt N2 N1)) 2**N1).  */
> +  if (CONST_INT_P (XEXP (x, 1))
> +   && pow2p_hwi (INTVAL (XEXP (x, 1
> + {
> +   int shift = exact_log2 (INTVAL (XEXP (x, 1)));
> +   rtx sub = make_compound_operation_and (mode, XEXP (x, 0),
> +  mask >> shift, in_code,
> +  next_code);
> +   if (sub)
> + return gen_rtx_MULT (mode, sub, XEXP (x, 1));
> + }
> +  break;
> +
>  default:
>break;
>  }
> diff --git a/gcc/testsuite/gcc.target/aarch64/pr106594.c 
> b/gcc/testsuite/gcc.target/aarch64/pr106594.c
> new file mode 100644
> index 000..beda8e050a5
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/pr106594.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +
> +extern const int constellation_64qam[64];
> +
> +void

Re: [PATCH] rtl-optimization: ppc backend generates unnecessary signed extension.

2023-03-31 Thread Jeff Law via Gcc-patches





On 3/30/23 18:01, Hans-Peter Nilsson wrote:

On Fri, 24 Mar 2023, Peter Bergner via Gcc-patches wrote:


On 3/23/23 6:12 PM, Jeff Law via Gcc-patches wrote:

Is there a reason why REE cannot see that our (reg:QI 4) is a param register
and thus due to our ABI, already correctly sign/zero extended?


I don't think REE has ever considered exploiting ABI constraints. Handling
that might be a notable improvement on various targets.  It'd be a great
place to do some experimentation.


Ok, so sounds like a good follow-on project after this patch is reviewed
and committed (stage1).  Thanks for your input!


Agreed.  I suspect that risc-v will benefit from such work as well.
With that in mind, if y'all start poking at this, please loop in Raphael
(on cc) who's expressed an interest in this space.


Will do.  I suspect that it'll be best to come up with some generic interface
using target hooks like "param regs are sign/zero extended" or "call return
values are sign/zero extended", etc. that targets can conditionally opt into
depending on their ABI that is in effect.


Pardon the arm-chair development mode but it sounds like
re-inventing the TARGET_PROMOTE_* hooks...

Maybe just hook up TARGET_PROMOTE_FUNCTION_MODE to ree.c (as
"you" already already define it for "rs6000")?

That's what we touched on up-thread.  Ideally we want to wire up REE to 
utilize the existing mechanisms to specify ABI constraints so that we 
don't have to write all those macros again.


jeff

Re: [PATCH v2 2/2] combine: Try harder to form zero_extends [PR106594]

2023-03-31 Thread Richard Sandiford via Gcc-patches

Segher Boessenkool  writes:
> On Thu, Mar 09, 2023 at 12:10:51PM +, Richard Sandiford wrote:
>> g:c23a9c87cc62bd177fd0d4db6ad34b34e1b9a31f uses nonzero_bits
>> information to convert sign_extends into zero_extends.
>> That change is semantically correct in itself, but for the
>> testcase in the PR, it leads to a series of unfortunate events,
>> as described below.
>
> That patch also does other much more obviously beneficial transforms.
> These things should not have been mashed together.  Smaller more atomic
> patches are much nicer to handle :-/
>
>> We try to combine:
>> 
>> Trying 24 -> 25:
>>24: r116:DI=sign_extend(r115:SI)
>>   REG_DEAD r115:SI
>>25: r117:SI=[r116:DI*0x4+r118:DI]
>>   REG_DEAD r116:DI
>>   REG_EQUAL [r116:DI*0x4+`constellation_64qam']
>> 
>> which previously succeeded, giving:
>> 
>> (set (reg:SI 117 [ constellation_64qam[_5] ])
>> (mem/u:SI (plus:DI (mult:DI (sign_extend:DI (reg:SI 115))
>> (const_int 4 [0x4]))
>> (reg/f:DI 118)) [1 constellation_64qam[_5]+0 S4 A32]))
>> 
>> However, nonzero_bits can tell that only the low 6 bits of r115
>> can be nonzero.  The commit above therefore converts the sign_extend
>> to a zero_extend.  Using the same nonzero_bits information, we then
>> "expand" the zero_extend to:
>> 
>>   (and:DI (subreg:DI (reg:SI r115) 0)
>>   (const_int 63))
>
> This is more expensive already?!  An "and" usually costs a real machine
> instruction, while subregs are usually free (just notational).  The
> "and" can not easily be optimised away either (after combine we only
> have less accurate nonzero_bits, to start with).

Well, it's an "and" in place of a zero_extend rather than an "and"
in place of a subreg.  So it is one real instruction for another
real instruction.  But yeah, it's not exactly a simplification.

The "we" here is expand_compound_operation, via simplify_and_const_int.
So it's the usual thing: expand_compound_operation creates something
that is perhaps more expensive, then after simplification,
make_compound_operation is supposed to collapse these "expanded"
perhaps-more-expensive forms back.  And it's the last bit that
goes wrong.

>> Substituting into the mult gives the unsimplified expression:
>> 
>>   (mult:DI (and:DI (subreg:DI (reg:SI r115) 0)
>>(const_int 63))
>>(const_int 4))
>> 
>> The simplification rules for mult convert this to an ashift by 2.
>> Then, this rule in simplify_shift_const_1:
>> 
>>/* If we have (shift (logical)), move the logical to the outside
>>   to allow it to possibly combine with another logical and the
>>   shift to combine with another shift.  This also canonicalizes to
>>   what a ZERO_EXTRACT looks like.  Also, some machines have
>>   (and (shift)) insns.  */
>> 
>> moves the shift inside the "and", so that the expression becomes:
>> 
>>   (and:DI (ashift:DI (subreg:DI (reg:SI r115) 0)
>>  (const_int 2))
>>   (const_int 252))
>> 
>> We later recanonicalise to a mult (since this is an address):
>> 
>>   (and:DI (mult:DI (subreg:DI (reg:SI r115) 0)
>>(const_int 4))
>>   (const_int 252))
>> 
>> But we fail to transform this back to the natural substitution:
>> 
>>   (mult:DI (zero_extend:DI (reg:SI r115))
>>(const_int 4))
>> 
>> There are several other cases in which make_compound_operation
>> needs to look more than one level down in order to complete a
>> compound operation.  For example:
>> 
>> (a) the ashiftrt handling uses extract_left_shift to look through
>> things like logic ops in order to find a partnering ashift
>> operation
>> 
>> (b) the "and" handling looks through subregs, xors and iors
>> to find a partnerning lshiftrt
>> 
>> This patch takes the same approach for mult.
>> 
>> gcc/
>>  PR rtl-optimization/106594
>>  * combine.cc (make_compound_operation_and): Look through
>>  multiplications by a power of two.
>> 
>> gcc/testsuite/
>>  * gcc.target/aarch64/pr106594.c: New test.
>> ---
>>  gcc/combine.cc  | 17 +
>>  gcc/testsuite/gcc.target/aarch64/pr106594.c | 20 
>>  2 files changed, 37 insertions(+)
>>  create mode 100644 gcc/testsuite/gcc.target/aarch64/pr106594.c
>> 
>> diff --git a/gcc/combine.cc b/gcc/combine.cc
>> index 7d446d02cb4..36d04ad6703 100644
>> --- a/gcc/combine.cc
>> +++ b/gcc/combine.cc
>> @@ -7996,6 +7996,23 @@ make_compound_operation_and (scalar_int_mode mode, 
>> rtx x,
>>  break;
>>}
>>  
>> +case MULT:
>> +  /* Recurse through a power of 2 multiplication (as can be found
>> + in an address), using the relationship:
>> +
>> + (and (mult X 2**N1) N2) == (mult (and X (lshifrt N2 N1)) 2**N1).  */
>> +  if (CONST_INT_P (XEXP (x, 1))
>> +  && pow2p_hwi (INTVAL (XEXP (x, 1
>> +{
>> +  int shift = exact_log2 (INTVAL (XEXP (x, 1)));
>> +  rtx sub

Re: [PATCH] tree-optimization/107561 - reduce -Wstringop-overflow false positives

2023-03-31 Thread Jeff Law via Gcc-patches





On 3/29/23 06:11, Richard Biener via Gcc-patches wrote:

The following tells pointer-query to prefer a zero size when we
are querying for the size range for a write into an object we've
determined is of zero size.  That avoids diagnostics about really
varying size arguments that just get a meaningful range for example
because they are multiplied by an element size.

I've adjusted only one call to get_size_range since that's what
I have a testcase for.  I think this is the most sensible
"workaround" for some of the false positives we see.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

OK if it succeeds?

Thanks,
Richard.

PR tree-optimization/107561
* gimple-ssa-warn-access.cc (get_size_range): Add flags
argument and pass it on.
(check_access): When querying for the size range pass
SR_ALLOW_ZERO when the known destination size is zero.

* g++.dg/pr71488.C: Remove XFAILed bogus diagnostic again.
* g++.dg/warn/Warray-bounds-16.C: Likewise.

OK.
jeff

Patch ping: [PATCH] aarch64, builtins: Include PR registers in FUNCTION_ARG_REGNO_P etc. [PR109254]

2023-03-31 Thread Jakub Jelinek via Gcc-patches

Hi!

On Fri, Mar 24, 2023 at 10:59:36PM +0100, Jakub Jelinek via Gcc-patches wrote:
> 2023-03-24  Jakub Jelinek  
> 
>   PR target/109254
>   * builtins.cc (apply_args_size): If targetm.calls.get_raw_arg_mode
>   returns VOIDmode, handle it like if the register isn't used for
>   passing arguments at all.
>   (apply_result_size): If targetm.calls.get_raw_result_mode returns
>   VOIDmode, handle it like if the register isn't used for returning
>   results at all.
>   * target.def (get_raw_result_mode, get_raw_arg_mode): Document what it
>   means to return VOIDmode.
>   * doc/tm.texi: Regenerated.
>   * config/aarch64/aarch64.cc (aarch64_function_value_regno_p): Return
>   TARGET_SVE for P0_REGNUM.
>   (aarch64_function_arg_regno_p): Also return true for p0-p3.
>   (aarch64_get_reg_raw_mode): Return VOIDmode for PR_REGNUM_P regs.

I'd like to ping this patch.
https://gcc.gnu.org/pipermail/gcc-patches/2023-March/614594.html

Thanks

Jakub

Re: [PATCH] aarch64, builtins: Include PR registers in FUNCTION_ARG_REGNO_P etc. [PR109254]

2023-03-31 Thread Jeff Law via Gcc-patches





On 3/24/23 15:59, Jakub Jelinek via Gcc-patches wrote:

Hi!

The testcase in the PR (which unfortunately because of my lack of experience
with SVE I'm not able to turn into a runtime testcase that verifies it)
is miscompiled on aarch64-linux in the regname pass, because while the
function takes arguments in the p0 register, FUNCTION_ARG_REGNO_P doesn't
reflect that, so DF doesn't know the register is used in register passing.
It sees 2 chains with p1 register and wants to replace the second one and
as DF doesn't know p0 is live at the start of the function, it will happily
use p0 register even when it is used in subsequent instructions.

The following patch fixes that.  FUNCTION_ARG_REGNO_P returns non-zero
for p0-p3 (unconditionally, seems for the floating/vector registers it
doesn't conditionalize them on TARGET_FLOAT either, but if you want,
I can conditionalize p0-p3 on TARGET_SVE), similarly
targetm.calls.function_value_regno_p returns true for p0 register
if TARGET_SVE (again for consistency, that function conditionalizes
the float/vector on TARGET_FLOAT); looking at the AAPCS, seems p1-p3
could be also used to return values in case of homogenous aggregates,
but it doesn't seem GCC supports putting svbool_t as a member of a
structure.

Now, that change broke bootstrap in libobjc and some
__builtin_apply_args/__builtin_apply/__builtin_return tests.  The
aarch64_get_reg_raw_mode hook already documents that SVE scalable arg/return
passing is fundamentally incompatible with those builtins, but unlike
the floating/vector regs where it forces a fixed vector mode, I think
there is no fixed mode which could be used for p0-p3.  So, I have tweaked
the generic code so that it uses VOIDmode return from that hook to signal
that a register shouldn't be touched by
__builtin_apply_args/__builtin_apply/__builtin_return
despite being mentioned in FUNCTION_ARG_REGNO_P or
targetm.calls.function_value_regno_p.

Bootstrapped/regtested on aarch64-linux, ok for trunk?

Could somebody please turn the testcase from the PR into something that
can be included into the testsuite?

2023-03-24  Jakub Jelinek  

PR target/109254
* builtins.cc (apply_args_size): If targetm.calls.get_raw_arg_mode
returns VOIDmode, handle it like if the register isn't used for
passing arguments at all.
(apply_result_size): If targetm.calls.get_raw_result_mode returns
VOIDmode, handle it like if the register isn't used for returning
results at all.
* target.def (get_raw_result_mode, get_raw_arg_mode): Document what it
means to return VOIDmode.
* doc/tm.texi: Regenerated.
* config/aarch64/aarch64.cc (aarch64_function_value_regno_p): Return
TARGET_SVE for P0_REGNUM.
(aarch64_function_arg_regno_p): Also return true for p0-p3.
(aarch64_get_reg_raw_mode): Return VOIDmode for PR_REGNUM_P regs.
Generic bits are OK by me.  The aarch64 bits looks sensible, but I'd 
like to give the aarch folks one more chance to chime in.


So OK for the trunk Monday if you haven't heard otherwise.

jeff

Re: [PATCH v2 2/2] combine: Try harder to form zero_extends [PR106594]

2023-03-31 Thread Segher Boessenkool

On Fri, Mar 31, 2023 at 03:06:41PM +0100, Richard Sandiford wrote:
> Segher Boessenkool  writes:
> > On Thu, Mar 09, 2023 at 12:10:51PM +, Richard Sandiford wrote:
> >>   (and:DI (subreg:DI (reg:SI r115) 0)
> >>   (const_int 63))
> >
> > This is more expensive already?!  An "and" usually costs a real machine
> > instruction, while subregs are usually free (just notational).  The
> > "and" can not easily be optimised away either (after combine we only
> > have less accurate nonzero_bits, to start with).
> 
> Well, it's an "and" in place of a zero_extend rather than an "and"
> in place of a subreg.  So it is one real instruction for another
> real instruction.  But yeah, it's not exactly a simplification.

We should be able to optimise away the zero_extend in many cases, and we
cannot do that to an "and" usually.

> The "we" here is expand_compound_operation, via simplify_and_const_int.
> So it's the usual thing: expand_compound_operation creates something
> that is perhaps more expensive, then after simplification,
> make_compound_operation is supposed to collapse these "expanded"
> perhaps-more-expensive forms back.  And it's the last bit that
> goes wrong.

Yes.  And that is even unavoidable.  The whole concept is a bad idea.
We should not create worse things hoping that we can make it better
things later again; we usually cannot make it as good anymore, or even
reasonably good :-(

Totally skipping this dance of course results in worse code as well, so
more work is needed.

> > This looks pretty good (thanks!), but as always it needs testing on more
> > architectures, showing it doesn't hurt things.  It should be beneficial,
> > but it is not unlikely to hurt other existing backends, and we are not
> > in stage 1 (we are in stage 4 even!)
> 
> Yeah, but it's fixing a GCC 13 regression (and an important one on aarch64).

That is not an excuse for (potentially) causing hundreds of new
regressions, some maybe even worse.

> > Do you have any such proof / indication / anything?  I can start
> > some run but it takes a day (or two), and I cannot start it until next
> > week.
> 
> This is an alternative presentation of the change that we discussed
> a few weeks ago, and that you already tested:
> 
>   https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613486.html
> 
> The results seem to indicate that the patch had no effect on targets
> other than aarch64.  [A good thing, IMO. :-)  The purpose of the
> patch is to fix the aarch64 regression in a minimally invasive way.]

If it is the same (I don't agree at all fwiw) it has no effect on
aarch64 either, other than on your single testcase.  So that is a good
reason to NAK this patch outside of stage 1 already.

> I tried building an aarch64 linux kernel locally with -Os
> -fno-schedule-insns{,2}.

Completely unrealistic.  No one builds anything they care for speed fo
with -Os (and if people care for size, you still get better results
with -O2 + some other tunings), and disabling scheduling is disastrous.

> I saw code size improvements in 182 .os and a
> regression in only one .o.  (I was comparing individual .os because it
> makes isolation easier.)


> The one regression was because the optimised version had fewer pseudos,
> and so something that was previously allocated to x3 was allocated to x2.
> This pseudo was initialised to 0, and a preceding stack protect
> instruction had the known side effect (both before and after the patch)
> of setting x3 to 0.  So with the x3 allocation, postreload was able to
> remove the separate initialisation of x3 with 0, but with the x2
> allocation it couldn't.
> 
> So for my kernel build it was a 182:1 split in favour of improvements,
> with the one regression being a quirk rather than a true regression.

For a default configuration it gave 100.033% size, a non-trivial
regression.  But again, I don't think this code has the same effect.

> > Do you have plans to make combine not do this insane "and" thing at all?
> > Or to attack the compound operation stuff head on?
> 
> I don't have any plans to do that myself, but I agree it would be
> better to get rid of the compound operation stuff if we can.

Getting rid of it is easy enough, but that also loses wanted
functionality (even for archs that do not have any extract
functionality; these functions do more than that :-( ).

> I can see why the expand/simplify/remake process seemed like
> a nice idea, but in practice, there's just too much that can
> go wrong.

Yup.

> And removing the compound stuff would go a long way
> to making combine simplification more like fwprop simpliification
> (unlike now, where we effectively have two separate simplification
> processes for things involving *_extends and *_extracts).

This has nothing to do with simplification?  We just use
simplify-rtx (which started out as part of combine exclusively), just
like everything else.

All the other transforms that are combine-only are very useful to have
as well, as shown a year or so

[committed] libstdc++: Revert addition of boolean flag to net::ip::basic_endpoint

2023-03-31 Thread Jonathan Wakely via Gcc-patches

Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

As pointed out in P2641R1, we can use GCC's __builtin_constant_p to
emulate the proposed std::is_active_member. This allows us to detect
which of the union members is active during constant evaluation, so we
don't need the extra bool data member. We still can't support constexpr
until C++20 though, as we need to change the active member during
constant evaluation.

libstdc++-v3/ChangeLog:

* include/experimental/internet (ip::basic_endpoint::_M_if_v6):
Revert change from member function to data member. Fix for
constant evaluation by detecting which union member is active.
(ip::basic_endpoint::resize): Revert changes to update _M_is_v6
flag.
---
 libstdc++-v3/include/experimental/internet | 32 ++
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/libstdc++-v3/include/experimental/internet 
b/libstdc++-v3/include/experimental/internet
index dff81b456ab..eb23ae21cdc 100644
--- a/libstdc++-v3/include/experimental/internet
+++ b/libstdc++-v3/include/experimental/internet
@@ -1531,7 +1531,6 @@ namespace ip
std::_Construct(&_M_data._M_v6);
_M_data._M_v6.sin6_family = __proto.family();
_M_data._M_v6.sin6_port = address_v4::_S_hton_16(__port_num);
-   _M_is_v6 = true;
  }
else
  {
@@ -1560,7 +1559,6 @@ namespace ip
for (int __i = 0; __i < 16; ++__i)
  __s6a[__i] = __addr._M_v6._M_bytes[__i];
_M_data._M_v6.sin6_scope_id = __addr._M_v6._M_scope_id;
-   _M_is_v6 = true;
  }
   }
 
@@ -1568,13 +1566,13 @@ namespace ip
 
   constexpr protocol_type protocol() const noexcept
   {
-   return _M_is_v6 ? protocol_type::v6() : protocol_type::v4();
+   return _M_is_v6() ? protocol_type::v6() : protocol_type::v4();
   }
 
   constexpr ip::address
   address() const noexcept
   {
-   if (_M_is_v6)
+   if (_M_is_v6())
  {
address_v6 __v6;
const uint8_t* __s6a = _M_data._M_v6.sin6_addr.s6_addr;
@@ -1601,14 +1599,12 @@ namespace ip
__builtin_memcpy(_M_data._M_v6.sin6_addr.s6_addr,
 __addr._M_v6._M_bytes.data(), 16);
_M_data._M_v6.sin6_scope_id = __addr._M_v6._M_scope_id;
-   _M_is_v6 = true;
  }
else
  {
std::_Construct(&_M_data._M_v4);
_M_data._M_v4.sin_family = protocol_type::v4().family();
_M_data._M_v4.sin_addr.s_addr = __addr._M_v4._M_addr;
-   _M_is_v6 = false;
  }
   }
 
@@ -1616,7 +1612,7 @@ namespace ip
   port() const noexcept
   {
port_type __p = 0;
-   if (_M_is_v6)
+   if (_M_is_v6())
  __p = _M_data._M_v6.sin6_port;
else
  __p = _M_data._M_v4.sin_port;
@@ -1627,7 +1623,7 @@ namespace ip
   port(port_type __port_num) noexcept
   {
__port_num = address_v4::_S_hton_16(__port_num);
-   if (_M_is_v6)
+   if (_M_is_v6())
  _M_data._M_v6.sin6_port = __port_num;
else
  _M_data._M_v4.sin_port = __port_num;
@@ -1639,19 +1635,11 @@ namespace ip
 
   constexpr size_t
   size() const noexcept
-  { return _M_is_v6 ? sizeof(sockaddr_in6) : sizeof(sockaddr_in); }
+  { return _M_is_v6() ? sizeof(sockaddr_in6) : sizeof(sockaddr_in); }
 
   void
   resize(size_t __s)
   {
-   __glibcxx_assert(__s >= 0);
-   static_assert(offsetof(sockaddr_in6, sin6_family)
-   == offsetof(sockaddr_in, sin_family),
- "sockaddr_in::sin_family and sockaddr_in6::sin6_family "
- "must be at the same offset");
-   const sa_family_t __in6 = AF_INET6;
-   const auto* __ptr = (char*)&_M_data + offsetof(sockaddr_in, sin_family);
-   _M_is_v6 = __builtin_memcmp(&__in6, __ptr, sizeof(__in6)) == 0;
if (__s != size())
  __throw_length_error("net::ip::basic_endpoint::resize");
   }
@@ -1665,7 +1653,15 @@ namespace ip
sockaddr_in6_M_v6;
   } _M_data;
 
-  bool _M_is_v6 = false;
+  constexpr bool
+  _M_is_v6() const noexcept
+  {
+   // For constexpr eval we can just detect which union member is active.
+   // i.e. emulate P2641R1's std::is_active_member(&_M_data._M_v6)).
+   if (std::__is_constant_evaluated())
+ return __builtin_constant_p(_M_data._M_v6.sin6_family);
+   return _M_data._M_v6.sin6_family == AF_INET6;
+  }
 };
 
   /** basic_endpoint comparisons
-- 
2.39.2

Re: [committed] libstdc++: Fix constexpr functions in

2023-03-31 Thread Jonathan Wakely via Gcc-patches

On Fri, 31 Mar 2023 at 12:06, Jonathan Wakely wrote:
>
> On Thu, 30 Mar 2023 at 17:01, Daniel Krügler wrote:
> >
> > Am Do., 30. März 2023 um 18:00 Uhr schrieb Jonathan Wakely via
> > Libstdc++ :
> > >
> > [..]
> > >
> > > In fact, thinking about P2641 some more, I might revert this change.
> > > Instead of adding an extra bool member to support constexpr, I think
> > > I'll just remove the 'constexpr' keywords from basic_endpoint for now,
> > > and implement it in terms of just inspecting the sa_family_t member of
> > > the union members. And then later, once we have something like P2641,
> > > we can re-add the constexpr keywords and use is_within_lifetime during
> > > constant evaluation. That way we don't add a bool then need to take it
> > > away again, changing the ABI each time.
> >
> > I was just going to make the same suggestion.
>
> Actually, as pointed out in Barry's P2641R1 paper, we can just use
> GCC's __builtin_constant_p:
>
> +  constexpr bool
> +  _M_is_v6() const noexcept
> +  {
> +   // For constexpr eval we can just detect which union member is active.
> +   // i.e. emulate P2641R1's std::is_active_member(&_M_data._M_v6)).
> +   if (std::__is_constant_evaluated())
> + return __builtin_constant_p(_M_data._M_v6.sin6_family);
> +   return _M_data._M_v6.sin6_family == AF_INET6;
> +  }

Done in commit 10e573e86c6a1ed11df529288ed8fba97f876032

[committed] libstdc++: Avoid -Wmaybe-uninitialized warning in std::stop_source [PR109339]

2023-03-31 Thread Jonathan Wakely via Gcc-patches

Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

We pass a const-reference to *this before it's constructed, and GCC
assumes that all const-references are accessed. Add the access attribute
to say it's not accessed.

libstdc++-v3/ChangeLog:

PR libstdc++/109339
* include/std/stop_token (_Stop_state_ptr(const stop_source&)):
Add attribute access with access-mode 'none'.
* testsuite/30_threads/stop_token/stop_source/109339.cc: New test.
---
 libstdc++-v3/include/std/stop_token|  1 +
 .../30_threads/stop_token/stop_source/109339.cc| 10 ++
 2 files changed, 11 insertions(+)
 create mode 100644 
libstdc++-v3/testsuite/30_threads/stop_token/stop_source/109339.cc

diff --git a/libstdc++-v3/include/std/stop_token 
b/libstdc++-v3/include/std/stop_token
index 76aef78a7ef..c90fc786b63 100644
--- a/libstdc++-v3/include/std/stop_token
+++ b/libstdc++-v3/include/std/stop_token
@@ -395,6 +395,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 {
   _Stop_state_ref() = default;
 
+  [[__gnu__::__access__(__none__, 2)]]
   explicit
   _Stop_state_ref(const stop_source&)
   : _M_ptr(new _Stop_state_t())
diff --git a/libstdc++-v3/testsuite/30_threads/stop_token/stop_source/109339.cc 
b/libstdc++-v3/testsuite/30_threads/stop_token/stop_source/109339.cc
new file mode 100644
index 000..eea614eef80
--- /dev/null
+++ b/libstdc++-v3/testsuite/30_threads/stop_token/stop_source/109339.cc
@@ -0,0 +1,10 @@
+// { dg-options "-Wmaybe-uninitialized -Og -std=gnu++20" }
+// { dg-do compile { target c++20 } }
+
+#include 
+
+int main()
+{
+  std::stop_source ss;
+ // { dg-bogus "uninitialized" "PR 109339" { target *-*-* } 0 }
+}
-- 
2.39.2

Re: [PATCH v2 2/2] combine: Try harder to form zero_extends [PR106594]

2023-03-31 Thread Richard Sandiford via Gcc-patches

Segher Boessenkool  writes:
> On Fri, Mar 31, 2023 at 03:06:41PM +0100, Richard Sandiford wrote:
>> This is an alternative presentation of the change that we discussed
>> a few weeks ago, and that you already tested:
>> 
>>   https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613486.html
>> 
>> The results seem to indicate that the patch had no effect on targets
>> other than aarch64.  [A good thing, IMO. :-)  The purpose of the
>> patch is to fix the aarch64 regression in a minimally invasive way.]
>
> If it is the same (I don't agree at all fwiw)

Why don't you agree that it's behaviourally the same?  It's really just
a refactoring of my earlier patch.

> it has no effect on aarch64 either, other than on your single
> testcase.  So that is a good reason to NAK this patch outside of stage
> 1 already.

But if it only affected this single testcase, I wouldn't see
improvements in 182 kernel objects :-)

>> I tried building an aarch64 linux kernel locally with -Os
>> -fno-schedule-insns{,2}.
>
> Completely unrealistic.  No one builds anything they care for speed fo
> with -Os (and if people care for size, you still get better results
> with -O2 + some other tunings), and disabling scheduling is disastrous.

As discussed before, the point is to use these results to get an
idea of the scope of the change.  And if we're using size to measure
the scope, it makes sense to align the compiler's goals with that,
since we then need to spend less time filtering the results for
oddities like different RA leading to different code layout, etc.

If we want to measure speed, we should do that.  But I don't expect
a significant speed impact on the kernel either way (whether the
patch is good or bad).

Thanks,
Richard

[pushed][PR109052] LRA: Implement commutative operands exchange for combining secondary memory reload and original insn

2023-03-31 Thread Vladimir Makarov via Gcc-patches


This is one more patch for

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109052

The patch adds trying commutative operands exchange for recently 
implemented combining secondary memory reload and original insn:


The patch was successfully bootstrapped and tested on x86_64.

commit 378d19cfebfa2bc4f693dfc9e6f0dd993e7c45f7
Author: Vladimir N. Makarov 
Date:   Fri Mar 31 11:04:44 2023 -0400

LRA: Implement commutative operands exchange for combining secondary memory reload and original insn

The patch implements trying commutative operands exchange for
combining secondary memory reload and original insn.

PR rtl-optimization/109052

gcc/ChangeLog:

* lra-constraints.cc: (combine_reload_insn): New function.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr109052-2.c: New.

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 405b8b92f5e..ff4e8f06063 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -5061,7 +5061,23 @@ combine_reload_insn (rtx_insn *from, rtx_insn *to)
   curr_insn = to;
   curr_id = lra_get_insn_recog_data (curr_insn);
   curr_static_id = curr_id->insn_static_data;
-  ok_p = !curr_insn_transform (true);
+  for (bool swapped_p = false;;)
+	{
+	  ok_p = !curr_insn_transform (true);
+	  if (ok_p || curr_static_id->commutative < 0)
+	break;
+	  swap_operands (curr_static_id->commutative);
+	  if (lra_dump_file != NULL)
+	{
+	  fprintf (lra_dump_file,
+		   "Swapping %scombined insn operands:\n",
+		   swapped_p ? "back " : "");
+	  dump_insn_slim (lra_dump_file, to);
+	}
+	  if (swapped_p)
+	break;
+	  swapped_p = true;
+	}
   curr_insn = saved_insn;
   curr_id = lra_get_insn_recog_data (curr_insn);
   curr_static_id = curr_id->insn_static_data;
diff --git a/gcc/testsuite/gcc.target/i386/pr109052-2.c b/gcc/testsuite/gcc.target/i386/pr109052-2.c
new file mode 100644
index 000..337d1f49c2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr109052-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mfpmath=both -mavx -fno-math-errno" } */
+
+double foo (double a, double b)
+{
+  double z = __builtin_fmod (a, 3.14);
+  return z * b;
+}
+
+/* { dg-final { scan-assembler-not "vmulsd\[ \t]\+%xmm\[0-9]\+, %xmm\[0-9]\+, %xmm\[0-9]\+"} } */

Re: [PATCH 1/2] c++: improve "NTTP argument considered unused" fix [PR53164, PR105848]

2023-03-31 Thread Patrick Palka via Gcc-patches

On Thu, 30 Mar 2023, Jason Merrill wrote:

> On 3/30/23 14:53, Patrick Palka wrote:
> > On Wed, 29 Mar 2023, Jason Merrill wrote:
> > 
> > > On 3/27/23 09:30, Patrick Palka wrote:
> > > > On Thu, 23 Mar 2023, Patrick Palka wrote:
> > > > 
> > > > > r13-995-g733a792a2b2e16 worked around the problem of FUNCTION_DECL
> > > > > template arguments not always getting marked as odr-used by
> > > > > redundantly
> > > > > calling mark_used on the substituted ADDR_EXPR callee of a CALL_EXPR.
> > > > > This is just a narrow workaround however, since using a FUNCTION_DECL
> > > > > as
> > > > > a template argument alone should constitutes an odr-use; we shouldn't
> > > > > need to subsequently e.g. call the function or take its address.
> > > 
> > > Agreed.  But why didn't we already wrap it in an ADDR_EXPR?  Even for
> > > reference tparms convert_nontype_argument should do that.
> > 
> > Indeed we do, the commit message was just rather sloppy/inaccurate...
> > I'll try to correct it.
> > 
> > > 
> > > > > This patch fixes this in a more general way at template specialization
> > > > > time by walking the template arguments of the specialization and
> > > > > calling
> > > > > mark_used on all entities used within.  As before, the call to
> > > > > mark_used
> > > > > as it worst a no-op, but it compensates for the situation where we end
> > > > > up
> > > > > forming a specialization from a template context in which mark_used is
> > > > > inhibited.  Another approach would be to call mark_used whenever we
> > > > > substitute a TEMPLATE_PARM_INDEX, but that would result in many more
> > > > > redundant calls to mark_used compared to this approach.
> > > > > 
> > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
> > > > > for
> > > > > trunk?
> > > > > 
> > > > >   PR c++/53164
> > > > >   PR c++/105848
> > > > > 
> > > > > gcc/cp/ChangeLog:
> > > > > 
> > > > >   * pt.cc (instantiate_class_template): Call
> > > > >   mark_template_arguments_used.
> > > > >   (tsubst_copy_and_build) : Revert r13-995
> > > > > change.
> > > > >   (mark_template_arguments_used): Define.
> > > > >   (instantiate_template): Call mark_template_arguments_used.
> > > > > 
> > > > > gcc/testsuite/ChangeLog:
> > > > > 
> > > > >   * g++.dg/template/fn-ptr3a.C: New test.
> > > > >   * g++.dg/template/fn-ptr4.C: New test.
> > > > > ---
> > > > >gcc/cp/pt.cc | 51
> > > > > 
> > > > >gcc/testsuite/g++.dg/template/fn-ptr3a.C | 25 
> > > > >gcc/testsuite/g++.dg/template/fn-ptr4.C  | 14 +++
> > > > >3 files changed, 74 insertions(+), 16 deletions(-)
> > > > >create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr3a.C
> > > > >create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr4.C
> > > > > 
> > > > > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> > > > > index 7e4a8de0c8b..9b3cc1c 100644
> > > > > --- a/gcc/cp/pt.cc
> > > > > +++ b/gcc/cp/pt.cc
> > > > > @@ -220,6 +220,7 @@ static tree make_argument_pack (tree);
> > > > >static tree enclosing_instantiation_of (tree tctx);
> > > > >static void instantiate_body (tree pattern, tree args, tree d, bool
> > > > > nested);
> > > > >static tree maybe_dependent_member_ref (tree, tree, tsubst_flags_t,
> > > > > tree);
> > > > > +static void mark_template_arguments_used (tree);
> > > > >  /* Make the current scope suitable for access checking when we
> > > > > are
> > > > >   processing T.  T can be FUNCTION_DECL for instantiated function
> > > > > @@ -12142,6 +12143,9 @@ instantiate_class_template (tree type)
> > > > >  cp_unevaluated_operand = 0;
> > > > >  c_inhibit_evaluation_warnings = 0;
> > > > >}
> > > > > +
> > > > > +  mark_template_arguments_used (INNERMOST_TEMPLATE_ARGS (args));
> > > > > +
> > > > >  /* Use #pragma pack from the template context.  */
> > > > >  saved_maximum_field_alignment = maximum_field_alignment;
> > > > >  maximum_field_alignment = TYPE_PRECISION (pattern);
> > > > > @@ -21173,22 +21177,10 @@ tsubst_copy_and_build (tree t,
> > > > > }
> > > > >   /* Remember that there was a reference to this entity.  */
> > > > > - if (function != NULL_TREE)
> > > > > -   {
> > > > > - tree inner = function;
> > > > > - if (TREE_CODE (inner) == ADDR_EXPR
> > > > > - && TREE_CODE (TREE_OPERAND (inner, 0)) ==
> > > > > FUNCTION_DECL)
> > > > > -   /* We should already have called mark_used when taking
> > > > > the
> > > > > -  address of this function, but do so again anyway to
> > > > > make
> > > > > -  sure it's odr-used: at worst this is a no-op, but if
> > > > > we
> > > > > -  obtained this FUNCTION_DECL as part of ahead-of-time
> > > > > overload
> > > > > -  resolution then that call to mark_used wouldn't have
> > > > > marked
> > > > > it
> > > > > -  odr-used

Re: [PATCH] aarch64, builtins: Include PR registers in FUNCTION_ARG_REGNO_P etc. [PR109254]

2023-03-31 Thread Richard Sandiford via Gcc-patches

Thanks for the patch and sorry for the slow reply.

Jakub Jelinek  writes:
> Hi!
>
> The testcase in the PR (which unfortunately because of my lack of experience
> with SVE I'm not able to turn into a runtime testcase that verifies it)
> is miscompiled on aarch64-linux in the regname pass, because while the
> function takes arguments in the p0 register, FUNCTION_ARG_REGNO_P doesn't
> reflect that, so DF doesn't know the register is used in register passing.
> It sees 2 chains with p1 register and wants to replace the second one and
> as DF doesn't know p0 is live at the start of the function, it will happily
> use p0 register even when it is used in subsequent instructions.
>
> The following patch fixes that.  FUNCTION_ARG_REGNO_P returns non-zero
> for p0-p3 (unconditionally, seems for the floating/vector registers it
> doesn't conditionalize them on TARGET_FLOAT either, but if you want,
> I can conditionalize p0-p3 on TARGET_SVE),

I agree doing it unconditionally makes sense.

> similarly targetm.calls.function_value_regno_p returns true for p0
> register if TARGET_SVE (again for consistency, that function
> conditionalizes the float/vector on TARGET_FLOAT); looking at the
> AAPCS, seems p1-p3 could be also used to return values in case of
> homogenous aggregates, but it doesn't seem GCC supports putting
> svbool_t as a member of a structure.

One testcase that uses p1-p3 for return values is:

typedef __SVBool_t fixed_bool_t __attribute__((arm_sve_vector_bits(256)));
struct s { fixed_bool_t x[4]; };
struct s f (struct s *ptr) { return *ptr; }

compiled with -msve-vector-bits=256.

> Now, that change broke bootstrap in libobjc and some
> __builtin_apply_args/__builtin_apply/__builtin_return tests.  The
> aarch64_get_reg_raw_mode hook already documents that SVE scalable arg/return
> passing is fundamentally incompatible with those builtins, but unlike
> the floating/vector regs where it forces a fixed vector mode, I think
> there is no fixed mode which could be used for p0-p3.  So, I have tweaked
> the generic code so that it uses VOIDmode return from that hook to signal
> that a register shouldn't be touched by
> __builtin_apply_args/__builtin_apply/__builtin_return
> despite being mentioned in FUNCTION_ARG_REGNO_P or
> targetm.calls.function_value_regno_p.
>
> Bootstrapped/regtested on aarch64-linux, ok for trunk?
>
> Could somebody please turn the testcase from the PR into something that
> can be included into the testsuite?

This seems to work:

/* { dg-do run { target aarch64_sve_hw } } */
/* { dg-options "-O2 -funroll-loops" } */

#include 
#include 

svfloat32_t __attribute__((noipa))
func_demo(svfloat32_t x, svfloat32_t y, svbool_t pg)
{
  svfloat32_t z = svadd_f32_x(pg, x, svdup_f32(0x1.800fep19f));
  svbool_t cmp = svcmplt_f32(pg, z, svdup_f32(0.0f));
  svfloat32_t zM1 = svsub_f32_x(pg, z, svdup_n_f32(1.0f));
  z = svsel_f32(cmp, zM1, z);
  svfloat32_t sum = svadd_f32_x(pg, z, y);
  return sum;
}

int
main()
{
  float arr[2];
  svfloat32_t x = svinsr_n_f32(svdup_f32(-0x1.880fep19f), 2.0f);
  svfloat32_t res = func_demo(x, svdup_f32(0.5f), svptrue_b32());
  svst1_f32(svptrue_pat_b32(SV_VL2), arr, res);
  if (arr[0] != 786561.50 || arr[1] != -16384.50)
__builtin_abort ();
  return 0;
}

> 2023-03-24  Jakub Jelinek  
>
>   PR target/109254
>   * builtins.cc (apply_args_size): If targetm.calls.get_raw_arg_mode
>   returns VOIDmode, handle it like if the register isn't used for
>   passing arguments at all.
>   (apply_result_size): If targetm.calls.get_raw_result_mode returns
>   VOIDmode, handle it like if the register isn't used for returning
>   results at all.
>   * target.def (get_raw_result_mode, get_raw_arg_mode): Document what it
>   means to return VOIDmode.
>   * doc/tm.texi: Regenerated.
>   * config/aarch64/aarch64.cc (aarch64_function_value_regno_p): Return
>   TARGET_SVE for P0_REGNUM.
>   (aarch64_function_arg_regno_p): Also return true for p0-p3.
>   (aarch64_get_reg_raw_mode): Return VOIDmode for PR_REGNUM_P regs.
>
> @@ -7995,6 +7999,10 @@ aarch64_get_reg_raw_mode (int regno)
> for SVE types are fundamentally incompatible with the
> __builtin_return/__builtin_apply interface.  */
>  return as_a  (V16QImode);
> +  if (PR_REGNUM_P (regno))
> +/* For SVE PR regs, indicate that they should be ignored for
> +   __builtin_apply/__builtin_return.  */
> +return as_a  (VOIDmode);   

Reached me with trailing whitespace, but maybe that's a mailerism.

It's an interesting philosophical question whether VOIDmode is a
fixed-size mode, given that VOIDmode doesn't really have a size. :-)
But I'm sure in practice we already treat it like that elsewhere.
It's just the first time I remember it being so explicit.

OK for the aarch64 bits with aarch64_function_value_regno_p
changed to use the same range as aarch64_function_arg_regno_p.

Thanks,
Richard

Re: [PATCH 1/2] c++: improve "NTTP argument considered unused" fix [PR53164, PR105848]

2023-03-31 Thread Patrick Palka via Gcc-patches

On Fri, 31 Mar 2023, Patrick Palka wrote:

> On Thu, 30 Mar 2023, Jason Merrill wrote:
> 
> > On 3/30/23 14:53, Patrick Palka wrote:
> > > On Wed, 29 Mar 2023, Jason Merrill wrote:
> > > 
> > > > On 3/27/23 09:30, Patrick Palka wrote:
> > > > > On Thu, 23 Mar 2023, Patrick Palka wrote:
> > > > > 
> > > > > > r13-995-g733a792a2b2e16 worked around the problem of FUNCTION_DECL
> > > > > > template arguments not always getting marked as odr-used by
> > > > > > redundantly
> > > > > > calling mark_used on the substituted ADDR_EXPR callee of a 
> > > > > > CALL_EXPR.
> > > > > > This is just a narrow workaround however, since using a 
> > > > > > FUNCTION_DECL
> > > > > > as
> > > > > > a template argument alone should constitutes an odr-use; we 
> > > > > > shouldn't
> > > > > > need to subsequently e.g. call the function or take its address.
> > > > 
> > > > Agreed.  But why didn't we already wrap it in an ADDR_EXPR?  Even for
> > > > reference tparms convert_nontype_argument should do that.
> > > 
> > > Indeed we do, the commit message was just rather sloppy/inaccurate...
> > > I'll try to correct it.
> > > 
> > > > 
> > > > > > This patch fixes this in a more general way at template 
> > > > > > specialization
> > > > > > time by walking the template arguments of the specialization and
> > > > > > calling
> > > > > > mark_used on all entities used within.  As before, the call to
> > > > > > mark_used
> > > > > > as it worst a no-op, but it compensates for the situation where we 
> > > > > > end
> > > > > > up
> > > > > > forming a specialization from a template context in which mark_used 
> > > > > > is
> > > > > > inhibited.  Another approach would be to call mark_used whenever we
> > > > > > substitute a TEMPLATE_PARM_INDEX, but that would result in many more
> > > > > > redundant calls to mark_used compared to this approach.
> > > > > > 
> > > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
> > > > > > for
> > > > > > trunk?
> > > > > > 
> > > > > > PR c++/53164
> > > > > > PR c++/105848
> > > > > > 
> > > > > > gcc/cp/ChangeLog:
> > > > > > 
> > > > > > * pt.cc (instantiate_class_template): Call
> > > > > > mark_template_arguments_used.
> > > > > > (tsubst_copy_and_build) : Revert r13-995
> > > > > > change.
> > > > > > (mark_template_arguments_used): Define.
> > > > > > (instantiate_template): Call mark_template_arguments_used.
> > > > > > 
> > > > > > gcc/testsuite/ChangeLog:
> > > > > > 
> > > > > > * g++.dg/template/fn-ptr3a.C: New test.
> > > > > > * g++.dg/template/fn-ptr4.C: New test.
> > > > > > ---
> > > > > >gcc/cp/pt.cc | 51
> > > > > > 
> > > > > >gcc/testsuite/g++.dg/template/fn-ptr3a.C | 25 
> > > > > >gcc/testsuite/g++.dg/template/fn-ptr4.C  | 14 +++
> > > > > >3 files changed, 74 insertions(+), 16 deletions(-)
> > > > > >create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr3a.C
> > > > > >create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr4.C
> > > > > > 
> > > > > > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> > > > > > index 7e4a8de0c8b..9b3cc1c 100644
> > > > > > --- a/gcc/cp/pt.cc
> > > > > > +++ b/gcc/cp/pt.cc
> > > > > > @@ -220,6 +220,7 @@ static tree make_argument_pack (tree);
> > > > > >static tree enclosing_instantiation_of (tree tctx);
> > > > > >static void instantiate_body (tree pattern, tree args, tree d, 
> > > > > > bool
> > > > > > nested);
> > > > > >static tree maybe_dependent_member_ref (tree, tree, 
> > > > > > tsubst_flags_t,
> > > > > > tree);
> > > > > > +static void mark_template_arguments_used (tree);
> > > > > >  /* Make the current scope suitable for access checking when we
> > > > > > are
> > > > > >   processing T.  T can be FUNCTION_DECL for instantiated 
> > > > > > function
> > > > > > @@ -12142,6 +12143,9 @@ instantiate_class_template (tree type)
> > > > > >  cp_unevaluated_operand = 0;
> > > > > >  c_inhibit_evaluation_warnings = 0;
> > > > > >}
> > > > > > +
> > > > > > +  mark_template_arguments_used (INNERMOST_TEMPLATE_ARGS (args));
> > > > > > +
> > > > > >  /* Use #pragma pack from the template context.  */
> > > > > >  saved_maximum_field_alignment = maximum_field_alignment;
> > > > > >  maximum_field_alignment = TYPE_PRECISION (pattern);
> > > > > > @@ -21173,22 +21177,10 @@ tsubst_copy_and_build (tree t,
> > > > > >   }
> > > > > > /* Remember that there was a reference to this entity.  
> > > > > > */
> > > > > > -   if (function != NULL_TREE)
> > > > > > - {
> > > > > > -   tree inner = function;
> > > > > > -   if (TREE_CODE (inner) == ADDR_EXPR
> > > > > > -   && TREE_CODE (TREE_OPERAND (inner, 0)) ==
> > > > > > FUNCTION_DECL)
> > > > > > - /* We should already have called mark_used when taking
> > > > > > the
> > > > > > -address of this function, but do so again anyway

Regression with "recomputation and PR 109154"

2023-03-31 Thread Hans-Peter Nilsson via Gcc-patches

> Attached. I also removed the bogus warning in Walloc-13.c that no longer 
> happens

> Add recursive GORI recompuations with a depth limit.
> 
> PR tree-optimization/109154
> gcc/
> * gimple-range-gori.cc (gori_compute::may_recompute_p): Add depth 
> limit.
> * gimple-range-gori.h (may_recompute_p): Add depth param.
> * params.opt (ranger-recompute-depth): New param.
> 
> gcc/testsuite/
> * gcc.dg/Walloca-13.c: Remove bogus warning that is now fixed.

This patch, commit r13-6945-g429a7a88438cc8, caused a
test-suite regression; FAIL for 'gcc.dg/tree-ssa/pr23109.c
scan-tree-dump-not recip "reciptmp"' for cris-elf that I
logged at PR109363.

Perhaps it's somewhat of a similar nature as Walloca-13.c
but then again it's not an bogus warning that's gone.

Is it expected and I should just blankly xfail it or is it
worth more attention?  I'm a bit surprised that it hasn't
shown up for other targets though.

brgds, H-P

Re: Regression with "recomputation and PR 109154"

2023-03-31 Thread Jeff Law via Gcc-patches





On 3/31/23 10:12, Hans-Peter Nilsson via Gcc-patches wrote:

Attached. I also removed the bogus warning in Walloc-13.c that no longer
happens



 Add recursive GORI recompuations with a depth limit.
 
 PR tree-optimization/109154

 gcc/
 * gimple-range-gori.cc (gori_compute::may_recompute_p): Add depth 
limit.
 * gimple-range-gori.h (may_recompute_p): Add depth param.
 * params.opt (ranger-recompute-depth): New param.
 
 gcc/testsuite/

 * gcc.dg/Walloca-13.c: Remove bogus warning that is now fixed.


This patch, commit r13-6945-g429a7a88438cc8, caused a
test-suite regression; FAIL for 'gcc.dg/tree-ssa/pr23109.c
scan-tree-dump-not recip "reciptmp"' for cris-elf that I
logged at PR109363.

Perhaps it's somewhat of a similar nature as Walloca-13.c
but then again it's not an bogus warning that's gone.

Is it expected and I should just blankly xfail it or is it
worth more attention?  I'm a bit surprised that it hasn't
shown up for other targets though.
I already let Andrew know -- it's affecting a ton of targets.  It may be 
the case that we just need to adjust the param for that test.


jeff

Re: [PATCH] rtl-optimization/109237 - quadraticness in delete_trivially_dead_insns

2023-03-31 Thread Jeff Law via Gcc-patches





On 3/22/23 04:01, Richard Biener via Gcc-patches wrote:

The following addresses quadraticness in processing debug insns
in delete_trivially_dead_insns and insn_live_p by using TREE_VISITED
on the INSN_VAR_LOCATION_DECL to indicate a later debug bind
with the same decl and no intervening real insn or debug marker.
That gets rid of the NEXT_INSN walk in insn_live_p in favor of
first clearing TREE_VISITED in the first loop over insn and
the book-keeping of decls we set the bit since we need to clear
them when visiting a real or debug marker insn.

That improves the time spent in delete_trivially_dead_insns from
10.6s to 2.2s for the testcase.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

PR rtl-optimization/109237
* cse.cc (insn_live_p): Remove NEXT_INSN walk, instead check
TREE_VISITED on INSN_VAR_LOCATION_DECL.
(delete_trivially_dead_insns): Maintain TREE_VISITED on
active debug bind INSN_VAR_LOCATION_DECL.

OK
jeff

Re: Should -ffp-contract=off the default on GCC?

2023-03-31 Thread Qing Zhao via Gcc-patches



> On Mar 24, 2023, at 3:42 PM, Andrew Pinski  wrote:
> 
> On Fri, Mar 24, 2023 at 1:14 AM Fangrui Song via Gcc-patches
>  wrote:
>> 
>> On Wed, Mar 22, 2023 at 8:52 AM Qing Zhao via Gcc-patches
>>  wrote:
>>> 
>>> 
>>> 
 On Mar 22, 2023, at 9:57 AM, Richard Biener via Gcc-patches 
  wrote:
 
 On Wed, Mar 22, 2023 at 1:26 PM Alexander Monakov  
 wrote:
> 
> 
> On Wed, 22 Mar 2023, Richard Biener wrote:
> 
>> I think it's even less realistic to expect users to know the details of
>> floating-point math.  So I doubt any such sentence will be helpful
>> besides spreading some FUD?
> 
> I think it's closer to "fundamental notions" rather than "details". For
> users who bother to read the GCC manual there's a decent chance it 
> wouldn't
> be for naught.
> 
> For documentation, I was thinking
> 
> Together with -fexcess-precision=standard, -ffp-contract=off
> is necessary to ensure that rounding of intermediate results to precision
> implied by the source code and the FLT_EVAL_METHOD macro is not
> omitted by the compiler.
 
 that sounds good to me
>>> 
>>> Shall we add such clarification to our Gcc13 doc? That should be helpful if 
>>> we keep the currently default.
>>> 
>>> Qing
 
> Alexander
>>> 
>> 
>> While updating the documentation, consider adding information that
>> #pragma STDC FP_CONTRACT OFF is ignored with -ffp-contract=fast.
>> 
>> This surprising behavior motivated Clang to add
>> -Xclang=-ffp-contract=fast-honor-pragmas
>> (https://discourse.llvm.org/t/fp-contract-fast-and-pragmas/58529).
> 
> `#pragma STDC FP_CONTRACT OFF` is not even implemented yet in GCC.
> Rather we should document that :).

Do we have any plan to implement this pragma?
Also, do we have any plan to implement -ffp-contract=on? 

I am very curious on why -ffp-contract=on has not been implemented for so long 
time?

There are some documentation on Floating Point implementation of GCC on the 
pragma STDC FP_CONTRACT
https://gcc.gnu.org/onlinedocs/gcc-12.2.0/gcc/Floating-point-implementation.html#Floating-point-implementation

"
• Whether and how floating expressions are contracted when not 
disallowed by the FP_CONTRACT pragma (C99 and C11 6.5).
Expressions are currently only contracted if -ffp-contract=fast, 
-funsafe-math-optimizations or -ffast-math are used. This is subject to change.

• The default state for the FP_CONTRACT pragma (C99 and C11 7.12.2).
This pragma is not implemented. Expressions are currently only contracted if 
-ffp-contract=fast, -funsafe-math-optimizations or -ffast-math are used. This 
is subject to change.
“

So, looks like that we have documented this. But When the user read the 
documentation for —ffp-contract option, there is no such information. We might 
add a link in the documentation of -ffp-contract option to here to make it 
clear.

Qing




> It does not matter what clang does here really since GCC does not even
> implement the pragma.
> 
> Thanks,
> Andrew Pinski
> 
> 
>> 
>> 
>> 
>> --
>> 宋方睿

Re: Regression with "recomputation and PR 109154"

2023-03-31 Thread Andrew MacLeod via Gcc-patches




On 3/31/23 12:20, Jeff Law wrote:



On 3/31/23 10:12, Hans-Peter Nilsson via Gcc-patches wrote:
Attached. I also removed the bogus warning in Walloc-13.c that no 
longer

happens



 Add recursive GORI recompuations with a depth limit.
  PR tree-optimization/109154
 gcc/
 * gimple-range-gori.cc (gori_compute::may_recompute_p): 
Add depth limit.

 * gimple-range-gori.h (may_recompute_p): Add depth param.
 * params.opt (ranger-recompute-depth): New param.
  gcc/testsuite/
 * gcc.dg/Walloca-13.c: Remove bogus warning that is now 
fixed.


This patch, commit r13-6945-g429a7a88438cc8, caused a
test-suite regression; FAIL for 'gcc.dg/tree-ssa/pr23109.c
scan-tree-dump-not recip "reciptmp"' for cris-elf that I
logged at PR109363.

Perhaps it's somewhat of a similar nature as Walloca-13.c
but then again it's not an bogus warning that's gone.

Is it expected and I should just blankly xfail it or is it
worth more attention?  I'm a bit surprised that it hasn't
shown up for other targets though.
I already let Andrew know -- it's affecting a ton of targets.  It may 
be the case that we just need to adjust the param for that test.



So the test fails in the testuite because a recip has been put into the 
code, but does it execute OK?  or maybe thats not testable?


with the recompuation, we make some signficant changes:

  e.0_1 = e;
  d_12 = e.0_1 * 2.0e+0;
  E_13 = 1.0e+0 - d_12;
  goto ; [INV]

   :
  if (e.0_1 > 
5.00010408340855860842566471546888351440429688e-3)

    goto ; [INV]
  else
    goto ; [INV]

   :
  if (E_13 > 1.0e+0)
    goto ; [INV]
  else
    goto ; [INV]

In EVRP we  now determine that the branch in BB4 is never true and we 
reduce the testcase significantly


We end up now with:

  [local count: 357878152]:
  e.0_1 = e;
  if (e.0_1 > 
5.00010408340855860842566471546888351440429688e-3)

    goto ; [50.00%]
  else
    goto ; [50.00%]

   [local count: 178939076]:
  d_9 = e.0_1 * 2.0e+0;
  E_10 = 1.0e+0 - d_9;
  _16 = E_10 - 1.0e+0;
  reciptmp.5_21 = 1.0e+0 / d_9;

And with
d_9  : [frange] double 
[1.0001942890293094023945741355419158935546875e-2 
(0x0.a3d70a3d70a3ep-6), +Inf]


I guess it figures the recip is safe to put in, there will not be a 
divide by zero.


I guess the test is no longer testing what it should be?

And yes, we could set he param back to 1 for the test...
add   --param=ranger-recompute-depth=1   makes the "issue" go away :-)  
for now.


Andrew

Re: [PATCH 1/2] c++: improve "NTTP argument considered unused" fix [PR53164, PR105848]

2023-03-31 Thread Jason Merrill via Gcc-patches


On 3/31/23 11:55, Patrick Palka wrote:

On Fri, 31 Mar 2023, Patrick Palka wrote:


On Thu, 30 Mar 2023, Jason Merrill wrote:


On 3/30/23 14:53, Patrick Palka wrote:

On Wed, 29 Mar 2023, Jason Merrill wrote:


On 3/27/23 09:30, Patrick Palka wrote:

On Thu, 23 Mar 2023, Patrick Palka wrote:


r13-995-g733a792a2b2e16 worked around the problem of FUNCTION_DECL
template arguments not always getting marked as odr-used by
redundantly
calling mark_used on the substituted ADDR_EXPR callee of a CALL_EXPR.
This is just a narrow workaround however, since using a FUNCTION_DECL
as
a template argument alone should constitutes an odr-use; we shouldn't
need to subsequently e.g. call the function or take its address.


Agreed.  But why didn't we already wrap it in an ADDR_EXPR?  Even for
reference tparms convert_nontype_argument should do that.


Indeed we do, the commit message was just rather sloppy/inaccurate...
I'll try to correct it.




This patch fixes this in a more general way at template specialization
time by walking the template arguments of the specialization and
calling
mark_used on all entities used within.  As before, the call to
mark_used
as it worst a no-op, but it compensates for the situation where we end
up
forming a specialization from a template context in which mark_used is
inhibited.  Another approach would be to call mark_used whenever we
substitute a TEMPLATE_PARM_INDEX, but that would result in many more
redundant calls to mark_used compared to this approach.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK
for
trunk?

PR c++/53164
PR c++/105848

gcc/cp/ChangeLog:

* pt.cc (instantiate_class_template): Call
mark_template_arguments_used.
(tsubst_copy_and_build) : Revert r13-995
change.
(mark_template_arguments_used): Define.
(instantiate_template): Call mark_template_arguments_used.

gcc/testsuite/ChangeLog:

* g++.dg/template/fn-ptr3a.C: New test.
* g++.dg/template/fn-ptr4.C: New test.
---
gcc/cp/pt.cc | 51

gcc/testsuite/g++.dg/template/fn-ptr3a.C | 25 
gcc/testsuite/g++.dg/template/fn-ptr4.C  | 14 +++
3 files changed, 74 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr3a.C
create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr4.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 7e4a8de0c8b..9b3cc1c 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -220,6 +220,7 @@ static tree make_argument_pack (tree);
static tree enclosing_instantiation_of (tree tctx);
static void instantiate_body (tree pattern, tree args, tree d, bool
nested);
static tree maybe_dependent_member_ref (tree, tree, tsubst_flags_t,
tree);
+static void mark_template_arguments_used (tree);
  /* Make the current scope suitable for access checking when we
are
   processing T.  T can be FUNCTION_DECL for instantiated function
@@ -12142,6 +12143,9 @@ instantiate_class_template (tree type)
  cp_unevaluated_operand = 0;
  c_inhibit_evaluation_warnings = 0;
}
+
+  mark_template_arguments_used (INNERMOST_TEMPLATE_ARGS (args));
+
  /* Use #pragma pack from the template context.  */
  saved_maximum_field_alignment = maximum_field_alignment;
  maximum_field_alignment = TYPE_PRECISION (pattern);
@@ -21173,22 +21177,10 @@ tsubst_copy_and_build (tree t,
  }
/* Remember that there was a reference to this entity.  */
-   if (function != NULL_TREE)
- {
-   tree inner = function;
-   if (TREE_CODE (inner) == ADDR_EXPR
-   && TREE_CODE (TREE_OPERAND (inner, 0)) ==
FUNCTION_DECL)
- /* We should already have called mark_used when taking
the
-address of this function, but do so again anyway to
make
-sure it's odr-used: at worst this is a no-op, but if
we
-obtained this FUNCTION_DECL as part of ahead-of-time
overload
-resolution then that call to mark_used wouldn't have
marked
it
-odr-used yet (53164).  */
- inner = TREE_OPERAND (inner, 0);
-   if (DECL_P (inner)
-   && !mark_used (inner, complain) && !(complain &
tf_error))
- RETURN (error_mark_node);
- }
+   if (function != NULL_TREE
+   && DECL_P (function)
+   && !mark_used (function, complain) && !(complain &
tf_error))
+ RETURN (error_mark_node);
if (!maybe_fold_fn_template_args (function, complain))
  return error_mark_node;
@@ -21883,6 +21875,31 @@ check_instantiated_args (tree tmpl, tree
args,
tsubst_flags_t complain)
  return result;
}
+/* Call mark_used on each entity within the template arguments
ARGS of
some
+   template specialization, to ensure that each such entity is
considered
+   odr-used regardless of whether the specialization was first formed
in
a
+

Re: Regression with "recomputation and PR 109154"

2023-03-31 Thread Jakub Jelinek via Gcc-patches

On Fri, Mar 31, 2023 at 01:02:18PM -0400, Andrew MacLeod wrote:
> I guess it figures the recip is safe to put in, there will not be a divide
> by zero.

I think the problem was that 1/d was hoisted before the loop; as long as
it is guarded with the d > 0.01 or e > 0.005 condition, it is fine.
The test probably should have been a runtime test, doing the main stuff
in some other noipa function and doing fetestexcept after it or something
similar.

> I guess the test is no longer testing what it should be?
> 
> And yes, we could set he param back to 1 for the test...
> add   --param=ranger-recompute-depth=1   makes the "issue" go away :-)  for
> now.

That looks reasonable unless we rewrite the test into runtime one (but we'd
then need to double check that it was really miscompiled and would fail back
then in 4.0).

Jakub

Re: [PATCH v4 2/2] gcc: Drop obsolete INCLUDE_PTHREAD_H (ping)

2023-03-31 Thread Sam James via Gcc-patches


Kito Cheng  writes:

> It's not the RISC-V part, so this requires a global maintainer there I think?
>

Someone able to look at the system.h bit? It should be trivial, there's
no uses left and it was added purely for a bug like this in the past
(see commit message).

> On Tue, Mar 14, 2023 at 8:28 AM juzhe.zh...@rivai.ai
>  wrote:
>>
>> Thank you for fixing this. I am not familiar with this.
>> This generator code (genrvv-type-indexer.cc) is written by @kito.
>>
>> Kito ? Can you take a look at this?
>>
>>
>> juzhe.zh...@rivai.ai
>>
>> From: Sam James
>> Date: 2023-03-14 08:23
>> To: gcc-patches
>> CC: Kito Cheng; Palmer Dabbelt; Andrew Waterman; Jim Wilson; Ju-Zhe Zhong; 
>> Sam James
>> Subject: [PATCH v4 2/2] gcc: Drop obsolete INCLUDE_PTHREAD_H
>> This is no longer used since 0a62889c7a155f8ed971860d68870dc9c46bb004, so
>> let's clean it up.
>>
>> gcc/ChangeLog:
>> * system.h: Drop unused INCLUDE_PTHREAD_H.
>>
>> Signed-off-by: Sam James 
>> ---
>> gcc/system.h | 4 
>> 1 file changed, 4 deletions(-)
>>
>> diff --git a/gcc/system.h b/gcc/system.h
>> index cf45db3f97e..3fdad7abf1e 100644
>> --- a/gcc/system.h
>> +++ b/gcc/system.h
>> @@ -761,10 +761,6 @@ extern int vsnprintf (char *, size_t, const char *, 
>> va_list);
>> #endif
>> #endif
>> -#ifdef INCLUDE_PTHREAD_H
>> -#include 
>> -#endif
>> -
>> #ifdef INCLUDE_ISL
>> #ifdef HAVE_isl
>> #include 
>> --
>> 2.40.0
>>
>>



signature.asc
Description: PGP signature

[pushed] libiberty: Remove a reference to the Glibc manual

2023-03-31 Thread Gerald Pfeifer

With this we have a sole 404 on all of gcc.gnu.org (at least with
documentation from trunk included). :-)

Gerald


---

longjmp is not specific to Glibc, and GCC supports lots of systems
that do not use Glibc. Plus this link has been broken in the web
version for ages without a good way to fix.

libiberty/ChangeLog:

* obstacks.texi (Preparing for Obstacks): Remove a (broken)
reference to the Glibc manual.
---
 libiberty/obstacks.texi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libiberty/obstacks.texi b/libiberty/obstacks.texi
index b2d2403210b..37d26c90f1b 100644
--- a/libiberty/obstacks.texi
+++ b/libiberty/obstacks.texi
@@ -172,8 +172,8 @@ The value of this variable is a pointer to a function that
 @code{obstack} uses when @code{obstack_chunk_alloc} fails to allocate
 memory.  The default action is to print a message and abort.
 You should supply a function that either calls @code{exit}
-(@pxref{Program Termination, , , libc, The GNU C Library Reference Manual}) or 
@code{longjmp} (@pxref{Non-Local
-Exits, , , libc, The GNU C Library Reference Manual}) and doesn't return.
+(@pxref{Program Termination, , , libc, The GNU C Library Reference Manual}) 
+or @code{longjmp} and doesn't return.
 
 @smallexample
 void my_obstack_alloc_failed (void)
-- 
2.39.2

Re: 'g++.dg/modules/modules.exp': don't leak local 'unsupported' proc [PR108899]

2023-03-31 Thread Jason Merrill via Gcc-patches


On 3/30/23 09:51, Alexandre Oliva wrote:

On Mar 30, 2023, Alexandre Oliva  wrote:


If we're dropping the renaming, I suppose we could also revert Jakub's
change.  I suppose this patch will take care of it, pending testing...


Regstrapped on x86_64-linux-gnu and also tested on arm-vx7r2 (with
gcc-12), where I used to get fails after an unsupported modules.exp
test, but there are no curly braces in the log files after the patch.
Ok to install?


[PR108899] testsuite: fix proc unsupported overriding in modules.exp


The [PR] tag should go at the end of the subject line, not the start. 
OK with that change.



The overrider of proc unsupported in modules.exp had two problems
reported by Thomas Schwinge, even after Jakub Jelínek's fix:

- it remained in effect while running other dejagnu testsets

- it didn't quote correctly the argument list passed to it, which
   caused test names to be surrounded by curly braces, as in:

UNSUPPORTED: {...}

This patch fixes both issues, obsoleting and reverting Jakub's change,
by dropping the overrider and renaming the saved proc back, and by
using uplevel's argument list splicing.


for  gcc/testsuite/ChangeLog

PR testsuite/108899
* g++.dg/modules/modules.exp (unsupported): Drop renaming.
Fix quoting.
---
  gcc/testsuite/g++.dg/modules/modules.exp |   20 +++-
  1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/g++.dg/modules/modules.exp 
b/gcc/testsuite/g++.dg/modules/modules.exp
index 80aa392bc7f3b..dc302d3d0af48 100644
--- a/gcc/testsuite/g++.dg/modules/modules.exp
+++ b/gcc/testsuite/g++.dg/modules/modules.exp
@@ -319,15 +319,11 @@ cleanup_module_files [find $DEFAULT_REPO *.gcm]
  # so that, after an unsupported result in dg-test, we can skip rather
  # than fail subsequent related tests.
  set module_do {"compile" "P"}
-if { [info procs unsupported] != [list] \
-  && [info procs saved-unsupported] == [list] } {
-rename unsupported saved-unsupported
-
-proc unsupported { args } {
-   global module_do
-   lset module_do 1 "N"
-   return [saved-unsupported $args]
-}
+rename unsupported modules-saved-unsupported
+proc unsupported { args } {
+global module_do
+lset module_do 1 "N"
+return [uplevel 1 modules-saved-unsupported $args]
  }
  
  # not grouped tests, sadly tcl doesn't have negated glob

@@ -412,4 +408,10 @@ foreach src [lsort [find $srcdir/$subdir {*_a.[CHX}]] {
  }
  }
  
+# Restore the original unsupported proc, lest it will affect

+# subsequent test runs, or even fail renaming if we run modules.exp
+# for multiple targets/multilibs/options.
+rename unsupported {}
+rename modules-saved-unsupported unsupported
+
  dg-finish

Re: [PATCH 1/2] c++: improve "NTTP argument considered unused" fix [PR53164, PR105848]

2023-03-31 Thread Patrick Palka via Gcc-patches

On Fri, 31 Mar 2023, Jason Merrill wrote:

> On 3/31/23 11:55, Patrick Palka wrote:
> > On Fri, 31 Mar 2023, Patrick Palka wrote:
> > 
> > > On Thu, 30 Mar 2023, Jason Merrill wrote:
> > > 
> > > > On 3/30/23 14:53, Patrick Palka wrote:
> > > > > On Wed, 29 Mar 2023, Jason Merrill wrote:
> > > > > 
> > > > > > On 3/27/23 09:30, Patrick Palka wrote:
> > > > > > > On Thu, 23 Mar 2023, Patrick Palka wrote:
> > > > > > > 
> > > > > > > > r13-995-g733a792a2b2e16 worked around the problem of
> > > > > > > > FUNCTION_DECL
> > > > > > > > template arguments not always getting marked as odr-used by
> > > > > > > > redundantly
> > > > > > > > calling mark_used on the substituted ADDR_EXPR callee of a
> > > > > > > > CALL_EXPR.
> > > > > > > > This is just a narrow workaround however, since using a
> > > > > > > > FUNCTION_DECL
> > > > > > > > as
> > > > > > > > a template argument alone should constitutes an odr-use; we
> > > > > > > > shouldn't
> > > > > > > > need to subsequently e.g. call the function or take its address.
> > > > > > 
> > > > > > Agreed.  But why didn't we already wrap it in an ADDR_EXPR?  Even
> > > > > > for
> > > > > > reference tparms convert_nontype_argument should do that.
> > > > > 
> > > > > Indeed we do, the commit message was just rather sloppy/inaccurate...
> > > > > I'll try to correct it.
> > > > > 
> > > > > > 
> > > > > > > > This patch fixes this in a more general way at template
> > > > > > > > specialization
> > > > > > > > time by walking the template arguments of the specialization and
> > > > > > > > calling
> > > > > > > > mark_used on all entities used within.  As before, the call to
> > > > > > > > mark_used
> > > > > > > > as it worst a no-op, but it compensates for the situation where
> > > > > > > > we end
> > > > > > > > up
> > > > > > > > forming a specialization from a template context in which
> > > > > > > > mark_used is
> > > > > > > > inhibited.  Another approach would be to call mark_used whenever
> > > > > > > > we
> > > > > > > > substitute a TEMPLATE_PARM_INDEX, but that would result in many
> > > > > > > > more
> > > > > > > > redundant calls to mark_used compared to this approach.
> > > > > > > > 
> > > > > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this
> > > > > > > > look OK
> > > > > > > > for
> > > > > > > > trunk?
> > > > > > > > 
> > > > > > > > PR c++/53164
> > > > > > > > PR c++/105848
> > > > > > > > 
> > > > > > > > gcc/cp/ChangeLog:
> > > > > > > > 
> > > > > > > > * pt.cc (instantiate_class_template): Call
> > > > > > > > mark_template_arguments_used.
> > > > > > > > (tsubst_copy_and_build) : Revert r13-995
> > > > > > > > change.
> > > > > > > > (mark_template_arguments_used): Define.
> > > > > > > > (instantiate_template): Call 
> > > > > > > > mark_template_arguments_used.
> > > > > > > > 
> > > > > > > > gcc/testsuite/ChangeLog:
> > > > > > > > 
> > > > > > > > * g++.dg/template/fn-ptr3a.C: New test.
> > > > > > > > * g++.dg/template/fn-ptr4.C: New test.
> > > > > > > > ---
> > > > > > > > gcc/cp/pt.cc | 51
> > > > > > > > 
> > > > > > > > gcc/testsuite/g++.dg/template/fn-ptr3a.C | 25 
> > > > > > > > gcc/testsuite/g++.dg/template/fn-ptr4.C  | 14 +++
> > > > > > > > 3 files changed, 74 insertions(+), 16 deletions(-)
> > > > > > > > create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr3a.C
> > > > > > > > create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr4.C
> > > > > > > > 
> > > > > > > > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> > > > > > > > index 7e4a8de0c8b..9b3cc1c 100644
> > > > > > > > --- a/gcc/cp/pt.cc
> > > > > > > > +++ b/gcc/cp/pt.cc
> > > > > > > > @@ -220,6 +220,7 @@ static tree make_argument_pack (tree);
> > > > > > > > static tree enclosing_instantiation_of (tree tctx);
> > > > > > > > static void instantiate_body (tree pattern, tree args, tree
> > > > > > > > d, bool
> > > > > > > > nested);
> > > > > > > > static tree maybe_dependent_member_ref (tree, tree,
> > > > > > > > tsubst_flags_t,
> > > > > > > > tree);
> > > > > > > > +static void mark_template_arguments_used (tree);
> > > > > > > >   /* Make the current scope suitable for access checking
> > > > > > > > when we
> > > > > > > > are
> > > > > > > >processing T.  T can be FUNCTION_DECL for instantiated
> > > > > > > > function
> > > > > > > > @@ -12142,6 +12143,9 @@ instantiate_class_template (tree type)
> > > > > > > >   cp_unevaluated_operand = 0;
> > > > > > > >   c_inhibit_evaluation_warnings = 0;
> > > > > > > > }
> > > > > > > > +
> > > > > > > > +  mark_template_arguments_used (INNERMOST_TEMPLATE_ARGS
> > > > > > > > (args));
> > > > > > > > +
> > > > > > > >   /* Use #pragma pack from the template context.  */
> > > > > > > >   saved_maximum_field_alignment = maximum_field_alignment;
> > > > > > > >   maxim

Re: Regression with "recomputation and PR 109154"

2023-03-31 Thread Andrew MacLeod via Gcc-patches

should we do something like this to tweak the testcases?   or does 
someone have something else in mind?


Richi opened a PR for the STL failure (109350)

Andrew





On 3/31/23 13:37, Jakub Jelinek wrote:

On Fri, Mar 31, 2023 at 01:02:18PM -0400, Andrew MacLeod wrote:

I guess it figures the recip is safe to put in, there will not be a divide
by zero.

I think the problem was that 1/d was hoisted before the loop; as long as
it is guarded with the d > 0.01 or e > 0.005 condition, it is fine.
The test probably should have been a runtime test, doing the main stuff
in some other noipa function and doing fetestexcept after it or something
similar.


I guess the test is no longer testing what it should be?

And yes, we could set he param back to 1 for the test...
add   --param=ranger-recompute-depth=1   makes the "issue" go away :-)  for
now.

That looks reasonable unless we rewrite the test into runtime one (but we'd
then need to double check that it was really miscompiled and would fail back
then in 4.0).

Jakub
commit 55bf4f0d443e5adbacfcdbbebf4b2e0c74d1dcc8
Author: Andrew MacLeod 
Date:   Fri Mar 31 15:42:43 2023 -0400

Adjust testcases to not produce errors..

tree-optimization/109363
gcc/testsuite/
* g++.dg/warn/Wstringop-overflow-4.C: Always cehck bogus message.
* gcc.dg/tree-ssa/pr23109.c: Disable better recomputations.

diff --git a/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C b/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C
index 35fb59e0232..faad5bed074 100644
--- a/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C
+++ b/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C
@@ -141,7 +141,7 @@ void test_strcpy_new_int16_t (size_t n, const size_t vals[])
 
   int r_imin_imax = SR (INT_MIN, INT_MAX);
   T (S (1), new int16_t[r_imin_imax]);
-  T (S (2), new int16_t[r_imin_imax + 1]); // { dg-bogus "into a region of size" "pr106120" { xfail { ilp32 && c++98_only } } }
+  T (S (2), new int16_t[r_imin_imax + 1]); // { dg-bogus "into a region of size" "pr106120" { xfail { c++98_only } } }
   T (S (9), new int16_t[r_imin_imax * 2 + 1]);
 
   int r_0_imax = SR (0, INT_MAX);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr23109.c b/gcc/testsuite/gcc.dg/tree-ssa/pr23109.c
index 7cdf1d05ee7..059f658ea20 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr23109.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr23109.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -funsafe-math-optimizations -ftrapping-math -fdump-tree-recip -fdump-tree-lim2" } */
+/* { dg-options "-O2 -funsafe-math-optimizations -ftrapping-math -fdump-tree-recip -fdump-tree-lim2 --param=ranger-recompute-depth=1" } */
 /* { dg-warning "'-fassociative-math' disabled" "" { target *-*-* } 0 } */
+/* ranger-recompute-depth prevents the optimizers from being too smart.  */
 
 double F[2] = { 0., 0. }, e = 0.;

Re: Regression with "recomputation and PR 109154"

2023-03-31 Thread Jeff Law via Gcc-patches





On 3/31/23 13:48, Andrew MacLeod wrote:
should we do something like this to tweak the testcases?   or does 
someone have something else in mind?
Go ahead and tweak the testcase.  Unless you want to revamp it per 
Jakub's suggestions.


jeff

Re: Regression with "recomputation and PR 109154"

2023-03-31 Thread Andrew MacLeod via Gcc-patches




On 3/31/23 15:59, Jeff Law wrote:



On 3/31/23 13:48, Andrew MacLeod wrote:
should we do something like this to tweak the testcases?   or does 
someone have something else in mind?
Go ahead and tweak the testcase.  Unless you want to revamp it per 
Jakub's suggestions.


not particularly  :-)

pushed.

hopefully this smooths things a little...  probably causes them to fail 
elsewhere now. ha!   Im out for most of the rest of the weekend.. any 
other tweaks someone else should do...   I will check in once in a  while.


Andrew

Re: Regression with "recomputation and PR 109154"

2023-03-31 Thread Jeff Law via Gcc-patches





On 3/31/23 14:16, Andrew MacLeod wrote:


On 3/31/23 15:59, Jeff Law wrote:



On 3/31/23 13:48, Andrew MacLeod wrote:
should we do something like this to tweak the testcases?   or does 
someone have something else in mind?
Go ahead and tweak the testcase.  Unless you want to revamp it per 
Jakub's suggestions.


not particularly  :-)

pushed.

hopefully this smooths things a little...  probably causes them to fail 
elsewhere now. ha!   Im out for most of the rest of the weekend.. any 
other tweaks someone else should do...   I will check in once in a  while.

If that causes the test to fail elsewhere, we'll know by Monday.

jeff

Re: [PATCH] testsuite, analyzer: Fix up pipe-glibc.c testcase [PR107396]

2023-03-31 Thread David Malcolm via Gcc-patches

On Thu, 2023-03-30 at 10:05 +0200, Jakub Jelinek wrote:
> Hi!
> 
> The gcc.dg/analyzer/pipe-glibc.c test FAILs when using recent glibc
> headers
> and succeeds with older headers.
> The important change is that
> https://sourceware.org/git/?p=glibc.git;a=commit;h=c1760eaf3b575ad174fd88b252fd16bd525fa818
> in 2021 added __attribute__ ((__malloc__ (fclose, 1))) attribute to
> fdopen,
> so in write_to_pipe there is an excess warning:
> .../gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c: In function
> 'write_to_pipe':
> .../gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c:28:3: warning: use of
> possibly-NULL 'stream' where non-null expected [CWE-690] [-Wanalyzer-
> possible-null-argument]
> .../gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c:27:12: note: (1) this
> call could return NULL
> .../gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c:28:3: note: (2)
> argument 4 ('stream') from (1) could be NULL where non-null expected
> : note: argument 4 of '__builtin_fwrite' must be non-null
> Strangely, nothing is reported on the read_from_pipe function, seems
> fwrite/fprintf/fputc etc. are builtins in GCC and we mark the FILE *
> arguments as nonnull there on the builtin declarations, while
> fgetc/fread
> etc. aren't builtins and glibc doesn't mark any of those using
> nonnull.
> Shall we change that on the glibc side?
> 
> Anyway, because this differs based on glibc version and I think the
> above warning is not the primary intention of the test, I think it is
> best to tweak it so that this warning isn't reported.

[...snip...]

Thanks: the patch LGTM.

Dave

Re: [PATCH] Implement std::advance for istreambuf_iterator using pubseekoff

2023-03-31 Thread Jonathan Wakely via Gcc-patches

On Tue, 15 Oct 2019 at 21:20, François Dumont wrote:
>
> Here is an update to set _M_sbuf to null if the user advance too much.
>
> Note that in this case the streambuf remains un-modified which is
> different from the current implementation. I think it is another
> enhancement.
>
> I also change the Debug assertion message for something more dedicated
> to std::advance algo.
>
> François
>
> On 10/14/19 10:12 PM, François Dumont wrote:
> > The same way I proposed to review std::copy overload for
> > istreambuf_iterator we can implement std::advance using pubseekoff.
> >
> > It is both a cleaner implementation and avoids yet another friend
> > declaration.

Looks like I never sent my review of this one, it's been sitting in my
draft mails for years, sorry.

It looks like this will fail if the streambuf doesn't support seeking.
The default behaviour for seekoff is to return -1, in which case
you'll get -1 for both calls to pubseekoff, and new_pos - cur_pos will
be zero, which is not equal to n, so you set the istreambuf_iterator
to end-of-stream. That seems wrong, we could still advance using the
old code (or just call ++ in a loop!)

> >
> > * include/std/streambuf
> > (advance(istreambuf_iterator<>&, _Distance)): Remove friend
> > declaration.
> > * include/bits/streambuf_iterator.h (__copy_move_a2): Re-implement
> > using
> > streambuf pubseekoff.
> >
> > Tested under Linux x86_64.
> >
> > Ok to commit ?
> >
> > François
> >
>

[committed] libstdc++: Teach optimizer that empty COW strings are empty [PR107087]

2023-03-31 Thread Jonathan Wakely via Gcc-patches

Tested powerpc64le-linux. Pushed to trunk.

-- >8 --

The compiler doesn't know about the invariant that the _S_empty_rep()
object is immutable and so _M_length and _M_refcount are always zero.
This means that we get warnings about writing possibly-non-zero length
strings into buffers that can't hold them. If we teach the compiler that
the empty rep is always zero length, it knows it can be copied into any
buffer.

For Stage 1 we might want to also consider adding this to capacity():

if (_S_empty_rep()._M_capacity != 0)
  __builtin_unreachable();

And this to _Rep::_M_is_leaked() and _Rep::_M_is_shared():

  if (_S_empty_rep()._M_refcount != 0)
__builtin_unreachable();

libstdc++-v3/ChangeLog:

PR tree-optimization/107087
* include/bits/cow_string.h (basic_string::size()): Add
optimizer hint that _S_empty_rep()._M_length is always zero.
(basic_string::length()): Call size().
---
 libstdc++-v3/include/bits/cow_string.h | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/cow_string.h 
b/libstdc++-v3/include/bits/cow_string.h
index 1ee84e60678..b6024365d4f 100644
--- a/libstdc++-v3/include/bits/cow_string.h
+++ b/libstdc++-v3/include/bits/cow_string.h
@@ -907,17 +907,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 public:
   // Capacity:
+
   ///  Returns the number of characters in the string, not including any
   ///  null-termination.
   size_type
   size() const _GLIBCXX_NOEXCEPT
-  { return _M_rep()->_M_length; }
+  {
+#if _GLIBCXX_FULLY_DYNAMIC_STRING == 0 && __OPTIMIZE__
+   if (_S_empty_rep()._M_length != 0)
+ __builtin_unreachable();
+#endif
+   return _M_rep()->_M_length;
+  }
 
   ///  Returns the number of characters in the string, not including any
   ///  null-termination.
   size_type
   length() const _GLIBCXX_NOEXCEPT
-  { return _M_rep()->_M_length; }
+  { return size(); }
 
   ///  Returns the size() of the largest possible %string.
   size_type
-- 
2.39.2

Re: Regression with "recomputation and PR 109154"

2023-03-31 Thread Hans-Peter Nilsson via Gcc-patches

> Date: Fri, 31 Mar 2023 15:48:22 -0400
> From: Andrew MacLeod via Gcc-patches 
> Reply-To: Andrew MacLeod 

> commit 55bf4f0d443e5adbacfcdbbebf4b2e0c74d1dcc8
> Author: Andrew MacLeod 
> Date:   Fri Mar 31 15:42:43 2023 -0400
> 
> Adjust testcases to not produce errors..
> 
> tree-optimization/109363
> gcc/testsuite/
> * g++.dg/warn/Wstringop-overflow-4.C: Always cehck bogus message.
> * gcc.dg/tree-ssa/pr23109.c: Disable better recomputations.


(Needs to be spelled "PR tree-optimization/109363" for the
bugzilla-marker hook to react.  I'll mark manually though.)

Thanks!  I was about to push the --param adjustment as
obvious, seeing consensus equivalent of approval in this
mail thread.

brgds, H-P

Re: Regression with "recomputation and PR 109154"

2023-03-31 Thread Andrew MacLeod via Gcc-patches

On 3/31/23 19:31, Hans-Peter Nilsson wrote:

Date: Fri, 31 Mar 2023 15:48:22 -0400
From: Andrew MacLeod via Gcc-patches 
Reply-To: Andrew MacLeod 
commit 55bf4f0d443e5adbacfcdbbebf4b2e0c74d1dcc8
Author: Andrew MacLeod 
Date:   Fri Mar 31 15:42:43 2023 -0400

 Adjust testcases to not produce errors..

 tree-optimization/109363

 gcc/testsuite/
 * g++.dg/warn/Wstringop-overflow-4.C: Always cehck bogus message.
 * gcc.dg/tree-ssa/pr23109.c: Disable better recomputations.

(Needs to be spelled "PR tree-optimization/109363" for the
bugzilla-marker hook to react.  I'll mark manually though.)

Oops!  thanks :-)

Andrew

Re: [PATCH 1/2] c++: improve "NTTP argument considered unused" fix [PR53164, PR105848]

2023-03-31 Thread Jason Merrill via Gcc-patches


On 3/31/23 15:06, Patrick Palka wrote:

On Fri, 31 Mar 2023, Jason Merrill wrote:


On 3/31/23 11:55, Patrick Palka wrote:

On Fri, 31 Mar 2023, Patrick Palka wrote:


On Thu, 30 Mar 2023, Jason Merrill wrote:


On 3/30/23 14:53, Patrick Palka wrote:

On Wed, 29 Mar 2023, Jason Merrill wrote:


On 3/27/23 09:30, Patrick Palka wrote:

On Thu, 23 Mar 2023, Patrick Palka wrote:


r13-995-g733a792a2b2e16 worked around the problem of
FUNCTION_DECL
template arguments not always getting marked as odr-used by
redundantly
calling mark_used on the substituted ADDR_EXPR callee of a
CALL_EXPR.
This is just a narrow workaround however, since using a
FUNCTION_DECL
as
a template argument alone should constitutes an odr-use; we
shouldn't
need to subsequently e.g. call the function or take its address.


Agreed.  But why didn't we already wrap it in an ADDR_EXPR?  Even
for
reference tparms convert_nontype_argument should do that.


Indeed we do, the commit message was just rather sloppy/inaccurate...
I'll try to correct it.




This patch fixes this in a more general way at template
specialization
time by walking the template arguments of the specialization and
calling
mark_used on all entities used within.  As before, the call to
mark_used
as it worst a no-op, but it compensates for the situation where
we end
up
forming a specialization from a template context in which
mark_used is
inhibited.  Another approach would be to call mark_used whenever
we
substitute a TEMPLATE_PARM_INDEX, but that would result in many
more
redundant calls to mark_used compared to this approach.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this
look OK
for
trunk?

PR c++/53164
PR c++/105848

gcc/cp/ChangeLog:

* pt.cc (instantiate_class_template): Call
mark_template_arguments_used.
(tsubst_copy_and_build) : Revert r13-995
change.
(mark_template_arguments_used): Define.
(instantiate_template): Call mark_template_arguments_used.

gcc/testsuite/ChangeLog:

* g++.dg/template/fn-ptr3a.C: New test.
* g++.dg/template/fn-ptr4.C: New test.
---
 gcc/cp/pt.cc | 51

 gcc/testsuite/g++.dg/template/fn-ptr3a.C | 25 
 gcc/testsuite/g++.dg/template/fn-ptr4.C  | 14 +++
 3 files changed, 74 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr3a.C
 create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr4.C

diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
index 7e4a8de0c8b..9b3cc1c 100644
--- a/gcc/cp/pt.cc
+++ b/gcc/cp/pt.cc
@@ -220,6 +220,7 @@ static tree make_argument_pack (tree);
 static tree enclosing_instantiation_of (tree tctx);
 static void instantiate_body (tree pattern, tree args, tree
d, bool
nested);
 static tree maybe_dependent_member_ref (tree, tree,
tsubst_flags_t,
tree);
+static void mark_template_arguments_used (tree);
   /* Make the current scope suitable for access checking
when we
are
processing T.  T can be FUNCTION_DECL for instantiated
function
@@ -12142,6 +12143,9 @@ instantiate_class_template (tree type)
   cp_unevaluated_operand = 0;
   c_inhibit_evaluation_warnings = 0;
 }
+
+  mark_template_arguments_used (INNERMOST_TEMPLATE_ARGS
(args));
+
   /* Use #pragma pack from the template context.  */
   saved_maximum_field_alignment = maximum_field_alignment;
   maximum_field_alignment = TYPE_PRECISION (pattern);
@@ -21173,22 +21177,10 @@ tsubst_copy_and_build (tree t,
  }
/* Remember that there was a reference to this entity.
*/
-   if (function != NULL_TREE)
- {
-   tree inner = function;
-   if (TREE_CODE (inner) == ADDR_EXPR
-   && TREE_CODE (TREE_OPERAND (inner, 0)) ==
FUNCTION_DECL)
- /* We should already have called mark_used when taking
the
-address of this function, but do so again anyway to
make
-sure it's odr-used: at worst this is a no-op, but if
we
-obtained this FUNCTION_DECL as part of ahead-of-time
overload
-resolution then that call to mark_used wouldn't have
marked
it
-odr-used yet (53164).  */
- inner = TREE_OPERAND (inner, 0);
-   if (DECL_P (inner)
-   && !mark_used (inner, complain) && !(complain &
tf_error))
- RETURN (error_mark_node);
- }
+   if (function != NULL_TREE
+   && DECL_P (function)
+   && !mark_used (function, complain) && !(complain &
tf_error))
+ RETURN (error_mark_node);
if (!maybe_fold_fn_template_args (function, complain))
  return error_mark_node;
@@ -21883,6 +21875,31 @@ check_instantiated_args (tree tmpl,
tree
args,
tsubst_flags_t complain)
   return result;
 }
 +/* Call mark_used on each entity within the template
arguments
ARGS of
some
+   template specialization, to ensure that each suc

56 matches

Mail list logo