Re: [PATCH] tree-optimization/109304 - properly handle instrumented aliases
On Tue, 28 Mar 2023, Richard Biener wrote: > When adjusting calls to reflect instrumentation we failed to handle > calls to aliases since they appear to have no body. Instead resort > to symtab node availability. The patch also avoids touching > internal function calls in a more obvious way (builtins might > have a body available). > > profiledbootstrap & regtest running on x86_64-unknown-linux-gnu. > > Honza - does this look OK? Honza - ping! > PR tree-optimization/109304 > * tree-profile.cc (tree_profiling): Use symtab node > availability to decide whether to skip adjusting calls. > Do not adjust calls to internal functions. > > * gcc.dg/pr109304.c: New testcase. > --- > gcc/testsuite/gcc.dg/pr109304.c | 12 > gcc/tree-profile.cc | 9 ++--- > 2 files changed, 18 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/pr109304.c > > diff --git a/gcc/testsuite/gcc.dg/pr109304.c b/gcc/testsuite/gcc.dg/pr109304.c > new file mode 100644 > index 000..d435b04d4d5 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr109304.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-require-profiling "-fprofile-generate" } */ > +/* { dg-require-effective-target fpic } */ > +/* { dg-options "-O3 -fprofile-generate -fPIC -fno-semantic-interposition" } > */ > + > +int PyUnicode_FindChar_i; > +int PyUnicode_FindChar() > +{ > + while (PyUnicode_FindChar_i) > +if (PyUnicode_FindChar()) > + break; > +} > diff --git a/gcc/tree-profile.cc b/gcc/tree-profile.cc > index 6f9a43e4bd5..da300d5f9e8 100644 > --- a/gcc/tree-profile.cc > +++ b/gcc/tree-profile.cc > @@ -808,7 +808,7 @@ tree_profiling (void) >{ > if (!gimple_has_body_p (node->decl) > || !(!node->clone_of > - || node->decl != node->clone_of->decl)) > + || node->decl != node->clone_of->decl)) > continue; > > /* Don't profile functions produced for builtin stuff. */ > @@ -842,12 +842,15 @@ tree_profiling (void) > for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) > { > gcall *call = dyn_cast (gsi_stmt (gsi)); > - if (!call) > + if (!call || gimple_call_internal_p (call)) > continue; > > /* We do not clear pure/const on decls without body. */ > tree fndecl = gimple_call_fndecl (call); > - if (fndecl && !gimple_has_body_p (fndecl)) > + cgraph_node *callee; > + if (fndecl > + && (callee = cgraph_node::get (fndecl)) > + && callee->get_availability (node) == AVAIL_NOT_AVAILABLE) > continue; > > /* Drop the const attribute from the call type (the pure > -- Richard Biener SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg, Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman; HRB 36809 (AG Nuernberg)
[committed] RISC-V: Fix missing file dependency in RISC-V back-end [PR109328]
gcc/ChangeLog: PR target/109328 * config/riscv/t-riscv: Add missing dependencies. Co-authored-by: Andrew Pinski --- gcc/config/riscv/t-riscv | 43 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 01f30a853e3..6e326fc7e02 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -1,6 +1,13 @@ +RISCV_BUILTINS_H = $(srcdir)/config/riscv/riscv-vector-builtins.h \ + $(srcdir)/config/riscv/riscv-vector-builtins.def \ + $(srcdir)/config/riscv/riscv-vector-builtins-functions.def \ + riscv-vector-type-indexer.gen.def + riscv-builtins.o: $(srcdir)/config/riscv/riscv-builtins.cc $(CONFIG_H) \ $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) $(RECOG_H) langhooks.h \ - $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(srcdir)/config/riscv/riscv-ftypes.def \ + $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(RISCV_BUILTINS_H) \ + $(srcdir)/config/riscv/riscv-ftypes.def \ + $(srcdir)/config/riscv/riscv-vector-builtins-types.def \ $(srcdir)/config/riscv/riscv-modes.def $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-builtins.cc @@ -11,12 +18,10 @@ riscv-vector-builtins.o: $(srcdir)/config/riscv/riscv-vector-builtins.cc \ $(FUNCTION_H) fold-const.h gimplify.h explow.h stor-layout.h $(REGS_H) \ alias.h langhooks.h attribs.h stringpool.h emit-rtl.h basic-block.h \ gimple.h gimple-iterator.h \ - $(srcdir)/config/riscv/riscv-vector-builtins.h \ $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h \ $(srcdir)/config/riscv/riscv-vector-builtins-bases.h \ - $(srcdir)/config/riscv/riscv-vector-builtins.def \ $(srcdir)/config/riscv/riscv-vector-builtins-types.def \ - $(srcdir)/config/riscv/riscv-vector-builtins-functions.def + $(RISCV_BUILTINS_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-vector-builtins.cc @@ -24,8 +29,9 @@ riscv-vector-builtins-shapes.o: \ $(srcdir)/config/riscv/riscv-vector-builtins-shapes.cc \ $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) \ - $(srcdir)/config/riscv/riscv-vector-builtins.h \ - $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h + $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h \ + $(srcdir)/config/riscv/riscv-vector-builtins-bases.h \ + $(RISCV_BUILTINS_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-vector-builtins-shapes.cc @@ -36,9 +42,9 @@ riscv-vector-builtins-bases.o: \ $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \ gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \ rtx-vector-builder.h \ - $(srcdir)/config/riscv/riscv-vector-builtins.h \ $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h \ - $(srcdir)/config/riscv/riscv-vector-builtins-bases.h + $(srcdir)/config/riscv/riscv-vector-builtins-bases.h \ + $(RISCV_BUILTINS_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-vector-builtins-bases.cc @@ -60,23 +66,33 @@ riscv-vsetvl.o: $(srcdir)/config/riscv/riscv-vsetvl.cc \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-vsetvl.cc -riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc +riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(COMPILE) $< $(POSTCOMPILE) -riscv-shorten-memrefs.o: $(srcdir)/config/riscv/riscv-shorten-memrefs.cc +riscv-shorten-memrefs.o: $(srcdir)/config/riscv/riscv-shorten-memrefs.cc \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TARGET_H) $(COMPILE) $< $(POSTCOMPILE) -riscv-selftests.o: $(srcdir)/config/riscv/riscv-selftests.cc +riscv-selftests.o: $(srcdir)/config/riscv/riscv-selftests.cc \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) output.h \ + $(C_COMMON_H) $(TARGET_H) $(OPTABS_H) $(EXPR_H) $(INSN_ATTR_H) $(EMIT_RTL_H) $(COMPILE) $< $(POSTCOMPILE) -riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc +riscv-v.o: $(srcdir)/config/riscv/riscv-v.cc \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \ + $(TM_P_H) $(TARGET_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \ + $(EXPR_H) $(INSN_ATTR_H) explow.h $(EMIT_RTL_H) tree-vector-builder.h \ + rtx-vector-builder.h $(COMPILE) $< $(POSTCOMPILE) -thead.o: $(srcdir)/config/riscv/thead.cc +thead.o: $(srcdir)/config/riscv/thead.cc \ + $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) backend.h $(RTL_H) \ + memmodel.h $(EMIT_RTL_H) poly-int.h output.h $(COMPILE) $< $(POSTCOMPILE) @@ -94,6 +110,7 @@ build/genrvv-type-indexer$(build_exeext): build
[PATCH] ipa: Avoid constructing aggregate jump functions with huge offsets (PR 109303)
Hi, we are in the process of changing data structures holding information about constants passed by reference and in aggregates to use unsigned int offsets rather than HOST_WIDE_INT (which was selected simply because that is what fell out of get_ref_base_and_extent at that time) in order to conserve memory, especially at WPA time. PR 109303 testcase discovers that we do not properly check that we only create jump functions with offsets (plus sizes) that fit into the smaller type. This patch adds the necessary check. Bootstrapped and tested on x86_64-linux. OK for master? Thanks, Martin gcc/ChangeLog: 2023-03-30 Martin Jambor PR ipa/109303 * ipa-prop.cc (determine_known_aggregate_parts): Check that the offset + size will be representable in unsigned int. gcc/testsuite/ChangeLog: 2023-03-30 Jakub Jelinek Martin Jambor PR ipa/109303 * gcc.dg/pr109303.c: New test. --- gcc/ipa-prop.cc | 4 +++- gcc/testsuite/gcc.dg/pr109303.c | 24 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/pr109303.c diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc index de45dbccf16..9ffd49b590c 100644 --- a/gcc/ipa-prop.cc +++ b/gcc/ipa-prop.cc @@ -2086,7 +2086,9 @@ determine_known_aggregate_parts (struct ipa_func_body_info *fbi, whether its value is clobbered any other dominating one. */ if ((content->value.pass_through.formal_id >= 0 || content->value.pass_through.operand) - && !clobber_by_agg_contents_list_p (all_list, content)) + && !clobber_by_agg_contents_list_p (all_list, content) + && (content->offset + content->size - arg_offset + <= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)) { struct ipa_known_agg_contents_list *copy = XALLOCA (struct ipa_known_agg_contents_list); diff --git a/gcc/testsuite/gcc.dg/pr109303.c b/gcc/testsuite/gcc.dg/pr109303.c new file mode 100644 index 000..f91535991c7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr109303.c @@ -0,0 +1,24 @@ +/* PR ipa/109303 */ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2" } */ + +struct __attribute__((packed)) A { char c1; short a1[__INT_MAX__]; }; +struct __attribute__((packed)) B { char c2; short a2[100]; }; +struct S { struct A p1; struct B p2[4]; }; +void bar (short int); + +static void +foo (struct S *q) +{ + for (int i = 0; i < q->p1.c1; i++) +for (int j = 0; j < q->p2[i].c2; j++) + bar (q->p2[i].a2[j]); +} + +int +main () +{ + struct S q = {}; + q.p2[0].c2 = q.p2[1].c2 = 3; + foo (&q); +} -- 2.40.0
Re: [PATCH] Document signbitm2.
On Fri, Mar 31, 2023 at 8:59 AM liuhongt via Gcc-patches wrote: > > Look through all backends which defined signbitm2. > 1. When m is a scalar mode, the dest is SImode. > 2. When m is a vector mode, the dest mode is the vector integer > mode has the same size and elements number as m. > > Ok for trunk? OK. > gcc/ChangeLog: > > * doc/md.texi: Document signbitm2. > --- > gcc/doc/md.texi | 11 +++ > 1 file changed, 11 insertions(+) > > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi > index 8e3113599fd..edfa51e867a 100644 > --- a/gcc/doc/md.texi > +++ b/gcc/doc/md.texi > @@ -6030,6 +6030,17 @@ floating-point mode. > > This pattern is not allowed to @code{FAIL}. > > +@cindex @code{signbit@var{m}2} instruction pattern > +@item @samp{signbit@var{m}2} > +Store the sign bit of floating-point operand 1 in operand 0. > +@var{m} is either a scalar or vector mode. When it is a scalar, > +operand 1 has mode @var{m} but operand 0 must have mode @code{SImode}. > +When @var{m} is a vector, operand 1 has the mode @var{m}. > +operand 0's mode should be an vector integer mode which has > +the same number of elements and the same size as mode @var{m}. > + > +This pattern is not allowed to @code{FAIL}. > + > @cindex @code{significand@var{m}2} instruction pattern > @item @samp{significand@var{m}2} > Store the significand of floating-point operand 1 in operand 0. > -- > 2.39.1.388.g2fc9e9ca3c >
Re: [PATCH] ipa: Avoid constructing aggregate jump functions with huge offsets (PR 109303)
On Fri, Mar 31, 2023 at 10:46 AM Martin Jambor wrote: > > Hi, > > we are in the process of changing data structures holding information > about constants passed by reference and in aggregates to use unsigned > int offsets rather than HOST_WIDE_INT (which was selected simply > because that is what fell out of get_ref_base_and_extent at that time) > in order to conserve memory, especially at WPA time. > > PR 109303 testcase discovers that we do not properly check that we > only create jump functions with offsets (plus sizes) that fit into the > smaller type. This patch adds the necessary check. > > Bootstrapped and tested on x86_64-linux. OK for master? > > Thanks, > > Martin > > > gcc/ChangeLog: > > 2023-03-30 Martin Jambor > > PR ipa/109303 > * ipa-prop.cc (determine_known_aggregate_parts): Check that the > offset + size will be representable in unsigned int. > > gcc/testsuite/ChangeLog: > > 2023-03-30 Jakub Jelinek > Martin Jambor > > PR ipa/109303 > * gcc.dg/pr109303.c: New test. > --- > gcc/ipa-prop.cc | 4 +++- > gcc/testsuite/gcc.dg/pr109303.c | 24 > 2 files changed, 27 insertions(+), 1 deletion(-) > create mode 100644 gcc/testsuite/gcc.dg/pr109303.c > > diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc > index de45dbccf16..9ffd49b590c 100644 > --- a/gcc/ipa-prop.cc > +++ b/gcc/ipa-prop.cc > @@ -2086,7 +2086,9 @@ determine_known_aggregate_parts (struct > ipa_func_body_info *fbi, > whether its value is clobbered any other dominating one. */ > if ((content->value.pass_through.formal_id >= 0 >|| content->value.pass_through.operand) > - && !clobber_by_agg_contents_list_p (all_list, content)) > + && !clobber_by_agg_contents_list_p (all_list, content) > + && (content->offset + content->size - arg_offset > + <= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)) > { it does seem a bit misplaced since after the if we add the same 'content' to another list anyway. Wouldn't a more obvious place be where we end up truncating this sum? > struct ipa_known_agg_contents_list *copy > = XALLOCA (struct ipa_known_agg_contents_list); > diff --git a/gcc/testsuite/gcc.dg/pr109303.c b/gcc/testsuite/gcc.dg/pr109303.c > new file mode 100644 > index 000..f91535991c7 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr109303.c > @@ -0,0 +1,24 @@ > +/* PR ipa/109303 */ > +/* { dg-do compile { target lp64 } } */ > +/* { dg-options "-O2" } */ > + > +struct __attribute__((packed)) A { char c1; short a1[__INT_MAX__]; }; > +struct __attribute__((packed)) B { char c2; short a2[100]; }; > +struct S { struct A p1; struct B p2[4]; }; > +void bar (short int); > + > +static void > +foo (struct S *q) > +{ > + for (int i = 0; i < q->p1.c1; i++) > +for (int j = 0; j < q->p2[i].c2; j++) > + bar (q->p2[i].a2[j]); > +} > + > +int > +main () > +{ > + struct S q = {}; > + q.p2[0].c2 = q.p2[1].c2 = 3; > + foo (&q); > +} > -- > 2.40.0 >
Re: [PATCH] ipa: Avoid constructing aggregate jump functions with huge offsets (PR 109303)
Hi, On Fri, Mar 31 2023, Richard Biener wrote: > On Fri, Mar 31, 2023 at 10:46 AM Martin Jambor wrote: >> >> Hi, >> >> we are in the process of changing data structures holding information >> about constants passed by reference and in aggregates to use unsigned >> int offsets rather than HOST_WIDE_INT (which was selected simply >> because that is what fell out of get_ref_base_and_extent at that time) >> in order to conserve memory, especially at WPA time. >> >> PR 109303 testcase discovers that we do not properly check that we >> only create jump functions with offsets (plus sizes) that fit into the >> smaller type. This patch adds the necessary check. >> >> Bootstrapped and tested on x86_64-linux. OK for master? >> >> Thanks, >> >> Martin >> >> >> gcc/ChangeLog: >> >> 2023-03-30 Martin Jambor >> >> PR ipa/109303 >> * ipa-prop.cc (determine_known_aggregate_parts): Check that the >> offset + size will be representable in unsigned int. >> >> gcc/testsuite/ChangeLog: >> >> 2023-03-30 Jakub Jelinek >> Martin Jambor >> >> PR ipa/109303 >> * gcc.dg/pr109303.c: New test. >> --- >> gcc/ipa-prop.cc | 4 +++- >> gcc/testsuite/gcc.dg/pr109303.c | 24 >> 2 files changed, 27 insertions(+), 1 deletion(-) >> create mode 100644 gcc/testsuite/gcc.dg/pr109303.c >> >> diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc >> index de45dbccf16..9ffd49b590c 100644 >> --- a/gcc/ipa-prop.cc >> +++ b/gcc/ipa-prop.cc >> @@ -2086,7 +2086,9 @@ determine_known_aggregate_parts (struct >> ipa_func_body_info *fbi, >> whether its value is clobbered any other dominating one. */ >> if ((content->value.pass_through.formal_id >= 0 >>|| content->value.pass_through.operand) >> - && !clobber_by_agg_contents_list_p (all_list, content)) >> + && !clobber_by_agg_contents_list_p (all_list, content) >> + && (content->offset + content->size - arg_offset >> + <= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)) >> { > > it does seem a bit misplaced since after the if we add the same > 'content' to another > list anyway. The other list is a clobber list, as we crawl backwards from the call statement searching for stores, we also look whether we have already encountered a store of something else to an overlapping area. In theory we could have a store to a smaller data type, where the offset + size would still fit unsigned int, be followed by a larger store, which would not. We want the large store to end up in the clobber list so that the smaller one does not. This is the place where we also calculate the size of the final heap-allocated vector, so that is why eventually I put it there. > Wouldn't a more obvious place be where we end up truncating this sum? My reasoning was that since we know we would not be able to use it, it makes sense to discard the data before we stream it from compilation to WPA. Also, when we shorten the offset type also in ipa_agg_jf_item (which is what I want to do next), this is where the check eventually needs to be. Thanks, Martin
Re: [PATCH] ipa: Avoid constructing aggregate jump functions with huge offsets (PR 109303)
On Fri, Mar 31, 2023 at 11:18 AM Martin Jambor wrote: > > Hi, > > On Fri, Mar 31 2023, Richard Biener wrote: > > On Fri, Mar 31, 2023 at 10:46 AM Martin Jambor wrote: > >> > >> Hi, > >> > >> we are in the process of changing data structures holding information > >> about constants passed by reference and in aggregates to use unsigned > >> int offsets rather than HOST_WIDE_INT (which was selected simply > >> because that is what fell out of get_ref_base_and_extent at that time) > >> in order to conserve memory, especially at WPA time. > >> > >> PR 109303 testcase discovers that we do not properly check that we > >> only create jump functions with offsets (plus sizes) that fit into the > >> smaller type. This patch adds the necessary check. > >> > >> Bootstrapped and tested on x86_64-linux. OK for master? > >> > >> Thanks, > >> > >> Martin > >> > >> > >> gcc/ChangeLog: > >> > >> 2023-03-30 Martin Jambor > >> > >> PR ipa/109303 > >> * ipa-prop.cc (determine_known_aggregate_parts): Check that the > >> offset + size will be representable in unsigned int. > >> > >> gcc/testsuite/ChangeLog: > >> > >> 2023-03-30 Jakub Jelinek > >> Martin Jambor > >> > >> PR ipa/109303 > >> * gcc.dg/pr109303.c: New test. > >> --- > >> gcc/ipa-prop.cc | 4 +++- > >> gcc/testsuite/gcc.dg/pr109303.c | 24 > >> 2 files changed, 27 insertions(+), 1 deletion(-) > >> create mode 100644 gcc/testsuite/gcc.dg/pr109303.c > >> > >> diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc > >> index de45dbccf16..9ffd49b590c 100644 > >> --- a/gcc/ipa-prop.cc > >> +++ b/gcc/ipa-prop.cc > >> @@ -2086,7 +2086,9 @@ determine_known_aggregate_parts (struct > >> ipa_func_body_info *fbi, > >> whether its value is clobbered any other dominating one. */ > >> if ((content->value.pass_through.formal_id >= 0 > >>|| content->value.pass_through.operand) > >> - && !clobber_by_agg_contents_list_p (all_list, content)) > >> + && !clobber_by_agg_contents_list_p (all_list, content) > >> + && (content->offset + content->size - arg_offset > >> + <= (HOST_WIDE_INT) UINT_MAX * BITS_PER_UNIT)) > >> { > > > > it does seem a bit misplaced since after the if we add the same > > 'content' to another > > list anyway. > > The other list is a clobber list, as we crawl backwards from the call > statement searching for stores, we also look whether we have already > encountered a store of something else to an overlapping area. In theory > we could have a store to a smaller data type, where the offset + size > would still fit unsigned int, be followed by a larger store, which would > not. We want the large store to end up in the clobber list so that the > smaller one does not. > > This is the place where we also calculate the size of the final > heap-allocated vector, so that is why eventually I put it there. > > > Wouldn't a more obvious place be where we end up truncating this sum? > > My reasoning was that since we know we would not be able to use it, it > makes sense to discard the data before we stream it from compilation to > WPA. > > Also, when we shorten the offset type also in ipa_agg_jf_item (which is > what I want to do next), this is where the check eventually needs to > be. OK, maybe add the above as comment then. OK with that change. Richard. > Thanks, > > Martin
Re: [PATCH v3] RISC-V: Add Z*inx imcompatible check in gcc
../../gcc/common/config/riscv/riscv-common.cc: In static member function 'static riscv_subset_list* riscv_subset_list::parse(const char*, location_t)': ../../gcc/common/config/riscv/riscv-common.cc:1158:48: error: unquoted keyword 'float' in format [-Werror=format-diag] 1158 | "%<-march=%s%>: z*inx is conflict with float extensions", |^ -- Andreas Schwab, sch...@linux-m68k.org GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510 2552 DF73 E780 A9DA AEC1 "And now for something completely different."
Re: [PATCH] vect: Verify that GET_MODE_NUNITS is greater than one.
Michael Collison writes: > While working on autovectorizing for the RISCV port I encountered an issue > where can_duplicate_and_interleave_p assumes that GET_MODE_NUNITS is a > evenly divisible by two. The RISC-V target has vector modes (e.g. VNx1DImode), > where GET_MODE_NUNITS is equal to one. > > Tested on RISCV and x86_64-linux-gnu. Okay? > > 2023-03-09 Michael Collison > > * tree-vect-slp.cc (can_duplicate_and_interleave_p): > Check that GET_MODE_NUNITS is greater than one. > --- > gcc/tree-vect-slp.cc | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc > index 9a4e000925e..add58113fa8 100644 > --- a/gcc/tree-vect-slp.cc > +++ b/gcc/tree-vect-slp.cc > @@ -426,7 +426,8 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned > int count, > if (vector_type > && VECTOR_MODE_P (TYPE_MODE (vector_type)) > && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)), > -GET_MODE_SIZE (base_vector_mode))) > +GET_MODE_SIZE (base_vector_mode)) > + && known_gt (GET_MODE_NUNITS (TYPE_MODE (vector_type)), 1)) > { > /* Try fusing consecutive sequences of COUNT / NVECTORS elements >together into elements of type INT_TYPE and using the result FWIW, I think it'd better to remove: poly_int64 half_nelts = exact_div (nelts, 2); declare: poly_uint64 half_nelts; before the if condition, and use: && multiple_p (GET_MODE_NUNITS (TYPE_MODE (vector_type)), 2, &half_nelts) instead of the known_gt. In other words, now that we can't assert the exact_div, we should check it (using multiple_p) instead. Thanks, Richard
[PATCH] range-op-float, value-range: Fix up handling of UN{LT,LE,GT,GE,EQ}_EXPR and handle comparisons in get_tree_range [PR91645]
Hi! When looking into PR91645, I've noticed we handle UN{LT,LE,GT,GE,EQ}_EXPR comparisons incorrectly. All those are unordered or ..., we correctly return [1, 1] if one or both operands are known NANs, and correctly ask the non-UN prefixed op to fold_range if neither operand may be NAN. But for the case where one or both operands may be NAN, we always return [0, 1]. The UN* fold_range tries to handle it by asking the non-UN prefixed fold_range and if it returns [1, 1] return that, if it returns [0, 0] or [0, 1] return [0, 1], which makes sense, because the maybe NAN means that it is the non-UN prefixed fold_range unioned with [1, 1] in case the maybe NAN is actually NAN at runtime. The problem is that the non-UN prefixed fold_range always returns [0, 1] because those fold_range implementations are like: if (op1.known_isnan () || op2.known_isnan ()) r = range_false (type); else if (!maybe_isnan (op1, op2)) { ... } else r = range_true_and_false (type); and so if maybe_isnan, they always return [0, 1]. Now, thinking about it, this is unnecessary pessimization, for the case where the ... block returns range_false (type) we actually could do it also if maybe_isnan (op1, op2), because if one or both operands are NAN, the comparison will be false, and if neither is NAN, the comparison will be also false. Will fix incrementally today. Anyway, the following patch fixes it by asking the non-UN prefixed fold_range on ranges with NAN cleared, which I think does the right thing in all cases. Another change in the patch is that range_query::get_tree_range always returned VARYING for comparisons, this patch allows to ask about those as well (they are very much like binary ops, except they take the important type from the types of the operands rather than result). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2023-03-31 Jakub Jelinek PR tree-optimization/91645 * range-op-float.cc (foperator_unordered_lt::fold_range, foperator_unordered_le::fold_range, foperator_unordered_gt::fold_range, foperator_unordered_ge::fold_range, foperator_unordered_equal::fold_range): Call the ordered fold_range on ranges with cleared NaNs. * value-query.cc (range_query::get_tree_range): Handle also COMPARISON_CLASS_P trees. --- gcc/range-op-float.cc.jj2023-03-28 11:00:29.205986288 +0200 +++ gcc/range-op-float.cc 2023-03-30 15:14:12.219183066 +0200 @@ -1587,7 +1587,13 @@ public: r = range_true (type); return true; } -if (!fop_lt.fold_range (r, type, op1, op2, rel)) +frange op1_no_nan = op1; +frange op2_no_nan = op2; +if (op1.maybe_isnan ()) + op1_no_nan.clear_nan (); +if (op2.maybe_isnan ()) + op2_no_nan.clear_nan (); +if (!fop_lt.fold_range (r, type, op1_no_nan, op2_no_nan, rel)) return false; // The result is the same as the ordered version when the // comparison is true or when the operands cannot be NANs. @@ -1692,7 +1698,13 @@ public: r = range_true (type); return true; } -if (!fop_le.fold_range (r, type, op1, op2, rel)) +frange op1_no_nan = op1; +frange op2_no_nan = op2; +if (op1.maybe_isnan ()) + op1_no_nan.clear_nan (); +if (op2.maybe_isnan ()) + op2_no_nan.clear_nan (); +if (!fop_le.fold_range (r, type, op1_no_nan, op2_no_nan, rel)) return false; // The result is the same as the ordered version when the // comparison is true or when the operands cannot be NANs. @@ -1793,7 +1805,13 @@ public: r = range_true (type); return true; } -if (!fop_gt.fold_range (r, type, op1, op2, rel)) +frange op1_no_nan = op1; +frange op2_no_nan = op2; +if (op1.maybe_isnan ()) + op1_no_nan.clear_nan (); +if (op2.maybe_isnan ()) + op2_no_nan.clear_nan (); +if (!fop_gt.fold_range (r, type, op1_no_nan, op2_no_nan, rel)) return false; // The result is the same as the ordered version when the // comparison is true or when the operands cannot be NANs. @@ -1898,7 +1916,13 @@ public: r = range_true (type); return true; } -if (!fop_ge.fold_range (r, type, op1, op2, rel)) +frange op1_no_nan = op1; +frange op2_no_nan = op2; +if (op1.maybe_isnan ()) + op1_no_nan.clear_nan (); +if (op2.maybe_isnan ()) + op2_no_nan.clear_nan (); +if (!fop_ge.fold_range (r, type, op1_no_nan, op2_no_nan, rel)) return false; // The result is the same as the ordered version when the // comparison is true or when the operands cannot be NANs. @@ -2002,7 +2026,13 @@ public: r = range_true (type); return true; } -if (!fop_equal.fold_range (r, type, op1, op2, rel)) +frange op1_no_nan = op1; +frange op2_no_nan = op2; +if (op1.maybe_isnan ()) + op1_no_nan.clear_nan (); +if (op2.maybe_isnan ()) + op2_no_nan.clear_nan (); +
[RFC PATCH] Use ranger in the cdce pass [PR91645]
Hi! The cdce pass among other things replaces calls like sqrt with code like if (condition(s)) ret = .IFN_SQRT (x); else ret = sqrt (x); so that in the common case when we know the argument doesn't trigger any range/domain errors we use the hardware instruction and defer to a library call just to handle the erroneous conditions. Now, on pr103559-3.c we already fold that condition during vrp2 or so, but for pr103559-{2,4}.c we don't. The following patch is an attempt to ask the ranger already during the cdce pass. Unfortunately, bootstrap/regtest of this patch found one regression: FAIL: 26_numerics/headers/cmath/functions_std_c++23.cc (test for excess errors) Excess errors: during GIMPLE pass: cdce /home/jakub/src/gcc/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc:26: internal compiler error: in operator[], at vec.h:890 0x94b822 vec::operator[](unsigned int) ../../gcc/vec.h:890 0x94ba2a vec::operator[](unsigned int) ../../gcc/value-relation.cc:705 0x94ba2a vec::operator[](unsigned int) ../../gcc/vec.h:1505 0x94ba2a equiv_oracle::add_equiv_to_block(basic_block_def*, bitmap_head*) ../../gcc/value-relation.cc:690 The problem is that if the pass asks ranger about something, then splits some basic blocks to add the above if (condition(s)) stuff and then asks the ranger again, the ranger caches stuff based on basic blocks but the pass adds more basic blocks. So, if I wanted to use ranger in the pass, is it true I'd need to do all the ranger queries in some analysis phase before changes are made to the cfg? Now, I've just tried and apparently already the patch I've just posted about foperator_un*::fold_range seems to fix those testcases (during vrp2 or when), so maybe we don't need to bother with cdce here and can just commit the testcases. Are they ok for trunk as incremental change to the previous patch? 2023-03-31 Jakub Jelinek PR tree-optimization/91645 * tree-call-cdce.cc: Include gimple-range.h. (gen_one_condition): Add STMT argument. Ask ranger if the comparison is known to be always false or always true and use false or true instead of the actual comparison in that case. (gen_conditions_for_domain, gen_conditions_for_pow_cst_base, gen_conditions_for_pow_int_base): Add STMT argument, pass it through. (gen_conditions_for_pow): Pass pow_call as STMT to gen_conditions_for_pow_cst_base and gen_conditions_for_pow_int_base. (gen_shrink_wrap_conditions): Pass bi_call as STMT to gen_conditions_for_domain. (pass_call_cdce::execute): Disable ranger if it has been enabled. * gcc.target/i386/pr103559-1.c: New test. * gcc.target/i386/pr103559-2.c: New test. * gcc.target/i386/pr103559-3.c: New test. * gcc.target/i386/pr103559-4.c: New test. --- gcc/tree-call-cdce.cc.jj2023-01-02 09:32:45.940944935 +0100 +++ gcc/tree-call-cdce.cc 2023-03-30 14:54:25.248544702 +0200 @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. #include "builtins.h" #include "internal-fn.h" #include "tree-dfa.h" +#include "gimple-range.h" /* This pass serves two closely-related purposes: @@ -425,12 +426,9 @@ comparison_code_if_no_nans (tree_code co null tree. */ static void -gen_one_condition (tree arg, int lbub, - enum tree_code tcode, - const char *temp_name1, - const char *temp_name2, - vec conds, - unsigned *nconds) +gen_one_condition (tree arg, int lbub, enum tree_code tcode, + const char *temp_name1, const char *temp_name2, + vec conds, unsigned *nconds, gimple *stmt) { if (!HONOR_NANS (arg)) tcode = comparison_code_if_no_nans (tcode); @@ -451,10 +449,24 @@ gen_one_condition (tree arg, int lbub, gimple_assign_set_lhs (stmt1, tempn); tempc = create_tmp_var (boolean_type_node, temp_name2); - stmt2 = gimple_build_assign (tempc, - fold_build2 (tcode, - boolean_type_node, - tempn, lbub_real_cst)); + tree tcond = build2 (tcode, boolean_type_node, arg, lbub_real_cst); + int_range_max r; + range_query *q = get_range_query (cfun); + if (q == get_global_range_query ()) +q = enable_ranger (cfun); + /* Ask the ranger whether it knows the condition will be always false or + always true. */ + if (!q->range_of_expr (r, tcond, stmt) || r.undefined_p ()) +tcond = NULL_TREE; + else if (r.upper_bound () == 0) +tcond = boolean_false_node; + else if (r.lower_bound () == 1) +tcond = boolean_true_node; + else +tcond = NULL_TREE; + if (!tcond) +tcond = fold_build2 (tcode, boolean_type_node, tempn, lbub_real_cst); + stmt2 = gimple_build_assign (tempc, tcond); tempcn = make_ssa_name (tempc, stmt2); gimple_assign_set_lhs (stmt2, te
[PATCH] range-op-float: Further comparison fixes
On Fri, Mar 31, 2023 at 09:57:54AM +0200, Jakub Jelinek via Gcc-patches wrote: > and so if maybe_isnan, they always return [0, 1]. Now, thinking about it, > this is unnecessary pessimization, for the case where the ... block > returns range_false (type) we actually could do it also if maybe_isnan (op1, > op2), because if one or both operands are NAN, the comparison will be false, > and if neither is NAN, the comparison will be also false. Will fix > incrementally today. Here it is. 1) the foperator_{,not_}equal::fold_range cases - we have correctly first a case handling known_isnan on either operand, return there range_false (type) - EQ, resp. range_true (type) - NE - next we handle the singleton cases, maybe_isnan + singleton isn't singleton, so these are ok - there is a missing case (not handled in this patch) where both operands are known to be zeros, but not singleton zeros - there is some !maybe_isnan (op1, op2) handling which tries to prove when the operands are certainly not equal and results in range_false (type) - EQ, resp. range_true (type) - NE - otherwise range_true_and_false (type) Now, I think (and this patch implements it) that the !maybe_isnan (op1, op2) check is unnecessary. If we prove that when ignoring maybe NANs, the ranges don't intersect and so the comparison is always false for EQ or always true for NE, if NANs can appear, it doesn't change anything on that either, for EQ if NAN appears, the result is false like what we proved for the finite ranges, for NE if NAN appears, the result is true like what we proved for the finite ranges 2) foperator_{lt,le,gt,ge}::fold_range cases - these have correctly known_isnan on either operand first and return there range_false (type) - then !maybe_isnan (op1, op2) condition guarded checks - finally range_true_and_false (type) - so do that for maybe_isnan (op1, op2) Here in the !maybe_isnan (op1, op2) guarded code we have some condition which results in range_true (type), another condition which results in range_false (type) and otherwise range_true_and_false (type). Now, the condition which results in range_false (type) can be IMHO done also for the maybe_isnan (op1, op2) cases, because it is [0, 0] if both operands are finite or [0, 0] if either operand is NAN. 3) finally, LTGT_EXPR wasn't handled at all. LTGT_EXPR is the inverse comparision to UNEQ_EXPR, I believe both raise exceptions only if either operand is sNaN, UNEQ_EXPR is true if both operands are either equal or either of the operands is NAN, while LTGT_EXPR is true if the operands compare either smaller or larger and is false if either of the operands is NAN. Ok for trunk if this passes bootstrap/regtest? 2023-03-31 Jakub Jelinek * range-op-float.cc (foperator_equal::fold_range): Perform the non-singleton handling regardless of maybe_isnan (op1, op2). (foperator_not_equal::fold_range): Likewise. (foperator_lt::fold_range, foperator_le::fold_range, foperator_gt::fold_range, foperator_ge::fold_range): Perform the real_* comparison check which results in range_false (type) even if maybe_isnan (op1, op2). Simplify. (foperator_ltgt): New class. (fop_ltgt): New variable. (floating_op_table::floating_op_table): Handle LTGT_EXPR using fop_ltgt. --- gcc/range-op-float.cc.jj2023-03-31 09:30:11.245296618 +0200 +++ gcc/range-op-float.cc 2023-03-31 11:23:04.817876083 +0200 @@ -616,7 +616,7 @@ foperator_equal::fold_range (irange &r, else r = range_false (type); } - else if (!maybe_isnan (op1, op2)) + else { // If ranges do not intersect, we know the range is not equal, // otherwise we don't know anything for sure. @@ -638,8 +638,6 @@ foperator_equal::fold_range (irange &r, else r = range_true_and_false (type); } - else -r = range_true_and_false (type); return true; } @@ -734,7 +732,7 @@ foperator_not_equal::fold_range (irange else r = range_true (type); } - else if (!maybe_isnan (op1, op2)) + else { // If ranges do not intersect, we know the range is not equal, // otherwise we don't know anything for sure. @@ -756,8 +754,6 @@ foperator_not_equal::fold_range (irange else r = range_true_and_false (type); } - else -r = range_true_and_false (type); return true; } @@ -839,17 +835,13 @@ foperator_lt::fold_range (irange &r, tre if (frelop_early_resolve (r, type, op1, op2, rel, VREL_LT)) return true; - if (op1.known_isnan () || op2.known_isnan ()) + if (op1.known_isnan () + || op2.known_isnan () + || !real_less (&op1.lower_bound (), &op2.upper_bound ())) r = range_false (type); - else if (!maybe_isnan (op1, op2)) -{ - if (real_less (&op1.upper_bound (), &op2.lower_bound ())) - r = range_true (type); -
[PATCH] range-op-float: Further foperator_{,not_}equal::fold_range fix
On Fri, Mar 31, 2023 at 12:45:10PM +0200, Jakub Jelinek via Gcc-patches wrote: >- there is a missing case (not handled in this patch) where both operands > are known to be zeros, but not singleton zeros This patch adds those cases. Ok for trunk if it passes bootstrap/regtest? 2023-03-31 Jakub Jelinek * range-op-float.cc (foperator_equal::fold_range): If at least one of the op ranges is not singleton and neither is NaN and all 4 bounds are zero, return [1, 1]. (foperator_not_equal::fold_range): In the same case return [0, 0]. --- gcc/range-op-float.cc.jj2023-03-31 11:23:04.817876083 +0200 +++ gcc/range-op-float.cc 2023-03-31 12:51:34.757480162 +0200 @@ -616,6 +616,13 @@ foperator_equal::fold_range (irange &r, else r = range_false (type); } + else if (real_iszero (&op1.lower_bound ()) + && real_iszero (&op1.upper_bound ()) + && real_iszero (&op2.lower_bound ()) + && real_iszero (&op2.upper_bound ()) + && !maybe_isnan (op1, op2)) +// [-0.0, 0.0] == [-0.0, 0.0] or similar. +r = range_true (type); else { // If ranges do not intersect, we know the range is not equal, @@ -732,6 +739,13 @@ foperator_not_equal::fold_range (irange else r = range_true (type); } + else if (real_iszero (&op1.lower_bound ()) + && real_iszero (&op1.upper_bound ()) + && real_iszero (&op2.lower_bound ()) + && real_iszero (&op2.upper_bound ()) + && !maybe_isnan (op1, op2)) +// [-0.0, 0.0] != [-0.0, 0.0] or similar. +r = range_false (type); else { // If ranges do not intersect, we know the range is not equal, Jakub
Re: [committed] libstdc++: Fix constexpr functions in
On Thu, 30 Mar 2023 at 17:01, Daniel Krügler wrote: > > Am Do., 30. März 2023 um 18:00 Uhr schrieb Jonathan Wakely via > Libstdc++ : > > > [..] > > > > In fact, thinking about P2641 some more, I might revert this change. > > Instead of adding an extra bool member to support constexpr, I think > > I'll just remove the 'constexpr' keywords from basic_endpoint for now, > > and implement it in terms of just inspecting the sa_family_t member of > > the union members. And then later, once we have something like P2641, > > we can re-add the constexpr keywords and use is_within_lifetime during > > constant evaluation. That way we don't add a bool then need to take it > > away again, changing the ABI each time. > > I was just going to make the same suggestion. Actually, as pointed out in Barry's P2641R1 paper, we can just use GCC's __builtin_constant_p: + constexpr bool + _M_is_v6() const noexcept + { + // For constexpr eval we can just detect which union member is active. + // i.e. emulate P2641R1's std::is_active_member(&_M_data._M_v6)). + if (std::__is_constant_evaluated()) + return __builtin_constant_p(_M_data._M_v6.sin6_family); + return _M_data._M_v6.sin6_family == AF_INET6; + }
Re: [PATCH v2 1/2] combine: Split code out of make_compound_operation_int
Hi! On Thu, Mar 09, 2023 at 12:09:59PM +, Richard Sandiford wrote: > This patch just splits some code out of make_compound_operation_int > into a new function called make_compound_operation_and. It is a > prerequisite for the fix for PR106594. > > It might (or might not) make sense to put more of the existing > "and" handling into the new function, so that the subreg+lshiftrt > case can be handled through recursion rather than duplication. > But that's certainly not necessary to fix the bug, so is at > best stage 1 material. > > No behavioural change intended. That doesn't sound as if you are very sure about things. I'll just pretend it says "no functional changes" :-) (*Is* this a pure refactoring?) Segher
Re: [PATCH] range-op-float, value-range: Fix up handling of UN{LT,LE,GT,GE,EQ}_EXPR and handle comparisons in get_tree_range [PR91645]
On 3/31/23 09:57, Jakub Jelinek wrote: Hi! When looking into PR91645, I've noticed we handle UN{LT,LE,GT,GE,EQ}_EXPR comparisons incorrectly. All those are unordered or ..., we correctly return [1, 1] if one or both operands are known NANs, and correctly ask the non-UN prefixed op to fold_range if neither operand may be NAN. But for the case where one or both operands may be NAN, we always return [0, 1]. The UN* fold_range tries to handle it by asking the non-UN prefixed fold_range and if it returns [1, 1] return that, if it returns [0, 0] or [0, 1] return [0, 1], which makes sense, because the maybe NAN means that it is the non-UN prefixed fold_range unioned with [1, 1] in case the maybe NAN is actually NAN at runtime. The problem is that the non-UN prefixed fold_range always returns [0, 1] because those fold_range implementations are like: if (op1.known_isnan () || op2.known_isnan ()) r = range_false (type); else if (!maybe_isnan (op1, op2)) { ... } else r = range_true_and_false (type); and so if maybe_isnan, they always return [0, 1]. Now, thinking about it, this is unnecessary pessimization, for the case where the ... block returns range_false (type) we actually could do it also if maybe_isnan (op1, op2), because if one or both operands are NAN, the comparison will be false, and if neither is NAN, the comparison will be also false. Will fix incrementally today. Anyway, the following patch fixes it by asking the non-UN prefixed fold_range on ranges with NAN cleared, which I think does the right thing in all cases. Another change in the patch is that range_query::get_tree_range always returned VARYING for comparisons, this patch allows to ask about those as well (they are very much like binary ops, except they take the important type from the types of the operands rather than result). Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? LGTM Aldy 2023-03-31 Jakub Jelinek PR tree-optimization/91645 * range-op-float.cc (foperator_unordered_lt::fold_range, foperator_unordered_le::fold_range, foperator_unordered_gt::fold_range, foperator_unordered_ge::fold_range, foperator_unordered_equal::fold_range): Call the ordered fold_range on ranges with cleared NaNs. * value-query.cc (range_query::get_tree_range): Handle also COMPARISON_CLASS_P trees. --- gcc/range-op-float.cc.jj2023-03-28 11:00:29.205986288 +0200 +++ gcc/range-op-float.cc 2023-03-30 15:14:12.219183066 +0200 @@ -1587,7 +1587,13 @@ public: r = range_true (type); return true; } -if (!fop_lt.fold_range (r, type, op1, op2, rel)) +frange op1_no_nan = op1; +frange op2_no_nan = op2; +if (op1.maybe_isnan ()) + op1_no_nan.clear_nan (); +if (op2.maybe_isnan ()) + op2_no_nan.clear_nan (); +if (!fop_lt.fold_range (r, type, op1_no_nan, op2_no_nan, rel)) return false; // The result is the same as the ordered version when the // comparison is true or when the operands cannot be NANs. @@ -1692,7 +1698,13 @@ public: r = range_true (type); return true; } -if (!fop_le.fold_range (r, type, op1, op2, rel)) +frange op1_no_nan = op1; +frange op2_no_nan = op2; +if (op1.maybe_isnan ()) + op1_no_nan.clear_nan (); +if (op2.maybe_isnan ()) + op2_no_nan.clear_nan (); +if (!fop_le.fold_range (r, type, op1_no_nan, op2_no_nan, rel)) return false; // The result is the same as the ordered version when the // comparison is true or when the operands cannot be NANs. @@ -1793,7 +1805,13 @@ public: r = range_true (type); return true; } -if (!fop_gt.fold_range (r, type, op1, op2, rel)) +frange op1_no_nan = op1; +frange op2_no_nan = op2; +if (op1.maybe_isnan ()) + op1_no_nan.clear_nan (); +if (op2.maybe_isnan ()) + op2_no_nan.clear_nan (); +if (!fop_gt.fold_range (r, type, op1_no_nan, op2_no_nan, rel)) return false; // The result is the same as the ordered version when the // comparison is true or when the operands cannot be NANs. @@ -1898,7 +1916,13 @@ public: r = range_true (type); return true; } -if (!fop_ge.fold_range (r, type, op1, op2, rel)) +frange op1_no_nan = op1; +frange op2_no_nan = op2; +if (op1.maybe_isnan ()) + op1_no_nan.clear_nan (); +if (op2.maybe_isnan ()) + op2_no_nan.clear_nan (); +if (!fop_ge.fold_range (r, type, op1_no_nan, op2_no_nan, rel)) return false; // The result is the same as the ordered version when the // comparison is true or when the operands cannot be NANs. @@ -2002,7 +2026,13 @@ public: r = range_true (type); return true; } -if (!fop_equal.fold_range (r, type, op1, op2, rel)) +frange op1_no_nan = op1; +frange op2_no_nan = op2; +if (op1.maybe_isnan ()) + op1_no_
Re: [PATCH] range-op-float: Further comparison fixes
On 3/31/23 12:45, Jakub Jelinek wrote: On Fri, Mar 31, 2023 at 09:57:54AM +0200, Jakub Jelinek via Gcc-patches wrote: and so if maybe_isnan, they always return [0, 1]. Now, thinking about it, this is unnecessary pessimization, for the case where the ... block returns range_false (type) we actually could do it also if maybe_isnan (op1, op2), because if one or both operands are NAN, the comparison will be false, and if neither is NAN, the comparison will be also false. Will fix incrementally today. Here it is. 1) the foperator_{,not_}equal::fold_range cases - we have correctly first a case handling known_isnan on either operand, return there range_false (type) - EQ, resp. range_true (type) - NE - next we handle the singleton cases, maybe_isnan + singleton isn't singleton, so these are ok - there is a missing case (not handled in this patch) where both operands are known to be zeros, but not singleton zeros - there is some !maybe_isnan (op1, op2) handling which tries to prove when the operands are certainly not equal and results in range_false (type) - EQ, resp. range_true (type) - NE - otherwise range_true_and_false (type) Now, I think (and this patch implements it) that the !maybe_isnan (op1, op2) check is unnecessary. If we prove that when ignoring maybe NANs, the ranges don't intersect and so the comparison is always false for EQ or always true for NE, if NANs can appear, it doesn't change anything on that either, for EQ if NAN appears, the result is false like what we proved for the finite ranges, for NE if NAN appears, the result is true like what we proved for the finite ranges 2) foperator_{lt,le,gt,ge}::fold_range cases - these have correctly known_isnan on either operand first and return there range_false (type) - then !maybe_isnan (op1, op2) condition guarded checks - finally range_true_and_false (type) - so do that for maybe_isnan (op1, op2) Here in the !maybe_isnan (op1, op2) guarded code we have some condition which results in range_true (type), another condition which results in range_false (type) and otherwise range_true_and_false (type). Now, the condition which results in range_false (type) can be IMHO done also for the maybe_isnan (op1, op2) cases, because it is [0, 0] if both operands are finite or [0, 0] if either operand is NAN. 3) finally, LTGT_EXPR wasn't handled at all. LTGT_EXPR is the inverse comparision to UNEQ_EXPR, I believe both raise exceptions only if either operand is sNaN, UNEQ_EXPR is true if both operands are either equal or either of the operands is NAN, while LTGT_EXPR is true if the operands compare either smaller or larger and is false if either of the operands is NAN. Ok for trunk if this passes bootstrap/regtest? LGTM Aldy 2023-03-31 Jakub Jelinek * range-op-float.cc (foperator_equal::fold_range): Perform the non-singleton handling regardless of maybe_isnan (op1, op2). (foperator_not_equal::fold_range): Likewise. (foperator_lt::fold_range, foperator_le::fold_range, foperator_gt::fold_range, foperator_ge::fold_range): Perform the real_* comparison check which results in range_false (type) even if maybe_isnan (op1, op2). Simplify. (foperator_ltgt): New class. (fop_ltgt): New variable. (floating_op_table::floating_op_table): Handle LTGT_EXPR using fop_ltgt. --- gcc/range-op-float.cc.jj2023-03-31 09:30:11.245296618 +0200 +++ gcc/range-op-float.cc 2023-03-31 11:23:04.817876083 +0200 @@ -616,7 +616,7 @@ foperator_equal::fold_range (irange &r, else r = range_false (type); } - else if (!maybe_isnan (op1, op2)) + else { // If ranges do not intersect, we know the range is not equal, // otherwise we don't know anything for sure. @@ -638,8 +638,6 @@ foperator_equal::fold_range (irange &r, else r = range_true_and_false (type); } - else -r = range_true_and_false (type); return true; } @@ -734,7 +732,7 @@ foperator_not_equal::fold_range (irange else r = range_true (type); } - else if (!maybe_isnan (op1, op2)) + else { // If ranges do not intersect, we know the range is not equal, // otherwise we don't know anything for sure. @@ -756,8 +754,6 @@ foperator_not_equal::fold_range (irange else r = range_true_and_false (type); } - else -r = range_true_and_false (type); return true; } @@ -839,17 +835,13 @@ foperator_lt::fold_range (irange &r, tre if (frelop_early_resolve (r, type, op1, op2, rel, VREL_LT)) return true; - if (op1.known_isnan () || op2.known_isnan ()) + if (op1.known_isnan () + || op2.known_isnan () + || !real_less (&op1.lower_bound (), &op2.upper_bound ())) r = range_false (type); - else if (!maybe_isnan (op1, op2))
Re: [PATCH] range-op-float: Further foperator_{,not_}equal::fold_range fix
On 3/31/23 12:57, Jakub Jelinek wrote: On Fri, Mar 31, 2023 at 12:45:10PM +0200, Jakub Jelinek via Gcc-patches wrote: - there is a missing case (not handled in this patch) where both operands are known to be zeros, but not singleton zeros This patch adds those cases. Ok for trunk if it passes bootstrap/regtest? LGTM. Thanks so much for taking care of all this. Aldy 2023-03-31 Jakub Jelinek * range-op-float.cc (foperator_equal::fold_range): If at least one of the op ranges is not singleton and neither is NaN and all 4 bounds are zero, return [1, 1]. (foperator_not_equal::fold_range): In the same case return [0, 0]. --- gcc/range-op-float.cc.jj2023-03-31 11:23:04.817876083 +0200 +++ gcc/range-op-float.cc 2023-03-31 12:51:34.757480162 +0200 @@ -616,6 +616,13 @@ foperator_equal::fold_range (irange &r, else r = range_false (type); } + else if (real_iszero (&op1.lower_bound ()) + && real_iszero (&op1.upper_bound ()) + && real_iszero (&op2.lower_bound ()) + && real_iszero (&op2.upper_bound ()) + && !maybe_isnan (op1, op2)) +// [-0.0, 0.0] == [-0.0, 0.0] or similar. +r = range_true (type); else { // If ranges do not intersect, we know the range is not equal, @@ -732,6 +739,13 @@ foperator_not_equal::fold_range (irange else r = range_true (type); } + else if (real_iszero (&op1.lower_bound ()) + && real_iszero (&op1.upper_bound ()) + && real_iszero (&op2.lower_bound ()) + && real_iszero (&op2.upper_bound ()) + && !maybe_isnan (op1, op2)) +// [-0.0, 0.0] != [-0.0, 0.0] or similar. +r = range_false (type); else { // If ranges do not intersect, we know the range is not equal, Jakub
Re: [RFC PATCH] Use ranger in the cdce pass [PR91645]
On 3/31/23 10:16, Jakub Jelinek wrote: Hi! The cdce pass among other things replaces calls like sqrt with code like if (condition(s)) ret = .IFN_SQRT (x); else ret = sqrt (x); so that in the common case when we know the argument doesn't trigger any range/domain errors we use the hardware instruction and defer to a library call just to handle the erroneous conditions. Now, on pr103559-3.c we already fold that condition during vrp2 or so, but for pr103559-{2,4}.c we don't. The following patch is an attempt to ask the ranger already during the cdce pass. Unfortunately, bootstrap/regtest of this patch found one regression: FAIL: 26_numerics/headers/cmath/functions_std_c++23.cc (test for excess errors) Excess errors: during GIMPLE pass: cdce /home/jakub/src/gcc/libstdc++-v3/testsuite/26_numerics/headers/cmath/functions_std_c++23.cc:26: internal compiler error: in operator[], at vec.h:890 0x94b822 vec::operator[](unsigned int) ../../gcc/vec.h:890 0x94ba2a vec::operator[](unsigned int) ../../gcc/value-relation.cc:705 0x94ba2a vec::operator[](unsigned int) ../../gcc/vec.h:1505 0x94ba2a equiv_oracle::add_equiv_to_block(basic_block_def*, bitmap_head*) ../../gcc/value-relation.cc:690 The problem is that if the pass asks ranger about something, then splits some basic blocks to add the above if (condition(s)) stuff and then asks the ranger again, the ranger caches stuff based on basic blocks but the pass adds more basic blocks. So, if I wanted to use ranger in the pass, is it true I'd need to do all the ranger queries in some analysis phase before changes are made to the cfg? Most likely, but Andrew had some fixes to make sure we didn't tooo bad when simple changes to the IL happened. Perhaps he has some thoughts here. Aldy Now, I've just tried and apparently already the patch I've just posted about foperator_un*::fold_range seems to fix those testcases (during vrp2 or when), so maybe we don't need to bother with cdce here and can just commit the testcases. Are they ok for trunk as incremental change to the previous patch? 2023-03-31 Jakub Jelinek PR tree-optimization/91645 * tree-call-cdce.cc: Include gimple-range.h. (gen_one_condition): Add STMT argument. Ask ranger if the comparison is known to be always false or always true and use false or true instead of the actual comparison in that case. (gen_conditions_for_domain, gen_conditions_for_pow_cst_base, gen_conditions_for_pow_int_base): Add STMT argument, pass it through. (gen_conditions_for_pow): Pass pow_call as STMT to gen_conditions_for_pow_cst_base and gen_conditions_for_pow_int_base. (gen_shrink_wrap_conditions): Pass bi_call as STMT to gen_conditions_for_domain. (pass_call_cdce::execute): Disable ranger if it has been enabled. * gcc.target/i386/pr103559-1.c: New test. * gcc.target/i386/pr103559-2.c: New test. * gcc.target/i386/pr103559-3.c: New test. * gcc.target/i386/pr103559-4.c: New test. --- gcc/tree-call-cdce.cc.jj2023-01-02 09:32:45.940944935 +0100 +++ gcc/tree-call-cdce.cc 2023-03-30 14:54:25.248544702 +0200 @@ -36,6 +36,7 @@ along with GCC; see the file COPYING3. #include "builtins.h" #include "internal-fn.h" #include "tree-dfa.h" +#include "gimple-range.h" /* This pass serves two closely-related purposes: @@ -425,12 +426,9 @@ comparison_code_if_no_nans (tree_code co null tree. */ static void -gen_one_condition (tree arg, int lbub, - enum tree_code tcode, - const char *temp_name1, - const char *temp_name2, - vec conds, - unsigned *nconds) +gen_one_condition (tree arg, int lbub, enum tree_code tcode, + const char *temp_name1, const char *temp_name2, + vec conds, unsigned *nconds, gimple *stmt) { if (!HONOR_NANS (arg)) tcode = comparison_code_if_no_nans (tcode); @@ -451,10 +449,24 @@ gen_one_condition (tree arg, int lbub, gimple_assign_set_lhs (stmt1, tempn); tempc = create_tmp_var (boolean_type_node, temp_name2); - stmt2 = gimple_build_assign (tempc, - fold_build2 (tcode, - boolean_type_node, - tempn, lbub_real_cst)); + tree tcond = build2 (tcode, boolean_type_node, arg, lbub_real_cst); + int_range_max r; + range_query *q = get_range_query (cfun); + if (q == get_global_range_query ()) +q = enable_ranger (cfun); + /* Ask the ranger whether it knows the condition will be always false or + always true. */ + if (!q->range_of_expr (r, tcond, stmt) || r.undefined_p ()) +tcond = NULL_TREE; + else if (r.upper_bound () == 0) +tcond = boolean_false_node; + else if (r.lower_bound () == 1) +tcond = boolean_true_node; + else +tcon
Re: [PATCH v2 1/2] combine: Split code out of make_compound_operation_int
Segher Boessenkool writes: > Hi! > > On Thu, Mar 09, 2023 at 12:09:59PM +, Richard Sandiford wrote: >> This patch just splits some code out of make_compound_operation_int >> into a new function called make_compound_operation_and. It is a >> prerequisite for the fix for PR106594. >> >> It might (or might not) make sense to put more of the existing >> "and" handling into the new function, so that the subreg+lshiftrt >> case can be handled through recursion rather than duplication. >> But that's certainly not necessary to fix the bug, so is at >> best stage 1 material. >> >> No behavioural change intended. > > That doesn't sound as if you are very sure about things. I'll just > pretend it says "no functional changes" :-) Heh. Stock phrase borrowed from LLVM. I'm not going to sign off with "this patch contains no bugs". :-) > (*Is* this a pure refactoring?) Yeah, this patch is a pure refactoring. It's the follow-on 2/2 part that does something useful. Thanks, Richard
Re: [PATCH v2 2/2] combine: Try harder to form zero_extends [PR106594]
On Thu, Mar 09, 2023 at 12:10:51PM +, Richard Sandiford wrote: > g:c23a9c87cc62bd177fd0d4db6ad34b34e1b9a31f uses nonzero_bits > information to convert sign_extends into zero_extends. > That change is semantically correct in itself, but for the > testcase in the PR, it leads to a series of unfortunate events, > as described below. That patch also does other much more obviously beneficial transforms. These things should not have been mashed together. Smaller more atomic patches are much nicer to handle :-/ > We try to combine: > > Trying 24 -> 25: >24: r116:DI=sign_extend(r115:SI) > REG_DEAD r115:SI >25: r117:SI=[r116:DI*0x4+r118:DI] > REG_DEAD r116:DI > REG_EQUAL [r116:DI*0x4+`constellation_64qam'] > > which previously succeeded, giving: > > (set (reg:SI 117 [ constellation_64qam[_5] ]) > (mem/u:SI (plus:DI (mult:DI (sign_extend:DI (reg:SI 115)) > (const_int 4 [0x4])) > (reg/f:DI 118)) [1 constellation_64qam[_5]+0 S4 A32])) > > However, nonzero_bits can tell that only the low 6 bits of r115 > can be nonzero. The commit above therefore converts the sign_extend > to a zero_extend. Using the same nonzero_bits information, we then > "expand" the zero_extend to: > > (and:DI (subreg:DI (reg:SI r115) 0) > (const_int 63)) This is more expensive already?! An "and" usually costs a real machine instruction, while subregs are usually free (just notational). The "and" can not easily be optimised away either (after combine we only have less accurate nonzero_bits, to start with). > Substituting into the mult gives the unsimplified expression: > > (mult:DI (and:DI (subreg:DI (reg:SI r115) 0) >(const_int 63)) >(const_int 4)) > > The simplification rules for mult convert this to an ashift by 2. > Then, this rule in simplify_shift_const_1: > > /* If we have (shift (logical)), move the logical to the outside >to allow it to possibly combine with another logical and the >shift to combine with another shift. This also canonicalizes to >what a ZERO_EXTRACT looks like. Also, some machines have >(and (shift)) insns. */ > > moves the shift inside the "and", so that the expression becomes: > > (and:DI (ashift:DI (subreg:DI (reg:SI r115) 0) > (const_int 2)) > (const_int 252)) > > We later recanonicalise to a mult (since this is an address): > > (and:DI (mult:DI (subreg:DI (reg:SI r115) 0) >(const_int 4)) > (const_int 252)) > > But we fail to transform this back to the natural substitution: > > (mult:DI (zero_extend:DI (reg:SI r115)) >(const_int 4)) > > There are several other cases in which make_compound_operation > needs to look more than one level down in order to complete a > compound operation. For example: > > (a) the ashiftrt handling uses extract_left_shift to look through > things like logic ops in order to find a partnering ashift > operation > > (b) the "and" handling looks through subregs, xors and iors > to find a partnerning lshiftrt > > This patch takes the same approach for mult. > > gcc/ > PR rtl-optimization/106594 > * combine.cc (make_compound_operation_and): Look through > multiplications by a power of two. > > gcc/testsuite/ > * gcc.target/aarch64/pr106594.c: New test. > --- > gcc/combine.cc | 17 + > gcc/testsuite/gcc.target/aarch64/pr106594.c | 20 > 2 files changed, 37 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/pr106594.c > > diff --git a/gcc/combine.cc b/gcc/combine.cc > index 7d446d02cb4..36d04ad6703 100644 > --- a/gcc/combine.cc > +++ b/gcc/combine.cc > @@ -7996,6 +7996,23 @@ make_compound_operation_and (scalar_int_mode mode, rtx > x, > break; >} > > +case MULT: > + /* Recurse through a power of 2 multiplication (as can be found > + in an address), using the relationship: > + > + (and (mult X 2**N1) N2) == (mult (and X (lshifrt N2 N1)) 2**N1). */ > + if (CONST_INT_P (XEXP (x, 1)) > + && pow2p_hwi (INTVAL (XEXP (x, 1 > + { > + int shift = exact_log2 (INTVAL (XEXP (x, 1))); > + rtx sub = make_compound_operation_and (mode, XEXP (x, 0), > + mask >> shift, in_code, > + next_code); > + if (sub) > + return gen_rtx_MULT (mode, sub, XEXP (x, 1)); > + } > + break; > + > default: >break; > } > diff --git a/gcc/testsuite/gcc.target/aarch64/pr106594.c > b/gcc/testsuite/gcc.target/aarch64/pr106594.c > new file mode 100644 > index 000..beda8e050a5 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/pr106594.c > @@ -0,0 +1,20 @@ > +/* { dg-options "-O2" } */ > + > +extern const int constellation_64qam[64]; > + > +void
Re: [PATCH] rtl-optimization: ppc backend generates unnecessary signed extension.
On 3/30/23 18:01, Hans-Peter Nilsson wrote: On Fri, 24 Mar 2023, Peter Bergner via Gcc-patches wrote: On 3/23/23 6:12 PM, Jeff Law via Gcc-patches wrote: Is there a reason why REE cannot see that our (reg:QI 4) is a param register and thus due to our ABI, already correctly sign/zero extended? I don't think REE has ever considered exploiting ABI constraints. Handling that might be a notable improvement on various targets. It'd be a great place to do some experimentation. Ok, so sounds like a good follow-on project after this patch is reviewed and committed (stage1). Thanks for your input! Agreed. I suspect that risc-v will benefit from such work as well. With that in mind, if y'all start poking at this, please loop in Raphael (on cc) who's expressed an interest in this space. Will do. I suspect that it'll be best to come up with some generic interface using target hooks like "param regs are sign/zero extended" or "call return values are sign/zero extended", etc. that targets can conditionally opt into depending on their ABI that is in effect. Pardon the arm-chair development mode but it sounds like re-inventing the TARGET_PROMOTE_* hooks... Maybe just hook up TARGET_PROMOTE_FUNCTION_MODE to ree.c (as "you" already already define it for "rs6000")? That's what we touched on up-thread. Ideally we want to wire up REE to utilize the existing mechanisms to specify ABI constraints so that we don't have to write all those macros again. jeff
Re: [PATCH v2 2/2] combine: Try harder to form zero_extends [PR106594]
Segher Boessenkool writes: > On Thu, Mar 09, 2023 at 12:10:51PM +, Richard Sandiford wrote: >> g:c23a9c87cc62bd177fd0d4db6ad34b34e1b9a31f uses nonzero_bits >> information to convert sign_extends into zero_extends. >> That change is semantically correct in itself, but for the >> testcase in the PR, it leads to a series of unfortunate events, >> as described below. > > That patch also does other much more obviously beneficial transforms. > These things should not have been mashed together. Smaller more atomic > patches are much nicer to handle :-/ > >> We try to combine: >> >> Trying 24 -> 25: >>24: r116:DI=sign_extend(r115:SI) >> REG_DEAD r115:SI >>25: r117:SI=[r116:DI*0x4+r118:DI] >> REG_DEAD r116:DI >> REG_EQUAL [r116:DI*0x4+`constellation_64qam'] >> >> which previously succeeded, giving: >> >> (set (reg:SI 117 [ constellation_64qam[_5] ]) >> (mem/u:SI (plus:DI (mult:DI (sign_extend:DI (reg:SI 115)) >> (const_int 4 [0x4])) >> (reg/f:DI 118)) [1 constellation_64qam[_5]+0 S4 A32])) >> >> However, nonzero_bits can tell that only the low 6 bits of r115 >> can be nonzero. The commit above therefore converts the sign_extend >> to a zero_extend. Using the same nonzero_bits information, we then >> "expand" the zero_extend to: >> >> (and:DI (subreg:DI (reg:SI r115) 0) >> (const_int 63)) > > This is more expensive already?! An "and" usually costs a real machine > instruction, while subregs are usually free (just notational). The > "and" can not easily be optimised away either (after combine we only > have less accurate nonzero_bits, to start with). Well, it's an "and" in place of a zero_extend rather than an "and" in place of a subreg. So it is one real instruction for another real instruction. But yeah, it's not exactly a simplification. The "we" here is expand_compound_operation, via simplify_and_const_int. So it's the usual thing: expand_compound_operation creates something that is perhaps more expensive, then after simplification, make_compound_operation is supposed to collapse these "expanded" perhaps-more-expensive forms back. And it's the last bit that goes wrong. >> Substituting into the mult gives the unsimplified expression: >> >> (mult:DI (and:DI (subreg:DI (reg:SI r115) 0) >>(const_int 63)) >>(const_int 4)) >> >> The simplification rules for mult convert this to an ashift by 2. >> Then, this rule in simplify_shift_const_1: >> >>/* If we have (shift (logical)), move the logical to the outside >> to allow it to possibly combine with another logical and the >> shift to combine with another shift. This also canonicalizes to >> what a ZERO_EXTRACT looks like. Also, some machines have >> (and (shift)) insns. */ >> >> moves the shift inside the "and", so that the expression becomes: >> >> (and:DI (ashift:DI (subreg:DI (reg:SI r115) 0) >> (const_int 2)) >> (const_int 252)) >> >> We later recanonicalise to a mult (since this is an address): >> >> (and:DI (mult:DI (subreg:DI (reg:SI r115) 0) >>(const_int 4)) >> (const_int 252)) >> >> But we fail to transform this back to the natural substitution: >> >> (mult:DI (zero_extend:DI (reg:SI r115)) >>(const_int 4)) >> >> There are several other cases in which make_compound_operation >> needs to look more than one level down in order to complete a >> compound operation. For example: >> >> (a) the ashiftrt handling uses extract_left_shift to look through >> things like logic ops in order to find a partnering ashift >> operation >> >> (b) the "and" handling looks through subregs, xors and iors >> to find a partnerning lshiftrt >> >> This patch takes the same approach for mult. >> >> gcc/ >> PR rtl-optimization/106594 >> * combine.cc (make_compound_operation_and): Look through >> multiplications by a power of two. >> >> gcc/testsuite/ >> * gcc.target/aarch64/pr106594.c: New test. >> --- >> gcc/combine.cc | 17 + >> gcc/testsuite/gcc.target/aarch64/pr106594.c | 20 >> 2 files changed, 37 insertions(+) >> create mode 100644 gcc/testsuite/gcc.target/aarch64/pr106594.c >> >> diff --git a/gcc/combine.cc b/gcc/combine.cc >> index 7d446d02cb4..36d04ad6703 100644 >> --- a/gcc/combine.cc >> +++ b/gcc/combine.cc >> @@ -7996,6 +7996,23 @@ make_compound_operation_and (scalar_int_mode mode, >> rtx x, >> break; >>} >> >> +case MULT: >> + /* Recurse through a power of 2 multiplication (as can be found >> + in an address), using the relationship: >> + >> + (and (mult X 2**N1) N2) == (mult (and X (lshifrt N2 N1)) 2**N1). */ >> + if (CONST_INT_P (XEXP (x, 1)) >> + && pow2p_hwi (INTVAL (XEXP (x, 1 >> +{ >> + int shift = exact_log2 (INTVAL (XEXP (x, 1))); >> + rtx sub
Re: [PATCH] tree-optimization/107561 - reduce -Wstringop-overflow false positives
On 3/29/23 06:11, Richard Biener via Gcc-patches wrote: The following tells pointer-query to prefer a zero size when we are querying for the size range for a write into an object we've determined is of zero size. That avoids diagnostics about really varying size arguments that just get a meaningful range for example because they are multiplied by an element size. I've adjusted only one call to get_size_range since that's what I have a testcase for. I think this is the most sensible "workaround" for some of the false positives we see. Bootstrapped on x86_64-unknown-linux-gnu, testing in progress. OK if it succeeds? Thanks, Richard. PR tree-optimization/107561 * gimple-ssa-warn-access.cc (get_size_range): Add flags argument and pass it on. (check_access): When querying for the size range pass SR_ALLOW_ZERO when the known destination size is zero. * g++.dg/pr71488.C: Remove XFAILed bogus diagnostic again. * g++.dg/warn/Warray-bounds-16.C: Likewise. OK. jeff
Patch ping: [PATCH] aarch64, builtins: Include PR registers in FUNCTION_ARG_REGNO_P etc. [PR109254]
Hi! On Fri, Mar 24, 2023 at 10:59:36PM +0100, Jakub Jelinek via Gcc-patches wrote: > 2023-03-24 Jakub Jelinek > > PR target/109254 > * builtins.cc (apply_args_size): If targetm.calls.get_raw_arg_mode > returns VOIDmode, handle it like if the register isn't used for > passing arguments at all. > (apply_result_size): If targetm.calls.get_raw_result_mode returns > VOIDmode, handle it like if the register isn't used for returning > results at all. > * target.def (get_raw_result_mode, get_raw_arg_mode): Document what it > means to return VOIDmode. > * doc/tm.texi: Regenerated. > * config/aarch64/aarch64.cc (aarch64_function_value_regno_p): Return > TARGET_SVE for P0_REGNUM. > (aarch64_function_arg_regno_p): Also return true for p0-p3. > (aarch64_get_reg_raw_mode): Return VOIDmode for PR_REGNUM_P regs. I'd like to ping this patch. https://gcc.gnu.org/pipermail/gcc-patches/2023-March/614594.html Thanks Jakub
Re: [PATCH] aarch64, builtins: Include PR registers in FUNCTION_ARG_REGNO_P etc. [PR109254]
On 3/24/23 15:59, Jakub Jelinek via Gcc-patches wrote: Hi! The testcase in the PR (which unfortunately because of my lack of experience with SVE I'm not able to turn into a runtime testcase that verifies it) is miscompiled on aarch64-linux in the regname pass, because while the function takes arguments in the p0 register, FUNCTION_ARG_REGNO_P doesn't reflect that, so DF doesn't know the register is used in register passing. It sees 2 chains with p1 register and wants to replace the second one and as DF doesn't know p0 is live at the start of the function, it will happily use p0 register even when it is used in subsequent instructions. The following patch fixes that. FUNCTION_ARG_REGNO_P returns non-zero for p0-p3 (unconditionally, seems for the floating/vector registers it doesn't conditionalize them on TARGET_FLOAT either, but if you want, I can conditionalize p0-p3 on TARGET_SVE), similarly targetm.calls.function_value_regno_p returns true for p0 register if TARGET_SVE (again for consistency, that function conditionalizes the float/vector on TARGET_FLOAT); looking at the AAPCS, seems p1-p3 could be also used to return values in case of homogenous aggregates, but it doesn't seem GCC supports putting svbool_t as a member of a structure. Now, that change broke bootstrap in libobjc and some __builtin_apply_args/__builtin_apply/__builtin_return tests. The aarch64_get_reg_raw_mode hook already documents that SVE scalable arg/return passing is fundamentally incompatible with those builtins, but unlike the floating/vector regs where it forces a fixed vector mode, I think there is no fixed mode which could be used for p0-p3. So, I have tweaked the generic code so that it uses VOIDmode return from that hook to signal that a register shouldn't be touched by __builtin_apply_args/__builtin_apply/__builtin_return despite being mentioned in FUNCTION_ARG_REGNO_P or targetm.calls.function_value_regno_p. Bootstrapped/regtested on aarch64-linux, ok for trunk? Could somebody please turn the testcase from the PR into something that can be included into the testsuite? 2023-03-24 Jakub Jelinek PR target/109254 * builtins.cc (apply_args_size): If targetm.calls.get_raw_arg_mode returns VOIDmode, handle it like if the register isn't used for passing arguments at all. (apply_result_size): If targetm.calls.get_raw_result_mode returns VOIDmode, handle it like if the register isn't used for returning results at all. * target.def (get_raw_result_mode, get_raw_arg_mode): Document what it means to return VOIDmode. * doc/tm.texi: Regenerated. * config/aarch64/aarch64.cc (aarch64_function_value_regno_p): Return TARGET_SVE for P0_REGNUM. (aarch64_function_arg_regno_p): Also return true for p0-p3. (aarch64_get_reg_raw_mode): Return VOIDmode for PR_REGNUM_P regs. Generic bits are OK by me. The aarch64 bits looks sensible, but I'd like to give the aarch folks one more chance to chime in. So OK for the trunk Monday if you haven't heard otherwise. jeff
Re: [PATCH v2 2/2] combine: Try harder to form zero_extends [PR106594]
On Fri, Mar 31, 2023 at 03:06:41PM +0100, Richard Sandiford wrote: > Segher Boessenkool writes: > > On Thu, Mar 09, 2023 at 12:10:51PM +, Richard Sandiford wrote: > >> (and:DI (subreg:DI (reg:SI r115) 0) > >> (const_int 63)) > > > > This is more expensive already?! An "and" usually costs a real machine > > instruction, while subregs are usually free (just notational). The > > "and" can not easily be optimised away either (after combine we only > > have less accurate nonzero_bits, to start with). > > Well, it's an "and" in place of a zero_extend rather than an "and" > in place of a subreg. So it is one real instruction for another > real instruction. But yeah, it's not exactly a simplification. We should be able to optimise away the zero_extend in many cases, and we cannot do that to an "and" usually. > The "we" here is expand_compound_operation, via simplify_and_const_int. > So it's the usual thing: expand_compound_operation creates something > that is perhaps more expensive, then after simplification, > make_compound_operation is supposed to collapse these "expanded" > perhaps-more-expensive forms back. And it's the last bit that > goes wrong. Yes. And that is even unavoidable. The whole concept is a bad idea. We should not create worse things hoping that we can make it better things later again; we usually cannot make it as good anymore, or even reasonably good :-( Totally skipping this dance of course results in worse code as well, so more work is needed. > > This looks pretty good (thanks!), but as always it needs testing on more > > architectures, showing it doesn't hurt things. It should be beneficial, > > but it is not unlikely to hurt other existing backends, and we are not > > in stage 1 (we are in stage 4 even!) > > Yeah, but it's fixing a GCC 13 regression (and an important one on aarch64). That is not an excuse for (potentially) causing hundreds of new regressions, some maybe even worse. > > Do you have any such proof / indication / anything? I can start > > some run but it takes a day (or two), and I cannot start it until next > > week. > > This is an alternative presentation of the change that we discussed > a few weeks ago, and that you already tested: > > https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613486.html > > The results seem to indicate that the patch had no effect on targets > other than aarch64. [A good thing, IMO. :-) The purpose of the > patch is to fix the aarch64 regression in a minimally invasive way.] If it is the same (I don't agree at all fwiw) it has no effect on aarch64 either, other than on your single testcase. So that is a good reason to NAK this patch outside of stage 1 already. > I tried building an aarch64 linux kernel locally with -Os > -fno-schedule-insns{,2}. Completely unrealistic. No one builds anything they care for speed fo with -Os (and if people care for size, you still get better results with -O2 + some other tunings), and disabling scheduling is disastrous. > I saw code size improvements in 182 .os and a > regression in only one .o. (I was comparing individual .os because it > makes isolation easier.) > The one regression was because the optimised version had fewer pseudos, > and so something that was previously allocated to x3 was allocated to x2. > This pseudo was initialised to 0, and a preceding stack protect > instruction had the known side effect (both before and after the patch) > of setting x3 to 0. So with the x3 allocation, postreload was able to > remove the separate initialisation of x3 with 0, but with the x2 > allocation it couldn't. > > So for my kernel build it was a 182:1 split in favour of improvements, > with the one regression being a quirk rather than a true regression. For a default configuration it gave 100.033% size, a non-trivial regression. But again, I don't think this code has the same effect. > > Do you have plans to make combine not do this insane "and" thing at all? > > Or to attack the compound operation stuff head on? > > I don't have any plans to do that myself, but I agree it would be > better to get rid of the compound operation stuff if we can. Getting rid of it is easy enough, but that also loses wanted functionality (even for archs that do not have any extract functionality; these functions do more than that :-( ). > I can see why the expand/simplify/remake process seemed like > a nice idea, but in practice, there's just too much that can > go wrong. Yup. > And removing the compound stuff would go a long way > to making combine simplification more like fwprop simpliification > (unlike now, where we effectively have two separate simplification > processes for things involving *_extends and *_extracts). This has nothing to do with simplification? We just use simplify-rtx (which started out as part of combine exclusively), just like everything else. All the other transforms that are combine-only are very useful to have as well, as shown a year or so
[committed] libstdc++: Revert addition of boolean flag to net::ip::basic_endpoint
Tested powerpc64le-linux. Pushed to trunk. -- >8 -- As pointed out in P2641R1, we can use GCC's __builtin_constant_p to emulate the proposed std::is_active_member. This allows us to detect which of the union members is active during constant evaluation, so we don't need the extra bool data member. We still can't support constexpr until C++20 though, as we need to change the active member during constant evaluation. libstdc++-v3/ChangeLog: * include/experimental/internet (ip::basic_endpoint::_M_if_v6): Revert change from member function to data member. Fix for constant evaluation by detecting which union member is active. (ip::basic_endpoint::resize): Revert changes to update _M_is_v6 flag. --- libstdc++-v3/include/experimental/internet | 32 ++ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/libstdc++-v3/include/experimental/internet b/libstdc++-v3/include/experimental/internet index dff81b456ab..eb23ae21cdc 100644 --- a/libstdc++-v3/include/experimental/internet +++ b/libstdc++-v3/include/experimental/internet @@ -1531,7 +1531,6 @@ namespace ip std::_Construct(&_M_data._M_v6); _M_data._M_v6.sin6_family = __proto.family(); _M_data._M_v6.sin6_port = address_v4::_S_hton_16(__port_num); - _M_is_v6 = true; } else { @@ -1560,7 +1559,6 @@ namespace ip for (int __i = 0; __i < 16; ++__i) __s6a[__i] = __addr._M_v6._M_bytes[__i]; _M_data._M_v6.sin6_scope_id = __addr._M_v6._M_scope_id; - _M_is_v6 = true; } } @@ -1568,13 +1566,13 @@ namespace ip constexpr protocol_type protocol() const noexcept { - return _M_is_v6 ? protocol_type::v6() : protocol_type::v4(); + return _M_is_v6() ? protocol_type::v6() : protocol_type::v4(); } constexpr ip::address address() const noexcept { - if (_M_is_v6) + if (_M_is_v6()) { address_v6 __v6; const uint8_t* __s6a = _M_data._M_v6.sin6_addr.s6_addr; @@ -1601,14 +1599,12 @@ namespace ip __builtin_memcpy(_M_data._M_v6.sin6_addr.s6_addr, __addr._M_v6._M_bytes.data(), 16); _M_data._M_v6.sin6_scope_id = __addr._M_v6._M_scope_id; - _M_is_v6 = true; } else { std::_Construct(&_M_data._M_v4); _M_data._M_v4.sin_family = protocol_type::v4().family(); _M_data._M_v4.sin_addr.s_addr = __addr._M_v4._M_addr; - _M_is_v6 = false; } } @@ -1616,7 +1612,7 @@ namespace ip port() const noexcept { port_type __p = 0; - if (_M_is_v6) + if (_M_is_v6()) __p = _M_data._M_v6.sin6_port; else __p = _M_data._M_v4.sin_port; @@ -1627,7 +1623,7 @@ namespace ip port(port_type __port_num) noexcept { __port_num = address_v4::_S_hton_16(__port_num); - if (_M_is_v6) + if (_M_is_v6()) _M_data._M_v6.sin6_port = __port_num; else _M_data._M_v4.sin_port = __port_num; @@ -1639,19 +1635,11 @@ namespace ip constexpr size_t size() const noexcept - { return _M_is_v6 ? sizeof(sockaddr_in6) : sizeof(sockaddr_in); } + { return _M_is_v6() ? sizeof(sockaddr_in6) : sizeof(sockaddr_in); } void resize(size_t __s) { - __glibcxx_assert(__s >= 0); - static_assert(offsetof(sockaddr_in6, sin6_family) - == offsetof(sockaddr_in, sin_family), - "sockaddr_in::sin_family and sockaddr_in6::sin6_family " - "must be at the same offset"); - const sa_family_t __in6 = AF_INET6; - const auto* __ptr = (char*)&_M_data + offsetof(sockaddr_in, sin_family); - _M_is_v6 = __builtin_memcmp(&__in6, __ptr, sizeof(__in6)) == 0; if (__s != size()) __throw_length_error("net::ip::basic_endpoint::resize"); } @@ -1665,7 +1653,15 @@ namespace ip sockaddr_in6_M_v6; } _M_data; - bool _M_is_v6 = false; + constexpr bool + _M_is_v6() const noexcept + { + // For constexpr eval we can just detect which union member is active. + // i.e. emulate P2641R1's std::is_active_member(&_M_data._M_v6)). + if (std::__is_constant_evaluated()) + return __builtin_constant_p(_M_data._M_v6.sin6_family); + return _M_data._M_v6.sin6_family == AF_INET6; + } }; /** basic_endpoint comparisons -- 2.39.2
Re: [committed] libstdc++: Fix constexpr functions in
On Fri, 31 Mar 2023 at 12:06, Jonathan Wakely wrote: > > On Thu, 30 Mar 2023 at 17:01, Daniel Krügler wrote: > > > > Am Do., 30. März 2023 um 18:00 Uhr schrieb Jonathan Wakely via > > Libstdc++ : > > > > > [..] > > > > > > In fact, thinking about P2641 some more, I might revert this change. > > > Instead of adding an extra bool member to support constexpr, I think > > > I'll just remove the 'constexpr' keywords from basic_endpoint for now, > > > and implement it in terms of just inspecting the sa_family_t member of > > > the union members. And then later, once we have something like P2641, > > > we can re-add the constexpr keywords and use is_within_lifetime during > > > constant evaluation. That way we don't add a bool then need to take it > > > away again, changing the ABI each time. > > > > I was just going to make the same suggestion. > > Actually, as pointed out in Barry's P2641R1 paper, we can just use > GCC's __builtin_constant_p: > > + constexpr bool > + _M_is_v6() const noexcept > + { > + // For constexpr eval we can just detect which union member is active. > + // i.e. emulate P2641R1's std::is_active_member(&_M_data._M_v6)). > + if (std::__is_constant_evaluated()) > + return __builtin_constant_p(_M_data._M_v6.sin6_family); > + return _M_data._M_v6.sin6_family == AF_INET6; > + } Done in commit 10e573e86c6a1ed11df529288ed8fba97f876032
[committed] libstdc++: Avoid -Wmaybe-uninitialized warning in std::stop_source [PR109339]
Tested powerpc64le-linux. Pushed to trunk. -- >8 -- We pass a const-reference to *this before it's constructed, and GCC assumes that all const-references are accessed. Add the access attribute to say it's not accessed. libstdc++-v3/ChangeLog: PR libstdc++/109339 * include/std/stop_token (_Stop_state_ptr(const stop_source&)): Add attribute access with access-mode 'none'. * testsuite/30_threads/stop_token/stop_source/109339.cc: New test. --- libstdc++-v3/include/std/stop_token| 1 + .../30_threads/stop_token/stop_source/109339.cc| 10 ++ 2 files changed, 11 insertions(+) create mode 100644 libstdc++-v3/testsuite/30_threads/stop_token/stop_source/109339.cc diff --git a/libstdc++-v3/include/std/stop_token b/libstdc++-v3/include/std/stop_token index 76aef78a7ef..c90fc786b63 100644 --- a/libstdc++-v3/include/std/stop_token +++ b/libstdc++-v3/include/std/stop_token @@ -395,6 +395,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { _Stop_state_ref() = default; + [[__gnu__::__access__(__none__, 2)]] explicit _Stop_state_ref(const stop_source&) : _M_ptr(new _Stop_state_t()) diff --git a/libstdc++-v3/testsuite/30_threads/stop_token/stop_source/109339.cc b/libstdc++-v3/testsuite/30_threads/stop_token/stop_source/109339.cc new file mode 100644 index 000..eea614eef80 --- /dev/null +++ b/libstdc++-v3/testsuite/30_threads/stop_token/stop_source/109339.cc @@ -0,0 +1,10 @@ +// { dg-options "-Wmaybe-uninitialized -Og -std=gnu++20" } +// { dg-do compile { target c++20 } } + +#include + +int main() +{ + std::stop_source ss; + // { dg-bogus "uninitialized" "PR 109339" { target *-*-* } 0 } +} -- 2.39.2
Re: [PATCH v2 2/2] combine: Try harder to form zero_extends [PR106594]
Segher Boessenkool writes: > On Fri, Mar 31, 2023 at 03:06:41PM +0100, Richard Sandiford wrote: >> This is an alternative presentation of the change that we discussed >> a few weeks ago, and that you already tested: >> >> https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613486.html >> >> The results seem to indicate that the patch had no effect on targets >> other than aarch64. [A good thing, IMO. :-) The purpose of the >> patch is to fix the aarch64 regression in a minimally invasive way.] > > If it is the same (I don't agree at all fwiw) Why don't you agree that it's behaviourally the same? It's really just a refactoring of my earlier patch. > it has no effect on aarch64 either, other than on your single > testcase. So that is a good reason to NAK this patch outside of stage > 1 already. But if it only affected this single testcase, I wouldn't see improvements in 182 kernel objects :-) >> I tried building an aarch64 linux kernel locally with -Os >> -fno-schedule-insns{,2}. > > Completely unrealistic. No one builds anything they care for speed fo > with -Os (and if people care for size, you still get better results > with -O2 + some other tunings), and disabling scheduling is disastrous. As discussed before, the point is to use these results to get an idea of the scope of the change. And if we're using size to measure the scope, it makes sense to align the compiler's goals with that, since we then need to spend less time filtering the results for oddities like different RA leading to different code layout, etc. If we want to measure speed, we should do that. But I don't expect a significant speed impact on the kernel either way (whether the patch is good or bad). Thanks, Richard
[pushed][PR109052] LRA: Implement commutative operands exchange for combining secondary memory reload and original insn
This is one more patch for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109052 The patch adds trying commutative operands exchange for recently implemented combining secondary memory reload and original insn: The patch was successfully bootstrapped and tested on x86_64. commit 378d19cfebfa2bc4f693dfc9e6f0dd993e7c45f7 Author: Vladimir N. Makarov Date: Fri Mar 31 11:04:44 2023 -0400 LRA: Implement commutative operands exchange for combining secondary memory reload and original insn The patch implements trying commutative operands exchange for combining secondary memory reload and original insn. PR rtl-optimization/109052 gcc/ChangeLog: * lra-constraints.cc: (combine_reload_insn): New function. gcc/testsuite/ChangeLog: * gcc.target/i386/pr109052-2.c: New. diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc index 405b8b92f5e..ff4e8f06063 100644 --- a/gcc/lra-constraints.cc +++ b/gcc/lra-constraints.cc @@ -5061,7 +5061,23 @@ combine_reload_insn (rtx_insn *from, rtx_insn *to) curr_insn = to; curr_id = lra_get_insn_recog_data (curr_insn); curr_static_id = curr_id->insn_static_data; - ok_p = !curr_insn_transform (true); + for (bool swapped_p = false;;) + { + ok_p = !curr_insn_transform (true); + if (ok_p || curr_static_id->commutative < 0) + break; + swap_operands (curr_static_id->commutative); + if (lra_dump_file != NULL) + { + fprintf (lra_dump_file, + "Swapping %scombined insn operands:\n", + swapped_p ? "back " : ""); + dump_insn_slim (lra_dump_file, to); + } + if (swapped_p) + break; + swapped_p = true; + } curr_insn = saved_insn; curr_id = lra_get_insn_recog_data (curr_insn); curr_static_id = curr_id->insn_static_data; diff --git a/gcc/testsuite/gcc.target/i386/pr109052-2.c b/gcc/testsuite/gcc.target/i386/pr109052-2.c new file mode 100644 index 000..337d1f49c2b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr109052-2.c @@ -0,0 +1,10 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mfpmath=both -mavx -fno-math-errno" } */ + +double foo (double a, double b) +{ + double z = __builtin_fmod (a, 3.14); + return z * b; +} + +/* { dg-final { scan-assembler-not "vmulsd\[ \t]\+%xmm\[0-9]\+, %xmm\[0-9]\+, %xmm\[0-9]\+"} } */
Re: [PATCH 1/2] c++: improve "NTTP argument considered unused" fix [PR53164, PR105848]
On Thu, 30 Mar 2023, Jason Merrill wrote: > On 3/30/23 14:53, Patrick Palka wrote: > > On Wed, 29 Mar 2023, Jason Merrill wrote: > > > > > On 3/27/23 09:30, Patrick Palka wrote: > > > > On Thu, 23 Mar 2023, Patrick Palka wrote: > > > > > > > > > r13-995-g733a792a2b2e16 worked around the problem of FUNCTION_DECL > > > > > template arguments not always getting marked as odr-used by > > > > > redundantly > > > > > calling mark_used on the substituted ADDR_EXPR callee of a CALL_EXPR. > > > > > This is just a narrow workaround however, since using a FUNCTION_DECL > > > > > as > > > > > a template argument alone should constitutes an odr-use; we shouldn't > > > > > need to subsequently e.g. call the function or take its address. > > > > > > Agreed. But why didn't we already wrap it in an ADDR_EXPR? Even for > > > reference tparms convert_nontype_argument should do that. > > > > Indeed we do, the commit message was just rather sloppy/inaccurate... > > I'll try to correct it. > > > > > > > > > > This patch fixes this in a more general way at template specialization > > > > > time by walking the template arguments of the specialization and > > > > > calling > > > > > mark_used on all entities used within. As before, the call to > > > > > mark_used > > > > > as it worst a no-op, but it compensates for the situation where we end > > > > > up > > > > > forming a specialization from a template context in which mark_used is > > > > > inhibited. Another approach would be to call mark_used whenever we > > > > > substitute a TEMPLATE_PARM_INDEX, but that would result in many more > > > > > redundant calls to mark_used compared to this approach. > > > > > > > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK > > > > > for > > > > > trunk? > > > > > > > > > > PR c++/53164 > > > > > PR c++/105848 > > > > > > > > > > gcc/cp/ChangeLog: > > > > > > > > > > * pt.cc (instantiate_class_template): Call > > > > > mark_template_arguments_used. > > > > > (tsubst_copy_and_build) : Revert r13-995 > > > > > change. > > > > > (mark_template_arguments_used): Define. > > > > > (instantiate_template): Call mark_template_arguments_used. > > > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > > > * g++.dg/template/fn-ptr3a.C: New test. > > > > > * g++.dg/template/fn-ptr4.C: New test. > > > > > --- > > > > >gcc/cp/pt.cc | 51 > > > > > > > > > >gcc/testsuite/g++.dg/template/fn-ptr3a.C | 25 > > > > >gcc/testsuite/g++.dg/template/fn-ptr4.C | 14 +++ > > > > >3 files changed, 74 insertions(+), 16 deletions(-) > > > > >create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr3a.C > > > > >create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr4.C > > > > > > > > > > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc > > > > > index 7e4a8de0c8b..9b3cc1c 100644 > > > > > --- a/gcc/cp/pt.cc > > > > > +++ b/gcc/cp/pt.cc > > > > > @@ -220,6 +220,7 @@ static tree make_argument_pack (tree); > > > > >static tree enclosing_instantiation_of (tree tctx); > > > > >static void instantiate_body (tree pattern, tree args, tree d, bool > > > > > nested); > > > > >static tree maybe_dependent_member_ref (tree, tree, tsubst_flags_t, > > > > > tree); > > > > > +static void mark_template_arguments_used (tree); > > > > > /* Make the current scope suitable for access checking when we > > > > > are > > > > > processing T. T can be FUNCTION_DECL for instantiated function > > > > > @@ -12142,6 +12143,9 @@ instantiate_class_template (tree type) > > > > > cp_unevaluated_operand = 0; > > > > > c_inhibit_evaluation_warnings = 0; > > > > >} > > > > > + > > > > > + mark_template_arguments_used (INNERMOST_TEMPLATE_ARGS (args)); > > > > > + > > > > > /* Use #pragma pack from the template context. */ > > > > > saved_maximum_field_alignment = maximum_field_alignment; > > > > > maximum_field_alignment = TYPE_PRECISION (pattern); > > > > > @@ -21173,22 +21177,10 @@ tsubst_copy_and_build (tree t, > > > > > } > > > > > /* Remember that there was a reference to this entity. */ > > > > > - if (function != NULL_TREE) > > > > > - { > > > > > - tree inner = function; > > > > > - if (TREE_CODE (inner) == ADDR_EXPR > > > > > - && TREE_CODE (TREE_OPERAND (inner, 0)) == > > > > > FUNCTION_DECL) > > > > > - /* We should already have called mark_used when taking > > > > > the > > > > > - address of this function, but do so again anyway to > > > > > make > > > > > - sure it's odr-used: at worst this is a no-op, but if > > > > > we > > > > > - obtained this FUNCTION_DECL as part of ahead-of-time > > > > > overload > > > > > - resolution then that call to mark_used wouldn't have > > > > > marked > > > > > it > > > > > - odr-used
Re: [PATCH] aarch64, builtins: Include PR registers in FUNCTION_ARG_REGNO_P etc. [PR109254]
Thanks for the patch and sorry for the slow reply. Jakub Jelinek writes: > Hi! > > The testcase in the PR (which unfortunately because of my lack of experience > with SVE I'm not able to turn into a runtime testcase that verifies it) > is miscompiled on aarch64-linux in the regname pass, because while the > function takes arguments in the p0 register, FUNCTION_ARG_REGNO_P doesn't > reflect that, so DF doesn't know the register is used in register passing. > It sees 2 chains with p1 register and wants to replace the second one and > as DF doesn't know p0 is live at the start of the function, it will happily > use p0 register even when it is used in subsequent instructions. > > The following patch fixes that. FUNCTION_ARG_REGNO_P returns non-zero > for p0-p3 (unconditionally, seems for the floating/vector registers it > doesn't conditionalize them on TARGET_FLOAT either, but if you want, > I can conditionalize p0-p3 on TARGET_SVE), I agree doing it unconditionally makes sense. > similarly targetm.calls.function_value_regno_p returns true for p0 > register if TARGET_SVE (again for consistency, that function > conditionalizes the float/vector on TARGET_FLOAT); looking at the > AAPCS, seems p1-p3 could be also used to return values in case of > homogenous aggregates, but it doesn't seem GCC supports putting > svbool_t as a member of a structure. One testcase that uses p1-p3 for return values is: typedef __SVBool_t fixed_bool_t __attribute__((arm_sve_vector_bits(256))); struct s { fixed_bool_t x[4]; }; struct s f (struct s *ptr) { return *ptr; } compiled with -msve-vector-bits=256. > Now, that change broke bootstrap in libobjc and some > __builtin_apply_args/__builtin_apply/__builtin_return tests. The > aarch64_get_reg_raw_mode hook already documents that SVE scalable arg/return > passing is fundamentally incompatible with those builtins, but unlike > the floating/vector regs where it forces a fixed vector mode, I think > there is no fixed mode which could be used for p0-p3. So, I have tweaked > the generic code so that it uses VOIDmode return from that hook to signal > that a register shouldn't be touched by > __builtin_apply_args/__builtin_apply/__builtin_return > despite being mentioned in FUNCTION_ARG_REGNO_P or > targetm.calls.function_value_regno_p. > > Bootstrapped/regtested on aarch64-linux, ok for trunk? > > Could somebody please turn the testcase from the PR into something that > can be included into the testsuite? This seems to work: /* { dg-do run { target aarch64_sve_hw } } */ /* { dg-options "-O2 -funroll-loops" } */ #include #include svfloat32_t __attribute__((noipa)) func_demo(svfloat32_t x, svfloat32_t y, svbool_t pg) { svfloat32_t z = svadd_f32_x(pg, x, svdup_f32(0x1.800fep19f)); svbool_t cmp = svcmplt_f32(pg, z, svdup_f32(0.0f)); svfloat32_t zM1 = svsub_f32_x(pg, z, svdup_n_f32(1.0f)); z = svsel_f32(cmp, zM1, z); svfloat32_t sum = svadd_f32_x(pg, z, y); return sum; } int main() { float arr[2]; svfloat32_t x = svinsr_n_f32(svdup_f32(-0x1.880fep19f), 2.0f); svfloat32_t res = func_demo(x, svdup_f32(0.5f), svptrue_b32()); svst1_f32(svptrue_pat_b32(SV_VL2), arr, res); if (arr[0] != 786561.50 || arr[1] != -16384.50) __builtin_abort (); return 0; } > 2023-03-24 Jakub Jelinek > > PR target/109254 > * builtins.cc (apply_args_size): If targetm.calls.get_raw_arg_mode > returns VOIDmode, handle it like if the register isn't used for > passing arguments at all. > (apply_result_size): If targetm.calls.get_raw_result_mode returns > VOIDmode, handle it like if the register isn't used for returning > results at all. > * target.def (get_raw_result_mode, get_raw_arg_mode): Document what it > means to return VOIDmode. > * doc/tm.texi: Regenerated. > * config/aarch64/aarch64.cc (aarch64_function_value_regno_p): Return > TARGET_SVE for P0_REGNUM. > (aarch64_function_arg_regno_p): Also return true for p0-p3. > (aarch64_get_reg_raw_mode): Return VOIDmode for PR_REGNUM_P regs. > > @@ -7995,6 +7999,10 @@ aarch64_get_reg_raw_mode (int regno) > for SVE types are fundamentally incompatible with the > __builtin_return/__builtin_apply interface. */ > return as_a (V16QImode); > + if (PR_REGNUM_P (regno)) > +/* For SVE PR regs, indicate that they should be ignored for > + __builtin_apply/__builtin_return. */ > +return as_a (VOIDmode); Reached me with trailing whitespace, but maybe that's a mailerism. It's an interesting philosophical question whether VOIDmode is a fixed-size mode, given that VOIDmode doesn't really have a size. :-) But I'm sure in practice we already treat it like that elsewhere. It's just the first time I remember it being so explicit. OK for the aarch64 bits with aarch64_function_value_regno_p changed to use the same range as aarch64_function_arg_regno_p. Thanks, Richard
Re: [PATCH 1/2] c++: improve "NTTP argument considered unused" fix [PR53164, PR105848]
On Fri, 31 Mar 2023, Patrick Palka wrote: > On Thu, 30 Mar 2023, Jason Merrill wrote: > > > On 3/30/23 14:53, Patrick Palka wrote: > > > On Wed, 29 Mar 2023, Jason Merrill wrote: > > > > > > > On 3/27/23 09:30, Patrick Palka wrote: > > > > > On Thu, 23 Mar 2023, Patrick Palka wrote: > > > > > > > > > > > r13-995-g733a792a2b2e16 worked around the problem of FUNCTION_DECL > > > > > > template arguments not always getting marked as odr-used by > > > > > > redundantly > > > > > > calling mark_used on the substituted ADDR_EXPR callee of a > > > > > > CALL_EXPR. > > > > > > This is just a narrow workaround however, since using a > > > > > > FUNCTION_DECL > > > > > > as > > > > > > a template argument alone should constitutes an odr-use; we > > > > > > shouldn't > > > > > > need to subsequently e.g. call the function or take its address. > > > > > > > > Agreed. But why didn't we already wrap it in an ADDR_EXPR? Even for > > > > reference tparms convert_nontype_argument should do that. > > > > > > Indeed we do, the commit message was just rather sloppy/inaccurate... > > > I'll try to correct it. > > > > > > > > > > > > > This patch fixes this in a more general way at template > > > > > > specialization > > > > > > time by walking the template arguments of the specialization and > > > > > > calling > > > > > > mark_used on all entities used within. As before, the call to > > > > > > mark_used > > > > > > as it worst a no-op, but it compensates for the situation where we > > > > > > end > > > > > > up > > > > > > forming a specialization from a template context in which mark_used > > > > > > is > > > > > > inhibited. Another approach would be to call mark_used whenever we > > > > > > substitute a TEMPLATE_PARM_INDEX, but that would result in many more > > > > > > redundant calls to mark_used compared to this approach. > > > > > > > > > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK > > > > > > for > > > > > > trunk? > > > > > > > > > > > > PR c++/53164 > > > > > > PR c++/105848 > > > > > > > > > > > > gcc/cp/ChangeLog: > > > > > > > > > > > > * pt.cc (instantiate_class_template): Call > > > > > > mark_template_arguments_used. > > > > > > (tsubst_copy_and_build) : Revert r13-995 > > > > > > change. > > > > > > (mark_template_arguments_used): Define. > > > > > > (instantiate_template): Call mark_template_arguments_used. > > > > > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > > > > > * g++.dg/template/fn-ptr3a.C: New test. > > > > > > * g++.dg/template/fn-ptr4.C: New test. > > > > > > --- > > > > > >gcc/cp/pt.cc | 51 > > > > > > > > > > > >gcc/testsuite/g++.dg/template/fn-ptr3a.C | 25 > > > > > >gcc/testsuite/g++.dg/template/fn-ptr4.C | 14 +++ > > > > > >3 files changed, 74 insertions(+), 16 deletions(-) > > > > > >create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr3a.C > > > > > >create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr4.C > > > > > > > > > > > > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc > > > > > > index 7e4a8de0c8b..9b3cc1c 100644 > > > > > > --- a/gcc/cp/pt.cc > > > > > > +++ b/gcc/cp/pt.cc > > > > > > @@ -220,6 +220,7 @@ static tree make_argument_pack (tree); > > > > > >static tree enclosing_instantiation_of (tree tctx); > > > > > >static void instantiate_body (tree pattern, tree args, tree d, > > > > > > bool > > > > > > nested); > > > > > >static tree maybe_dependent_member_ref (tree, tree, > > > > > > tsubst_flags_t, > > > > > > tree); > > > > > > +static void mark_template_arguments_used (tree); > > > > > > /* Make the current scope suitable for access checking when we > > > > > > are > > > > > > processing T. T can be FUNCTION_DECL for instantiated > > > > > > function > > > > > > @@ -12142,6 +12143,9 @@ instantiate_class_template (tree type) > > > > > > cp_unevaluated_operand = 0; > > > > > > c_inhibit_evaluation_warnings = 0; > > > > > >} > > > > > > + > > > > > > + mark_template_arguments_used (INNERMOST_TEMPLATE_ARGS (args)); > > > > > > + > > > > > > /* Use #pragma pack from the template context. */ > > > > > > saved_maximum_field_alignment = maximum_field_alignment; > > > > > > maximum_field_alignment = TYPE_PRECISION (pattern); > > > > > > @@ -21173,22 +21177,10 @@ tsubst_copy_and_build (tree t, > > > > > > } > > > > > > /* Remember that there was a reference to this entity. > > > > > > */ > > > > > > - if (function != NULL_TREE) > > > > > > - { > > > > > > - tree inner = function; > > > > > > - if (TREE_CODE (inner) == ADDR_EXPR > > > > > > - && TREE_CODE (TREE_OPERAND (inner, 0)) == > > > > > > FUNCTION_DECL) > > > > > > - /* We should already have called mark_used when taking > > > > > > the > > > > > > -address of this function, but do so again anyway
Regression with "recomputation and PR 109154"
> Attached. I also removed the bogus warning in Walloc-13.c that no longer > happens > Add recursive GORI recompuations with a depth limit. > > PR tree-optimization/109154 > gcc/ > * gimple-range-gori.cc (gori_compute::may_recompute_p): Add depth > limit. > * gimple-range-gori.h (may_recompute_p): Add depth param. > * params.opt (ranger-recompute-depth): New param. > > gcc/testsuite/ > * gcc.dg/Walloca-13.c: Remove bogus warning that is now fixed. This patch, commit r13-6945-g429a7a88438cc8, caused a test-suite regression; FAIL for 'gcc.dg/tree-ssa/pr23109.c scan-tree-dump-not recip "reciptmp"' for cris-elf that I logged at PR109363. Perhaps it's somewhat of a similar nature as Walloca-13.c but then again it's not an bogus warning that's gone. Is it expected and I should just blankly xfail it or is it worth more attention? I'm a bit surprised that it hasn't shown up for other targets though. brgds, H-P
Re: Regression with "recomputation and PR 109154"
On 3/31/23 10:12, Hans-Peter Nilsson via Gcc-patches wrote: Attached. I also removed the bogus warning in Walloc-13.c that no longer happens Add recursive GORI recompuations with a depth limit. PR tree-optimization/109154 gcc/ * gimple-range-gori.cc (gori_compute::may_recompute_p): Add depth limit. * gimple-range-gori.h (may_recompute_p): Add depth param. * params.opt (ranger-recompute-depth): New param. gcc/testsuite/ * gcc.dg/Walloca-13.c: Remove bogus warning that is now fixed. This patch, commit r13-6945-g429a7a88438cc8, caused a test-suite regression; FAIL for 'gcc.dg/tree-ssa/pr23109.c scan-tree-dump-not recip "reciptmp"' for cris-elf that I logged at PR109363. Perhaps it's somewhat of a similar nature as Walloca-13.c but then again it's not an bogus warning that's gone. Is it expected and I should just blankly xfail it or is it worth more attention? I'm a bit surprised that it hasn't shown up for other targets though. I already let Andrew know -- it's affecting a ton of targets. It may be the case that we just need to adjust the param for that test. jeff
Re: [PATCH] rtl-optimization/109237 - quadraticness in delete_trivially_dead_insns
On 3/22/23 04:01, Richard Biener via Gcc-patches wrote: The following addresses quadraticness in processing debug insns in delete_trivially_dead_insns and insn_live_p by using TREE_VISITED on the INSN_VAR_LOCATION_DECL to indicate a later debug bind with the same decl and no intervening real insn or debug marker. That gets rid of the NEXT_INSN walk in insn_live_p in favor of first clearing TREE_VISITED in the first loop over insn and the book-keeping of decls we set the bit since we need to clear them when visiting a real or debug marker insn. That improves the time spent in delete_trivially_dead_insns from 10.6s to 2.2s for the testcase. Bootstrapped and tested on x86_64-unknown-linux-gnu. OK? Thanks, Richard. PR rtl-optimization/109237 * cse.cc (insn_live_p): Remove NEXT_INSN walk, instead check TREE_VISITED on INSN_VAR_LOCATION_DECL. (delete_trivially_dead_insns): Maintain TREE_VISITED on active debug bind INSN_VAR_LOCATION_DECL. OK jeff
Re: Should -ffp-contract=off the default on GCC?
> On Mar 24, 2023, at 3:42 PM, Andrew Pinski wrote: > > On Fri, Mar 24, 2023 at 1:14 AM Fangrui Song via Gcc-patches > wrote: >> >> On Wed, Mar 22, 2023 at 8:52 AM Qing Zhao via Gcc-patches >> wrote: >>> >>> >>> On Mar 22, 2023, at 9:57 AM, Richard Biener via Gcc-patches wrote: On Wed, Mar 22, 2023 at 1:26 PM Alexander Monakov wrote: > > > On Wed, 22 Mar 2023, Richard Biener wrote: > >> I think it's even less realistic to expect users to know the details of >> floating-point math. So I doubt any such sentence will be helpful >> besides spreading some FUD? > > I think it's closer to "fundamental notions" rather than "details". For > users who bother to read the GCC manual there's a decent chance it > wouldn't > be for naught. > > For documentation, I was thinking > > Together with -fexcess-precision=standard, -ffp-contract=off > is necessary to ensure that rounding of intermediate results to precision > implied by the source code and the FLT_EVAL_METHOD macro is not > omitted by the compiler. that sounds good to me >>> >>> Shall we add such clarification to our Gcc13 doc? That should be helpful if >>> we keep the currently default. >>> >>> Qing > Alexander >>> >> >> While updating the documentation, consider adding information that >> #pragma STDC FP_CONTRACT OFF is ignored with -ffp-contract=fast. >> >> This surprising behavior motivated Clang to add >> -Xclang=-ffp-contract=fast-honor-pragmas >> (https://discourse.llvm.org/t/fp-contract-fast-and-pragmas/58529). > > `#pragma STDC FP_CONTRACT OFF` is not even implemented yet in GCC. > Rather we should document that :). Do we have any plan to implement this pragma? Also, do we have any plan to implement -ffp-contract=on? I am very curious on why -ffp-contract=on has not been implemented for so long time? There are some documentation on Floating Point implementation of GCC on the pragma STDC FP_CONTRACT https://gcc.gnu.org/onlinedocs/gcc-12.2.0/gcc/Floating-point-implementation.html#Floating-point-implementation " • Whether and how floating expressions are contracted when not disallowed by the FP_CONTRACT pragma (C99 and C11 6.5). Expressions are currently only contracted if -ffp-contract=fast, -funsafe-math-optimizations or -ffast-math are used. This is subject to change. • The default state for the FP_CONTRACT pragma (C99 and C11 7.12.2). This pragma is not implemented. Expressions are currently only contracted if -ffp-contract=fast, -funsafe-math-optimizations or -ffast-math are used. This is subject to change. “ So, looks like that we have documented this. But When the user read the documentation for —ffp-contract option, there is no such information. We might add a link in the documentation of -ffp-contract option to here to make it clear. Qing > It does not matter what clang does here really since GCC does not even > implement the pragma. > > Thanks, > Andrew Pinski > > >> >> >> >> -- >> 宋方睿
Re: Regression with "recomputation and PR 109154"
On 3/31/23 12:20, Jeff Law wrote: On 3/31/23 10:12, Hans-Peter Nilsson via Gcc-patches wrote: Attached. I also removed the bogus warning in Walloc-13.c that no longer happens Add recursive GORI recompuations with a depth limit. PR tree-optimization/109154 gcc/ * gimple-range-gori.cc (gori_compute::may_recompute_p): Add depth limit. * gimple-range-gori.h (may_recompute_p): Add depth param. * params.opt (ranger-recompute-depth): New param. gcc/testsuite/ * gcc.dg/Walloca-13.c: Remove bogus warning that is now fixed. This patch, commit r13-6945-g429a7a88438cc8, caused a test-suite regression; FAIL for 'gcc.dg/tree-ssa/pr23109.c scan-tree-dump-not recip "reciptmp"' for cris-elf that I logged at PR109363. Perhaps it's somewhat of a similar nature as Walloca-13.c but then again it's not an bogus warning that's gone. Is it expected and I should just blankly xfail it or is it worth more attention? I'm a bit surprised that it hasn't shown up for other targets though. I already let Andrew know -- it's affecting a ton of targets. It may be the case that we just need to adjust the param for that test. So the test fails in the testuite because a recip has been put into the code, but does it execute OK? or maybe thats not testable? with the recompuation, we make some signficant changes: e.0_1 = e; d_12 = e.0_1 * 2.0e+0; E_13 = 1.0e+0 - d_12; goto ; [INV] : if (e.0_1 > 5.00010408340855860842566471546888351440429688e-3) goto ; [INV] else goto ; [INV] : if (E_13 > 1.0e+0) goto ; [INV] else goto ; [INV] In EVRP we now determine that the branch in BB4 is never true and we reduce the testcase significantly We end up now with: [local count: 357878152]: e.0_1 = e; if (e.0_1 > 5.00010408340855860842566471546888351440429688e-3) goto ; [50.00%] else goto ; [50.00%] [local count: 178939076]: d_9 = e.0_1 * 2.0e+0; E_10 = 1.0e+0 - d_9; _16 = E_10 - 1.0e+0; reciptmp.5_21 = 1.0e+0 / d_9; And with d_9 : [frange] double [1.0001942890293094023945741355419158935546875e-2 (0x0.a3d70a3d70a3ep-6), +Inf] I guess it figures the recip is safe to put in, there will not be a divide by zero. I guess the test is no longer testing what it should be? And yes, we could set he param back to 1 for the test... add --param=ranger-recompute-depth=1 makes the "issue" go away :-) for now. Andrew
Re: [PATCH 1/2] c++: improve "NTTP argument considered unused" fix [PR53164, PR105848]
On 3/31/23 11:55, Patrick Palka wrote: On Fri, 31 Mar 2023, Patrick Palka wrote: On Thu, 30 Mar 2023, Jason Merrill wrote: On 3/30/23 14:53, Patrick Palka wrote: On Wed, 29 Mar 2023, Jason Merrill wrote: On 3/27/23 09:30, Patrick Palka wrote: On Thu, 23 Mar 2023, Patrick Palka wrote: r13-995-g733a792a2b2e16 worked around the problem of FUNCTION_DECL template arguments not always getting marked as odr-used by redundantly calling mark_used on the substituted ADDR_EXPR callee of a CALL_EXPR. This is just a narrow workaround however, since using a FUNCTION_DECL as a template argument alone should constitutes an odr-use; we shouldn't need to subsequently e.g. call the function or take its address. Agreed. But why didn't we already wrap it in an ADDR_EXPR? Even for reference tparms convert_nontype_argument should do that. Indeed we do, the commit message was just rather sloppy/inaccurate... I'll try to correct it. This patch fixes this in a more general way at template specialization time by walking the template arguments of the specialization and calling mark_used on all entities used within. As before, the call to mark_used as it worst a no-op, but it compensates for the situation where we end up forming a specialization from a template context in which mark_used is inhibited. Another approach would be to call mark_used whenever we substitute a TEMPLATE_PARM_INDEX, but that would result in many more redundant calls to mark_used compared to this approach. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk? PR c++/53164 PR c++/105848 gcc/cp/ChangeLog: * pt.cc (instantiate_class_template): Call mark_template_arguments_used. (tsubst_copy_and_build) : Revert r13-995 change. (mark_template_arguments_used): Define. (instantiate_template): Call mark_template_arguments_used. gcc/testsuite/ChangeLog: * g++.dg/template/fn-ptr3a.C: New test. * g++.dg/template/fn-ptr4.C: New test. --- gcc/cp/pt.cc | 51 gcc/testsuite/g++.dg/template/fn-ptr3a.C | 25 gcc/testsuite/g++.dg/template/fn-ptr4.C | 14 +++ 3 files changed, 74 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr3a.C create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr4.C diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 7e4a8de0c8b..9b3cc1c 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -220,6 +220,7 @@ static tree make_argument_pack (tree); static tree enclosing_instantiation_of (tree tctx); static void instantiate_body (tree pattern, tree args, tree d, bool nested); static tree maybe_dependent_member_ref (tree, tree, tsubst_flags_t, tree); +static void mark_template_arguments_used (tree); /* Make the current scope suitable for access checking when we are processing T. T can be FUNCTION_DECL for instantiated function @@ -12142,6 +12143,9 @@ instantiate_class_template (tree type) cp_unevaluated_operand = 0; c_inhibit_evaluation_warnings = 0; } + + mark_template_arguments_used (INNERMOST_TEMPLATE_ARGS (args)); + /* Use #pragma pack from the template context. */ saved_maximum_field_alignment = maximum_field_alignment; maximum_field_alignment = TYPE_PRECISION (pattern); @@ -21173,22 +21177,10 @@ tsubst_copy_and_build (tree t, } /* Remember that there was a reference to this entity. */ - if (function != NULL_TREE) - { - tree inner = function; - if (TREE_CODE (inner) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (inner, 0)) == FUNCTION_DECL) - /* We should already have called mark_used when taking the -address of this function, but do so again anyway to make -sure it's odr-used: at worst this is a no-op, but if we -obtained this FUNCTION_DECL as part of ahead-of-time overload -resolution then that call to mark_used wouldn't have marked it -odr-used yet (53164). */ - inner = TREE_OPERAND (inner, 0); - if (DECL_P (inner) - && !mark_used (inner, complain) && !(complain & tf_error)) - RETURN (error_mark_node); - } + if (function != NULL_TREE + && DECL_P (function) + && !mark_used (function, complain) && !(complain & tf_error)) + RETURN (error_mark_node); if (!maybe_fold_fn_template_args (function, complain)) return error_mark_node; @@ -21883,6 +21875,31 @@ check_instantiated_args (tree tmpl, tree args, tsubst_flags_t complain) return result; } +/* Call mark_used on each entity within the template arguments ARGS of some + template specialization, to ensure that each such entity is considered + odr-used regardless of whether the specialization was first formed in a +
Re: Regression with "recomputation and PR 109154"
On Fri, Mar 31, 2023 at 01:02:18PM -0400, Andrew MacLeod wrote: > I guess it figures the recip is safe to put in, there will not be a divide > by zero. I think the problem was that 1/d was hoisted before the loop; as long as it is guarded with the d > 0.01 or e > 0.005 condition, it is fine. The test probably should have been a runtime test, doing the main stuff in some other noipa function and doing fetestexcept after it or something similar. > I guess the test is no longer testing what it should be? > > And yes, we could set he param back to 1 for the test... > add --param=ranger-recompute-depth=1 makes the "issue" go away :-) for > now. That looks reasonable unless we rewrite the test into runtime one (but we'd then need to double check that it was really miscompiled and would fail back then in 4.0). Jakub
Re: [PATCH v4 2/2] gcc: Drop obsolete INCLUDE_PTHREAD_H (ping)
Kito Cheng writes: > It's not the RISC-V part, so this requires a global maintainer there I think? > Someone able to look at the system.h bit? It should be trivial, there's no uses left and it was added purely for a bug like this in the past (see commit message). > On Tue, Mar 14, 2023 at 8:28 AM juzhe.zh...@rivai.ai > wrote: >> >> Thank you for fixing this. I am not familiar with this. >> This generator code (genrvv-type-indexer.cc) is written by @kito. >> >> Kito ? Can you take a look at this? >> >> >> juzhe.zh...@rivai.ai >> >> From: Sam James >> Date: 2023-03-14 08:23 >> To: gcc-patches >> CC: Kito Cheng; Palmer Dabbelt; Andrew Waterman; Jim Wilson; Ju-Zhe Zhong; >> Sam James >> Subject: [PATCH v4 2/2] gcc: Drop obsolete INCLUDE_PTHREAD_H >> This is no longer used since 0a62889c7a155f8ed971860d68870dc9c46bb004, so >> let's clean it up. >> >> gcc/ChangeLog: >> * system.h: Drop unused INCLUDE_PTHREAD_H. >> >> Signed-off-by: Sam James >> --- >> gcc/system.h | 4 >> 1 file changed, 4 deletions(-) >> >> diff --git a/gcc/system.h b/gcc/system.h >> index cf45db3f97e..3fdad7abf1e 100644 >> --- a/gcc/system.h >> +++ b/gcc/system.h >> @@ -761,10 +761,6 @@ extern int vsnprintf (char *, size_t, const char *, >> va_list); >> #endif >> #endif >> -#ifdef INCLUDE_PTHREAD_H >> -#include >> -#endif >> - >> #ifdef INCLUDE_ISL >> #ifdef HAVE_isl >> #include >> -- >> 2.40.0 >> >> signature.asc Description: PGP signature
[pushed] libiberty: Remove a reference to the Glibc manual
With this we have a sole 404 on all of gcc.gnu.org (at least with documentation from trunk included). :-) Gerald --- longjmp is not specific to Glibc, and GCC supports lots of systems that do not use Glibc. Plus this link has been broken in the web version for ages without a good way to fix. libiberty/ChangeLog: * obstacks.texi (Preparing for Obstacks): Remove a (broken) reference to the Glibc manual. --- libiberty/obstacks.texi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libiberty/obstacks.texi b/libiberty/obstacks.texi index b2d2403210b..37d26c90f1b 100644 --- a/libiberty/obstacks.texi +++ b/libiberty/obstacks.texi @@ -172,8 +172,8 @@ The value of this variable is a pointer to a function that @code{obstack} uses when @code{obstack_chunk_alloc} fails to allocate memory. The default action is to print a message and abort. You should supply a function that either calls @code{exit} -(@pxref{Program Termination, , , libc, The GNU C Library Reference Manual}) or @code{longjmp} (@pxref{Non-Local -Exits, , , libc, The GNU C Library Reference Manual}) and doesn't return. +(@pxref{Program Termination, , , libc, The GNU C Library Reference Manual}) +or @code{longjmp} and doesn't return. @smallexample void my_obstack_alloc_failed (void) -- 2.39.2
Re: 'g++.dg/modules/modules.exp': don't leak local 'unsupported' proc [PR108899]
On 3/30/23 09:51, Alexandre Oliva wrote: On Mar 30, 2023, Alexandre Oliva wrote: If we're dropping the renaming, I suppose we could also revert Jakub's change. I suppose this patch will take care of it, pending testing... Regstrapped on x86_64-linux-gnu and also tested on arm-vx7r2 (with gcc-12), where I used to get fails after an unsupported modules.exp test, but there are no curly braces in the log files after the patch. Ok to install? [PR108899] testsuite: fix proc unsupported overriding in modules.exp The [PR] tag should go at the end of the subject line, not the start. OK with that change. The overrider of proc unsupported in modules.exp had two problems reported by Thomas Schwinge, even after Jakub Jelínek's fix: - it remained in effect while running other dejagnu testsets - it didn't quote correctly the argument list passed to it, which caused test names to be surrounded by curly braces, as in: UNSUPPORTED: {...} This patch fixes both issues, obsoleting and reverting Jakub's change, by dropping the overrider and renaming the saved proc back, and by using uplevel's argument list splicing. for gcc/testsuite/ChangeLog PR testsuite/108899 * g++.dg/modules/modules.exp (unsupported): Drop renaming. Fix quoting. --- gcc/testsuite/g++.dg/modules/modules.exp | 20 +++- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/gcc/testsuite/g++.dg/modules/modules.exp b/gcc/testsuite/g++.dg/modules/modules.exp index 80aa392bc7f3b..dc302d3d0af48 100644 --- a/gcc/testsuite/g++.dg/modules/modules.exp +++ b/gcc/testsuite/g++.dg/modules/modules.exp @@ -319,15 +319,11 @@ cleanup_module_files [find $DEFAULT_REPO *.gcm] # so that, after an unsupported result in dg-test, we can skip rather # than fail subsequent related tests. set module_do {"compile" "P"} -if { [info procs unsupported] != [list] \ - && [info procs saved-unsupported] == [list] } { -rename unsupported saved-unsupported - -proc unsupported { args } { - global module_do - lset module_do 1 "N" - return [saved-unsupported $args] -} +rename unsupported modules-saved-unsupported +proc unsupported { args } { +global module_do +lset module_do 1 "N" +return [uplevel 1 modules-saved-unsupported $args] } # not grouped tests, sadly tcl doesn't have negated glob @@ -412,4 +408,10 @@ foreach src [lsort [find $srcdir/$subdir {*_a.[CHX}]] { } } +# Restore the original unsupported proc, lest it will affect +# subsequent test runs, or even fail renaming if we run modules.exp +# for multiple targets/multilibs/options. +rename unsupported {} +rename modules-saved-unsupported unsupported + dg-finish
Re: [PATCH 1/2] c++: improve "NTTP argument considered unused" fix [PR53164, PR105848]
On Fri, 31 Mar 2023, Jason Merrill wrote: > On 3/31/23 11:55, Patrick Palka wrote: > > On Fri, 31 Mar 2023, Patrick Palka wrote: > > > > > On Thu, 30 Mar 2023, Jason Merrill wrote: > > > > > > > On 3/30/23 14:53, Patrick Palka wrote: > > > > > On Wed, 29 Mar 2023, Jason Merrill wrote: > > > > > > > > > > > On 3/27/23 09:30, Patrick Palka wrote: > > > > > > > On Thu, 23 Mar 2023, Patrick Palka wrote: > > > > > > > > > > > > > > > r13-995-g733a792a2b2e16 worked around the problem of > > > > > > > > FUNCTION_DECL > > > > > > > > template arguments not always getting marked as odr-used by > > > > > > > > redundantly > > > > > > > > calling mark_used on the substituted ADDR_EXPR callee of a > > > > > > > > CALL_EXPR. > > > > > > > > This is just a narrow workaround however, since using a > > > > > > > > FUNCTION_DECL > > > > > > > > as > > > > > > > > a template argument alone should constitutes an odr-use; we > > > > > > > > shouldn't > > > > > > > > need to subsequently e.g. call the function or take its address. > > > > > > > > > > > > Agreed. But why didn't we already wrap it in an ADDR_EXPR? Even > > > > > > for > > > > > > reference tparms convert_nontype_argument should do that. > > > > > > > > > > Indeed we do, the commit message was just rather sloppy/inaccurate... > > > > > I'll try to correct it. > > > > > > > > > > > > > > > > > > > This patch fixes this in a more general way at template > > > > > > > > specialization > > > > > > > > time by walking the template arguments of the specialization and > > > > > > > > calling > > > > > > > > mark_used on all entities used within. As before, the call to > > > > > > > > mark_used > > > > > > > > as it worst a no-op, but it compensates for the situation where > > > > > > > > we end > > > > > > > > up > > > > > > > > forming a specialization from a template context in which > > > > > > > > mark_used is > > > > > > > > inhibited. Another approach would be to call mark_used whenever > > > > > > > > we > > > > > > > > substitute a TEMPLATE_PARM_INDEX, but that would result in many > > > > > > > > more > > > > > > > > redundant calls to mark_used compared to this approach. > > > > > > > > > > > > > > > > Bootstrapped and regtested on x86_64-pc-linux-gnu, does this > > > > > > > > look OK > > > > > > > > for > > > > > > > > trunk? > > > > > > > > > > > > > > > > PR c++/53164 > > > > > > > > PR c++/105848 > > > > > > > > > > > > > > > > gcc/cp/ChangeLog: > > > > > > > > > > > > > > > > * pt.cc (instantiate_class_template): Call > > > > > > > > mark_template_arguments_used. > > > > > > > > (tsubst_copy_and_build) : Revert r13-995 > > > > > > > > change. > > > > > > > > (mark_template_arguments_used): Define. > > > > > > > > (instantiate_template): Call > > > > > > > > mark_template_arguments_used. > > > > > > > > > > > > > > > > gcc/testsuite/ChangeLog: > > > > > > > > > > > > > > > > * g++.dg/template/fn-ptr3a.C: New test. > > > > > > > > * g++.dg/template/fn-ptr4.C: New test. > > > > > > > > --- > > > > > > > > gcc/cp/pt.cc | 51 > > > > > > > > > > > > > > > > gcc/testsuite/g++.dg/template/fn-ptr3a.C | 25 > > > > > > > > gcc/testsuite/g++.dg/template/fn-ptr4.C | 14 +++ > > > > > > > > 3 files changed, 74 insertions(+), 16 deletions(-) > > > > > > > > create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr3a.C > > > > > > > > create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr4.C > > > > > > > > > > > > > > > > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc > > > > > > > > index 7e4a8de0c8b..9b3cc1c 100644 > > > > > > > > --- a/gcc/cp/pt.cc > > > > > > > > +++ b/gcc/cp/pt.cc > > > > > > > > @@ -220,6 +220,7 @@ static tree make_argument_pack (tree); > > > > > > > > static tree enclosing_instantiation_of (tree tctx); > > > > > > > > static void instantiate_body (tree pattern, tree args, tree > > > > > > > > d, bool > > > > > > > > nested); > > > > > > > > static tree maybe_dependent_member_ref (tree, tree, > > > > > > > > tsubst_flags_t, > > > > > > > > tree); > > > > > > > > +static void mark_template_arguments_used (tree); > > > > > > > > /* Make the current scope suitable for access checking > > > > > > > > when we > > > > > > > > are > > > > > > > >processing T. T can be FUNCTION_DECL for instantiated > > > > > > > > function > > > > > > > > @@ -12142,6 +12143,9 @@ instantiate_class_template (tree type) > > > > > > > > cp_unevaluated_operand = 0; > > > > > > > > c_inhibit_evaluation_warnings = 0; > > > > > > > > } > > > > > > > > + > > > > > > > > + mark_template_arguments_used (INNERMOST_TEMPLATE_ARGS > > > > > > > > (args)); > > > > > > > > + > > > > > > > > /* Use #pragma pack from the template context. */ > > > > > > > > saved_maximum_field_alignment = maximum_field_alignment; > > > > > > > > maxim
Re: Regression with "recomputation and PR 109154"
should we do something like this to tweak the testcases? or does someone have something else in mind? Richi opened a PR for the STL failure (109350) Andrew On 3/31/23 13:37, Jakub Jelinek wrote: On Fri, Mar 31, 2023 at 01:02:18PM -0400, Andrew MacLeod wrote: I guess it figures the recip is safe to put in, there will not be a divide by zero. I think the problem was that 1/d was hoisted before the loop; as long as it is guarded with the d > 0.01 or e > 0.005 condition, it is fine. The test probably should have been a runtime test, doing the main stuff in some other noipa function and doing fetestexcept after it or something similar. I guess the test is no longer testing what it should be? And yes, we could set he param back to 1 for the test... add --param=ranger-recompute-depth=1 makes the "issue" go away :-) for now. That looks reasonable unless we rewrite the test into runtime one (but we'd then need to double check that it was really miscompiled and would fail back then in 4.0). Jakub commit 55bf4f0d443e5adbacfcdbbebf4b2e0c74d1dcc8 Author: Andrew MacLeod Date: Fri Mar 31 15:42:43 2023 -0400 Adjust testcases to not produce errors.. tree-optimization/109363 gcc/testsuite/ * g++.dg/warn/Wstringop-overflow-4.C: Always cehck bogus message. * gcc.dg/tree-ssa/pr23109.c: Disable better recomputations. diff --git a/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C b/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C index 35fb59e0232..faad5bed074 100644 --- a/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C +++ b/gcc/testsuite/g++.dg/warn/Wstringop-overflow-4.C @@ -141,7 +141,7 @@ void test_strcpy_new_int16_t (size_t n, const size_t vals[]) int r_imin_imax = SR (INT_MIN, INT_MAX); T (S (1), new int16_t[r_imin_imax]); - T (S (2), new int16_t[r_imin_imax + 1]); // { dg-bogus "into a region of size" "pr106120" { xfail { ilp32 && c++98_only } } } + T (S (2), new int16_t[r_imin_imax + 1]); // { dg-bogus "into a region of size" "pr106120" { xfail { c++98_only } } } T (S (9), new int16_t[r_imin_imax * 2 + 1]); int r_0_imax = SR (0, INT_MAX); diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr23109.c b/gcc/testsuite/gcc.dg/tree-ssa/pr23109.c index 7cdf1d05ee7..059f658ea20 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/pr23109.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr23109.c @@ -1,6 +1,7 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -funsafe-math-optimizations -ftrapping-math -fdump-tree-recip -fdump-tree-lim2" } */ +/* { dg-options "-O2 -funsafe-math-optimizations -ftrapping-math -fdump-tree-recip -fdump-tree-lim2 --param=ranger-recompute-depth=1" } */ /* { dg-warning "'-fassociative-math' disabled" "" { target *-*-* } 0 } */ +/* ranger-recompute-depth prevents the optimizers from being too smart. */ double F[2] = { 0., 0. }, e = 0.;
Re: Regression with "recomputation and PR 109154"
On 3/31/23 13:48, Andrew MacLeod wrote: should we do something like this to tweak the testcases? or does someone have something else in mind? Go ahead and tweak the testcase. Unless you want to revamp it per Jakub's suggestions. jeff
Re: Regression with "recomputation and PR 109154"
On 3/31/23 15:59, Jeff Law wrote: On 3/31/23 13:48, Andrew MacLeod wrote: should we do something like this to tweak the testcases? or does someone have something else in mind? Go ahead and tweak the testcase. Unless you want to revamp it per Jakub's suggestions. not particularly :-) pushed. hopefully this smooths things a little... probably causes them to fail elsewhere now. ha! Im out for most of the rest of the weekend.. any other tweaks someone else should do... I will check in once in a while. Andrew
Re: Regression with "recomputation and PR 109154"
On 3/31/23 14:16, Andrew MacLeod wrote: On 3/31/23 15:59, Jeff Law wrote: On 3/31/23 13:48, Andrew MacLeod wrote: should we do something like this to tweak the testcases? or does someone have something else in mind? Go ahead and tweak the testcase. Unless you want to revamp it per Jakub's suggestions. not particularly :-) pushed. hopefully this smooths things a little... probably causes them to fail elsewhere now. ha! Im out for most of the rest of the weekend.. any other tweaks someone else should do... I will check in once in a while. If that causes the test to fail elsewhere, we'll know by Monday. jeff
Re: [PATCH] testsuite, analyzer: Fix up pipe-glibc.c testcase [PR107396]
On Thu, 2023-03-30 at 10:05 +0200, Jakub Jelinek wrote: > Hi! > > The gcc.dg/analyzer/pipe-glibc.c test FAILs when using recent glibc > headers > and succeeds with older headers. > The important change is that > https://sourceware.org/git/?p=glibc.git;a=commit;h=c1760eaf3b575ad174fd88b252fd16bd525fa818 > in 2021 added __attribute__ ((__malloc__ (fclose, 1))) attribute to > fdopen, > so in write_to_pipe there is an excess warning: > .../gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c: In function > 'write_to_pipe': > .../gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c:28:3: warning: use of > possibly-NULL 'stream' where non-null expected [CWE-690] [-Wanalyzer- > possible-null-argument] > .../gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c:27:12: note: (1) this > call could return NULL > .../gcc/testsuite/gcc.dg/analyzer/pipe-glibc.c:28:3: note: (2) > argument 4 ('stream') from (1) could be NULL where non-null expected > : note: argument 4 of '__builtin_fwrite' must be non-null > Strangely, nothing is reported on the read_from_pipe function, seems > fwrite/fprintf/fputc etc. are builtins in GCC and we mark the FILE * > arguments as nonnull there on the builtin declarations, while > fgetc/fread > etc. aren't builtins and glibc doesn't mark any of those using > nonnull. > Shall we change that on the glibc side? > > Anyway, because this differs based on glibc version and I think the > above warning is not the primary intention of the test, I think it is > best to tweak it so that this warning isn't reported. [...snip...] Thanks: the patch LGTM. Dave
Re: [PATCH] Implement std::advance for istreambuf_iterator using pubseekoff
On Tue, 15 Oct 2019 at 21:20, François Dumont wrote: > > Here is an update to set _M_sbuf to null if the user advance too much. > > Note that in this case the streambuf remains un-modified which is > different from the current implementation. I think it is another > enhancement. > > I also change the Debug assertion message for something more dedicated > to std::advance algo. > > François > > On 10/14/19 10:12 PM, François Dumont wrote: > > The same way I proposed to review std::copy overload for > > istreambuf_iterator we can implement std::advance using pubseekoff. > > > > It is both a cleaner implementation and avoids yet another friend > > declaration. Looks like I never sent my review of this one, it's been sitting in my draft mails for years, sorry. It looks like this will fail if the streambuf doesn't support seeking. The default behaviour for seekoff is to return -1, in which case you'll get -1 for both calls to pubseekoff, and new_pos - cur_pos will be zero, which is not equal to n, so you set the istreambuf_iterator to end-of-stream. That seems wrong, we could still advance using the old code (or just call ++ in a loop!) > > > > * include/std/streambuf > > (advance(istreambuf_iterator<>&, _Distance)): Remove friend > > declaration. > > * include/bits/streambuf_iterator.h (__copy_move_a2): Re-implement > > using > > streambuf pubseekoff. > > > > Tested under Linux x86_64. > > > > Ok to commit ? > > > > François > > >
[committed] libstdc++: Teach optimizer that empty COW strings are empty [PR107087]
Tested powerpc64le-linux. Pushed to trunk. -- >8 -- The compiler doesn't know about the invariant that the _S_empty_rep() object is immutable and so _M_length and _M_refcount are always zero. This means that we get warnings about writing possibly-non-zero length strings into buffers that can't hold them. If we teach the compiler that the empty rep is always zero length, it knows it can be copied into any buffer. For Stage 1 we might want to also consider adding this to capacity(): if (_S_empty_rep()._M_capacity != 0) __builtin_unreachable(); And this to _Rep::_M_is_leaked() and _Rep::_M_is_shared(): if (_S_empty_rep()._M_refcount != 0) __builtin_unreachable(); libstdc++-v3/ChangeLog: PR tree-optimization/107087 * include/bits/cow_string.h (basic_string::size()): Add optimizer hint that _S_empty_rep()._M_length is always zero. (basic_string::length()): Call size(). --- libstdc++-v3/include/bits/cow_string.h | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/libstdc++-v3/include/bits/cow_string.h b/libstdc++-v3/include/bits/cow_string.h index 1ee84e60678..b6024365d4f 100644 --- a/libstdc++-v3/include/bits/cow_string.h +++ b/libstdc++-v3/include/bits/cow_string.h @@ -907,17 +907,24 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION public: // Capacity: + /// Returns the number of characters in the string, not including any /// null-termination. size_type size() const _GLIBCXX_NOEXCEPT - { return _M_rep()->_M_length; } + { +#if _GLIBCXX_FULLY_DYNAMIC_STRING == 0 && __OPTIMIZE__ + if (_S_empty_rep()._M_length != 0) + __builtin_unreachable(); +#endif + return _M_rep()->_M_length; + } /// Returns the number of characters in the string, not including any /// null-termination. size_type length() const _GLIBCXX_NOEXCEPT - { return _M_rep()->_M_length; } + { return size(); } /// Returns the size() of the largest possible %string. size_type -- 2.39.2
Re: Regression with "recomputation and PR 109154"
> Date: Fri, 31 Mar 2023 15:48:22 -0400 > From: Andrew MacLeod via Gcc-patches > Reply-To: Andrew MacLeod > commit 55bf4f0d443e5adbacfcdbbebf4b2e0c74d1dcc8 > Author: Andrew MacLeod > Date: Fri Mar 31 15:42:43 2023 -0400 > > Adjust testcases to not produce errors.. > > tree-optimization/109363 > gcc/testsuite/ > * g++.dg/warn/Wstringop-overflow-4.C: Always cehck bogus message. > * gcc.dg/tree-ssa/pr23109.c: Disable better recomputations. (Needs to be spelled "PR tree-optimization/109363" for the bugzilla-marker hook to react. I'll mark manually though.) Thanks! I was about to push the --param adjustment as obvious, seeing consensus equivalent of approval in this mail thread. brgds, H-P
Re: Regression with "recomputation and PR 109154"
On 3/31/23 19:31, Hans-Peter Nilsson wrote: Date: Fri, 31 Mar 2023 15:48:22 -0400 From: Andrew MacLeod via Gcc-patches Reply-To: Andrew MacLeod commit 55bf4f0d443e5adbacfcdbbebf4b2e0c74d1dcc8 Author: Andrew MacLeod Date: Fri Mar 31 15:42:43 2023 -0400 Adjust testcases to not produce errors.. tree-optimization/109363 gcc/testsuite/ * g++.dg/warn/Wstringop-overflow-4.C: Always cehck bogus message. * gcc.dg/tree-ssa/pr23109.c: Disable better recomputations. (Needs to be spelled "PR tree-optimization/109363" for the bugzilla-marker hook to react. I'll mark manually though.) Oops! thanks :-) Andrew
Re: [PATCH 1/2] c++: improve "NTTP argument considered unused" fix [PR53164, PR105848]
On 3/31/23 15:06, Patrick Palka wrote: On Fri, 31 Mar 2023, Jason Merrill wrote: On 3/31/23 11:55, Patrick Palka wrote: On Fri, 31 Mar 2023, Patrick Palka wrote: On Thu, 30 Mar 2023, Jason Merrill wrote: On 3/30/23 14:53, Patrick Palka wrote: On Wed, 29 Mar 2023, Jason Merrill wrote: On 3/27/23 09:30, Patrick Palka wrote: On Thu, 23 Mar 2023, Patrick Palka wrote: r13-995-g733a792a2b2e16 worked around the problem of FUNCTION_DECL template arguments not always getting marked as odr-used by redundantly calling mark_used on the substituted ADDR_EXPR callee of a CALL_EXPR. This is just a narrow workaround however, since using a FUNCTION_DECL as a template argument alone should constitutes an odr-use; we shouldn't need to subsequently e.g. call the function or take its address. Agreed. But why didn't we already wrap it in an ADDR_EXPR? Even for reference tparms convert_nontype_argument should do that. Indeed we do, the commit message was just rather sloppy/inaccurate... I'll try to correct it. This patch fixes this in a more general way at template specialization time by walking the template arguments of the specialization and calling mark_used on all entities used within. As before, the call to mark_used as it worst a no-op, but it compensates for the situation where we end up forming a specialization from a template context in which mark_used is inhibited. Another approach would be to call mark_used whenever we substitute a TEMPLATE_PARM_INDEX, but that would result in many more redundant calls to mark_used compared to this approach. Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for trunk? PR c++/53164 PR c++/105848 gcc/cp/ChangeLog: * pt.cc (instantiate_class_template): Call mark_template_arguments_used. (tsubst_copy_and_build) : Revert r13-995 change. (mark_template_arguments_used): Define. (instantiate_template): Call mark_template_arguments_used. gcc/testsuite/ChangeLog: * g++.dg/template/fn-ptr3a.C: New test. * g++.dg/template/fn-ptr4.C: New test. --- gcc/cp/pt.cc | 51 gcc/testsuite/g++.dg/template/fn-ptr3a.C | 25 gcc/testsuite/g++.dg/template/fn-ptr4.C | 14 +++ 3 files changed, 74 insertions(+), 16 deletions(-) create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr3a.C create mode 100644 gcc/testsuite/g++.dg/template/fn-ptr4.C diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc index 7e4a8de0c8b..9b3cc1c 100644 --- a/gcc/cp/pt.cc +++ b/gcc/cp/pt.cc @@ -220,6 +220,7 @@ static tree make_argument_pack (tree); static tree enclosing_instantiation_of (tree tctx); static void instantiate_body (tree pattern, tree args, tree d, bool nested); static tree maybe_dependent_member_ref (tree, tree, tsubst_flags_t, tree); +static void mark_template_arguments_used (tree); /* Make the current scope suitable for access checking when we are processing T. T can be FUNCTION_DECL for instantiated function @@ -12142,6 +12143,9 @@ instantiate_class_template (tree type) cp_unevaluated_operand = 0; c_inhibit_evaluation_warnings = 0; } + + mark_template_arguments_used (INNERMOST_TEMPLATE_ARGS (args)); + /* Use #pragma pack from the template context. */ saved_maximum_field_alignment = maximum_field_alignment; maximum_field_alignment = TYPE_PRECISION (pattern); @@ -21173,22 +21177,10 @@ tsubst_copy_and_build (tree t, } /* Remember that there was a reference to this entity. */ - if (function != NULL_TREE) - { - tree inner = function; - if (TREE_CODE (inner) == ADDR_EXPR - && TREE_CODE (TREE_OPERAND (inner, 0)) == FUNCTION_DECL) - /* We should already have called mark_used when taking the -address of this function, but do so again anyway to make -sure it's odr-used: at worst this is a no-op, but if we -obtained this FUNCTION_DECL as part of ahead-of-time overload -resolution then that call to mark_used wouldn't have marked it -odr-used yet (53164). */ - inner = TREE_OPERAND (inner, 0); - if (DECL_P (inner) - && !mark_used (inner, complain) && !(complain & tf_error)) - RETURN (error_mark_node); - } + if (function != NULL_TREE + && DECL_P (function) + && !mark_used (function, complain) && !(complain & tf_error)) + RETURN (error_mark_node); if (!maybe_fold_fn_template_args (function, complain)) return error_mark_node; @@ -21883,6 +21875,31 @@ check_instantiated_args (tree tmpl, tree args, tsubst_flags_t complain) return result; } +/* Call mark_used on each entity within the template arguments ARGS of some + template specialization, to ensure that each suc