Re: [RFC][gimple] Move can_duplicate_bb_p to gimple_can_duplicate_bb_p
On Fri, 9 Oct 2020, Tom de Vries wrote: > Hi, > > The function gimple_can_duplicate_bb_p currently always returns true. > > The presence of can_duplicate_bb_p in tracer.c however suggests that > there are cases when bb's indeed cannot be duplicated. > > Move the implementation of can_duplicate_bb_p to gimple_can_duplicate_bb_p. > > Bootstrapped and reg-tested on x86_64-linux. > > Build x86_64-linux with nvptx accelerator and tested libgomp. > > No issues found. > > As corner-case check, bootstrapped and reg-tested a patch that makes > gimple_can_duplicate_bb_p always return false, resulting in > PR97333 - "[gimple_can_duplicate_bb_p == false, tree-ssa-threadupdate] > ICE in duplicate_block, at cfghooks.c:1093". > > Any comments? In principle it's correct to move this to the CFG hook since there now seem to be stmts that cannot be duplicated and thus we need to implement can_duplicate_bb_p. Some minor things below... > Thanks, > - Tom > > [gimple] Move can_duplicate_bb_p to gimple_can_duplicate_bb_p > > gcc/ChangeLog: > > 2020-10-09 Tom de Vries > > * tracer.c (cached_can_duplicate_bb_p): Use can_duplicate_block_p > instead of can_duplicate_bb_p. > (can_duplicate_insn_p, can_duplicate_bb_no_insn_iter_p): Move ... > * tree-cfg.c: ... here. > * tracer.c (can_duplicate_bb_p): Move ... > * tree-cfg.c (gimple_can_duplicate_bb_p): here. > * tree-cfg.h (can_duplicate_insn_p, can_duplicate_bb_no_insn_iter_p): > Declare. > > --- > gcc/tracer.c | 61 > +- > gcc/tree-cfg.c | 54 ++- > gcc/tree-cfg.h | 2 ++ > 3 files changed, 56 insertions(+), 61 deletions(-) > > diff --git a/gcc/tracer.c b/gcc/tracer.c > index e1c2b9527e5..16b46c65b14 100644 > --- a/gcc/tracer.c > +++ b/gcc/tracer.c > @@ -84,65 +84,6 @@ bb_seen_p (basic_block bb) >return bitmap_bit_p (bb_seen, bb->index); > } > > -/* Return true if gimple stmt G can be duplicated. */ > -static bool > -can_duplicate_insn_p (gimple *g) > -{ > - /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be > - duplicated as part of its group, or not at all. > - The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of such a > - group, so the same holds there. */ > - if (is_gimple_call (g) > - && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC) > - || gimple_call_internal_p (g, IFN_GOMP_SIMT_EXIT) > - || gimple_call_internal_p (g, IFN_GOMP_SIMT_VOTE_ANY) > - || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_BFLY) > - || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_IDX))) > -return false; > - > - return true; > -} > - > -/* Return true if BB can be duplicated. Avoid iterating over the insns. */ > -static bool > -can_duplicate_bb_no_insn_iter_p (const_basic_block bb) > -{ > - if (bb->index < NUM_FIXED_BLOCKS) > -return false; > - > - if (gimple *g = last_stmt (CONST_CAST_BB (bb))) > -{ > - /* A transaction is a single entry multiple exit region. It > - must be duplicated in its entirety or not at all. */ > - if (gimple_code (g) == GIMPLE_TRANSACTION) > - return false; > - > - /* An IFN_UNIQUE call must be duplicated as part of its group, > - or not at all. */ > - if (is_gimple_call (g) > - && gimple_call_internal_p (g) > - && gimple_call_internal_unique_p (g)) > - return false; > -} > - > - return true; > -} > - > -/* Return true if BB can be duplicated. */ > -static bool > -can_duplicate_bb_p (const_basic_block bb) > -{ > - if (!can_duplicate_bb_no_insn_iter_p (bb)) > -return false; > - > - for (gimple_stmt_iterator gsi = gsi_start_bb (CONST_CAST_BB (bb)); > - !gsi_end_p (gsi); gsi_next (&gsi)) > -if (!can_duplicate_insn_p (gsi_stmt (gsi))) > - return false; > - > - return true; > -} > - > static sbitmap can_duplicate_bb; > > /* Cache VAL as value of can_duplicate_bb_p for BB. */ > @@ -167,7 +108,7 @@ cached_can_duplicate_bb_p (const_basic_block bb) >return false; > } > > - return can_duplicate_bb_p (bb); > + return can_duplicate_block_p (bb); > } > > /* Return true if we should ignore the basic block for purposes of tracing. > */ > diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c > index 5caf3b62d69..a5677859ffc 100644 > --- a/gcc/tree-cfg.c > +++ b/gcc/tree-cfg.c > @@ -6208,11 +6208,63 @@ gimple_split_block_before_cond_jump (basic_block bb) > } > > > +/* Return true if gimple stmt G can be duplicated. */ > +bool > +can_duplicate_insn_p (gimple *g) Does this need to be exported? Please name it can_duplicate_stmt_p. It's also incomplete given the function below > +{ > + /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be > + duplicated as part of its group, or not at all. > + The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of such a > + group, so the same holds the
[PATCH] tree-optimization/97357 - avoid abnormals in loop splitting conditions
This avoids abnormals in another place. Bootstrap / regtest in progress on x86_64-unknown-linux-gnu. 2020-10-12 Richard Biener PR tree-optimization/97357 * tree-ssa-loop-split.c (ssa_semi_invariant_p): Abnormal SSA names are not semi invariant. * gcc.dg/pr97357.c: New testcase. --- gcc/testsuite/gcc.dg/pr97357.c | 39 ++ gcc/tree-ssa-loop-split.c | 3 +++ 2 files changed, 42 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/pr97357.c diff --git a/gcc/testsuite/gcc.dg/pr97357.c b/gcc/testsuite/gcc.dg/pr97357.c new file mode 100644 index 000..2b32d13a43f --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr97357.c @@ -0,0 +1,39 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-require-effective-target indirect_jumps } */ + +#include +#include + +void * my_malloc (size_t size); + +typedef struct glk { + struct glk *nxt; +} glk; + +typedef struct Lock +{ + glk ByteLock; +} Lock; + +static Lock *l, *lk; + +void bytelocks(glk *rethead, jmp_buf jb) +{ + glk *cur, *cur_lk; + + if (( _setjmp (jb)) == 0) +for (cur = &l->ByteLock; cur != ((glk *)0) ; cur = (cur)->nxt) +for (cur_lk = &lk->ByteLock; cur_lk != ((glk *)0); cur_lk = cur_lk->nxt) + { +glk *retrng; + +if(!rethead) + rethead = (glk *) my_malloc (sizeof(glk)); +retrng = (glk *) my_malloc (sizeof(glk)); + +retrng->nxt = rethead; + } + + return; +} diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c index 1eb6be5ddb2..46ee7c0fc14 100644 --- a/gcc/tree-ssa-loop-split.c +++ b/gcc/tree-ssa-loop-split.c @@ -977,6 +977,9 @@ ssa_semi_invariant_p (struct loop *loop, tree name, if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb)) return true; + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) +return false; + return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat); } -- 2.26.2
[PATCH] More consistently split SLP groups
This appropriately makes matches all true after successful SLP discovery to reliably succeed splitting. We were picking up an eventual all false built-up from scalars state in some cases. Bootstrap / regtest in progress on x86_64-unknown-linux-gnu. 2020-10-12 Richard Biener * tree-vect-slp.c (vect_analyze_slp_instance): Set matches to true after successful discovery but forced split. --- gcc/tree-vect-slp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index 495fb970e24..dd2042a4db5 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -2283,7 +2283,7 @@ vect_analyze_slp_instance (vec_info *vinfo, dump_printf_loc (MSG_NOTE, vect_location, "SLP discovery succeeded but node needs " "splitting\n"); - matches[0] = true; + memset (matches, true, group_size); matches[group_size / const_max_nunits * const_max_nunits] = false; vect_free_slp_tree (node); } -- 2.26.2
Re: [PATCH] arm: Fix multiple inheritance thunks for thumb-1 with -mpure-code
On Thu, 8 Oct 2020 at 11:58, Richard Earnshaw wrote: > > On 08/10/2020 10:07, Christophe Lyon via Gcc-patches wrote: > > On Tue, 6 Oct 2020 at 18:02, Richard Earnshaw > > wrote: > >> > >> On 29/09/2020 20:50, Christophe Lyon via Gcc-patches wrote: > >>> When mi_delta is > 255 and -mpure-code is used, we cannot load delta > >>> from code memory (like we do without -mpure-code). > >>> > >>> This patch builds the value of mi_delta into r3 with a series of > >>> movs/adds/lsls. > >>> > >>> We also do some cleanup by not emitting the function address and delta > >>> via .word directives at the end of the thunk since we don't use them > >>> with -mpure-code. > >>> > >>> No need for new testcases, this bug was already identified by > >>> eg. pr46287-3.C > >>> > >>> 2020-09-29 Christophe Lyon > >>> > >>> gcc/ > >>> * config/arm/arm.c (arm_thumb1_mi_thunk): Build mi_delta in r3 and > >>> do not emit function address and delta when -mpure-code is used. > >> > > Hi Richard, > > > > Thanks for your comments. > > > >> There are some optimizations you can make to this code. > >> > >> Firstly, for values between 256 and 510 (inclusive), it would be better > >> to just expand a mov of 255 followed by an add. > > I now see the splitted for the "Pe" constraint which I hadn't noticed > > before, so I can write something similar indeed. > > > > However, I'm note quite sure to understand the benefit in the split > > when -mpure-code is NOT used. > > Consider: > > int f3_1 (void) { return 510; } > > int f3_2 (void) { return 511; } > > Compile with -O2 -mcpu=cortex-m0: > > f3_1: > > movsr0, #255 > > lslsr0, r0, #1 > > bx lr > > f3_2: > > ldr r0, .L4 > > bx lr > > > > The splitter makes the code bigger, does it "compensate" for this by > > not having to load the constant? > > Actually the constant uses 4 more bytes, which should be taken into > > account when comparing code size, > > Yes, the size of the literal pool entry needs to be taken into account. > It might happen that the entry could be shared with another use of that > literal, but in general that's rare. > > > so f3_1 uses 6 bytes, and f3_2 uses 8, so as you say below three > > thumb1 instructions would be equivalent in size compared to loading > > from the literal pool. Should the 256-510 range be extended? > > It's a bit borderline at three instructions when literal pools are not > expensive to use, but in thumb1 literal pools tend to be quite small due > to the limited pc offsets we can use. I think on balance we probably > want to use the instruction sequence unless optimizing for size. > > > > > > >> This is also true for > >> the literal pools alternative as well, so should be handled before all > >> this. > > I am not sure what you mean: with -mpure-code, the above sample is compiled > > as: > > f3_1: > > movsr0, #255 > > lslsr0, r0, #1 > > bx lr > > f3_2: > > movsr0, #1 > > lslsr0, r0, #8 > > addsr0, r0, #255 > > bx lr > > > > so the "return 510" case is already handled as without -mpure-code. > > I was thinking specifically of the thunk sequence where you seem to be > emitting instructions directly rather than generating RTL. The examples > you show here are not thunks. > OK thanks for the clarification. Here is an updated version, split into 3 patches to hopefully make review easier. They apply on top of my other mpure-code patches for PR96967 and PR96770: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554956.html https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554957.html I kept it this way to make incremental changes easier to understand. Patch 1: With the hope to avoid confusion and make maintenance easier, I have updated thumb1_gen_const_int() so that it can generate either RTL or asm. This way, all the code used to build thumb-1 constants is in the same place, in case we need to improve/fix it later. We now generate shorter sequences in several cases matching your comments. Patch 2: Removes the equivalent loop from thumb1_movsi_insn pattern and calls thumb1_gen_const_int. Patch 3: Update of the original patch in this thread, now calls thumb1_gen_const_int. > > > > >> I also suspect (but haven't check) that the base adjustment will > >> most commonly be a multiple of the machine word size (ie 4). If that is > >> the case then you could generate n/4 and then shift it left by 2 for an > >> even greater range of literals. > > I can see there is provision for this in the !TARGET_THUMB1_ONLY case, > > I'll update my patch. > > > >> More generally, any sequence of up to > >> three thumb1 instructions will be no larger, and probably as fast as the > >> existing literal pool fall back. > >> > >> Secondly, if the value is, for example, 65536 (0x1), your code will > >> emit a mov followed by two shift-by-8 instructions; the two shifts could > >> be merged into
[wwwdocs][Patch] gcc-11 + project/gomp: OpenMP status update
This is a tiny update – and probably not the last for the GCC 11 status and especially as the project page is still linked prominently at some external pages: Update the GOMP project page a bit; some more revisions wouldn't harm, however: https://gcc.gnu.org/projects/gomp/ OK? Wording suggestions? Tobias - Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter gcc-11 + project/gomp: OpenMP status update * htdocs/gcc-11/changes.html (languages): Add another newly supported clause: device_type. * htdocs/projects/gomp/index.html: Mention offloading support; add post-2015 news items. diff --git a/htdocs/gcc-11/changes.html b/htdocs/gcc-11/changes.html index e2a32e51..f7417622 100644 --- a/htdocs/gcc-11/changes.html +++ b/htdocs/gcc-11/changes.html @@ -123,6 +123,7 @@ a work-in-progress. For Fortran, OpenMP 4.5 is now finally fully supported and OpenMP 5.0 support has been extended, including the following features which were before only available in C and C++: order(concurrent), + device_type, lastprivate with conditional modifier, if clause with simd and cancel modifiers, target data without map clause, diff --git a/htdocs/projects/gomp/index.html b/htdocs/projects/gomp/index.html index 70e6d95a..15cb1585 100644 --- a/htdocs/projects/gomp/index.html +++ b/htdocs/projects/gomp/index.html @@ -36,6 +36,12 @@ environments. Using OpenMP's directive-based parallelism also simplifies the act of converting existing serial code to efficient parallel code. +OpenMP additionally permits to offload computations on +accelerators such as GPUs, +making use of their highly parallel computation support; if +no accelarator is available, as fallback, the computation is +then done on the host. + Project goal To remain relevant, free software development tools must support emerging technologies. By implementing OpenMP, GOMP @@ -63,6 +69,40 @@ available. Status +August 20, 2020 +https://www.openmp.org/wp-content/uploads/openmp-TR9.pdf";>OpenMP +Technical Report 9: Version 5.1 Public Comment Draft has been released. + +May 7, 2020 +GCC 10 has been released; it adds a number of +newly implemented OpenMP 5.0 features on top of the GCC 9 release such as +conditional lastprivate clause, scan +and loop directives, order(concurrent) and +use_device_addr clauses support, if clause on +simd construct or partial support for the +declare variant directive, getting closer to full support +of the OpenMP 5.0 standard. + +May 3, 2019 +https://gcc.gnu.org/gcc-9/";>GCC 9 has been released and +version 5.0 of the OpenMP specification is now partially supported in the C +and C++ compilers. + +November 8, 2018 +The https://www.openmp.org/specifications/";>OpenMP v5.0 +specification has been released. + +May 2, 2017 +https://gcc.gnu.org/gcc-7/";>GCC 7 has been released and +version 4.5 of the OpenMP specification is now partially supported in the +Fortran compiler; the largest missing item is structure element +mapping. + +April 27, 2016 +https://gcc.gnu.org/gcc-6/";>GCC 6 has been released and +version 4.5 of the OpenMP specification is now supported in the C and +C++ compilers + November 14, 2015 The final https://www.openmp.org/wp-content/uploads/openmp-4.5.pdf";>OpenMP v4.5
Re: [wwwdocs][Patch] gcc-11 + project/gomp: OpenMP status update
On Mon, Oct 12, 2020 at 10:13:09AM +0200, Tobias Burnus wrote: > This is a tiny update – and probably not the last for the GCC 11 status > and especially as the project page is still linked prominently at some > external pages: > > Update the GOMP project page a bit; some more revisions wouldn't harm, > however: https://gcc.gnu.org/projects/gomp/ > > OK? Wording suggestions? Thanks, LGTM, but I'd one nit, see below. Ok with that change. > @@ -63,6 +69,40 @@ available. > > Status > > +August 20, 2020 > + href="https://www.openmp.org/wp-content/uploads/openmp-TR9.pdf";>OpenMP > +Technical Report 9: Version 5.1 Public Comment Draft has been > released. > + > +May 7, 2020 > +GCC 10 has been released; it adds a number of > +newly implemented OpenMP 5.0 features on top of the GCC 9 release such as > +conditional lastprivate clause, scan > +and loop directives, order(concurrent) and > +use_device_addr clauses support, if clause on > +simd construct or partial support for the > +declare variant directive, getting closer to full support > +of the OpenMP 5.0 standard. > + > +May 3, 2019 > +https://gcc.gnu.org/gcc-9/";>GCC 9 has been released and > +version 5.0 of the OpenMP specification is now partially supported in the C > +and C++ compilers. While gcc-9/changes.html mentions it, I think it would be useful to add the For details which features of OpenMP 5.0 are and which are not supported in the GCC 9 release see https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00628.html>this mail. sentence. Jakub
[committed][nvptx] Fix -msoft-stack-reserve-local format
Hi, Currently, in order to use the switch -msoft-stack-reserve-local with the default arg 128, you have to specify '-msoft-stack-reserve-local128'. Fix the switch format such that you specify '-msoft-stack-reserve-local=128' instead. Tested on nvptx. Committed to trunk. Thanks, - Tom [nvptx] Fix -msoft-stack-reserve-local format gcc/ChangeLog: 2020-10-12 Tom de Vries * config/nvptx/nvptx.opt (-msoft-stack-reserve-local): Rename to ... (-msoft-stack-reserve-local=): ... this. --- gcc/config/nvptx/nvptx.opt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt index d6910a96cf0..38454222d42 100644 --- a/gcc/config/nvptx/nvptx.opt +++ b/gcc/config/nvptx/nvptx.opt @@ -37,7 +37,7 @@ msoft-stack Target Report Mask(SOFT_STACK) Use custom stacks instead of local memory for automatic storage. -msoft-stack-reserve-local +msoft-stack-reserve-local= Target Report Joined RejectNegative UInteger Var(nvptx_softstack_size) Init(128) Specify size of .local memory used for stack when the exact amount is not known.
Re: make sincos take type from intrinsic formal, not from result assignment
Hello Alexander. It seems the patch caused quite some clang warnings: /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang/build/gcc/builtins.c:2366:5: warning: comparison of different enumeration types in switch statement ('combined_fn' and 'built_in_function') [-Wenum-compare-switch] ... Can we please fix them? Thanks, Martin
[PATCH] IPA: prevent an UBSAN error
Prevents the following UBSAN error: ./xgcc -B. /home/marxin/Programming/gcc/gcc/testsuite/g++.dg/torture/pr49770.C -O2 -c /home/marxin/Programming/gcc2/gcc/ipa-modref-tree.h:482:22: runtime error: load of value 2, which is not a valid value for type 'bool' #0 0x1fdb4d1 in modref_tree::merge(modref_tree*, vec*) /home/marxin/Programming/gcc2/gcc/ipa-modref-tree.h:482 #1 0x1fcadaa in merge_call_side_effects(modref_summary*, gimple*, modref_summary*, bool) /home/marxin/Programming/gcc2/gcc/ipa-modref.c:511 #2 0x1fcbadd in analyze_call /home/marxin/Programming/gcc2/gcc/ipa-modref.c:642 #3 0x1fcc061 in analyze_stmt /home/marxin/Programming/gcc2/gcc/ipa-modref.c:732 #4 0x1fccf31 in analyze_function /home/marxin/Programming/gcc2/gcc/ipa-modref.c:823 #5 0x1fd17e5 in execute /home/marxin/Programming/gcc2/gcc/ipa-modref.c:1441 #6 0x25cca6e in execute_one_pass(opt_pass*) /home/marxin/Programming/gcc2/gcc/passes.c:2509 #7 0x25cd39b in execute_pass_list_1 /home/marxin/Programming/gcc2/gcc/passes.c:2597 #8 0x25cd450 in execute_pass_list_1 /home/marxin/Programming/gcc2/gcc/passes.c:2598 #9 0x25cd4ee in execute_pass_list(function*, opt_pass*) /home/marxin/Programming/gcc2/gcc/passes.c:2608 #10 0x25c7a5a in do_per_function_toporder(void (*)(function*, void*), void*) /home/marxin/Programming/gcc2/gcc/passes.c:1726 #11 0x25cfa3f in execute_ipa_pass_list(opt_pass*) /home/marxin/Programming/gcc2/gcc/passes.c:2941 #12 0x173572d in ipa_passes /home/marxin/Programming/gcc2/gcc/cgraphunit.c:2642 #13 0x17364ee in symbol_table::compile() /home/marxin/Programming/gcc2/gcc/cgraphunit.c:2777 #14 0x17372d9 in symbol_table::finalize_compilation_unit() /home/marxin/Programming/gcc2/gcc/cgraphunit.c:3022 #15 0x2a1f00a in compile_file /home/marxin/Programming/gcc2/gcc/toplev.c:485 #16 0x2a27dc8 in do_compile /home/marxin/Programming/gcc2/gcc/toplev.c:2321 #17 0x2a283cc in toplev::main(int, char**) /home/marxin/Programming/gcc2/gcc/toplev.c:2460 #18 0x54f21cd in main /home/marxin/Programming/gcc2/gcc/main.c:39 #19 0x76f0de09 in __libc_start_main ../csu/libc-start.c:314 #20 0x9eac09 in _start (/home/marxin/Programming/gcc2/objdir/gcc/cc1plus+0x9eac09) Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Ready to be installed? Thanks, Martin gcc/ChangeLog: * ipa-modref.c (merge_call_side_effects): Clear modref_parm_map fields in the vector. --- gcc/ipa-modref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index dd59e804c0f..b815eb8cc88 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -476,7 +476,7 @@ merge_call_side_effects (modref_summary *cur_summary, auto_vec parm_map; bool changed = false; - parm_map.safe_grow (gimple_call_num_args (stmt)); + parm_map.safe_grow_cleared (gimple_call_num_args (stmt)); for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) { tree op = gimple_call_arg (stmt, i); -- 2.28.0
Re: [PATCH] IPA: prevent an UBSAN error
> Prevents the following UBSAN error: > > ./xgcc -B. > /home/marxin/Programming/gcc/gcc/testsuite/g++.dg/torture/pr49770.C -O2 -c > /home/marxin/Programming/gcc2/gcc/ipa-modref-tree.h:482:22: runtime error: > load of value 2, which is not a valid value for type 'bool' > #0 0x1fdb4d1 in modref_tree::merge(modref_tree*, > vec*) > /home/marxin/Programming/gcc2/gcc/ipa-modref-tree.h:482 > #1 0x1fcadaa in merge_call_side_effects(modref_summary*, gimple*, > modref_summary*, bool) /home/marxin/Programming/gcc2/gcc/ipa-modref.c:511 > #2 0x1fcbadd in analyze_call > /home/marxin/Programming/gcc2/gcc/ipa-modref.c:642 > #3 0x1fcc061 in analyze_stmt > /home/marxin/Programming/gcc2/gcc/ipa-modref.c:732 > #4 0x1fccf31 in analyze_function > /home/marxin/Programming/gcc2/gcc/ipa-modref.c:823 > #5 0x1fd17e5 in execute > /home/marxin/Programming/gcc2/gcc/ipa-modref.c:1441 > #6 0x25cca6e in execute_one_pass(opt_pass*) > /home/marxin/Programming/gcc2/gcc/passes.c:2509 > #7 0x25cd39b in execute_pass_list_1 > /home/marxin/Programming/gcc2/gcc/passes.c:2597 > #8 0x25cd450 in execute_pass_list_1 > /home/marxin/Programming/gcc2/gcc/passes.c:2598 > #9 0x25cd4ee in execute_pass_list(function*, opt_pass*) > /home/marxin/Programming/gcc2/gcc/passes.c:2608 > #10 0x25c7a5a in do_per_function_toporder(void (*)(function*, void*), > void*) /home/marxin/Programming/gcc2/gcc/passes.c:1726 > #11 0x25cfa3f in execute_ipa_pass_list(opt_pass*) > /home/marxin/Programming/gcc2/gcc/passes.c:2941 > #12 0x173572d in ipa_passes > /home/marxin/Programming/gcc2/gcc/cgraphunit.c:2642 > #13 0x17364ee in symbol_table::compile() > /home/marxin/Programming/gcc2/gcc/cgraphunit.c:2777 > #14 0x17372d9 in symbol_table::finalize_compilation_unit() > /home/marxin/Programming/gcc2/gcc/cgraphunit.c:3022 > #15 0x2a1f00a in compile_file > /home/marxin/Programming/gcc2/gcc/toplev.c:485 > #16 0x2a27dc8 in do_compile > /home/marxin/Programming/gcc2/gcc/toplev.c:2321 > #17 0x2a283cc in toplev::main(int, char**) > /home/marxin/Programming/gcc2/gcc/toplev.c:2460 > #18 0x54f21cd in main /home/marxin/Programming/gcc2/gcc/main.c:39 > #19 0x76f0de09 in __libc_start_main ../csu/libc-start.c:314 > #20 0x9eac09 in _start > (/home/marxin/Programming/gcc2/objdir/gcc/cc1plus+0x9eac09) > > Patch can bootstrap on x86_64-linux-gnu and survives regression tests. > > Ready to be installed? OK (to silence valgrind). The situation here is harmless - when param_index is -1 the param_offset_known has no meaning, but we still merge it into the summary. Honza
Re: [PATCH v2] IBM Z: Change vector copysign to use bitwise operations
On 09.10.20 17:49, Ilya Leoshkevich wrote: > Bootstrapped and regtested on s390x-redhat-linux. OK for master? > > v1: https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555782.html > v1 -> v2: Use related_int_vector_mode. > > > > The vector copysign pattern incorrectly assumes that vector > if_then_else operates on bits, not on elements. This can theoretically > mislead the optimizers. Fix by changing it to use bitwise operations, > like commit 2930bb321794 ("PR94613: Fix vec_sel builtin for IBM Z") did > for vec_sel builtin. > > gcc/ChangeLog: > > 2020-10-07 Ilya Leoshkevich > > * config/s390/s390-protos.h (s390_build_signbit_mask): New > function. > * config/s390/s390.c (s390_contiguous_bitmask_vector_p): > Bitcast the argument to an integral mode. > (s390_expand_vec_init): Do not call > s390_contiguous_bitmask_vector_p with a scalar argument. > (s390_build_signbit_mask): New function. > * config/s390/vector.md (copysign3): Use bitwise > operations. Ok. Thanks! Andreas > --- > gcc/config/s390/s390-protos.h | 1 + > gcc/config/s390/s390.c| 44 --- > gcc/config/s390/vector.md | 28 +++--- > 3 files changed, 45 insertions(+), 28 deletions(-) > > diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h > index 6f1bc07db17..029f7289fac 100644 > --- a/gcc/config/s390/s390-protos.h > +++ b/gcc/config/s390/s390-protos.h > @@ -121,6 +121,7 @@ extern void s390_expand_vec_compare_cc (rtx, enum > rtx_code, rtx, rtx, bool); > extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code); > extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx); > extern void s390_expand_vec_init (rtx, rtx); > +extern rtx s390_build_signbit_mask (machine_mode); > extern rtx s390_return_addr_rtx (int, rtx); > extern rtx s390_back_chain_rtx (void); > extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx); > diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c > index 93894307d62..dbb541bbea7 100644 > --- a/gcc/config/s390/s390.c > +++ b/gcc/config/s390/s390.c > @@ -2467,6 +2467,9 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, > int *end) >rtx elt; >bool b; > > + /* Handle floats by bitcasting them to ints. */ > + op = gen_lowpart (related_int_vector_mode (GET_MODE (op)).require (), op); > + >gcc_assert (!!start == !!end); >if (!const_vec_duplicate_p (op, &elt) >|| !CONST_INT_P (elt)) > @@ -6863,15 +6866,16 @@ s390_expand_vec_init (rtx target, rtx vals) > } > >/* Use vector gen mask or vector gen byte mask if possible. */ > - if (all_same && all_const_int > - && (XVECEXP (vals, 0, 0) == const0_rtx > - || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0), > -NULL, NULL) > - || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL))) > + if (all_same && all_const_int) > { > - emit_insn (gen_rtx_SET (target, > - gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0; > - return; > + rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); > + if (XVECEXP (vals, 0, 0) == const0_rtx > + || s390_contiguous_bitmask_vector_p (vec, NULL, NULL) > + || s390_bytemask_vector_p (vec, NULL)) > + { > + emit_insn (gen_rtx_SET (target, vec)); > + return; > + } > } > >/* Use vector replicate instructions. vlrep/vrepi/vrep */ > @@ -6949,6 +6953,30 @@ s390_expand_vec_init (rtx target, rtx vals) > } > } > > +/* Emit a vector constant that contains 1s in each element's sign bit > position > + and 0s in other positions. MODE is the desired constant's mode. */ > +extern rtx > +s390_build_signbit_mask (machine_mode mode) > +{ > + /* Generate the integral element mask value. */ > + machine_mode inner_mode = GET_MODE_INNER (mode); > + int inner_bitsize = GET_MODE_BITSIZE (inner_mode); > + wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize); > + > + /* Emit the element mask rtx. Use gen_lowpart in order to cast the > integral > + value to the desired mode. */ > + machine_mode int_mode = related_int_vector_mode (mode).require (); > + rtx mask = immed_wide_int_const (mask_val, GET_MODE_INNER (int_mode)); > + mask = gen_lowpart (inner_mode, mask); > + > + /* Emit the vector mask rtx by mode the element mask rtx. */ > + int nunits = GET_MODE_NUNITS (mode); > + rtvec v = rtvec_alloc (nunits); > + for (int i = 0; i < nunits; i++) > +RTVEC_ELT (v, i) = mask; > + return gen_rtx_CONST_VECTOR (mode, v); > +} > + > /* Structure to hold the initial parameters for a compare_and_swap operation > in HImode and QImode. */ > > diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md > index 2573b7d980a..e9332bad0fd 100644 > --- a/gcc/config/s390/vector.md > +++ b/gcc/config/s390/vector.md > @@ -1425,28 +1425,16 @@
[PATCH] Ignore shifts larger than precision in operator_rshift::op1_range.
Pushed as obvious. gcc/ChangeLog: PR tree-optimization/97371 * range-op.cc (operator_rshift::op1_range): Ignore shifts larger than or equal to type precision. gcc/testsuite/ChangeLog: * gcc.dg/pr97371.c: New test. --- gcc/range-op.cc| 7 +++ gcc/testsuite/gcc.dg/pr97371.c | 8 2 files changed, 15 insertions(+) create mode 100644 gcc/testsuite/gcc.dg/pr97371.c diff --git a/gcc/range-op.cc b/gcc/range-op.cc index d1a11b34894..ce6ae2de20c 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -1626,6 +1626,13 @@ operator_rshift::op1_range (irange &r, tree shift; if (op2.singleton_p (&shift)) { + // Ignore nonsensical shifts. + unsigned prec = TYPE_PRECISION (type); + if (wi::ge_p (wi::to_wide (shift), + wi::uhwi (prec, TYPE_PRECISION (TREE_TYPE (shift))), + UNSIGNED)) + return false; + // Folding the original operation may discard some impossible // ranges from the LHS. int_range_max lhs_refined; diff --git a/gcc/testsuite/gcc.dg/pr97371.c b/gcc/testsuite/gcc.dg/pr97371.c new file mode 100644 index 000..ffefad0287e --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr97371.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -w" } */ + +int a, b; +void c() { + if (b >> 38) +a = b; +} -- 2.26.2
ipa-modref cleanups
Hi, this is largely mechanical patch fixing some suboptimal datastrcuture decision in modref. It records three different things 1) optimization_summaries that are used by tree-ssa-alias to disambiguate (computed by local passes or ipa execute) 2) summaries produced by local analysis and used by the ipa execute 3) summaries_lto produced by analysis when streaming is expected, streamed, used by ipa execute All three items are stored in "summaries" datastructure where 1 dn 2 are mixed and differentiated by "finished" flags. This use extra memory and also makes it impossible to use modref while producing other IPA summaries (by ipa-prop and ipa-devirt). This patch separates the summaries into three special purpose datastructures. There is one fix to propagation in ipa_merge_modref_summary_after_inlining where check to ignore stores was placed incorrectly. This seems to lead to increased clobber disambiguations: Alias oracle query stats: refs_may_alias_p: 64266142 disambiguations, 74474762 queries ref_maybe_used_by_call_p: 142295 disambiguations, 65168507 queries call_may_clobber_ref_p: 22975 disambiguations, 28762 queries nonoverlapping_component_refs_p: 0 disambiguations, 36805 queries nonoverlapping_refs_since_match_p: 19389 disambiguations, 4 must overlaps, 75714 queries aliasing_component_refs_p: 54702 disambiguations, 759023 queries TBAA oracle: 23639134 disambiguations 56006211 queries 16113791 are in alias set 0 10615301 queries asked about the same object 125 queries asked about the same alias set 0 access volatile 3994283 are dependent in the DAG 1643577 are aritificially in conflict with void * Modref stats: modref use: 11659 disambiguations, 40203 queries modref clobber: 1509635 disambiguations, 1830233 queries 3919009 tbaa queries (2.141262 per modref query) 623504 base compares (0.340669 per modref query) PTA query stats: pt_solution_includes: 967422 disambiguations, 13605769 queries pt_solutions_intersect: 1033368 disambiguations, 13121788 queries Bootstrapped/regtested x86_64-linux. I plan to commit it later today if there are no complains. gcc/ChangeLog: 2020-10-11 Jan Hubicka * ipa-modref.c (modref_summaries): Remove field IPA. (class modref_summary_lto): New global variable. (class modref_summaries_lto): New. (modref_summary::modref_summary): Remove loads_lto and stores_lto. (modref_summary::~modref_summary): Remove loads_lto and stores_lto. (modref_summary::useful_p): Do not use lto_useful. (modref_records_lto): New typedef. (struct modref_summary_lto): New type. (modref_summary_lto::modref_summary_lto): New member function. (modref_summary_lto::~modref_summary_lto): New member function. (modref_summary_lto::useful_p): New member function. (modref_summary::dump): Do not handle lto. (modref_summary_lto::dump): New member function. (get_modref_function_summary): Use optimization_summary. (merge_call_side_effects): Use optimization_summary. (analyze_call): Use optimization_summary. (struct summary_ptrs): New struture. (analyze_load): Update to handle separate lto and non-lto summaries. (analyze_store): Likewise. (analyze_stmt): Likewise. (remove_summary): Break out from ... (analyze_function): ... here; update to handle seprated summaries. (modref_summaries::insert): Do not handle lto summary. (modref_summaries_lto::insert): New member function. (modref_summaries::duplicate): Do not handle lto summary. (modref_summaries_lto::duplicate): New member function. (read_modref_records): Expect nolto_ret or lto_ret to be NULL> (modref_write): Write lto summary. (read_section): Handle separated summaries. (modref_read): Initialize separated summaries. (modref_transform): Handle separated summaries. (pass_modref::execute): Turn summary to optimization_summary; handle separate summaries. (ignore_edge): Handle separate summaries. (ipa_merge_modref_summary_after_inlining): Likewise. (collapse_loads): Likewise. (modref_propagate_in_scc): Likewise. (pass_ipa_modref::execute): Likewise. (ipa_modref_c_finalize): Likewise. * ipa-modref.h (modref_records_lto): Remove typedef. (struct modref_summary): Remove stores_lto, loads_lto and finished fields; remove lto_useful_p member function. gcc/ChangeLog: 2020-10-12 Jan Hubicka * ipa-modref.c (GTY): (class modref_summary_lto): (class GTY): (modref_summary::modref_summary): (modref_summary::~modref_summary): (modref_summary::lto_useful_p): (modref_summary::useful_p): (struct GTY): (modref_summary_lto::modref_summary_l
[committed] d: Merge upstream dmd 3a9790525
Hi, This patch merges the D front-end implementation with upstream dmd 3a9790525. Fixes the return codes to match the documentation of Target::isVectorTypeSupported. Bootstrapped and regression tested on x86_64-linux-gnu, and committed to mainline. Regards, Iain. --- gcc/d/ChangeLog: * dmd/MERGE: Merge upstream dmd 3a9790525 * d-target.cc (Target::isVectorTypeSupported): Adjust return codes for invalid size and invalid base type. --- gcc/d/d-target.cc | 6 +++--- gcc/d/dmd/MERGE | 2 +- gcc/d/dmd/mtype.c | 8 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/gcc/d/d-target.cc b/gcc/d/d-target.cc index 4a38cca56b4..78f14203b5c 100644 --- a/gcc/d/d-target.cc +++ b/gcc/d/d-target.cc @@ -241,7 +241,7 @@ Target::isVectorTypeSupported (int sz, Type *type) { /* Size must be greater than zero, and a power of two. */ if (sz <= 0 || sz & (sz - 1)) -return 2; +return 3; /* __vector(void[]) is treated same as __vector(ubyte[]) */ if (type == Type::tvoid) @@ -249,7 +249,7 @@ Target::isVectorTypeSupported (int sz, Type *type) /* No support for non-trivial types, complex types, or booleans. */ if (!type->isTypeBasic () || type->iscomplex () || type->ty == Tbool) -return 3; +return 2; /* In [simd/vector extensions], which vector types are supported depends on the target. The implementation is expected to only support the vector @@ -258,7 +258,7 @@ Target::isVectorTypeSupported (int sz, Type *type) tree ctype = build_vector_type (build_ctype (type), nunits); if (!targetm.vector_mode_supported_p (TYPE_MODE (ctype))) -return 3; +return 2; return 0; } diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE index 4676645f971..8a59cbde78e 100644 --- a/gcc/d/dmd/MERGE +++ b/gcc/d/dmd/MERGE @@ -1,4 +1,4 @@ -e49192807967c6f11252683a731c5a0159ef36da +3a979052509fff8170ba80e48817377a60e78eb3 The first line of this file holds the git revision number of the last merge done from the dlang/dmd repository. diff --git a/gcc/d/dmd/mtype.c b/gcc/d/dmd/mtype.c index 36471557dfc..bc66be028c1 100644 --- a/gcc/d/dmd/mtype.c +++ b/gcc/d/dmd/mtype.c @@ -3824,12 +3824,12 @@ Type *TypeVector::semantic(Loc loc, Scope *sc) case 1: // no support at all error(loc, "SIMD vector types not supported on this platform"); return terror; -case 2: // invalid size -error(loc, "%d byte vector type %s is not supported on this platform", sz, toChars()); -return terror; -case 3: // invalid base type +case 2: // invalid base type error(loc, "vector type %s is not supported on this platform", toChars()); return terror; +case 3: // invalid size +error(loc, "%d byte vector type %s is not supported on this platform", sz, toChars()); +return terror; default: assert(0); } -- 2.25.1
[committed] libphobos: Override tool_timeout value in testsuite
Hi, Some of the larger tests in the phobos testsuite on occasion trigger the default timeout limit. Increasing the limit to 10 minutes should give compilation enough time to finish. Regression tested on x86_64-linux-gnu, and committed to mainline. Regards, Iain. --- libphobos/ChangeLog: * testsuite/lib/libphobos.exp: Define tool_timeout, set to 600. --- libphobos/testsuite/lib/libphobos.exp | 4 1 file changed, 4 insertions(+) diff --git a/libphobos/testsuite/lib/libphobos.exp b/libphobos/testsuite/lib/libphobos.exp index 2e9da95ac1c..790480bf95c 100644 --- a/libphobos/testsuite/lib/libphobos.exp +++ b/libphobos/testsuite/lib/libphobos.exp @@ -105,6 +105,7 @@ proc libphobos_init { args } { global gdcpaths gdcldflags global gluefile wrap_flags global ld_library_path +global tool_timeout global DEFAULT_DFLAGS # If a testcase doesn't have special options, use these. @@ -185,6 +186,9 @@ proc libphobos_init { args } { } } +# Set the default timeout for phobos tests. +set tool_timeout 600 + set_ld_library_path_env_vars libphobos_maybe_build_wrapper "${objdir}/testglue.o" -- 2.25.1
[committed] d: Fix alias protection being ignored if used before declaration.
Hi, This patch merges the D front-end implementation with upstream dmd 3a9790525. Fixes a symbol resolver bug where a private alias becomes public if used before its declaration. Bootstrapped and regression tested on x86_64-linux-gnu, and committed to mainline, and backported to the gcc-10 and gcc-9 release branches. Regards, Iain. --- gcc/d/ChangeLog: * dmd/MERGE: Merge upstream dmd 70aabfb51 --- gcc/d/dmd/MERGE | 2 +- gcc/d/dmd/declaration.c | 7 +++ gcc/testsuite/gdc.test/fail_compilation/fail21001.d | 12 .../gdc.test/fail_compilation/imports/fail21001b.d | 5 + .../fail_compilation/imports/issue21295ast_node.d| 5 + .../fail_compilation/imports/issue21295astcodegen.d | 4 .../fail_compilation/imports/issue21295dtemplate.d | 3 +++ .../fail_compilation/imports/issue21295visitor.d | 3 +++ gcc/testsuite/gdc.test/fail_compilation/issue21295.d | 9 + 9 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gdc.test/fail_compilation/fail21001.d create mode 100644 gcc/testsuite/gdc.test/fail_compilation/imports/fail21001b.d create mode 100644 gcc/testsuite/gdc.test/fail_compilation/imports/issue21295ast_node.d create mode 100644 gcc/testsuite/gdc.test/fail_compilation/imports/issue21295astcodegen.d create mode 100644 gcc/testsuite/gdc.test/fail_compilation/imports/issue21295dtemplate.d create mode 100644 gcc/testsuite/gdc.test/fail_compilation/imports/issue21295visitor.d create mode 100644 gcc/testsuite/gdc.test/fail_compilation/issue21295.d diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE index 8a59cbde78e..5f6193f76b7 100644 --- a/gcc/d/dmd/MERGE +++ b/gcc/d/dmd/MERGE @@ -1,4 +1,4 @@ -3a979052509fff8170ba80e48817377a60e78eb3 +70aabfb511d55f2bfbdccbac7868519d9d4b63da The first line of this file holds the git revision number of the last merge done from the dlang/dmd repository. diff --git a/gcc/d/dmd/declaration.c b/gcc/d/dmd/declaration.c index 08b295070b2..f490cc5a413 100644 --- a/gcc/d/dmd/declaration.c +++ b/gcc/d/dmd/declaration.c @@ -340,6 +340,13 @@ void AliasDeclaration::semantic(Scope *sc) void AliasDeclaration::aliasSemantic(Scope *sc) { //printf("AliasDeclaration::semantic() %s\n", toChars()); + +// as AliasDeclaration::semantic, in case we're called first. +// see https://issues.dlang.org/show_bug.cgi?id=21001 +storage_class |= sc->stc & STCdeprecated; +protection = sc->protection; +userAttribDecl = sc->userAttribDecl; + // TypeTraits needs to know if it's located in an AliasDeclaration sc->flags |= SCOPEalias; diff --git a/gcc/testsuite/gdc.test/fail_compilation/fail21001.d b/gcc/testsuite/gdc.test/fail_compilation/fail21001.d new file mode 100644 index 000..0faeb4038ef --- /dev/null +++ b/gcc/testsuite/gdc.test/fail_compilation/fail21001.d @@ -0,0 +1,12 @@ +/* +TEST_OUTPUT: +--- +fail_compilation/fail21001.d(12): Error: undefined identifier `Alias` +--- +*/ + +module fail21001; + +import imports.fail21001b; + +void main() { Alias var; } diff --git a/gcc/testsuite/gdc.test/fail_compilation/imports/fail21001b.d b/gcc/testsuite/gdc.test/fail_compilation/imports/fail21001b.d new file mode 100644 index 000..69c1c20a356 --- /dev/null +++ b/gcc/testsuite/gdc.test/fail_compilation/imports/fail21001b.d @@ -0,0 +1,5 @@ +module imports.fail21001b; + +private struct S { Alias member; } + +private alias Alias = int; diff --git a/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295ast_node.d b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295ast_node.d new file mode 100644 index 000..d9298bf972d --- /dev/null +++ b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295ast_node.d @@ -0,0 +1,5 @@ +module imports.issue21295ast_node; +import imports.issue21295visitor : Visitor; +class ASTNode { +void accept(Visitor); +} diff --git a/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295astcodegen.d b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295astcodegen.d new file mode 100644 index 000..5eccf6ac028 --- /dev/null +++ b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295astcodegen.d @@ -0,0 +1,4 @@ +module imports.issue21295astcodegen; +struct ASTCodegen { +import imports.issue21295dtemplate; +} diff --git a/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295dtemplate.d b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295dtemplate.d new file mode 100644 index 000..8864f9ea27e --- /dev/null +++ b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295dtemplate.d @@ -0,0 +1,3 @@ +module imports.issue21295dtemplate; +import imports.issue21295ast_node; +class TemplateParameter : ASTNode { } diff --git a/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295visitor.d b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295visitor.d new file mode 100644 index 000..3665
[PATCH] PR target/97349 AArch64: Incorrect types for some Neon vdupq_n_<...> intrinsics
Hi all, This patch fixes the PR by adjusting the input types of the intrinsic prototypes to the ones mandated by ACLE Turns out the tests in the testsuite were already using the correct ones, but implicit conversions hid the bug... Bootstrapped and tested on aarch64-none-linux-gnu. Pushing to master and later the branches. Thanks, Kyrill gcc/ PR target/97349 * config/aarch64/arm_neon.h (vdupq_n_p8, vdupq_n_p16, vdupq_n_p64, vdupq_n_s8, vdupq_n_s16, vdupq_n_u8, vdupq_n_u16): Fix argument type. gcc/testsuite/ PR target/97349 * gcc.target/aarch64/simd/pr97349.c: New test. vdup-types.patch Description: vdup-types.patch
[PATCH] SLP: fix SVE issues
The patch fixes the following 2 issues: .MASK_STORE_LANES (&a, 4B, max_mask_34, vect_array.12); here we miss to return the last argument as stored value. ivtmp_32 = ivtmp_31 + POLY_INT_CST [4, 4]; here we miss a bail out in vect_recog_over_widening_pattern. gcc/ChangeLog: PR tree-optimization/97079 * internal-fn.c (internal_fn_stored_value_index): Handle also .MASK_STORE_LANES. * tree-vect-patterns.c (vect_recog_over_widening_pattern): Bail out for POLY_INT_CST. gcc/testsuite/ChangeLog: PR tree-optimization/97079 * gcc.target/aarch64/sve/pr97079.c: New test. I'm running tests on x86_64-linux-gnu and I would appreciate running that on a SVE aarch64 machine. Ready for master? Thanks, Martin --- gcc/internal-fn.c | 1 + .../gcc.target/aarch64/sve/pr97079.c | 22 +++ gcc/tree-vect-patterns.c | 2 ++ 3 files changed, 25 insertions(+) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr97079.c diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c index 92cb3cd845a..792d2ca568a 100644 --- a/gcc/internal-fn.c +++ b/gcc/internal-fn.c @@ -3795,6 +3795,7 @@ internal_fn_stored_value_index (internal_fn fn) switch (fn) { case IFN_MASK_STORE: +case IFN_MASK_STORE_LANES: case IFN_SCATTER_STORE: case IFN_MASK_SCATTER_STORE: case IFN_LEN_STORE: diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c new file mode 100644 index 000..06e6a7cde94 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=armv8.2-a+sve" } */ + +void g(void); + +int a[8][3]; +int b; +void c(void) +{ + int d[] = {7, 3}; + int *e = a[0]; + int f; + b = 0; + for (; b < 8; b++) +{ + f = 0; + for (; f < 3; f++) + a[b][f] = 0; +} + g(); + *e = (long)d; +} diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c index 71e4e106202..6302bc42f46 100644 --- a/gcc/tree-vect-patterns.c +++ b/gcc/tree-vect-patterns.c @@ -1571,6 +1571,8 @@ vect_recog_over_widening_pattern (vec_info *vinfo, tree op = gimple_op (last_stmt, first_op + i); if (TREE_CODE (op) == INTEGER_CST) unprom[i].set_op (op, vect_constant_def); + else if (TREE_CODE (op) == POLY_INT_CST) + return NULL; else if (TREE_CODE (op) == SSA_NAME) { bool op_single_use_p = true; -- 2.28.0
Re: [PATCH] SLP: fix SVE issues
On Mon, Oct 12, 2020 at 12:59 PM Martin Liška wrote: > > The patch fixes the following 2 issues: > >.MASK_STORE_LANES (&a, 4B, max_mask_34, vect_array.12); > > here we miss to return the last argument as stored value. > > ivtmp_32 = ivtmp_31 + POLY_INT_CST [4, 4]; > > here we miss a bail out in vect_recog_over_widening_pattern. > > gcc/ChangeLog: > > PR tree-optimization/97079 > * internal-fn.c (internal_fn_stored_value_index): Handle also > .MASK_STORE_LANES. > * tree-vect-patterns.c (vect_recog_over_widening_pattern): Bail > out for POLY_INT_CST. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/97079 > * gcc.target/aarch64/sve/pr97079.c: New test. > > I'm running tests on x86_64-linux-gnu and I would appreciate running that > on a SVE aarch64 machine. > > Ready for master? > Thanks, > Martin > > --- > gcc/internal-fn.c | 1 + > .../gcc.target/aarch64/sve/pr97079.c | 22 +++ > gcc/tree-vect-patterns.c | 2 ++ > 3 files changed, 25 insertions(+) > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr97079.c > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c > index 92cb3cd845a..792d2ca568a 100644 > --- a/gcc/internal-fn.c > +++ b/gcc/internal-fn.c > @@ -3795,6 +3795,7 @@ internal_fn_stored_value_index (internal_fn fn) > switch (fn) > { > case IFN_MASK_STORE: > +case IFN_MASK_STORE_LANES: > case IFN_SCATTER_STORE: > case IFN_MASK_SCATTER_STORE: > case IFN_LEN_STORE: > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c > b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c > new file mode 100644 > index 000..06e6a7cde94 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c > @@ -0,0 +1,22 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -march=armv8.2-a+sve" } */ > + > +void g(void); > + > +int a[8][3]; > +int b; > +void c(void) > +{ > + int d[] = {7, 3}; > + int *e = a[0]; > + int f; > + b = 0; > + for (; b < 8; b++) > +{ > + f = 0; > + for (; f < 3; f++) > + a[b][f] = 0; > +} > + g(); > + *e = (long)d; > +} > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c > index 71e4e106202..6302bc42f46 100644 > --- a/gcc/tree-vect-patterns.c > +++ b/gcc/tree-vect-patterns.c > @@ -1571,6 +1571,8 @@ vect_recog_over_widening_pattern (vec_info *vinfo, > tree op = gimple_op (last_stmt, first_op + i); > if (TREE_CODE (op) == INTEGER_CST) > unprom[i].set_op (op, vect_constant_def); > + else if (TREE_CODE (op) == POLY_INT_CST) > + return NULL; can you make this a simpler else return NULL; at the end of the if please? Thanks, Richard. > else if (TREE_CODE (op) == SSA_NAME) > { > bool op_single_use_p = true; > -- > 2.28.0 >
Re: [PATCH] SLP: fix SVE issues
On Mon, Oct 12, 2020 at 1:26 PM Richard Biener wrote: > > On Mon, Oct 12, 2020 at 12:59 PM Martin Liška wrote: > > > > The patch fixes the following 2 issues: > > > >.MASK_STORE_LANES (&a, 4B, max_mask_34, vect_array.12); > > > > here we miss to return the last argument as stored value. > > > > ivtmp_32 = ivtmp_31 + POLY_INT_CST [4, 4]; > > > > here we miss a bail out in vect_recog_over_widening_pattern. > > > > gcc/ChangeLog: > > > > PR tree-optimization/97079 > > * internal-fn.c (internal_fn_stored_value_index): Handle also > > .MASK_STORE_LANES. > > * tree-vect-patterns.c (vect_recog_over_widening_pattern): Bail > > out for POLY_INT_CST. > > > > gcc/testsuite/ChangeLog: > > > > PR tree-optimization/97079 > > * gcc.target/aarch64/sve/pr97079.c: New test. > > > > I'm running tests on x86_64-linux-gnu and I would appreciate running that > > on a SVE aarch64 machine. > > > > Ready for master? > > Thanks, > > Martin > > > > --- > > gcc/internal-fn.c | 1 + > > .../gcc.target/aarch64/sve/pr97079.c | 22 +++ > > gcc/tree-vect-patterns.c | 2 ++ > > 3 files changed, 25 insertions(+) > > create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr97079.c > > > > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c > > index 92cb3cd845a..792d2ca568a 100644 > > --- a/gcc/internal-fn.c > > +++ b/gcc/internal-fn.c > > @@ -3795,6 +3795,7 @@ internal_fn_stored_value_index (internal_fn fn) > > switch (fn) > > { > > case IFN_MASK_STORE: > > +case IFN_MASK_STORE_LANES: > > case IFN_SCATTER_STORE: > > case IFN_MASK_SCATTER_STORE: > > case IFN_LEN_STORE: > > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c > > b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c > > new file mode 100644 > > index 000..06e6a7cde94 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c > > @@ -0,0 +1,22 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-O3 -march=armv8.2-a+sve" } */ > > + > > +void g(void); > > + > > +int a[8][3]; > > +int b; > > +void c(void) > > +{ > > + int d[] = {7, 3}; > > + int *e = a[0]; > > + int f; > > + b = 0; > > + for (; b < 8; b++) > > +{ > > + f = 0; > > + for (; f < 3; f++) > > + a[b][f] = 0; > > +} > > + g(); > > + *e = (long)d; > > +} > > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c > > index 71e4e106202..6302bc42f46 100644 > > --- a/gcc/tree-vect-patterns.c > > +++ b/gcc/tree-vect-patterns.c > > @@ -1571,6 +1571,8 @@ vect_recog_over_widening_pattern (vec_info *vinfo, > > tree op = gimple_op (last_stmt, first_op + i); > > if (TREE_CODE (op) == INTEGER_CST) > > unprom[i].set_op (op, vect_constant_def); > > + else if (TREE_CODE (op) == POLY_INT_CST) > > + return NULL; > > can you make this a simpler > > else > return NULL; > > at the end of the if please? Btw, POLY_INT_CST can likely be handled the same as INTEGER_CST - I suppose you tried that? (it might need further adjustments downstream). Richard. > > Thanks, > Richard. > > > else if (TREE_CODE (op) == SSA_NAME) > > { > > bool op_single_use_p = true; > > -- > > 2.28.0 > >
Re: [PATCH] SLP: fix SVE issues
On 10/12/20 1:27 PM, Richard Biener wrote: Btw, POLY_INT_CST can likely be handled the same as INTEGER_CST - I suppose you tried that? (it might need further adjustments downstream). Yes, it can. But it seemed to me like an incorrect match: /home/marxin/Programming/testcases/pr97079-2.c:10:12: note: extra pattern stmt: patt_2 = (unsigned int) ivtmp_31; /home/marxin/Programming/testcases/pr97079-2.c:10:12: note: extra pattern stmt: patt_1 = (unsigned int) POLY_INT_CST [4, 4]; /home/marxin/Programming/testcases/pr97079-2.c:10:12: note: extra pattern stmt: patt_6 = patt_2 + patt_1; dunno if we can make such a casting? Martin
Re: [PATCH PR96757] aarch64: ICE during GIMPLE pass: vect
"duanbo (C)" writes: >> -Original Message- >> From: Richard Sandiford [mailto:richard.sandif...@arm.com] >> Sent: Wednesday, September 30, 2020 6:38 PM >> To: duanbo (C) >> Cc: GCC Patches >> Subject: Re: [PATCH PR96757] aarch64: ICE during GIMPLE pass: vect >> >> Thanks for the update, looks good apart from… >> >> "duanbo (C)" writes: >> > @@ -4361,7 +4391,7 @@ vect_recog_mask_conversion_pattern (vec_info >> *vinfo, >> >if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1), >> >TYPE_VECTOR_SUBPARTS (vectype2)) >> > && (TREE_CODE (rhs1) == SSA_NAME >> > -|| rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0 >> > +|| !rhs1_op0_type || !rhs1_op1_type)) >> >return NULL; >> >> …I think this should be: >> >>&& (TREE_CODE (rhs1) == SSA_NAME >>|| (!rhs1_op0_type && !rhs1_op1_type)) >> >> i.e. punt only if both types are already OK. If one operand wants a specific >> mask type, we should continue to the code below and attach the chosen >> type to the comparison. >> >> Although I guess this simplifies to: >> >> if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1), >> TYPE_VECTOR_SUBPARTS (vectype2)) >> && !rhs1_op0_type >> && !rhs1_op1_type) >> return NULL; >> >> (I think the comment above the code is still accurate with this change.) >> >> > @@ -4393,7 +4423,16 @@ vect_recog_mask_conversion_pattern >> (vec_info *vinfo, >> >if (TREE_CODE (rhs1) != SSA_NAME) >> >{ >> > tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); >> > -pattern_stmt = gimple_build_assign (tmp, rhs1); >> > +if (rhs1_op0_type && TYPE_PRECISION (rhs1_op0_type) >> > + != TYPE_PRECISION (rhs1_type)) >> > + rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0, >> > +vectype2, stmt_vinfo); >> > +if (rhs1_op1_type && TYPE_PRECISION (rhs1_op1_type) >> > + != TYPE_PRECISION (rhs1_type)) >> >> Very minor -- I would have fixed this up before committing if it wasn't for >> the >> above -- but: GCC formatting is instead: >> >>if (rhs1_op1_type >>&& TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION >> (rhs1_type)) >> >> LGTM with those changes, thanks. >> >> Richard >> >> > + rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1, >> > +vectype2, stmt_vinfo); >> > +pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1), >> > +rhs1_op0, rhs1_op1); >> > rhs1 = tmp; >> > append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt, >> vectype2, >> > rhs1_type); > > Sorry for the late reply. > I have modified the patch according to your suggestion, and it works well. Looks good, thanks. Pushed to trunk. Richard
[patch, committed] nvptx - invoke.texi: Update default of -misa (was: [committed][nvptx] Set -misa=sm_35 by default)
On 10/9/20 1:56 PM, Tom de Vries wrote: [nvptx] Set -misa=sm_35 by default I committed the attached follow-up commit as obvious, r11-3818-g91e4e16b550540723cca824b9674c7d8c43f4849 Tobias - Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter commit 91e4e16b550540723cca824b9674c7d8c43f4849 Author: Tobias Burnus Date: Mon Oct 12 13:13:20 2020 +0200 nvptx - invoke.texi: Update default of -misa Followup to commit 383400a6078d75bbfa1216c9af2c37f7e88740c9 gcc/ChangeLog * doc/invoke.texi (nvptx's -misa): Update default to sm_35. diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index b8c807e631c..307f4f5426c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -25593,7 +25593,7 @@ Generate code for 32-bit or 64-bit ABI. @opindex march Generate code for given the specified PTX ISA (e.g.@: @samp{sm_35}). ISA strings must be lower-case. Valid ISA strings include @samp{sm_30} and -@samp{sm_35}. The default ISA is sm_30. +@samp{sm_35}. The default ISA is sm_35. @item -mmainkernel @opindex mmainkernel
[PATCH, wwwdocs] gcc-11/changes: NVPTX: Mention new -misa=sm_35 default
Hi, Mention new -misa=sm_35 default for NVPTX target in the gcc 11 release notes. See also PR target/97348. Verified using the validator OK? Thanks, - Tom gcc-11/changes: NVPTX: Mention new -misa=sm_35 default --- htdocs/gcc-11/changes.html | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/htdocs/gcc-11/changes.html b/htdocs/gcc-11/changes.html index f7417622..37b9127a 100644 --- a/htdocs/gcc-11/changes.html +++ b/htdocs/gcc-11/changes.html @@ -225,7 +225,13 @@ a work-in-progress. - +NVPTX + + The -misa default has changed from sm_30 + to sm_35. + + +
Re: Ping: [PATCH][Arm] Enable MVE SIMD modes for vectorization
Hi, On Thu, 8 Oct 2020 at 16:22, Christophe Lyon wrote: > > On Thu, 8 Oct 2020 at 16:08, Dennis Zhang wrote: > > > > Hi Christophe, > > > > On 08/10/2020 14:14, Christophe Lyon wrote: > > > Hi, > > > > > > > > > On Tue, 6 Oct 2020 at 15:37, Dennis Zhang via Gcc-patches > > > wrote: > > >> > > >> On 9/16/20 4:00 PM, Dennis Zhang wrote: > > >>> Hi all, > > >>> > > >>> This patch enables SIMD modes for MVE auto-vectorization. > > >>> In this patch, the integer and float MVE SIMD modes are returned by > > >>> arm_preferred_simd_mode (TARGET_VECTORIZE_PREFERRED_SIMD_MODE hook) when > > >>> MVE or MVE_FLOAT is enabled. > > >>> Then the expanders for auto-vectorization can be used for generating MVE > > >>> SIMD code. > > >>> > > >>> This patch also fixes bugs in MVE vreiterpretq_*.c tests which are > > >>> revealed by the enabled MVE SIMD modes. > > >>> The tests are for checking the MVE reinterpret intrinsics. > > >>> There are two functions in each of the tests. The two functions contain > > >>> the pattern of identical code so that they are folded in icf pass. > > >>> Because of icf, the instruction count only checks one function which is > > >>> 8. > > >>> However when the SIMD modes are enabled, the estimation of the code size > > >>> becomes smaller so that inlining is applied after icf, then the > > >>> instruction count becomes 16 which causes failure of the tests. > > >>> Because the icf is not the expected pattern to be tested but causes > > >>> above issues, -fno-ipa-icf is applied to the tests to avoid unstable > > >>> instruction count. > > >>> > > >>> This patch is separated from > > >>> https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552104.html > > >>> because this part is not strongly connected to the aim of that one so > > >>> that causing confusion. > > >>> > > >>> Regtested and bootstraped. > > >>> > > >>> Is it OK for trunk please? > > >>> > > >>> Thanks > > >>> Dennis > > >>> > > >>> gcc/ChangeLog: > > >>> > > >>> 2020-09-15 Dennis Zhang > > >>> > > >>>* config/arm/arm.c (arm_preferred_simd_mode): Enable MVE SIMD > > >>> modes. > > >>> > > > > > > Since toolchain builds work again after Jakub's divmod fix, I'm now > > > facing another build error likely caused by this patch: > > > In file included from > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/coretypes.h:449:0, > > > from > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:28: > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c: > > > In function 'machine_mode arm_preferred_simd_mode(scalar_mode)': > > > ./insn-modes.h:196:71: error: temporary of non-literal type > > > 'scalar_int_mode' in a constant expression > > > #define QImode (scalar_int_mode ((scalar_int_mode::from_int) E_QImode)) > > > ^ > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:28970:12: > > > note: in expansion of macro 'QImode' > > > case QImode: > > > > > > and similarly for the other cases. > > > > > > Does the build work for you? > > > > > > Thanks, > > > > > > Christophe > > > > > > > Thanks for the report. Sorry to see the error. > > I tested it for arm-none-eabi and arm-none-linux-gnueabi targets. I > > didn't get this error. > > Could you please help to show the configuration you use for your build? > > I will test and fix at once. > > > > It fails on all of them for me. Does it work for you with current > master? (r11-3720-gf18eeb6b958acd5e1590ca4a73231486b749be9b) > So... I guess you are using a host with GCC more recent than 4.8.5? :-) When I build manually on ubuntu-16.04 with gcc-5.4, the build succeeds, and after manually building with the same environment in the compute farm I use for validation (RHEL 7, gcc-4.8.5), I managed to reproduce the build failure. It's a matter of replacing case QImode: with case E_QImode: Is the attached patch OK? Or do we instead want to revisit the minimum gcc version required to build gcc? Thanks, Christophe > > Thanks > > Dennis gcc-4.8.5 does not accept case clauses with non-literal type, which happens for "QImode" as it expands to (scalar_int_mode ((scalar_int_mode::from_int) E_QImode)). Use E_QImode instead in arm_preferred_simd_mode, to fix the build. Same for HImode, SImode, HFmode and SFmode as introduced by a recent patch. 2020-10-12 Christophe Lyon gcc/ * config/arm/arm.c (arm_preferred_simd_mode): Use E_FOOmode instead of FOOmode. diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 5d9c995..0b8c5fa 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -28967,11 +28967,11 @@ arm_preferred_simd_mode (scalar_mode mode) if (TARGET_HAVE_MVE) switch (mode) { - case QImode: + case E_QImode: return V16QImode; - case HImode: + case E_HImode: return V8HImode; - case SImod
Re: [patch, committed] nvptx - invoke.texi: Update default of -misa (was: [committed][nvptx] Set -misa=sm_35 by default)
On 10/12/20 1:34 PM, Tobias Burnus wrote: > On 10/9/20 1:56 PM, Tom de Vries wrote: > >> [nvptx] Set -misa=sm_35 by default > I committed the attached follow-up commit as obvious, > r11-3818-g91e4e16b550540723cca824b9674c7d8c43f4849 Thanks for catching this. Thanks, - Tom
Re: [PATCH] calls.c:can_implement_as_sibling_call_p REG_PARM_STACK_SPACE check
Ping? On Fri, Oct 02, 2020 at 05:03:50PM +0930, Alan Modra wrote: https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555309.html -- Alan Modra Australia Development Lab, IBM
Re: [r11-3641 Regression] FAIL: gcc.dg/torture/pta-ptrarith-1.c -Os scan-tree-dump alias "ESCAPED = {[^\n}]* i f [^\n}]*}" on Linux/x86_64 (-m32 -march=cascadelake)
Martin Sebor via Gcc-patches writes: > On 10/4/20 10:51 AM, H.J. Lu via Gcc-patches wrote: >> On Sat, Oct 3, 2020 at 5:57 PM Segher Boessenkool >> wrote: >>> >>> On Sat, Oct 03, 2020 at 12:21:04PM -0700, sunil.k.pandey via Gcc-patches >>> wrote: On Linux/x86_64, c34db4b6f8a5d80367c709309f9b00cb32630054 is the first bad commit commit c34db4b6f8a5d80367c709309f9b00cb32630054 Author: Jan Hubicka Date: Sat Oct 3 17:20:16 2020 +0200 Track access ranges in ipa-modref caused >>> >>> [ ... ] >>> >>> This isn't a patch. Wrong mailing list? >> >> I view this as a follow up of >> >> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555314.html >> >> What do people think about this kind of followups? Is this appropriate >> for this mailing list? > > A number of people routinely send emails similar to these to this > list to point out regressions on their targets. I find both kinds > of emails very useful and don't mind the additional traffic. +1 FWIW. I think it's great that we have this kind of automatic CI, and this seems like a natural place to send the reports. Shovelling them into bugzilla is likely to create more work rather than less, especially since the fix turnaround should (hopefully) be short. Richard
[PATCH] fix SLP subgraph detection wrt fully shared lanes
When a VEC_PERM SLP node just permutes existing lanes this confuses the SLP subgraph detection where I tried to elide a node-based visited hash-map in a way that doesn't work. Fixed by adding such. Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed. 2020-10-12 Richard Biener * tree-vect-slp.c (vect_bb_partition_graph_r): Use visited hash-map. (vect_bb_partition_graph): Likewise. --- gcc/tree-vect-slp.c | 17 ++--- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c index dd2042a4db5..8acef6f3cef 100644 --- a/gcc/tree-vect-slp.c +++ b/gcc/tree-vect-slp.c @@ -3254,18 +3254,19 @@ static void vect_bb_partition_graph_r (bb_vec_info bb_vinfo, slp_instance instance, slp_tree node, hash_map &stmt_to_instance, - hash_map &instance_leader) + hash_map &instance_leader, + hash_set &visited) { stmt_vec_info stmt_info; unsigned i; - bool all = true; + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) { bool existed_p; slp_instance &stmt_instance = stmt_to_instance.get_or_insert (stmt_info, &existed_p); if (!existed_p) - all = false; + ; else if (stmt_instance != instance) { /* If we're running into a previously marked stmt make us the @@ -3279,15 +3280,15 @@ vect_bb_partition_graph_r (bb_vec_info bb_vinfo, } stmt_instance = instance; } - /* If not all stmts had been visited we have to recurse on children. */ - if (all) + + if (visited.add (node)) return; slp_tree child; FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) vect_bb_partition_graph_r (bb_vinfo, instance, child, stmt_to_instance, -instance_leader); +instance_leader, visited); } /* Partition the SLP graph into pieces that can be costed independently. */ @@ -3302,13 +3303,15 @@ vect_bb_partition_graph (bb_vec_info bb_vinfo) marked stmt, make the stmts leader the current SLP graph entry. */ hash_map stmt_to_instance; hash_map instance_leader; + hash_set visited; slp_instance instance; for (unsigned i = 0; bb_vinfo->slp_instances.iterate (i, &instance); ++i) { instance_leader.put (instance, instance); vect_bb_partition_graph_r (bb_vinfo, instance, SLP_INSTANCE_TREE (instance), -stmt_to_instance, instance_leader); +stmt_to_instance, instance_leader, +visited); } /* Then collect entries to each independent subgraph. */ -- 2.26.2
Re: [PATCH] Add if-chain to switch conversion pass.
On 10/6/20 4:12 PM, Jakub Jelinek wrote: On Tue, Oct 06, 2020 at 03:48:38PM +0200, Martin Liška wrote: On 10/6/20 9:47 AM, Richard Biener wrote: But is it really extensible with the current implementation? I doubt so. I must agree with the statement. So let's make the pass properly. I would need a help with the algorithm where I'm planning to do the following steps: 1) for each BB ending with a gcond, parse index variable and it's VR; I'll support: a) index == 123 ([123, 123]) b) 1 <= index && index <= 9 ([1, 9]) c) index == 123 || index == 12345 ([123, 123] [12345, 12345]) d) index != 1 ([1, 1]) e) index != 1 && index != 5 ([1, 1] [5, 5]) The fold_range_test created cases are essential to support, so f) index - 123U < 456U ([123, 456+123]) g) (unsigned) index - 123U < 456U (ditto) but the discovery should actually recurse on all of those forms, so it will handle (unsigned) index - 123U < 456U || (unsigned) index - 16384U <= 32711U etc. You can see what reassoc init_range_entry does and do something similar? All right, I started to use init_range_entry in combination with linearize_expr_tree. One thing I have problem with is that linearize_expr_tree doesn't properly mark all statements as visited for cases like: : index2.1_1 = (unsigned int) index2_16(D); _2 = index2.1_1 + 4294967196; _3 = _2 <= 100; _5 = index2.1_1 + 4294966996; _6 = _5 <= 33; _7 = _3 | _6; if (_7 != 0) goto ; [INV] else goto ; [INV] As seen, all statements in this BB are used by the final _7 != 0 and it would be handy for me to identify all statements that should be hoisted. Thoughts how can I achieve that? Thanks, Martin
Re: [PATCH] Add if-chain to switch conversion pass.
On 10/7/20 10:00 AM, Richard Biener wrote: As said I'd have a BB-local pass over BBs recording the index variable and the range covered by the BBs gcond, plus recording how many excess stmts there are for eventual code motion. Only after that BB-local pass start to group BBs in a walk from dominated to dominating BBs looking for common indexes and building a case vector. The main thing is to avoid repeatedly analyzing BBs conditions (so the first pass could be also a on-demand precompute thing) and making the case vector build optimal. I have a patch that does that using the infrastructure from tree-ssa-reassoc. Now I would like to implement the code hoisting. Am I right that we want something like: if (index == C0) goto BB_0; else { BB1_to_hoist_stmts; if (index == C1) goto BB_1; else { BB2_to_hoist_stmts; if (index == C2) goto BB_2; else goto default_BB; } } be converted into: switch(index) { case C0: goto BB_0; case C1: BB1_to_hoist_stmts; goto BB_1; case C2: BB1_to_hoist_stmts; BB2_to_hoist_stmts; goto BB_2; else: BB1_to_hoist_stmts; BB2_to_hoist_stmts; goto default_BB; } ? Thanks, Martin
Re: [PATCH] Add if-chain to switch conversion pass.
On Mon, Oct 12, 2020 at 02:39:24PM +0200, Martin Liška wrote: > All right, I started to use init_range_entry in combination with > linearize_expr_tree. > One thing I have problem with is that linearize_expr_tree doesn't properly > mark > all statements as visited for cases like: Not sure if linearize_expr_tree is what you want, then you run into many reassoc dependencies (e.g. having computed uids and all that). My suggestion was to just copy and tweak init_range_entry (and reuse the fold_const range step stuff). There is no need to linearize anything, for what you want it doesn't matter if you process (x | y) | (z | w) where all of x, y, z, w are some comparisons, or x | (y | (z | w)) etc. All you want to ensure is that all the logical operations feeding each GIMPLE_COND are the same (all |s or all &s), and that they make sense also for the basic blocks, then for each of the subconditions find the ranges and verify that they all use the same index. And then I think you shouldn't hoist anything either, rather check that all the blocks but the first one are no_side_effect_bb (perhaps export that one from reassoc), thus when you turn that into a switch starting at the end of first bb, you can just throw away all the non-side-effects basic blocks. Or do you want instead allow other stmts in those bbs and check that either it is consumed all in the same bb, or it is consumed in the bbs dominated by the bb that the case label for the particular case would be added for, and sink the statements to that bb? > >: > index2.1_1 = (unsigned int) index2_16(D); > _2 = index2.1_1 + 4294967196; > _3 = _2 <= 100; > _5 = index2.1_1 + 4294966996; > _6 = _5 <= 33; > _7 = _3 | _6; > if (_7 != 0) > goto ; [INV] > else > goto ; [INV] > > As seen, all statements in this BB are used by the final _7 != 0 and it would > be handy for me to identify all statements that should be hoisted. > > Thoughts how can I achieve that? Jakub
Re: [PATCH] Add if-chain to switch conversion pass.
On 10/12/20 2:44 PM, Martin Liška wrote: On 10/7/20 10:00 AM, Richard Biener wrote: As said I'd have a BB-local pass over BBs recording the index variable and the range covered by the BBs gcond, plus recording how many excess stmts there are for eventual code motion. Only after that BB-local pass start to group BBs in a walk from dominated to dominating BBs looking for common indexes and building a case vector. The main thing is to avoid repeatedly analyzing BBs conditions (so the first pass could be also a on-demand precompute thing) and making the case vector build optimal. I have a patch that does that using the infrastructure from tree-ssa-reassoc. Now I would like to implement the code hoisting. Am I right that we want something like: if (index == C0) goto BB_0; else { BB1_to_hoist_stmts; if (index == C1) goto BB_1; else { BB2_to_hoist_stmts; if (index == C2) goto BB_2; else goto default_BB; } } be converted into: switch(index) { case C0: goto BB_0; case C1: BB1_to_hoist_stmts; goto BB_1; case C2: BB1_to_hoist_stmts; BB2_to_hoist_stmts; goto BB_2; else: BB1_to_hoist_stmts; BB2_to_hoist_stmts; goto default_BB; } ? Reading again Richi's comment: ifcombine simply hoists any stmts without side-effects up the dominator tree and thus only requires BBs without side-effects (IIRC there's a predicate fn for that). we likely want to hoist the statements "up" to the gswitch BB? Martin Thanks, Martin
Re: [PATCH] SLP: fix SVE issues
On Mon, Oct 12, 2020 at 1:31 PM Martin Liška wrote: > > On 10/12/20 1:27 PM, Richard Biener wrote: > > Btw, POLY_INT_CST can likely be handled the same as INTEGER_CST - I suppose > > you tried that? (it might need further adjustments downstream). > > Yes, it can. But it seemed to me like an incorrect match: > > /home/marxin/Programming/testcases/pr97079-2.c:10:12: note: extra pattern > stmt: patt_2 = (unsigned int) ivtmp_31; > /home/marxin/Programming/testcases/pr97079-2.c:10:12: note: extra pattern > stmt: patt_1 = (unsigned int) POLY_INT_CST [4, 4]; > /home/marxin/Programming/testcases/pr97079-2.c:10:12: note: extra pattern > stmt: patt_6 = patt_2 + patt_1; > > dunno if we can make such a casting? We should be able to constant-fold it at least. Richard. > Martin >
RE: Ping: [PATCH][Arm] Enable MVE SIMD modes for vectorization
Hi Christophe, > -Original Message- > From: Gcc-patches On Behalf Of > Christophe Lyon via Gcc-patches > Sent: 12 October 2020 12:41 > To: Dennis Zhang > Cc: Richard Earnshaw ; nd ; > gcc-patches@gcc.gnu.org; Ramana Radhakrishnan > > Subject: Re: Ping: [PATCH][Arm] Enable MVE SIMD modes for vectorization > > Hi, > > > On Thu, 8 Oct 2020 at 16:22, Christophe Lyon > wrote: > > > > On Thu, 8 Oct 2020 at 16:08, Dennis Zhang > wrote: > > > > > > Hi Christophe, > > > > > > On 08/10/2020 14:14, Christophe Lyon wrote: > > > > Hi, > > > > > > > > > > > > On Tue, 6 Oct 2020 at 15:37, Dennis Zhang via Gcc-patches > > > > wrote: > > > >> > > > >> On 9/16/20 4:00 PM, Dennis Zhang wrote: > > > >>> Hi all, > > > >>> > > > >>> This patch enables SIMD modes for MVE auto-vectorization. > > > >>> In this patch, the integer and float MVE SIMD modes are returned by > > > >>> arm_preferred_simd_mode > (TARGET_VECTORIZE_PREFERRED_SIMD_MODE hook) when > > > >>> MVE or MVE_FLOAT is enabled. > > > >>> Then the expanders for auto-vectorization can be used for > generating MVE > > > >>> SIMD code. > > > >>> > > > >>> This patch also fixes bugs in MVE vreiterpretq_*.c tests which are > > > >>> revealed by the enabled MVE SIMD modes. > > > >>> The tests are for checking the MVE reinterpret intrinsics. > > > >>> There are two functions in each of the tests. The two functions > contain > > > >>> the pattern of identical code so that they are folded in icf pass. > > > >>> Because of icf, the instruction count only checks one function which > is 8. > > > >>> However when the SIMD modes are enabled, the estimation of the > code size > > > >>> becomes smaller so that inlining is applied after icf, then the > > > >>> instruction count becomes 16 which causes failure of the tests. > > > >>> Because the icf is not the expected pattern to be tested but causes > > > >>> above issues, -fno-ipa-icf is applied to the tests to avoid unstable > > > >>> instruction count. > > > >>> > > > >>> This patch is separated from > > > >>> https://gcc.gnu.org/pipermail/gcc-patches/2020- > August/552104.html > > > >>> because this part is not strongly connected to the aim of that one so > > > >>> that causing confusion. > > > >>> > > > >>> Regtested and bootstraped. > > > >>> > > > >>> Is it OK for trunk please? > > > >>> > > > >>> Thanks > > > >>> Dennis > > > >>> > > > >>> gcc/ChangeLog: > > > >>> > > > >>> 2020-09-15 Dennis Zhang > > > >>> > > > >>>* config/arm/arm.c (arm_preferred_simd_mode): Enable MVE > SIMD modes. > > > >>> > > > > > > > > Since toolchain builds work again after Jakub's divmod fix, I'm now > > > > facing another build error likely caused by this patch: > > > > In file included from > > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc- > fsf/gccsrc/gcc/coretypes.h:449:0, > > > > from > > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc- > fsf/gccsrc/gcc/config/arm/arm.c:28: > > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc- > fsf/gccsrc/gcc/config/arm/arm.c: > > > > In function 'machine_mode arm_preferred_simd_mode(scalar_mode)': > > > > ./insn-modes.h:196:71: error: temporary of non-literal type > > > > 'scalar_int_mode' in a constant expression > > > > #define QImode (scalar_int_mode ((scalar_int_mode::from_int) > E_QImode)) > > > > > > > > ^ > > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc- > fsf/gccsrc/gcc/config/arm/arm.c:28970:12: > > > > note: in expansion of macro 'QImode' > > > > case QImode: > > > > > > > > and similarly for the other cases. > > > > > > > > Does the build work for you? > > > > > > > > Thanks, > > > > > > > > Christophe > > > > > > > > > > Thanks for the report. Sorry to see the error. > > > I tested it for arm-none-eabi and arm-none-linux-gnueabi targets. I > > > didn't get this error. > > > Could you please help to show the configuration you use for your build? > > > I will test and fix at once. > > > > > > > It fails on all of them for me. Does it work for you with current > > master? (r11-3720-gf18eeb6b958acd5e1590ca4a73231486b749be9b) > > > > So... I guess you are using a host with GCC more recent than 4.8.5? :-) > When I build manually on ubuntu-16.04 with gcc-5.4, the build succeeds, > and after manually building with the same environment in the compute > farm I use for validation (RHEL 7, gcc-4.8.5), I managed to reproduce the > build failure. > It's a matter of replacing > case QImode: > with > case E_QImode: > > Is the attached patch OK? Or do we instead want to revisit the minimum > gcc version required to build gcc? I'd rather go with this patch as long as it passes the usual testing. Thanks, Kyrill > > Thanks, > > Christophe > > > > > Thanks > > > Dennis
Re: [r11-3641 Regression] FAIL: gcc.dg/torture/pta-ptrarith-1.c -Os scan-tree-dump alias "ESCAPED = {[^\n}]* i f [^\n}]*}" on Linux/x86_64 (-m32 -march=cascadelake)
On Mon, 5 Oct 2020 at 17:19, Segher Boessenkool wrote: > > On Sun, Oct 04, 2020 at 09:51:23AM -0700, H.J. Lu wrote: > > On Sat, Oct 3, 2020 at 5:57 PM Segher Boessenkool > > wrote: > > > On Sat, Oct 03, 2020 at 12:21:04PM -0700, sunil.k.pandey via Gcc-patches > > > wrote: > > > > On Linux/x86_64, > > > > > > > > c34db4b6f8a5d80367c709309f9b00cb32630054 is the first bad commit > > > > commit c34db4b6f8a5d80367c709309f9b00cb32630054 > > > > Author: Jan Hubicka > > > > Date: Sat Oct 3 17:20:16 2020 +0200 > > > > > > > > Track access ranges in ipa-modref > > > > > > > > caused > > > > > > [ ... ] > > > > > > This isn't a patch. Wrong mailing list? > > > > I view this as a follow up of > > > > https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555314.html > > But it *isn't* a follow-up of that mail. That is my point. Most of > these messages do not finger any particular patch even, I think? > That's why I kept the reporting part manual on my side: once you know which commit introduced a failure/regression (either via bisect, or by some other way), it's not always easy to identify the gcc-patches message to which you want to reply. And as already said in this thread, we certainly want to avoid sending a regression email for each test, multiplied by the number of configurations under test. > > What do people think about this kind of followups? Is this appropriate > > for this mailing list? > > Please just use bugzilla. And report bugs there the way they should be > reported: full command lines, full description of the errors, and > everything else needed to easily reproduce the problem. > It seems some people prefer such regressions reports in bugzilla, others in gcc-patches@. In general when I report a regression I noticed in the GCC testsuite, I tend to assume that the testname and GCC configure options are sufficient for a usual contributor to reproduce. Not sure if it matches "full" and "easily" in your mind? With all the automated builds where the build dir is removed from the server at the end whatever the result, it does take time if I have to reproduce the problem manually before reporting. Christophe > *Actually* following up to the patch mail could be useful (but you can > than just point to the bugzilla). Sending spam to gcc-patches@ is not > useful for most users of the list. > > > Segher
Re: [r11-3641 Regression] FAIL: gcc.dg/torture/pta-ptrarith-1.c -Os scan-tree-dump alias "ESCAPED = {[^\n}]* i f [^\n}]*}" on Linux/x86_64 (-m32 -march=cascadelake)
On Mon, Oct 12, 2020 at 3:27 PM Christophe Lyon via Gcc-patches wrote: > > On Mon, 5 Oct 2020 at 17:19, Segher Boessenkool > wrote: > > > > On Sun, Oct 04, 2020 at 09:51:23AM -0700, H.J. Lu wrote: > > > On Sat, Oct 3, 2020 at 5:57 PM Segher Boessenkool > > > wrote: > > > > On Sat, Oct 03, 2020 at 12:21:04PM -0700, sunil.k.pandey via > > > > Gcc-patches wrote: > > > > > On Linux/x86_64, > > > > > > > > > > c34db4b6f8a5d80367c709309f9b00cb32630054 is the first bad commit > > > > > commit c34db4b6f8a5d80367c709309f9b00cb32630054 > > > > > Author: Jan Hubicka > > > > > Date: Sat Oct 3 17:20:16 2020 +0200 > > > > > > > > > > Track access ranges in ipa-modref > > > > > > > > > > caused > > > > > > > > [ ... ] > > > > > > > > This isn't a patch. Wrong mailing list? > > > > > > I view this as a follow up of > > > > > > https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555314.html > > > > But it *isn't* a follow-up of that mail. That is my point. Most of > > these messages do not finger any particular patch even, I think? > > > > That's why I kept the reporting part manual on my side: once you know > which commit introduced a failure/regression (either via bisect, or by > some other way), it's not always easy to identify the gcc-patches > message to which you want to reply. > And as already said in this thread, we certainly want to avoid sending > a regression email for each test, multiplied by the number of > configurations under test. Definitely. > > > What do people think about this kind of followups? Is this appropriate > > > for this mailing list? > > > > Please just use bugzilla. And report bugs there the way they should be > > reported: full command lines, full description of the errors, and > > everything else needed to easily reproduce the problem. > > > It seems some people prefer such regressions reports in bugzilla, > others in gcc-patches@. We also want to avoid reporting a bug for each test, multiplied by the number of configurations under test. > In general when I report a regression I noticed in the GCC testsuite, > I tend to assume that the testname and GCC configure options are > sufficient for a usual contributor to reproduce. > Not sure if it matches "full" and "easily" in your mind? > > With all the automated builds where the build dir is removed from the > server at the end whatever the result, it does take time if I have to > reproduce the problem manually before reporting. And that's IMHO and important step - the human sanitizing of the report - eventually checking the issue isn't already fixed or reported. Richard. > > Christophe > > > *Actually* following up to the patch mail could be useful (but you can > > than just point to the bugzilla). Sending spam to gcc-patches@ is not > > useful for most users of the list. > > > > > > Segher
[PATCH] libstdc++: Apply proposed resolution for LWG 3449 [PR95322]
Now that the frontend bug PR96805 is fixed, we can apply the proposed resolution for this issue. This slightly deviates from the proposed resolution by declaring _CI a member of take_view instead of take_view::_Sentinel, since it doesn't depend on anything within _Sentinel anymore. Tested on x86_64-pc-linux-gnu, does this look OK for trunk? libstdc++-v3/ChangeLog: PR libstdc++/95322 * include/std/ranges (take_view::_CI): Define this alias template as per LWG 3449 and remove ... (take_view::_Sentinel::_CI): ... this type alias. (take_view::_Sentinel::operator==): Adjust use of _CI accordingly. Define a second overload that accepts an iterator of the opposite constness as per LWG 3449. (take_while_view::_Sentinel::operator==): Likewise. * testsuite/std/ranges/adaptors/95322.cc: Add tests for LWG 3449. --- libstdc++-v3/include/std/ranges | 23 +-- .../testsuite/std/ranges/adaptors/95322.cc| 28 ++- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges index 10f1f7b525b..19bc01a3b3a 100644 --- a/libstdc++-v3/include/std/ranges +++ b/libstdc++-v3/include/std/ranges @@ -1662,13 +1662,15 @@ namespace views class take_view : public view_interface> { private: + template + using _CI = counted_iterator< + iterator_t<__detail::__maybe_const_t<_Const, _Vp>>>; + template struct _Sentinel { private: using _Base = __detail::__maybe_const_t<_Const, _Vp>; - using _CI = counted_iterator>; - sentinel_t<_Base> _M_end = sentinel_t<_Base>(); public: @@ -1689,7 +1691,15 @@ namespace views base() const { return _M_end; } - friend constexpr bool operator==(const _CI& __y, const _Sentinel& __x) + friend constexpr bool + operator==(const _CI<_Const>& __y, const _Sentinel& __x) + { return __y.count() == 0 || __y.base() == __x._M_end; } + + template> + requires sentinel_for, iterator_t<_Base2>> + friend constexpr bool + operator==(const _CI<_OtherConst>& __y, const _Sentinel& __x) { return __y.count() == 0 || __y.base() == __x._M_end; } friend _Sentinel; @@ -1839,6 +1849,13 @@ namespace views operator==(const iterator_t<_Base>& __x, const _Sentinel& __y) { return __y._M_end == __x || !std::__invoke(*__y._M_pred, *__x); } + template> + requires sentinel_for, iterator_t<_Base2>> + friend constexpr bool + operator==(const iterator_t<_Base2>& __x, const _Sentinel& __y) + { return __y._M_end == __x || !std::__invoke(*__y._M_pred, *__x); } + friend _Sentinel; }; diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/95322.cc b/libstdc++-v3/testsuite/std/ranges/adaptors/95322.cc index 67bc7d33917..41785a0a8fa 100644 --- a/libstdc++-v3/testsuite/std/ranges/adaptors/95322.cc +++ b/libstdc++-v3/testsuite/std/ranges/adaptors/95322.cc @@ -26,7 +26,7 @@ using __gnu_test::test_forward_range; void test01() { - // PR libstdc++/95322 and LWG 3488 + // PR libstdc++/95322 and LWG 3448 int a[2]{1, 2}; test_forward_range v{a}; auto view1 = v | std::views::take(2); @@ -51,8 +51,34 @@ test02() VERIFY( !eq ); } +void +test03() +{ + // LWG 3449, for take_view + int a[2]{1, 2}; + test_forward_range v{a}; + auto view1 = v | std::views::transform(std::identity{}); + auto view2 = view1 | std::views::take(2); + const bool eq = std::ranges::cbegin(view2) == std::ranges::end(view2); + VERIFY( !eq ); +} + +void +test04() +{ + // LWG 3449, for take_while_view + int a[2]{1, 2}; + test_forward_range v{a}; + auto view1 = v | std::views::transform(std::identity{}); + auto view2 = view1 | std::views::take_while([] (int i) { return true; }); + const bool eq = std::ranges::cbegin(view2) == std::ranges::end(view2); + VERIFY( !eq ); +} + int main() { test01(); test02(); + test03(); + test04(); } -- 2.29.0.rc0
[PATCH] libstdc++: Apply proposed resolution for LWG 3450
libstdc++-v3/ChangeLog: * include/std/ranges (take_while_view::begin): Constrain the const overload further as per LWG 3450. (take_while_view::end): Likewise. * testsuite/std/ranges/adaptors/take_while.cc: Add test for LWG 3450. --- libstdc++-v3/include/std/ranges| 2 ++ .../testsuite/std/ranges/adaptors/take_while.cc| 10 ++ 2 files changed, 12 insertions(+) diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges index 19bc01a3b3a..55e7c4de234 100644 --- a/libstdc++-v3/include/std/ranges +++ b/libstdc++-v3/include/std/ranges @@ -1888,6 +1888,7 @@ namespace views constexpr auto begin() const requires range + && indirect_unary_predicate> { return ranges::begin(_M_base); } constexpr auto @@ -1897,6 +1898,7 @@ namespace views constexpr auto end() const requires range + && indirect_unary_predicate> { return _Sentinel(ranges::end(_M_base), std::__addressof(*_M_pred)); } }; diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/take_while.cc b/libstdc++-v3/testsuite/std/ranges/adaptors/take_while.cc index d587127b97e..48771397f1d 100644 --- a/libstdc++-v3/testsuite/std/ranges/adaptors/take_while.cc +++ b/libstdc++-v3/testsuite/std/ranges/adaptors/take_while.cc @@ -70,10 +70,20 @@ test03() b = ranges::end(v); } +void +test04() +{ + // LWG 3450 + auto v = views::single(1) | views::take_while([](int& x) { return true;}); + static_assert(ranges::range); + static_assert(!ranges::range); +} + int main() { test01(); test02(); test03(); + test04(); } -- 2.29.0.rc0
Re: [PATCH, wwwdocs] gcc-11/changes: NVPTX: Mention new -misa=sm_35 default
Hi Tom, On Mon, 12 Oct 2020, Tom de Vries wrote: > Mention new -misa=sm_35 default for NVPTX target in the gcc 11 release > notes. > > See also PR target/97348. > > Verified using the validator > > OK? yes, this is okay. FWIW, I am happy to review wwwdocs patches. As nvptx maintainer you can also self approve if you desire so. As you prefer. :-) Gerald
Re: [PING][PATCH] correct handling of indices into arrays with elements larger than 1 (PR c++/96511)
On 10/11/20 9:44 PM, Jason Merrill wrote: On 10/11/20 6:45 PM, Martin Sebor wrote: On 10/9/20 9:13 AM, Jason Merrill wrote: On 10/9/20 10:51 AM, Martin Sebor wrote: On 10/8/20 1:40 PM, Jason Merrill wrote: On 10/8/20 3:18 PM, Martin Sebor wrote: On 10/7/20 3:01 PM, Jason Merrill wrote: On 10/7/20 4:11 PM, Martin Sebor wrote: ... For the various member functions, please include the comments with the definition as well as the in-class declaration. Only one access_ref member function is defined out-of-line: offset_bounded(). I've adjusted the comment and copied it above the function definition. And size_remaining, as quoted above? I have this in my tree: /* Return the maximum amount of space remaining and if non-null, set argument to the minimum. */ I'll add it when I commit the patch. I also don't see a comment above the definition of offset_bounded in the new patch? There is a comment in the latest patch. ... The goal of conditionals is to avoid overwhelming the user with excessive numbers that may not be meaningful or even relevant to the warning. I've corrected the function body, tweaked and renamed the get_range function to get_offset_range to do a better job of extracting ranges from the types of some nonconstant expressions the front end passes it, and added a new test for all this. Attached is the new revision. offset_bounded looks unchanged in the new patch. It still returns true iff either the range is a single value or one of the bounds are unrepresentable in ptrdiff_t. I'm still unclear how this corresponds to "Return true if OFFRNG is bounded to a subrange of possible offset values." I don't think you're looking at the latest patch. It has this: +/* Return true if OFFRNG is bounded to a subrange of offset values + valid for the largest possible object. */ + bool access_ref::offset_bounded () const { - if (offrng[0] == offrng[1]) - return false; - tree min = TYPE_MIN_VALUE (ptrdiff_type_node); tree max = TYPE_MAX_VALUE (ptrdiff_type_node); - return offrng[0] <= wi::to_offset (min) || offrng[1] >= wi::to_offset (max); + return wi::to_offset (min) <= offrng[0] && offrng[1] <= wi::to_offset (max); } Here's a link to it in the archive: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/555019.html https://gcc.gnu.org/pipermail/gcc-patches/attachments/20200928/9026783a/attachment-0003.bin Ah, yes, there are two patches in that email; the first introduces the broken offset_bounded, and the second one fixes it without mentioning that in the ChangeLog. How about moving the fix to the first patch? Sure, I can do that. Anything else or is the final version okay to commit with this adjustment? OK with that adjustment. I've done more testing and found a bug in the second patch: adding an offset in an inverted range to an existing offset range isn't as simple as adding up the bounds because they mean different things: like an anti-range, an inverted range is a union of two subranges. Instead, the upper bound needs to be extended to PTRDIFF_MAX because that is the maximum being added, and the lower bound either reset to zero if the absolute value of the maximum being added is less than it, or incremented by the absolute value otherwise. For example, given: char a[8]; char *pa = a; char *p1 = pa + i; // i's range is [3, 5] char *p2 = p1 + j; // j's range is [1, -4] the range of p2's offset isn't [4, 1] but [4, PTRDIFF_MAX] (or more precisely [4, 8] if we assume it's valid). But the range of p3's valid offset in this last pointer char *p3 = p2 + k; // k's range is [5, -4] is all of [0, PTRDIFF_MAX] (or, more accurately, [0, 8]). This may seem obvious but it took me a while at first to wrap my head around. It makes sense, but doesn't seem obvious; a bit more comment might be nice. I just now noticed this suggestion, after pushing both patches. I'll keep it in mind and add something later. I've tweaked access_ref::add_offset in the patch to handle this correctly. The function now ensures that every offset is in a regular range (and not an inverted one). That in turn simplifies access_ref::size_remaining. Since an inverted range is the same as an anti-range, there's no reason to exclude the latter anymore(*). The diff on top of the approved patch is attached. I've retested this new revision of the patch with Glibc and GDB/ Binutils, (the latter fails due to PR 97360), and the Linux kernel. Please let me know if you have any questions or concerns with this change. If not, I'd like to commit it sometime tomorrow. Martin [*] I was curious how often these inverted ranges/anti-ranges come up in pointer arithmetic to see if handling them is worthwhile. I instrumented GCC to print them in get_range() on master where they are only looked at in calls to built-in functions, and in another patch I'm working on where they are looked at for every pointer addition. They accoun
Re: Ping: [PATCH][Arm] Enable MVE SIMD modes for vectorization
Hi Christophe, On 12/10/2020 12:40, Christophe Lyon wrote: Hi, On Thu, 8 Oct 2020 at 16:22, Christophe Lyon wrote: On Thu, 8 Oct 2020 at 16:08, Dennis Zhang wrote: Hi Christophe, On 08/10/2020 14:14, Christophe Lyon wrote: Hi, On Tue, 6 Oct 2020 at 15:37, Dennis Zhang via Gcc-patches wrote: On 9/16/20 4:00 PM, Dennis Zhang wrote: Hi all, This patch enables SIMD modes for MVE auto-vectorization. In this patch, the integer and float MVE SIMD modes are returned by arm_preferred_simd_mode (TARGET_VECTORIZE_PREFERRED_SIMD_MODE hook) when MVE or MVE_FLOAT is enabled. Then the expanders for auto-vectorization can be used for generating MVE SIMD code. This patch also fixes bugs in MVE vreiterpretq_*.c tests which are revealed by the enabled MVE SIMD modes. The tests are for checking the MVE reinterpret intrinsics. There are two functions in each of the tests. The two functions contain the pattern of identical code so that they are folded in icf pass. Because of icf, the instruction count only checks one function which is 8. However when the SIMD modes are enabled, the estimation of the code size becomes smaller so that inlining is applied after icf, then the instruction count becomes 16 which causes failure of the tests. Because the icf is not the expected pattern to be tested but causes above issues, -fno-ipa-icf is applied to the tests to avoid unstable instruction count. This patch is separated from https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552104.html because this part is not strongly connected to the aim of that one so that causing confusion. Regtested and bootstraped. Is it OK for trunk please? Thanks Dennis gcc/ChangeLog: 2020-09-15 Dennis Zhang * config/arm/arm.c (arm_preferred_simd_mode): Enable MVE SIMD modes. Since toolchain builds work again after Jakub's divmod fix, I'm now facing another build error likely caused by this patch: In file included from /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/coretypes.h:449:0, from /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:28: /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c: In function 'machine_mode arm_preferred_simd_mode(scalar_mode)': ./insn-modes.h:196:71: error: temporary of non-literal type 'scalar_int_mode' in a constant expression #define QImode (scalar_int_mode ((scalar_int_mode::from_int) E_QImode)) ^ /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:28970:12: note: in expansion of macro 'QImode' case QImode: and similarly for the other cases. Does the build work for you? Thanks, Christophe Thanks for the report. Sorry to see the error. I tested it for arm-none-eabi and arm-none-linux-gnueabi targets. I didn't get this error. Could you please help to show the configuration you use for your build? I will test and fix at once. It fails on all of them for me. Does it work for you with current master? (r11-3720-gf18eeb6b958acd5e1590ca4a73231486b749be9b) So... I guess you are using a host with GCC more recent than 4.8.5? :-) When I build manually on ubuntu-16.04 with gcc-5.4, the build succeeds, and after manually building with the same environment in the compute farm I use for validation (RHEL 7, gcc-4.8.5), I managed to reproduce the build failure. It's a matter of replacing case QImode: with case E_QImode: Is the attached patch OK? Or do we instead want to revisit the minimum gcc version required to build gcc? Thanks, Christophe I've tested your patch and it works with my other patches depending on this one. So I agree this patch is OK. Thanks for the fix. Bests Dennis
Re: [Patch] libgomp: Add, if existing, -latomic to libgomp.spec --as-needed (was: Re: [RFC] Offloading and automatic linking of libraries)
Hi all, first: *PING*. secondly, I think the change to testsuite/lib/libgomp.exp's libgomp_init is also needed. (Hence, I now added it.) I have a too new system-installed libatomic to be sure that it fails without. OK? Tobias - Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander Walter libgomp: Add, if existing, -latomic to libgomp.spec --as-needed libgomp/ChangeLog: * acinclude.m4 (LIBGOMP_CHECK_LIBATOMIC): New; set @LIBATOMICSPEC@ is the target libatomic is built. * configure.ac: Call LIBGOMP_CHECK_LIBATOMIC. * libgomp.spec.in: Add @LIBATOMICSPEC@. * testsuite/lib/libgomp.exp (libgomp_init): Add libatomic unconditionally if the lib exists. * Makefile.in: Regenerate. * configure: Regenerate. * testsuite/Makefile.in: Regenerate. libgomp/Makefile.in | 1 + libgomp/acinclude.m4 | 63 libgomp/configure | 100 +- libgomp/configure.ac | 2 + libgomp/libgomp.spec.in | 2 +- libgomp/testsuite/Makefile.in | 1 + libgomp/testsuite/lib/libgomp.exp | 20 7 files changed, 175 insertions(+), 14 deletions(-) diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in index 00d5e2919ee..a8ec69f1822 100644 --- a/libgomp/Makefile.in +++ b/libgomp/Makefile.in @@ -395,6 +395,7 @@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ INTPTR_T_KIND = @INTPTR_T_KIND@ LD = @LD@ LDFLAGS = @LDFLAGS@ +LIBATOMICSPEC = @LIBATOMICSPEC@ LIBOBJS = @LIBOBJS@ LIBS = @LIBS@ LIBTOOL = @LIBTOOL@ diff --git a/libgomp/acinclude.m4 b/libgomp/acinclude.m4 index dbf54d06db9..3d7e5e08c3a 100644 --- a/libgomp/acinclude.m4 +++ b/libgomp/acinclude.m4 @@ -365,3 +365,66 @@ if test $enable_symvers != no ; then esac fi ]) + +dnl Check whether libatomic exists +AC_DEFUN([LIBGOMP_CHECK_LIBATOMIC], [ + LIBATOMICSPEC= + libgomp_libatomic=no + + if echo " ${TARGET_CONFIGDIRS} " | grep " libatomic " > /dev/null 2>&1 ; then +libgomp_libatomic=yes; + fi + + AC_MSG_CHECKING([for target-libatomic support]) + AC_MSG_RESULT([$libgomp_libatomic]) + + if test "x$libgomp_libatomic" = xyes; then +dnl Check whether -Wl,--as-needed resp. -Wl,-zignore is supported +dnl +dnl Turn warnings into error to avoid testsuite breakage. So enable +dnl AC_LANG_WERROR, but there's currently (autoconf 2.64) no way to turn +dnl it off again. As a workaround, save and restore werror flag like +dnl AC_PATH_XTRA. +dnl Cf. http://gcc.gnu.org/ml/gcc-patches/2010-05/msg01889.html +ac_xsave_[]_AC_LANG_ABBREV[]_werror_flag=$ac_[]_AC_LANG_ABBREV[]_werror_flag +AC_CACHE_CHECK([whether --as-needed/-z ignore works], + [libgomp_cv_have_as_needed], + [ + # Test for native Solaris options first. + # No whitespace after -z to pass it through -Wl. + libgomp_cv_as_needed_option="-zignore" + libgomp_cv_no_as_needed_option="-zrecord" + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -Wl,$libgomp_cv_as_needed_option -lm -Wl,$libompr_cv_no_as_needed_option" + libgomp_cv_have_as_needed=no + AC_LANG_WERROR + AC_LINK_IFELSE([AC_LANG_PROGRAM([])], + [libgomp_cv_have_as_needed=yes], + [libgomp_cv_have_as_needed=no]) + LDFLAGS="$save_LDFLAGS" + if test "x$libgomp_cv_have_as_needed" = xno; then + libgomp_cv_as_needed_option="--as-needed" + libgomp_cv_no_as_needed_option="--no-as-needed" + save_LDFLAGS="$LDFLAGS" + LDFLAGS="$LDFLAGS -Wl,$libgomp_cv_as_needed_option -lm -Wl,$libgomp_cv_no_as_needed_option" + libgomp_cv_have_as_needed=no + AC_LANG_WERROR + AC_LINK_IFELSE([AC_LANG_PROGRAM([])], + [libgomp_cv_have_as_needed=yes], + [libgomp_cv_have_as_needed=no]) + LDFLAGS="$save_LDFLAGS" + fi + ac_[]_AC_LANG_ABBREV[]_werror_flag=$ac_xsave_[]_AC_LANG_ABBREV[]_werror_flag +]) + +dnl For static libgfortran linkage, depend on libquadmath only if needed. +if test "x$libgomp_cv_have_as_needed" = xyes; then + LIBATOMICSPEC="$libgomp_cv_as_needed_option -latomic $libgomp_cv_no_as_needed_option" +else + LIBATOMICSPEC="-latomic" +fi + fi + + dnl For the spec file + AC_SUBST(LIBATOMICSPEC) +]) diff --git a/libgomp/configure b/libgomp/configure index e48371d5093..a4d93974084 100755 --- a/libgomp/configure +++ b/libgomp/configure @@ -630,6 +630,8 @@ ac_includes_default="\ ac_header_list= ac_func_list= +ac_c_werror_flag= +ac_c_werror_flag= ac_subst_vars='am__EXEEXT_FALSE am__EXEEXT_TRUE LTLIBOBJS @@ -653,6 +655,7 @@ USE_FORTRAN_FALSE USE_FORTRAN_TRUE link_gomp tmake_file +LIBATOMICSPEC XLDFLAGS XCFLAGS config_path @@ -11429,7 +11432,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 11432 "configure" +#line 11435 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@
Re: [r11-3641 Regression] FAIL: gcc.dg/torture/pta-ptrarith-1.c -Os scan-tree-dump alias "ESCAPED = {[^\n}]* i f [^\n}]*}" on Linux/x86_64 (-m32 -march=cascadelake)
On Mon, Oct 12, 2020 at 01:24:44PM +0100, Richard Sandiford wrote: > Martin Sebor via Gcc-patches writes: > > On 10/4/20 10:51 AM, H.J. Lu via Gcc-patches wrote: > >> On Sat, Oct 3, 2020 at 5:57 PM Segher Boessenkool > >> wrote: > >>> > >>> On Sat, Oct 03, 2020 at 12:21:04PM -0700, sunil.k.pandey via Gcc-patches > >>> wrote: > On Linux/x86_64, > > c34db4b6f8a5d80367c709309f9b00cb32630054 is the first bad commit > commit c34db4b6f8a5d80367c709309f9b00cb32630054 > Author: Jan Hubicka > Date: Sat Oct 3 17:20:16 2020 +0200 > > Track access ranges in ipa-modref > > caused > >>> > >>> [ ... ] > >>> > >>> This isn't a patch. Wrong mailing list? > >> > >> I view this as a follow up of > >> > >> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555314.html > >> > >> What do people think about this kind of followups? Is this appropriate > >> for this mailing list? > > > > A number of people routinely send emails similar to these to this > > list to point out regressions on their targets. I find both kinds > > of emails very useful and don't mind the additional traffic. > > +1 FWIW. I think it's great that we have this kind of automatic CI, and > this seems like a natural place to send the reports. Shovelling them into > bugzilla is likely to create more work rather than less, especially since > the fix turnaround should (hopefully) be short. But send them as reply to the patch discussion then! Segher
Re: [PATCH] libstdc++: Apply proposed resolution for LWG 3449 [PR95322]
On 12/10/20 10:19 -0400, Patrick Palka via Libstdc++ wrote: Now that the frontend bug PR96805 is fixed, we can apply the proposed resolution for this issue. This slightly deviates from the proposed resolution by declaring _CI a member of take_view instead of take_view::_Sentinel, since it doesn't depend on anything within _Sentinel anymore. Tested on x86_64-pc-linux-gnu, does this look OK for trunk? OK, thanks.
Re: [PATCH] libstdc++: Apply proposed resolution for LWG 3450
On 12/10/20 10:19 -0400, Patrick Palka via Libstdc++ wrote: libstdc++-v3/ChangeLog: * include/std/ranges (take_while_view::begin): Constrain the const overload further as per LWG 3450. (take_while_view::end): Likewise. * testsuite/std/ranges/adaptors/take_while.cc: Add test for LWG 3450. OK, thanks.
[committed] analyzer: add warnings about writes to constant regions [PR95007]
This patch adds two new warnings: -Wanalyzer-write-to-const -Wanalyzer-write-to-string-literal for code paths where the analyzer detects a write to a constant region. As noted in the documentation part of the patch, the analyzer doesn't prioritize detection of such writes, in that the state-merging logic will blithely lose the distinction between const and non-const regions. Hence false negatives are likely to arise due to state-merging. However, if the analyzer does happen to spot such a write, it seems worth reporting, hence this patch. Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. Pushed to master as r11-3829-g3175d40fc52fb8eb3c3b18cc343d773da24434fb. gcc/analyzer/ChangeLog: * analyzer.opt (Wanalyzer-write-to-const): New. (Wanalyzer-write-to-string-literal): New. * region-model-impl-calls.cc (region_model::impl_call_memcpy): Call check_for_writable_region. (region_model::impl_call_memset): Likewise. (region_model::impl_call_strcpy): Likewise. * region-model.cc (class write_to_const_diagnostic): New. (class write_to_string_literal_diagnostic): New. (region_model::check_for_writable_region): New. (region_model::set_value): Call check_for_writable_region. * region-model.h (region_model::check_for_writable_region): New decl. gcc/ChangeLog: * doc/invoke.texi: Document -Wanalyzer-write-to-const and -Wanalyzer-write-to-string-literal. gcc/testsuite/ChangeLog: PR c/83347 PR middle-end/90404 PR analyzer/95007 * gcc.dg/analyzer/write-to-const-1.c: New test. * gcc.dg/analyzer/write-to-string-literal-1.c: New test. --- gcc/analyzer/analyzer.opt | 8 ++ gcc/analyzer/region-model-impl-calls.cc | 6 + gcc/analyzer/region-model.cc | 117 +- gcc/analyzer/region-model.h | 3 + gcc/doc/invoke.texi | 28 + .../gcc.dg/analyzer/write-to-const-1.c| 29 + .../analyzer/write-to-string-literal-1.c | 58 + 7 files changed, 248 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/write-to-const-1.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/write-to-string-literal-1.c diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt index a4d384211f3..c9df6dc7673 100644 --- a/gcc/analyzer/analyzer.opt +++ b/gcc/analyzer/analyzer.opt @@ -114,6 +114,14 @@ Wanalyzer-use-of-pointer-in-stale-stack-frame Common Var(warn_analyzer_use_of_pointer_in_stale_stack_frame) Init(1) Warning Warn about code paths in which a pointer to a stale stack frame is used. +Wanalyzer-write-to-const +Common Var(warn_analyzer_write_to_const) Init(1) Warning +Warn about code paths which attempt to write to a const object. + +Wanalyzer-write-to-string-literal +Common Var(warn_analyzer_write_to_string_literal) Init(1) Warning +Warn about code paths which attempt to write to a string literal. + Wanalyzer-too-complex Common Var(warn_analyzer_too_complex) Init(0) Warning Warn if the code is too complicated for the analyzer to fully explore. diff --git a/gcc/analyzer/region-model-impl-calls.cc b/gcc/analyzer/region-model-impl-calls.cc index 009b8c3ecb0..ef84e638992 100644 --- a/gcc/analyzer/region-model-impl-calls.cc +++ b/gcc/analyzer/region-model-impl-calls.cc @@ -305,6 +305,8 @@ region_model::impl_call_memcpy (const call_details &cd) return; } + check_for_writable_region (dest_reg, cd.get_ctxt ()); + /* Otherwise, mark region's contents as unknown. */ mark_region_as_unknown (dest_reg); } @@ -346,6 +348,8 @@ region_model::impl_call_memset (const call_details &cd) } } + check_for_writable_region (dest_reg, cd.get_ctxt ()); + /* Otherwise, mark region's contents as unknown. */ mark_region_as_unknown (dest_reg); return false; @@ -397,6 +401,8 @@ region_model::impl_call_strcpy (const call_details &cd) cd.maybe_set_lhs (dest_sval); + check_for_writable_region (dest_reg, cd.get_ctxt ()); + /* For now, just mark region's contents as unknown. */ mark_region_as_unknown (dest_reg); } diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc index a88a295a241..480f25a3a4b 100644 --- a/gcc/analyzer/region-model.cc +++ b/gcc/analyzer/region-model.cc @@ -1532,16 +1532,131 @@ region_model::deref_rvalue (const svalue *ptr_sval, tree ptr_tree, return m_mgr->get_symbolic_region (ptr_sval); } +/* A subclass of pending_diagnostic for complaining about writes to + constant regions of memory. */ + +class write_to_const_diagnostic +: public pending_diagnostic_subclass +{ +public: + write_to_const_diagnostic (const region *reg, tree decl) + : m_reg (reg), m_decl (decl) + {} + + const char *get_kind () const FINAL OVERRIDE + { +return "write_to_const_diagnostic"; + } + + bool operator== (const write_to_const_diagnostic &other) cons
Re: [PING][PATCH v2] combine: Don't turn (mult (extend x) 2^n) into extract [PR96998]
Segher Boessenkool writes: > On Fri, Oct 09, 2020 at 09:38:09AM +0100, Alex Coplan wrote: >> Hi Segher, >> >> On 08/10/2020 15:20, Segher Boessenkool wrote: >> > On Thu, Oct 08, 2020 at 11:21:26AM +0100, Alex Coplan wrote: >> > > Ping. The kernel is still broken on AArch64. >> > >> > You *cannot* fix a correctness bug with a combine addition. >> >> https://gcc.gnu.org/pipermail/gcc-patches/2020-September/555158.html >> explains why we do precisely that. > > And it still is wrong. > >> Also, as your own testing confirmed, the patch does indeed fix the issue. > > No, it did not. It showed that before the patch the bug was hit, and > after it it was not. It does not show the bug was solved. I agree there's a target bug here. Please see the explanation I posted in: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554518.html (especially the first sentence quoted below :-)). The situation as things stand is that aarch64 has a bug: it accepts an odd sign_extract representation of addresses, but doesn't accept that same odd form of address as an LEA. We have two options: (a) add back instructions that recognise the odd form of LEA, or (b) remove the code that accepts the odd addresses I think (b) is the way to go here. But doing that on its own would regress code quality. The reason we recognised the odd addresses in the first place was because that was the rtl that combine happened to generate for an important case. So if we go for (b) but fix the aarch64 bug strictly before the combine patch, we would need to: (1) Apply the target fix and adjust the testsuite markup to make sure that the git commit doesn't regress anyone's test results. (2) Apply the combine patch and revert the testsuite markup changes from (1). That seems like make-work, and would still show as a blip for people doing performance tracking. If you prefer, we could fix the aarch64 bug and patch combine as a single commit. See: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554257.html for the full patch, including the aarch64 bugfix. Thanks, Richard
Re: [r11-3641 Regression] FAIL: gcc.dg/torture/pta-ptrarith-1.c -Os scan-tree-dump alias "ESCAPED = {[^\n}]* i f [^\n}]*}" on Linux/x86_64 (-m32 -march=cascadelake)
On Mon, Oct 12, 2020 at 03:26:38PM +0200, Christophe Lyon wrote: > That's why I kept the reporting part manual on my side: once you know > which commit introduced a failure/regression (either via bisect, or by > some other way), it's not always easy to identify the gcc-patches > message to which you want to reply. But it *should* be: the check-in subject should be in the patch mail, or failing that, at least the changelog entries should be! > > > What do people think about this kind of followups? Is this appropriate > > > for this mailing list? > > > > Please just use bugzilla. And report bugs there the way they should be > > reported: full command lines, full description of the errors, and > > everything else needed to easily reproduce the problem. > > > It seems some people prefer such regressions reports in bugzilla, > others in gcc-patches@. If it will be resolved quickly, and by just telling the author, email is fine of course. Otherwise, you need bugzilla. > In general when I report a regression I noticed in the GCC testsuite, > I tend to assume that the testname and GCC configure options are > sufficient for a usual contributor to reproduce. > Not sure if it matches "full" and "easily" in your mind? Tests are often ran with multiple sets of options. If you give enough info that people can reproduce your configuration (hint: most bug reports do *not*), all is fine of course. But in general we *do* need all info (as documented in the bug reporting instructions), or we get a frustrating "I cannot reproduce this" game. > With all the automated builds where the build dir is removed from the > server at the end whatever the result, it does take time if I have to > reproduce the problem manually before reporting. Yes, and it is *easier* to reproduce for you than for other people! > > *Actually* following up to the patch mail could be useful (but you can > > than just point to the bugzilla). Sending spam to gcc-patches@ is not > > useful for most users of the list. ^^^ Still my main point. Segher
[PUSHED] operator_trunc_mod::wi_fold: Return VARYING for mod by zero.
Division by zero should return VARYING, otherwise we propagate undefine all over the ranger and cause bad things to happen :). This fixes MOD 0 to also return VARYING. This is Andrew's patch. I forgot to use --author for proper patch attribution. Tested on x86-64 Linux. Pushed to trunk. gcc/ChangeLog: PR tree-optimization/97378 * range-op.cc (operator_trunc_mod::wi_fold): Return VARYING for mod by zero. gcc/testsuite/ChangeLog: * gcc.dg/pr97378.c: New test. --- gcc/range-op.cc| 6 +++--- gcc/testsuite/gcc.dg/pr97378.c | 15 +++ 2 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr97378.c diff --git a/gcc/range-op.cc b/gcc/range-op.cc index ce6ae2de20c..6108de367ad 100644 --- a/gcc/range-op.cc +++ b/gcc/range-op.cc @@ -1359,7 +1359,7 @@ operator_div::wi_fold (irange &r, tree type, // If we're definitely dividing by zero, there's nothing to do. if (wi_zero_p (type, divisor_min, divisor_max)) { - r.set_undefined (); + r.set_varying (type); return; } @@ -2624,10 +2624,10 @@ operator_trunc_mod::wi_fold (irange &r, tree type, signop sign = TYPE_SIGN (type); unsigned prec = TYPE_PRECISION (type); - // Mod 0 is undefined. Return undefined. + // Mod 0 is undefined. if (wi_zero_p (type, rh_lb, rh_ub)) { - r.set_undefined (); + r.set_varying (type); return; } diff --git a/gcc/testsuite/gcc.dg/pr97378.c b/gcc/testsuite/gcc.dg/pr97378.c new file mode 100644 index 000..27e4a1f4321 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr97378.c @@ -0,0 +1,15 @@ +// { dg-do compile } +// { dg-options "-O2" } + +int a, b, c; +void d() { +e : { + long f; + long *g = &f; + if ((a != 0) - (b = 0)) +; + else +a &= (*g %= a *= c) >= (*g || f); + goto e; +} +} -- 2.26.2
Re: [PING][PATCH v2] combine: Don't turn (mult (extend x) 2^n) into extract [PR96998]
On Mon, Oct 12, 2020 at 05:19:58PM +0100, Richard Sandiford wrote: > Segher Boessenkool writes: > > On Fri, Oct 09, 2020 at 09:38:09AM +0100, Alex Coplan wrote: > >> Hi Segher, > >> > >> On 08/10/2020 15:20, Segher Boessenkool wrote: > >> > On Thu, Oct 08, 2020 at 11:21:26AM +0100, Alex Coplan wrote: > >> > > Ping. The kernel is still broken on AArch64. > >> > > >> > You *cannot* fix a correctness bug with a combine addition. > >> > >> https://gcc.gnu.org/pipermail/gcc-patches/2020-September/555158.html > >> explains why we do precisely that. > > > > And it still is wrong. > > > >> Also, as your own testing confirmed, the patch does indeed fix the issue. > > > > No, it did not. It showed that before the patch the bug was hit, and > > after it it was not. It does not show the bug was solved. > > I agree there's a target bug here. Please see the explanation I posted > in: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554518.html > (especially the first sentence quoted below :-)). > > The situation as things stand is that aarch64 has a bug: it accepts > an odd sign_extract representation of addresses, but doesn't accept > that same odd form of address as an LEA. We have two options: > > (a) add back instructions that recognise the odd form of LEA, or > (b) remove the code that accepts the odd addresses > > I think (b) is the way to go here. Either seems to be fine. > But doing that on its own > would regress code quality. The reason we recognised the odd > addresses in the first place was because that was the rtl that > combine happened to generate for an important case. > > So if we go for (b) but fix the aarch64 bug strictly before the > combine patch, we would need to: This is necessary to be able to evaluate what such a combine patch does in practice -- so there is no other way. > (1) Apply the target fix and adjust the testsuite markup to make sure > that the git commit doesn't regress anyone's test results. It is normal to regress the testsuite for a little while. > (2) Apply the combine patch and revert the testsuite markup changes > from (1). > > That seems like make-work, and would still show as a blip for > people doing performance tracking. Yes, that is make-work. Just regress the testsuite. You do not even have to apply the target patch first (but you need to send it as separate patch, so that other people can test it!) > If you prefer, we could fix the aarch64 bug and patch combine as a > single commit. See: > > https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554257.html > > for the full patch, including the aarch64 bugfix. I need separate patches, so that I can see what the current combine does, without ICEing all over. That is all. Send it as a series of two patches, or something. Segher
[committed] libstdc++: Update doxyfile to Doxygen 1.8.20 format
libstdc++-v3/ChangeLog: * doc/doxygen/user.cfg.in: Update to Doxygen 1.8.20 format. Tested x86_64-linux. Committed to trunk. commit c840700852c69b2bf7a73df77d8d9f0658330dae Author: Jonathan Wakely Date: Mon Oct 12 18:14:01 2020 libstdc++: Update doxyfile to Doxygen 1.8.20 format libstdc++-v3/ChangeLog: * doc/doxygen/user.cfg.in: Update to Doxygen 1.8.20 format. diff --git a/libstdc++-v3/doc/doxygen/user.cfg.in b/libstdc++-v3/doc/doxygen/user.cfg.in index 4495b1c9ede..9b49a15d31b 100644 --- a/libstdc++-v3/doc/doxygen/user.cfg.in +++ b/libstdc++-v3/doc/doxygen/user.cfg.in @@ -1,4 +1,4 @@ -# Doxyfile 1.8.14 +# Doxyfile 1.8.20 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -17,10 +17,10 @@ # Project related configuration options #--- -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all text -# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv -# built into libc) for the transcoding. See +# This tag specifies the encoding used for all characters in the configuration +# file that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See # https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. @@ -93,6 +93,14 @@ ALLOW_UNICODE_NAMES= NO OUTPUT_LANGUAGE= English +# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all generated output in the proper direction. +# Possible values are: None, LTR, RTL and Context. +# The default value is: None. + +OUTPUT_TEXT_DIRECTION = None + # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. @@ -179,6 +187,16 @@ SHORT_NAMES= @shortname@ JAVADOC_AUTOBRIEF = NO +# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line +# such as +# /*** +# as being the beginning of a Javadoc-style comment "banner". If set to NO, the +# Javadoc-style will behave just like regular comments and it will not be +# interpreted by doxygen. +# The default value is: NO. + +JAVADOC_BANNER = NO + # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus @@ -199,6 +217,14 @@ QT_AUTOBRIEF = NO MULTILINE_CPP_IS_BRIEF = YES +# By default Python docstrings are displayed as preformatted text and doxygen's +# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the +# doxygen's special commands can be used and the contents of the docstring +# documentation blocks is shown as doxygen documentation. +# The default value is: YES. + +PYTHON_DOCSTRING = YES + # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. @@ -228,17 +254,15 @@ TAB_SIZE = 8 # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines (in the resulting output). You can put ^^ in the value part of an # alias to insert a newline as if a physical newline was in the original file. +# When you need a literal { or } or , in the value part of an alias you have to +# escape them by means of a backslash (\), this can lead to conflicts with the +# commands \{ and \} for these it is advised to use the version @{ and @} or use +# a double escape (\\{ and \\}) ALIASES= "doctodo=@todo Needs documentation! See http://gcc.gnu.org/onlinedocs/libstdc++/manual/documentation_style.html"; \ "headername{1}=Instead, include \<\1\>." \ "headername{2}=Instead, include \<\1\> or \<\2\>." -# This tag can be used to specify a number of word-keyword mappings (TCL only). -# A mapping has the form "name=value". For example adding "class=itcl::class" -# will allow you to use the command class in the itcl::class meaning. - -TCL_SUBST = - # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all @@ -267,17 +291,26 @@ OPTIMIZE_FOR_FORTRAN = NO OPTIMIZE_OUT
[committed] libstdc++: Include C++17 features in doxygen API docs
libstdc++-v3/ChangeLog: * doc/doxygen/user.cfg.in (PREDEFINED): Use __cplusplus=201703L so that C++17 features are documented. Tested x86_64-linux. Committed to trunk. commit 925eb6a0fbf5aa6e99e8b461a41cbeeab507afcd Author: Jonathan Wakely Date: Mon Oct 12 18:14:01 2020 libstdc++: Include C++17 features in doxygen API docs libstdc++-v3/ChangeLog: * doc/doxygen/user.cfg.in (PREDEFINED): Use __cplusplus=201703L so that C++17 features are documented. diff --git a/libstdc++-v3/doc/doxygen/user.cfg.in b/libstdc++-v3/doc/doxygen/user.cfg.in index 19f8ffd8230..4495b1c9ede 100644 --- a/libstdc++-v3/doc/doxygen/user.cfg.in +++ b/libstdc++-v3/doc/doxygen/user.cfg.in @@ -2250,7 +2250,7 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. -PREDEFINED = __cplusplus=201402L \ +PREDEFINED = __cplusplus=201703L \ __GTHREADS \ _GLIBCXX_HAS_GTHREADS \ _GLIBCXX_HAVE_TLS \
Re: make sincos take type from intrinsic formal, not from result assignment
Hello, Martin, On Oct 12, 2020, Martin Liška wrote: > It seems the patch caused quite some clang warnings: > /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang/build/gcc/builtins.c:2366:5: > warning: comparison of different enumeration types in switch statement > ('combined_fn' and 'built_in_function') [-Wenum-compare-switch] > ... Thanks for the report. > Can we please fix them? Here's the patch I'm just about to begin regstrapping. I'll check it in, as obvious, once I'm done. Thanks again, mathfn_built_in_type case type fix Martin Liška reported warnings about type mismatches in the cases in the recently-introduced mathfn_built_in_type. This patch adjusts the macros to use the combined_fn enumerators rather than the (currently same-numbered) built_in_function ones. for gcc/ChangeLog * builtins.c (mathfn_built_in_type): Use CFN_ enumerators. --- gcc/builtins.c | 26 +- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gcc/builtins.c b/gcc/builtins.c index 3a77da2..3f799e5 100644 --- a/gcc/builtins.c +++ b/gcc/builtins.c @@ -2451,37 +2451,37 @@ tree mathfn_built_in_type (combined_fn fn) { #define CASE_MATHFN(MATHFN)\ - case BUILT_IN_##MATHFN: \ + case CFN_BUILT_IN_##MATHFN: \ return double_type_node; \ - case BUILT_IN_##MATHFN##F: \ + case CFN_BUILT_IN_##MATHFN##F: \ return float_type_node;\ - case BUILT_IN_##MATHFN##L: \ + case CFN_BUILT_IN_##MATHFN##L: \ return long_double_type_node; #define CASE_MATHFN_FLOATN(MATHFN) \ CASE_MATHFN(MATHFN) \ - case BUILT_IN_##MATHFN##F16: \ + case CFN_BUILT_IN_##MATHFN##F16: \ return float16_type_node; \ - case BUILT_IN_##MATHFN##F32: \ + case CFN_BUILT_IN_##MATHFN##F32: \ return float32_type_node; \ - case BUILT_IN_##MATHFN##F64: \ + case CFN_BUILT_IN_##MATHFN##F64: \ return float64_type_node; \ - case BUILT_IN_##MATHFN##F128:\ + case CFN_BUILT_IN_##MATHFN##F128:\ return float128_type_node; \ - case BUILT_IN_##MATHFN##F32X:\ + case CFN_BUILT_IN_##MATHFN##F32X:\ return float32x_type_node; \ - case BUILT_IN_##MATHFN##F64X:\ + case CFN_BUILT_IN_##MATHFN##F64X:\ return float64x_type_node; \ - case BUILT_IN_##MATHFN##F128X: \ + case CFN_BUILT_IN_##MATHFN##F128X: \ return float128x_type_node; /* Similar to above, but appends _R after any F/L suffix. */ #define CASE_MATHFN_REENT(MATHFN) \ - case BUILT_IN_##MATHFN##_R: \ + case CFN_BUILT_IN_##MATHFN##_R: \ return double_type_node; \ - case BUILT_IN_##MATHFN##F_R: \ + case CFN_BUILT_IN_##MATHFN##F_R: \ return float_type_node;\ - case BUILT_IN_##MATHFN##L_R: \ + case CFN_BUILT_IN_##MATHFN##L_R: \ return long_double_type_node; switch (fn) -- Alexandre Oliva, happy hacker https://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer
Merge from master to gccgo branch
I merged master revision 3175d40fc52fb8eb3c3b18cc343d773da24434fb to the gccgo branch. Ian
[PUSHED] Do not ignore failures from gimple_range_calc_op2.
From: Andrew MacLeod [posting on behalf of Andrew] We were ignoring the return value if op2 returned false and getting garbage ranges propagated. Tested on x86-64 Linux. Pushed. gcc/ChangeLog: PR tree-optimization/97381 * gimple-range-gori.cc (gori_compute::compute_operand2_range): If a range cannot be calculated through operand 2, return false. gcc/testsuite/ChangeLog: * gcc.dg/pr97381.c: New test. --- gcc/gimple-range-gori.cc | 5 +++-- gcc/testsuite/gcc.dg/pr97381.c | 13 + 2 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/pr97381.c diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc index 986427669a7..c4bfc658319 100644 --- a/gcc/gimple-range-gori.cc +++ b/gcc/gimple-range-gori.cc @@ -920,8 +920,9 @@ gori_compute::compute_operand2_range (irange &r, gimple *stmt, expr_range_in_bb (op2_range, op2, gimple_bb (stmt)); // Intersect with range for op2 based on lhs and op1. - if (gimple_range_calc_op2 (r, stmt, lhs, op1_range)) -op2_range.intersect (r); + if (!gimple_range_calc_op2 (r, stmt, lhs, op1_range)) +return false; + op2_range.intersect (r); gimple *src_stmt = SSA_NAME_DEF_STMT (op2); // If def stmt is outside of this BB, then name must be an import. diff --git a/gcc/testsuite/gcc.dg/pr97381.c b/gcc/testsuite/gcc.dg/pr97381.c new file mode 100644 index 000..947692cb1f6 --- /dev/null +++ b/gcc/testsuite/gcc.dg/pr97381.c @@ -0,0 +1,13 @@ +// { dg-do compile } +// { dg-options "-O2" } + +int a; +void b() { + char c = 27; + for (; c <= 85; c += 1) { +a /= 148372120 * c; +if (a) + for (;;) +; + } +} -- 2.26.2
Re: [PATCH] MIPS/libphobos: Fix switchcontext.S assembly for MIPS I ISA
On Thu, 8 Oct 2020, Iain Buclaw wrote: > > Noticed in a build of a MIPS I toolchain. I have no way to run MIPS > > regression-testing right now, however in `libopcodes' the L.D and S.D > > instructions are strict aliases valid for the MIPS II and higher ISAs, and > > just to double-check that I have built MIPS32r2 GCC with and without the > > change applied and verified with `objdump' that the respective target > > objects produced are identical. > > > > OK to apply to trunk, and -- as a fatal compilation error -- to backport > > to active release branches? > > > > Fine with me, thanks. Applied to trunk, thanks. Jakub, Richard: I should have cc-ed you for the backports to GCC 8/9/10. OK to backport as a fatal build failure fix, or shall we leave this as it stands? FAOD the L.D and S.D assembly instructions have been supported in binutils as long as the MIPS port has, i.e. from: commit 45b1470513cfef2af6fd5532d33a54a840b4600a Author: Ian Lance Taylor Date: Wed Aug 18 19:40:37 1993 + Maciej
[r11-3827 Regression] FAIL: g++.dg/asan/asan_test.C -O2 (test for excess errors) on Linux/x86_64
On Linux/x86_64, 83685efd5fd1623cfc4e4c435ce2773d95d458d1 is the first bad commit commit 83685efd5fd1623cfc4e4c435ce2773d95d458d1 Author: Martin Sebor Date: Fri Oct 9 14:48:43 2020 -0600 Generalize compute_objsize to return maximum size/offset instead of failing (PR middle-end/97023). caused FAIL: gcc.dg/Wstringop-overflow-47.c (test for warnings, line 29) FAIL: gcc.dg/Wstringop-overflow-47.c (test for warnings, line 32) FAIL: gcc.dg/Wstringop-overflow-47.c (test for warnings, line 37) FAIL: g++.dg/asan/asan_test.C -O2 (test for excess errors) with GCC configured with Configured with: ../../gcc/configure --prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r11-3827/usr --enable-clocale=gnu --with-system-zlib --with-demangler-in-ld --with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl --enable-libmpx x86_64-linux --disable-bootstrap To reproduce: $ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/Wstringop-overflow-47.c --target_board='unix{-m32\ -march=cascadelake}'" $ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/Wstringop-overflow-47.c --target_board='unix{-m64\ -march=cascadelake}'" $ cd {build_dir}/gcc && make check RUNTESTFLAGS="asan.exp=g++.dg/asan/asan_test.C --target_board='unix{-m32}'" $ cd {build_dir}/gcc && make check RUNTESTFLAGS="asan.exp=g++.dg/asan/asan_test.C --target_board='unix{-m32\ -march=cascadelake}'" $ cd {build_dir}/gcc && make check RUNTESTFLAGS="asan.exp=g++.dg/asan/asan_test.C --target_board='unix{-m64}'" $ cd {build_dir}/gcc && make check RUNTESTFLAGS="asan.exp=g++.dg/asan/asan_test.C --target_board='unix{-m64\ -march=cascadelake}'" (Please do not reply to this email, for question about this report, contact me at skpgkp2 at gmail dot com)
Re: [PATCH 2a/5] rs6000, vec_rlnm builtin fix arguments
Will, Segher: This patch fixes an error in how the vec_rlnm() builtin parameters are handled. The current test for this builtin are compile only. The issue was found in the path that adds the 128-bit operands to the vec_rlnm() builtin. The new test for the 128-bit operands is a compile and run test. Re-tested the patch on Power 9 with no regression errors. Carl - gcc/ChangeLog 2020-10-08 Carl Love * config/rs6000/altivec.h (vec_rlnm): Fix bug in argument generation. --- gcc/config/rs6000/altivec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 8a2dcda0144..f7720d136c9 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -183,7 +183,7 @@ #define vec_recipdiv __builtin_vec_recipdiv #define vec_rlmi __builtin_vec_rlmi #define vec_vrlnm __builtin_vec_rlnm -#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((c)<<8)|(b))) +#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((b)<<8)|(c))) #define vec_rsqrt __builtin_vec_rsqrt #define vec_rsqrte __builtin_vec_rsqrte #define vec_signed __builtin_vec_vsigned -- 2.17.1
Re: [PATCH 3/5] Add TI to TD (128-bit DFP) and TD to TI support
Will, Segher: This patch adds support for converting to/from 128-bit integers and 128-bit decimal floating point formats. Updated ChangeLog comments. Fixed up comments in the test program. Re-tested the patch on Power 9 with no regression errors. Carl --- gcc/ChangeLog 2020-10-12 Carl Love * config/rs6000/dfp.md (floattitd2, fixtdti2): New define_insns. * config/rs6000/rs6000-call.c (P10V_BUILTIN_VCMPNET_P, P10V_BUILTIN_VCMPAET_P): New overloaded definitions. gcc/testsuite/ChangeLog 2020-10-12 Carl Love * gcc.target/powerpc/int_128bit-runnable.c: Add 128-bit DFP conversion tests. --- gcc/config/rs6000/dfp.md | 14 + gcc/config/rs6000/rs6000-call.c | 4 ++ .../gcc.target/powerpc/int_128bit-runnable.c | 61 +++ 3 files changed, 79 insertions(+) diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md index 8f822732bac..0e82e315fee 100644 --- a/gcc/config/rs6000/dfp.md +++ b/gcc/config/rs6000/dfp.md @@ -222,6 +222,13 @@ "dcffixq %0,%1" [(set_attr "type" "dfp")]) +(define_insn "floattitd2" + [(set (match_operand:TD 0 "gpc_reg_operand" "=d") + (float:TD (match_operand:TI 1 "gpc_reg_operand" "v")))] + "TARGET_POWER10" + "dcffixqq %0,%1" + [(set_attr "type" "dfp")]) + ;; Convert a decimal64/128 to a decimal64/128 whose value is an integer. ;; This is the first stage of converting it to an integer type. @@ -241,6 +248,13 @@ "TARGET_DFP" "dctfix %0,%1" [(set_attr "type" "dfp")]) + +(define_insn "fixtdti2" + [(set (match_operand:TI 0 "gpc_reg_operand" "=v") + (fix:TI (match_operand:TD 1 "gpc_reg_operand" "d")))] + "TARGET_POWER10" + "dctfixqq %0,%1" + [(set_attr "type" "dfp")]) ;; Decimal builtin support diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index 87fff5c1c80..8d00a25d806 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -4967,6 +4967,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V2DI, 0 }, { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P, +RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEFP_P, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, @@ -5074,6 +5076,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_bool_V2DI, 0 }, { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P, RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P, +RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 }, { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEFP_P, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEDP_P, diff --git a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c index 85ad544e22b..9d281850ee3 100644 --- a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c +++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c @@ -38,6 +38,7 @@ #if DEBUG #include #include +#include void print_i128(__int128_t val) @@ -59,6 +60,13 @@ int main () __int128_t arg1, result; __uint128_t uarg2; + _Decimal128 arg1_dfp128, result_dfp128, expected_result_dfp128; + + struct conv_t { +__uint128_t u128; +_Decimal128 d128; + } conv, conv2; + vector signed long long int vec_arg1_di, vec_arg2_di; vector unsigned long long int vec_uarg1_di, vec_uarg2_di, vec_uarg3_di; vector unsigned long long int vec_uresult_di; @@ -2249,6 +2257,59 @@ int main () abort(); #endif } + + /* DFP to __int128 and __int128 to DFP conversions */ + /* Print the DFP value as an unsigned int so we can see the bit patterns. */ + conv.u128 = 0x2208ULL; + conv.u128 = (conv.u128 << 64) | 0x4ULL; //DFP bit pattern for integer 4 + expected_result_dfp128 = conv.d128; + arg1 = 4; + + conv.d128 = (_Decimal128) arg1; + + result_dfp128 = (_Decimal128) arg1; + if (((conv.u128 >>64) != 0x2208ULL) && + ((conv.u128 & 0x) != 0x4ULL)) { +#if DEBUG +printf("ERROR: convert int128 value "); +print_i128 (arg1); +conv.d128 = result_dfp128; +printf("\nto DFP value 0x%llx %llx (printed as hex bit string) ", + (unsigned long long)((conv.u128) >>64), + (unsigned long long)((conv.u128) & 0x)); + +conv.d128 = expected_result_dfp128; +printf("\ndoes not match expected_result = 0x%llx %llx\n\n", + (unsigned long long) (conv.u128>>64), + (unsigned long long) (conv.u128 &
Re: [PATCH 2b/5] RS6000 add 128-bit Integer Operations
Will, Segher: This patch adds the 128-bit integer support for divide, modulo, shift, compare of 128-bit integers instructions and builtin support. Fixed the references to 128-bit in ChangeLog that got missed in the last go round. Fixed missing spaces in emit_insn calls. Re-tested the patch on Power 9 with no regression errors. Carl -- gcc/ChangeLog 2020-10-08 Carl Love * config/rs6000/altivec.h (vec_signextq, vec_dive, vec_mod): Add define for new builtins. * config/rs6000/altivec.md (UNSPEC_VMULEUD, UNSPEC_VMULESD, UNSPEC_VMULOUD, UNSPEC_VMULOSD): New unspecs. (altivec_eqv1ti, altivec_gtv1ti, altivec_gtuv1ti, altivec_vmuleud, altivec_vmuloud, altivec_vmulesd, altivec_vmulosd, altivec_vrlq, altivec_vrlqmi, altivec_vrlqmi_inst, altivec_vrlqnm, altivec_vrlqnm_inst, altivec_vslq, altivec_vsrq, altivec_vsraq, altivec_vcmpequt_p, altivec_vcmpgtst_p, altivec_vcmpgtut_p): New define_insn. (vec_widen_umult_even_v2di, vec_widen_smult_even_v2di, vec_widen_umult_odd_v2di, vec_widen_smult_odd_v2di, altivec_vrlqmi, altivec_vrlqnm): New define_expands. * config/rs6000/rs6000-builtin.def (VCMPEQUT_P, VCMPGTST_P, VCMPGTUT_P): Add macro expansions. (VCMPGTUT, VCMPGTST, VCMPEQUT, CMPNET, CMPGE_1TI, CMPGE_U1TI, CMPLE_1TI, CMPLE_U1TI, VNOR_V1TI_UNS, VNOR_V1TI, VCMPNET_P, VCMPAET_P, VSIGNEXTSD2Q, VMULEUD, VMULESD, VMULOUD, VMULOSD, VRLQ, VSLQ, VSRQ, VSRAQ, VRLQNM, DIV_V1TI, UDIV_V1TI, DIVES_V1TI, DIVEU_V1TI, MODS_V1TI, MODU_V1TI, VRLQMI): New macro expansions. (VRLQ, VSLQ, VSRQ, VSRAQ, DIVE, MOD, SIGNEXT): New overload expansions. * config/rs6000/rs6000-call.c (P10_BUILTIN_VCMPEQUT, P10V_BUILTIN_CMPGE_1TI, P10V_BUILTIN_CMPGE_U1TI, P10V_BUILTIN_VCMPGTUT, P10V_BUILTIN_VCMPGTST, P10V_BUILTIN_CMPLE_1TI, P10V_BUILTIN_VCMPLE_U1TI, P10V_BUILTIN_DIV_V1TI, P10V_BUILTIN_UDIV_V1TI, P10V_BUILTIN_VMULESD, P10V_BUILTIN_VMULEUD, P10V_BUILTIN_VMULOSD, P10V_BUILTIN_VMULOUD, P10V_BUILTIN_VNOR_V1TI, P10V_BUILTIN_VNOR_V1TI_UNS, P10V_BUILTIN_VRLQ, P10V_BUILTIN_VRLQMI, P10V_BUILTIN_VRLQNM, P10V_BUILTIN_VSLQ, P10V_BUILTIN_VSRQ, P10V_BUILTIN_VSRAQ, P10V_BUILTIN_VCMPGTUT_P, P10V_BUILTIN_VCMPGTST_P, P10V_BUILTIN_VCMPEQUT_P, P10V_BUILTIN_VCMPGTUT_P, P10V_BUILTIN_VCMPGTST_P, P10V_BUILTIN_CMPNET, P10V_BUILTIN_VCMPNET_P, P10V_BUILTIN_VCMPAET_P, P10V_BUILTIN_VSIGNEXTSD2Q, P10V_BUILTIN_DIVES_V1TI, P10V_BUILTIN_MODS_V1TI, P10V_BUILTIN_MODU_V1TI): New overloaded definitions. (rs6000_gimple_fold_builtin) [P10V_BUILTIN_VCMPEQUT, P10_BUILTIN_CMPNET, P10_BUILTIN_CMPGE_1TI, P10_BUILTIN_CMPGE_U1TI, P10_BUILTIN_VCMPGTUT, P10_BUILTIN_VCMPGTST, P10_BUILTIN_CMPLE_1TI, P10_BUILTIN_CMPLE_U1TI]: New case statements. (rs6000_init_builtins) [bool_V1TI_type_node, int_ftype_int_v1ti_v1ti]: New assignments. (altivec_init_builtins): New E_V1TImode case statement. (builtin_function_type)[P10_BUILTIN_128BIT_VMULEUD, P10_BUILTIN_128BIT_VMULOUD, P10_BUILTIN_128BIT_DIVEU_V1TI, P10_BUILTIN_128BIT_MODU_V1TI, P10_BUILTIN_CMPGE_U1TI, P10_BUILTIN_VCMPGTUT, P10_BUILTIN_VCMPEQUT]: New case statements. * config/rs6000/r6000.c (rs6000_handle_altivec_attribute)[E_TImode, E_V1TImode]: New case statements. * config/rs6000/r6000.h (rs6000_builtin_type_index): New enum value RS6000_BTI_bool_V1TI. * config/rs6000/vector.md (vector_gtv1ti,vector_nltv1ti, vector_gtuv1ti, vector_nltuv1ti, vector_ngtv1ti, vector_ngtuv1ti, vector_eq_v1ti_p, vector_ne_v1ti_p, vector_ae_v1ti_p, vector_gt_v1ti_p, vector_gtu_v1ti_p, vrotlv1ti3, vashlv1ti3, vlshrv1ti3, vashrv1ti3): New define_expands. * config/rs6000/vsx.md (UNSPEC_VSX_DIVSQ, UNSPEC_VSX_DIVUQ, UNSPEC_VSX_DIVESQ, UNSPEC_VSX_DIVEUQ, UNSPEC_VSX_MODSQ, UNSPEC_VSX_MODUQ): New unspecs. (vsx_div_v1ti, vsx_udiv_v1ti, vsx_dives_v1ti, vsx_diveu_v1ti, vsx_mods_v1ti, vsx_modu_v1ti, xxswapd_v1ti, vsx_sign_extend_v2di_v1ti): New define_insns. (vcmpnet): New define_expand. * gcc/doc/extend.texi: Add documentation for the new builtins vec_rl, vec_rlmi, vec_rlnm, vec_sl, vec_sr, vec_sra, vec_mule, vec_mulo, vec_div, vec_dive, vec_mod, vec_cmpeq, vec_cmpne, vec_cmpgt, vec_cmplt, vec_cmpge, vec_cmple, vec_all_eq, vec_all_ne, vec_all_gt, vec_all_lt, vec_all_ge, vec_all_le, vec_any_eq, vec_any_ne, vec_any_gt, vec_any_lt, vec_any_ge, vec_any_le. gcc/testsuite/ChangeLog 2020-10-08 Carl Love * gcc.target/powerpc/int_128bit-runnable.c: New test file. --- gcc/config/rs6000/altivec.h |4 + gc
Re: [PATCH 4/5] Test 128-bit shifts for just the int128 type.
Will, Segher: Patch 4 adds the vector 128-bit integer shift instruction support for the V1TI type. This patch also renames and moves the VSX_TI iterator from vsx.md to VEC_TI in vector.md. The uses of VEC_TI are also updated. Re-tested the patch on Power 9 with no regression errors. Carl gcc/ChangeLog 2020-10-12 Carl Love * config/rs6000/altivec.md (altivec_vslq, altivec_vsrq): Rename to altivec_vslq_, altivec_vsrq_, mode VEC_TI. * config/rs6000/vector.md (VEC_TI): Was named VSX_TI in vsx.md. (vashlv1ti3): Change to vashl3, mode VEC_TI. (vlshrv1ti3): Change to vlshr3, mode VEC_TI. * config/rs6000/vsx.md (VSX_TI): Remove define_mode_iterator. Update uses of VSX_TI to VEC_TI. gcc/testsuite/ChangeLog 2020-10-12 Carl Love gcc.target/powerpc/int_128bit-runnable.c: Add shift_right, shift_left tests. --- gcc/config/rs6000/altivec.md | 16 - gcc/config/rs6000/vector.md | 27 --- gcc/config/rs6000/vsx.md | 33 +-- .../gcc.target/powerpc/int_128bit-runnable.c | 16 +++-- 4 files changed, 52 insertions(+), 40 deletions(-) diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index e9623bc3285..9b70830ae00 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2220,10 +2220,10 @@ "vsl %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "altivec_vslq" - [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") - (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") -(match_operand:V1TI 2 "vsx_register_operand" "v")))] +(define_insn "altivec_vslq_" + [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v") + (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v") +(match_operand:VEC_TI 2 "vsx_register_operand" "v")))] "TARGET_POWER10" /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */ "vslq %0,%1,%2" @@ -2237,10 +2237,10 @@ "vsr %0,%1,%2" [(set_attr "type" "vecsimple")]) -(define_insn "altivec_vsrq" - [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") - (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") - (match_operand:V1TI 2 "vsx_register_operand" "v")))] +(define_insn "altivec_vsrq_" + [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v") + (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v") + (match_operand:VEC_TI 2 "vsx_register_operand" "v")))] "TARGET_POWER10" /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */ "vsrq %0,%1,%2" diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index c2ae74fbe92..b2f17063ac9 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -26,6 +26,9 @@ ;; Vector int modes (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI]) +;; 128-bit int modes +(define_mode_iterator VEC_TI [V1TI TI]) + ;; Vector int modes for parity (define_mode_iterator VEC_IP [V8HI V4SI @@ -1627,17 +1630,17 @@ "") ;; No immediate version of this 128-bit instruction -(define_expand "vashlv1ti3" - [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") - (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") -(match_operand:V1TI 2 "vsx_register_operand" "v")))] +(define_expand "vashl3" + [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v") + (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand") +(match_operand:VEC_TI 2 "vsx_register_operand")))] "TARGET_POWER10" { /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */ - rtx tmp = gen_reg_rtx (V1TImode); + rtx tmp = gen_reg_rtx (mode); emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); - emit_insn (gen_altivec_vslq (operands[0], operands[1], tmp)); + emit_insn(gen_altivec_vslq_ (operands[0], operands[1], tmp)); DONE; }) @@ -1650,17 +1653,17 @@ "") ;; No immediate version of this 128-bit instruction -(define_expand "vlshrv1ti3" - [(set (match_operand:V1TI 0 "vsx_register_operand" "=v") - (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v") - (match_operand:V1TI 2 "vsx_register_operand" "v")))] +(define_expand "vlshr3" + [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v") + (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand") + (match_operand:VEC_TI 2 "vsx_register_operand")))] "TARGET_POWER10" { /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */ - rtx tmp = gen_reg_rtx (V1TImode); + rtx tmp = gen_reg_rtx (mode); emit_insn (gen_xxswapd_v1ti (tmp, operands[2])); - emit_insn (gen_altivec_vsrq (oper
Re: [PATCH 5/5] Conversions between 128-bit integer and floating point values.
Will, Segher: This patch adds support for converting to/from 128-bit integers and 128-bit decimal floating point formats using the new P10 instructions dcffixqq and dctfixqq. The new instructions are only used on P10 HW, otherwise the conversions continue to use the existing SW routines. The files fixkfti-sw.c and fixunskfti-sw.c are renamed versions of fixkfti.c and fixunskfti.c respectively. The function names in the files were updated with the rename as well as some white spaces fixes. Fixed a typo in the ChangeLog noted by Will. Removed the target ppc_native_128bit from the test case as we no longer have the 128-bit flag. Re-tested the patch on Power 9 with no regression errors. Carl -- gcc/ChangeLog 2020-10-12 Carl Love * config/rs6000/rs6000.md (floatti2, floatunsti2, fix_truncti2, fixuns_truncti2): Add define_insn for mode IEEE 128. * libgcc/config/rs6000/fixkfti.c: Renamed to fixkfti-sw.c. Change calls of __fixkfti to __fixkfti_sw. * libgcc/config/rs6000/fixunskfti.c: Renamed to fixunskfti-sw.c. Change calls of __fixunskfti to __fixunskfti_sw. * libgcc/config/rs6000/float128-hw.c (__floattikf_hw, __floatuntikf_hw, __fixkfti_hw, __fixunskfti_hw): New functions. * libgcc/config/rs6000/float128-ifunc.c (SW_OR_HW_ISA3_1): New macro. (__floattikf_resolve, __floatuntikf_resolve, __fixkfti_resolve, __fixunskfti_resolve): Add resolve functions. (__floattikf, __floatuntikf, __fixkfti, __fixunskfti): New functions. * libgcc/config/rs6000/float128-sed (floattitf, __floatuntitf, __fixtfti, __fixunstfti): Add editor commands to change names. * libgcc/config/rs6000/float128-sed-hw (__floattitf, __floatuntitf, __fixtfti, __fixunstfti): Add editor commands to change names. * libgcc/config/rs6000/floattikf.c: Renamed to floattikf-sw.c. * libgcc/config/rs6000/floatuntikf.c: Renamed to floatuntikf-sw.c. * libgcc/config/rs6000/quaad-float128.h (__floattikf_sw, __floatuntikf_sw, __fixkfti_sw, __fixunskfti_sw, __floattikf_hw, __floatuntikf_hw, __fixkfti_hw, __fixunskfti_hw, __floattikf, __floatuntikf, __fixkfti, __fixunskfti): New extern declarations. * libgcc/config/rs6000/t-float128 (floattikf, floatuntikf, fixkfti, fixunskfti): Remove file names from fp128_ppc_funcs. (floattikf-sw, floatuntikf-sw, fixkfti-sw, fixunskfti-sw): Add file names to fp128_ppc_funcs. gcc/testsuite/ChangeLog 2020-10-12 Carl Love * gcc.target/powerpc/fp128_conversions.c: New file. --- gcc/config/rs6000/rs6000.md | 36 +++ .../gcc.target/powerpc/fp128_conversions.c| 283 ++ .../config/rs6000/{fixkfti.c => fixkfti-sw.c} | 4 +- .../rs6000/{fixunskfti.c => fixunskfti-sw.c} | 4 +- libgcc/config/rs6000/float128-hw.c| 24 ++ libgcc/config/rs6000/float128-ifunc.c | 44 ++- libgcc/config/rs6000/float128-sed | 4 + libgcc/config/rs6000/float128-sed-hw | 4 + .../rs6000/{floattikf.c => floattikf-sw.c}| 4 +- .../{floatuntikf.c => floatuntikf-sw.c} | 4 +- libgcc/config/rs6000/quad-float128.h | 17 +- libgcc/config/rs6000/t-float128 | 3 +- 12 files changed, 411 insertions(+), 20 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/fp128_conversions.c rename libgcc/config/rs6000/{fixkfti.c => fixkfti-sw.c} (96%) rename libgcc/config/rs6000/{fixunskfti.c => fixunskfti-sw.c} (96%) rename libgcc/config/rs6000/{floattikf.c => floattikf-sw.c} (96%) rename libgcc/config/rs6000/{floatuntikf.c => floatuntikf-sw.c} (96%) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 694ff70635e..5db5d0b4505 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -6390,6 +6390,42 @@ xscvsxddp %x0,%x1" [(set_attr "type" "fp")]) +(define_insn "floatti2" + [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v") + (float:IEEE128 (match_operand:TI 1 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + return "xscvsqqp %0,%1"; +} + [(set_attr "type" "fp")]) + +(define_insn "floatunsti2" + [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v") + (unsigned_float:IEEE128 (match_operand:TI 1 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + return "xscvuqqp %0,%1"; +} + [(set_attr "type" "fp")]) + +(define_insn "fix_truncti2" + [(set (match_operand:TI 0 "vsx_register_operand" "=v") + (fix:TI (match_operand:IEEE128 1 "vsx_register_operand" "v")))] + "TARGET_POWER10" +{ + return "xscvqpsqz %0,%1"; +} + [(set_attr "type" "fp")]) + +(define_insn "fixuns_truncti2" + [(set (match_operand:TI 0 "vsx_register_operand" "=v") + (unsigned_fix:TI (match_operand:IEEE128 1 "vsx_regi
Re: [PATCH 1/5] RS6000 Add 128-bit Binary Integer sign extend operations
Will, Segher: Patch 1, adds the 128-bit sign extension instruction support and corresponding builtin support. Removed the blank line per Will's latest feedback. Retested the patch on Power 9 with no regression errors. Carl -- gcc/ChangeLog 2020-10-08 Carl Love * config/rs6000/altivec.h (vec_signextll, vec_signexti): Add define for new builtins. * config/rs6000/rs6000-builtin.def (VSIGNEXTI, VSIGNEXTLL): Add overloaded builtin definitions. (VSIGNEXTSB2W, VSIGNEXTSH2W, VSIGNEXTSB2D, VSIGNEXTSH2D,VSIGNEXTSW2D): Add builtin expansions. * config/rs6000-call.c (P9V_BUILTIN_VEC_VSIGNEXTI, P9V_BUILTIN_VEC_VSIGNEXTLL): Add overloaded argument definitions. * config/rs6000/vsx.md: Make define_insn vsx_sign_extend_si_v2di visible. * doc/extend.texi: Add documentation for the vec_signexti and vec_signextll builtins. gcc/testsuite/ChangeLog 2020-10-08 Carl Love * gcc.target/powerpc/p9-sign_extend-runnable.c: New test case. --- gcc/config/rs6000/altivec.h | 2 + gcc/config/rs6000/rs6000-builtin.def | 9 ++ gcc/config/rs6000/rs6000-call.c | 13 ++ gcc/config/rs6000/vsx.md | 2 +- gcc/doc/extend.texi | 15 ++ .../powerpc/p9-sign_extend-runnable.c | 128 ++ 6 files changed, 168 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index f7720d136c9..562c5273f71 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -494,6 +494,8 @@ #define vec_xlx __builtin_vec_vextulx #define vec_xrx __builtin_vec_vexturx +#define vec_signexti __builtin_vec_vsignexti +#define vec_signextll __builtin_vec_vsignextll #endif /* Predicates. diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index e91a48ddf5f..4c2e9460949 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -2715,6 +2715,8 @@ BU_P9V_OVERLOAD_1 (VPRTYBD, "vprtybd") BU_P9V_OVERLOAD_1 (VPRTYBQ,"vprtybq") BU_P9V_OVERLOAD_1 (VPRTYBW,"vprtybw") BU_P9V_OVERLOAD_1 (VPARITY_LSBB, "vparity_lsbb") +BU_P9V_OVERLOAD_1 (VSIGNEXTI, "vsignexti") +BU_P9V_OVERLOAD_1 (VSIGNEXTLL, "vsignextll") /* 2 argument functions added in ISA 3.0 (power9). */ BU_P9_2 (CMPRB,"byte_in_range",CONST, cmprb) @@ -2726,6 +2728,13 @@ BU_P9_OVERLOAD_2 (CMPRB, "byte_in_range") BU_P9_OVERLOAD_2 (CMPRB2, "byte_in_either_range") BU_P9_OVERLOAD_2 (CMPEQB, "byte_in_set") +/* Sign extend builtins that work on ISA 3.0, but not defined until ISA 3.1. */ +BU_P9V_AV_1 (VSIGNEXTSB2W, "vsignextsb2w", CONST, vsx_sign_extend_qi_v4si) +BU_P9V_AV_1 (VSIGNEXTSH2W, "vsignextsh2w", CONST, vsx_sign_extend_hi_v4si) +BU_P9V_AV_1 (VSIGNEXTSB2D, "vsignextsb2d", CONST, vsx_sign_extend_qi_v2di) +BU_P9V_AV_1 (VSIGNEXTSH2D, "vsignextsh2d", CONST, vsx_sign_extend_hi_v2di) +BU_P9V_AV_1 (VSIGNEXTSW2D, "vsignextsw2d", CONST, vsx_sign_extend_si_v2di) + /* Builtins for scalar instructions added in ISA 3.1 (power10). */ BU_P10_MISC_2 (CFUGED, "cfuged", CONST, cfuged) BU_P10_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm) diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c index a8b520834c7..9e514a01012 100644 --- a/gcc/config/rs6000/rs6000-call.c +++ b/gcc/config/rs6000/rs6000-call.c @@ -5527,6 +5527,19 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_INTSI, RS6000_BTI_INTSI }, + /* Sign extend builtins that work work on ISA 3.0, not added until ISA 3.1 */ + { P9V_BUILTIN_VEC_VSIGNEXTI, P9V_BUILTIN_VSIGNEXTSB2W, +RS6000_BTI_V4SI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VSIGNEXTI, P9V_BUILTIN_VSIGNEXTSH2W, +RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 }, + + { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSB2D, +RS6000_BTI_V2DI, RS6000_BTI_V16QI, 0, 0 }, + { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSH2D, +RS6000_BTI_V2DI, RS6000_BTI_V8HI, 0, 0 }, + { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSW2D, +RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, + /* Overloaded built-in functions for ISA3.1 (power10). */ { P10_BUILTIN_VEC_CLRL, P10V_BUILTIN_VCLRLB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 }, diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 4ff52455fd3..31fcffe8f33 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4787,7 +4787,7 @@ "vextsh2 %0,%1" [(set_attr "type" "vecexts")]) -(define_insn "*vsx_sign_extend_si_v2di" +(define_insn "vsx_sign_exte
Re: [PATCH 1/5] RS6000 Add 128-bit Binary Integer sign extend operations
Hi! On Wed, Oct 07, 2020 at 04:08:12PM -0500, will schmidt wrote: > On Mon, 2020-10-05 at 11:51 -0700, Carl Love wrote: > > +/* Sign extend builtins that work on ISA 3.0, but not defined until ISA > > 3.1. */ > > I have mixed feelings about straddling the ISA 3.0 and 3.1 ; but not > sure how to properly improve. (I defer). The builtins are not defined in the ISA. The instructions generated by these builtins are ISA 3.0 insns, but the builtins themselves were only defined contemporary with ISA 3.1. I don't know how to write that comment more clearly. Well, maybe we have to write it out, not everything is best explained in few words? Segher
Re: [PATCH 1/5] RS6000 Add 128-bit Binary Integer sign extend operations
Hi! On Mon, Oct 12, 2020 at 01:15:32PM -0700, Carl Love wrote: > Patch 1, adds the 128-bit sign extension instruction support and > corresponding builtin support. > * config/rs6000/altivec.h (vec_signextll, vec_signexti): Add define > for new builtins. > * config/rs6000/rs6000-builtin.def (VSIGNEXTI, VSIGNEXTLL): Add > overloaded builtin definitions. > (VSIGNEXTSB2W, VSIGNEXTSH2W, VSIGNEXTSB2D, VSIGNEXTSH2D,VSIGNEXTSW2D): > Add builtin expansions. > * config/rs6000-call.c (P9V_BUILTIN_VEC_VSIGNEXTI, > P9V_BUILTIN_VEC_VSIGNEXTLL): Add overloaded argument definitions. > * config/rs6000/vsx.md: Make define_insn vsx_sign_extend_si_v2di > visible. > * doc/extend.texi: Add documentation for the vec_signexti and > vec_signextll builtins. > +uThe following sign extension builtins are provided. Typo ("uThe"). Probably should be a colon at the end, while you're at it. > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c > @@ -0,0 +1,128 @@ > +/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } > */ Why only on Linux? (And everything in gcc.target/powerpc/ is powerpc* always, so could just be *-*-linux*). Looks good otherwise. Segher
RE: [PATCH 1/5] RS6000 Add 128-bit Binary Integer sign extend operations
On Mon, 2020-10-12 at 15:43 -0500, Segher Boessenkool wrote: > Hi! > > On Wed, Oct 07, 2020 at 04:08:12PM -0500, will schmidt wrote: > > On Mon, 2020-10-05 at 11:51 -0700, Carl Love wrote: > > > +/* Sign extend builtins that work on ISA 3.0, but not defined > > > until ISA 3.1. */ > > > > I have mixed feelings about straddling the ISA 3.0 and 3.1 ; but > > not > > sure how to properly improve. (I defer). > > The builtins are not defined in the ISA. The instructions generated > by > these builtins are ISA 3.0 insns, but the builtins themselves were > only > defined contemporary with ISA 3.1. > > I don't know how to write that comment more clearly. Well, maybe we > have to write it out, not everything is best explained in few words? > OK, we can just drop the comment all together. Carl
[Ada, FYI] make sin and cos generics inlineable
Enable the sincos optimization within callers of these (single-argument) elementary functions. Regstrapped on x86_64-linux-gnu, approved by Arno, checking in. for gcc/ada/ChangeLog * libgnat/a-ngelfu.ads (Sin, Cos): Make the single-argument functions inline. --- gcc/ada/libgnat/a-ngelfu.ads |2 ++ 1 file changed, 2 insertions(+) diff --git a/gcc/ada/libgnat/a-ngelfu.ads b/gcc/ada/libgnat/a-ngelfu.ads index 1a8e176..70f9b7a 100644 --- a/gcc/ada/libgnat/a-ngelfu.ads +++ b/gcc/ada/libgnat/a-ngelfu.ads @@ -92,6 +92,7 @@ is and then (if Left = 0.0 then "**"'Result = 0.0); function Sin (X : Float_Type'Base) return Float_Type'Base with + Inline, Post => Sin'Result in -1.0 .. 1.0 and then (if X = 0.0 then Sin'Result = 0.0); @@ -101,6 +102,7 @@ is and then (if X = 0.0 then Sin'Result = 0.0); function Cos (X : Float_Type'Base) return Float_Type'Base with + Inline, Post => Cos'Result in -1.0 .. 1.0 and then (if X = 0.0 then Cos'Result = 1.0); -- Alexandre Oliva, happy hacker https://FSFLA.org/blogs/lxo/ Free Software Activist GNU Toolchain Engineer
Fix tramp3d misoptimization
Hi, this patch fixes tramp3d ICE with PGO. It has turned out to be by a misupdate in ignore_edge I introduced in previous patch that made us to not compute SCCs correctly with -fno-lto. While looking for problem I proofread the sources and also fortified the srouces for situation where we insert a summary for no good reason and noticed a problem that early ipa-modref disabled itself in some cases. I also noticed that param_index is treamed as uhwi while it is signed (that wastes file space). Bootstrapping/regtesting x86_64-linux, will commit it tomorrow if that passes. gcc/ChangeLog: 2020-10-13 Jan Hubicka PR ipa/97389 * ipa-modref.c (dump_lto_records): Fix formating of dump file. (modref_summary::dump): Do not check loads to be non-null. (modref_summary_lto::dump): Do not check loads to be non-null. (merge_call_side_effects): Improve debug output. (analyze_call): Crash when cur_summary->loads is NULL. (analyze_function): Update. (modref_summaries::insert): Insert only into summaries, not optimization_summaries. (modref_summaries::duplicate): Likewise; crash when load or sotres are NULL. (modref_summaries_lto::duplicate): Crash when loads or stores are NULL. (write_modref_records): param_index is signed. (read_modref_records): param_index is signed. (modref_write): Crash when loads or stores are NULL. (read_section): Compensate previous change. (pass_modref::execute): Do not check optimization_summaries t be non-NULL. (ignore_edge): Fix. (compute_parm_map): Fix formating. (modref_propagate_in_scc): Do not expect loads/stores to be NULL. diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c index 1d4eaf8d7ad..d78cba44fe7 100644 --- a/gcc/ipa-modref.c +++ b/gcc/ipa-modref.c @@ -298,7 +298,7 @@ dump_lto_records (modref_records_lto *tt, FILE *out) r->ref ? get_alias_set (r->ref) : 0); if (r->every_access) { - fprintf (out, " Every access\n"); + fprintf (out, " Every access\n"); continue; } size_t k; @@ -314,16 +314,10 @@ dump_lto_records (modref_records_lto *tt, FILE *out) void modref_summary::dump (FILE *out) { - if (loads) -{ - fprintf (out, " loads:\n"); - dump_records (loads, out); -} - if (stores) -{ - fprintf (out, " stores:\n"); - dump_records (stores, out); -} + fprintf (out, " loads:\n"); + dump_records (loads, out); + fprintf (out, " stores:\n"); + dump_records (stores, out); } /* Dump summary. */ @@ -331,16 +325,10 @@ modref_summary::dump (FILE *out) void modref_summary_lto::dump (FILE *out) { - if (loads) -{ - fprintf (out, " loads:\n"); - dump_lto_records (loads, out); -} - if (stores) -{ - fprintf (out, " stores:\n"); - dump_lto_records (stores, out); -} + fprintf (out, " loads:\n"); + dump_lto_records (loads, out); + fprintf (out, " stores:\n"); + dump_lto_records (stores, out); } /* Get function summary for FUNC if it exists, return NULL otherwise. */ @@ -530,16 +518,19 @@ ignore_stores_p (tree caller, int flags) bool merge_call_side_effects (modref_summary *cur_summary, gimple *stmt, modref_summary *callee_summary, -bool ignore_stores) +bool ignore_stores, cgraph_node *callee_node) { auto_vec parm_map; bool changed = false; + if (dump_file) +fprintf (dump_file, " - Merging side effects of %s with parm map:", +callee_node->dump_name ()); + parm_map.safe_grow_cleared (gimple_call_num_args (stmt)); for (unsigned i = 0; i < gimple_call_num_args (stmt); i++) { tree op = gimple_call_arg (stmt, i); - STRIP_NOPS (op); if (TREE_CODE (op) == SSA_NAME && SSA_NAME_IS_DEFAULT_DEF (op) && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL) @@ -563,17 +554,17 @@ merge_call_side_effects (modref_summary *cur_summary, parm_map[i].parm_index = -2; else parm_map[i].parm_index = -1; + if (dump_file) + fprintf (dump_file, " %i", parm_map[i].parm_index); } + if (dump_file) +fprintf (dump_file, "\n"); /* Merge with callee's summary. */ - if (cur_summary->loads) -changed |= cur_summary->loads->merge (callee_summary->loads, &parm_map); + changed |= cur_summary->loads->merge (callee_summary->loads, &parm_map); if (!ignore_stores) -{ - if (cur_summary->stores) - changed |= cur_summary->stores->merge (callee_summary->stores, - &parm_map); -} +changed |= cur_summary->stores->merge (callee_summary->stores, + &parm_map); return changed; } @@ -672,8 +663,7 @@ analyze_call (modref_summary *cur_summary, { if (ignore
[committed] analyzer: handle static callbacks [PR97258]
The analyzer's initial worklist was only populated with non-static functions in the TU (along with those that look promising for call summaries). Hence some static functions that were never explicitly called but could be called via function pointers were not being analyzed. This patch remedies this by ensuring that functions that escape as function pointers get added to the worklist, if they haven't been already. Another fix would be to simply analyze all functions that we have a body for, but too much of the testsuite relies on static test functions not being directly analyzed. Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu. Pushed to master as r11-3840-gaf66094d037793773eb8a49597866457f2f6a104. gcc/analyzer/ChangeLog: PR analyzer/97258 * engine.cc (impl_region_model_context::on_escaped_function): New vfunc. (exploded_graph::add_function_entry): Use m_functions_with_enodes to implement idempotency. (add_any_callbacks): New. (exploded_graph::build_initial_worklist): Use the above to find callbacks that are reachable from global initializers. (exploded_graph::on_escaped_function): New. * exploded-graph.h (impl_region_model_context::on_escaped_function): New decl. (exploded_graph::on_escaped_function): New decl. (exploded_graph::m_functions_with_enodes): New field. * region-model-reachability.cc (reachable_regions::reachable_regions): Replace "store" param with "model" param; use it to initialize m_model. (reachable_regions::add): When getting the svalue for the region, call get_store_value on the model rather than using an initial value. (reachable_regions::mark_escaped_clusters): Add ctxt param and use it to call on_escaped_function when a function_region escapes. * region-model-reachability.h (reachable_regions::reachable_regions): Replace "store" param with "model" param. (reachable_regions::mark_escaped_clusters): Add ctxt param. (reachable_regions::m_model): New field. * region-model.cc (region_model::handle_unrecognized_call): Update for change in reachable_regions ctor. (region_model::handle_unrecognized_call): Pass ctxt to mark_escaped_clusters. (region_model::get_reachable_svalues): Update for change in reachable_regions ctor. (region_model::get_initial_value_for_global): Read-only variables keep their initial values. * region-model.h (region_model_context::on_escaped_function): New vfunc. (noop_region_model_context::on_escaped_function): New. gcc/testsuite/ChangeLog: PR analyzer/97258 * gcc.dg/analyzer/callbacks-1.c: New test. * gcc.dg/analyzer/callbacks-2.c: New test. * gcc.dg/analyzer/callbacks-3.c: New test. --- gcc/analyzer/engine.cc | 70 + gcc/analyzer/exploded-graph.h | 8 +++ gcc/analyzer/region-model-reachability.cc | 19 -- gcc/analyzer/region-model-reachability.h| 8 ++- gcc/analyzer/region-model.cc| 13 ++-- gcc/analyzer/region-model.h | 5 ++ gcc/testsuite/gcc.dg/analyzer/callbacks-1.c | 25 gcc/testsuite/gcc.dg/analyzer/callbacks-2.c | 22 +++ gcc/testsuite/gcc.dg/analyzer/callbacks-3.c | 19 ++ 9 files changed, 175 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/analyzer/callbacks-1.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/callbacks-2.c create mode 100644 gcc/testsuite/gcc.dg/analyzer/callbacks-3.c diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc index 0e79254ad60..65d7495f26f 100644 --- a/gcc/analyzer/engine.cc +++ b/gcc/analyzer/engine.cc @@ -143,6 +143,12 @@ impl_region_model_context::on_unknown_change (const svalue *sval, smap->on_unknown_change (sval, is_mutable, m_ext_state); } +void +impl_region_model_context::on_escaped_function (tree fndecl) +{ + m_eg->on_escaped_function (fndecl); +} + /* class setjmp_svalue : public svalue. */ /* Implementation of svalue::accept vfunc for setjmp_svalue. */ @@ -1931,6 +1937,15 @@ exploded_graph::~exploded_graph () exploded_node * exploded_graph::add_function_entry (function *fun) { + /* Be idempotent. */ + if (m_functions_with_enodes.contains (fun)) +{ + logger * const logger = get_logger (); + if (logger) + logger->log ("entrypoint for %qE already exists", fun->decl); + return NULL; +} + program_point point = program_point::from_function_entry (m_sg, fun); program_state state (m_ext_state); state.push_frame (m_ext_state, fun); @@ -1942,6 +1957,9 @@ exploded_graph::add_function_entry (function *fun) /* We should never fail to add such a node. */ gcc_assert (enode); add_edge (m_origin, enode, NULL); + + m_functions_with_enodes.add (fun); + return enode; } @@
Ping: [PATCH 0/2] Rework adding Power10 IEEE 128-bit min, max, and conditional move
Ping the following two patches to add IEEE 128-bit minimum, maximu, and conditional move support: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554460.html https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554461.html -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797
Ping: [PATCH 0/9] PowerPC: Patches to enable changing the long double default to IEEE 128-bit on little endian PowerPC 64-bit Linux systems
Ping the following 9 patches to add support for building a GCC toolchain where the default long double is IEEE 128-bit floating point instead of the IBM extended double floating point. The first patch was revised with input from Joesph Myers. I will list that patch in this list. Most of these patches are independent of each other, so if there are problems with some of the patches, please look at the other patches after that patch: Patch #1: Map built-in long double functions to an alternate name if long double is IEEE 128-bit. https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555855.html Patch #2: Update error messages about mixing __float128 and __ibm128 to deal with long double being IEEE 128-bit: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554767.html Patch #3: Rework libgcc 128-bit floating point conversion support: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554768.html Patch #4: Add support in libgcc to convert between IEEE 128-bit and the three Decimal types: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554770.html Patch #5: Fix some tests that break if long double is IEEE 128-bit: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554772.html Patch #6: Map the 'q' built-ins to 'l' built-ins if long double is IEEE 128-bit: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554773.html Patch #7: Update the power10 built-in functions for IEEE 128-bit support to support long double if long double is IEEE 128-bit: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554774.html Patch #8: Change tests that use an explicit '__ieee128' keywork to use the keyword we document ('__float128'): https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554775.html Patch #9: If long double is IEEE 128-bit, use the pack_ibm128 built-in instead of the pack_longdouble built-in in libgcc: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554776.html -- Michael Meissner, IBM IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797
Re: [PATCH 2a/5] rs6000, vec_rlnm builtin fix arguments
On Mon, Oct 12, 2020 at 01:15:39PM -0700, Carl Love wrote: > This patch fixes an error in how the vec_rlnm() builtin parameters are > handled. The current test for this builtin are compile only. The > issue was found in the path that adds the 128-bit operands to the > vec_rlnm() builtin. The new test for the 128-bit operands is a compile > and run test. > * config/rs6000/altivec.h (vec_rlnm): Fix bug in argument generation. > diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h > index 8a2dcda0144..f7720d136c9 100644 > --- a/gcc/config/rs6000/altivec.h > +++ b/gcc/config/rs6000/altivec.h > @@ -183,7 +183,7 @@ > #define vec_recipdiv __builtin_vec_recipdiv > #define vec_rlmi __builtin_vec_rlmi > #define vec_vrlnm __builtin_vec_rlnm > -#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((c)<<8)|(b))) > +#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((b)<<8)|(c))) > #define vec_rsqrt __builtin_vec_rsqrt > #define vec_rsqrte __builtin_vec_rsqrte > #define vec_signed __builtin_vec_vsigned That patch is fine of course, thanks! Is there some testcase that trips over the old definition? That would have been good to have. Segher
Re: [PATCH 2b/5] RS6000 add 128-bit Integer Operations
Hi! On Wed, Oct 07, 2020 at 04:53:11PM -0500, will schmidt wrote: > > +;; AIX does not support extended mnemonic xxswapd. Use the basic > > +;; mnemonic xxpermdi instead. > > I'd wonder if there can be additional logic using ( DEFAULT_ABI == > ABI_AIX ) sort of check to resolve this. It looks like this same > comment exists in multiple places througout our *.md files, so not > something that needs to be solved here today. ABI_AIX just tests the *ABI*, whether we have function descriptors mostly. But there is TARGET_AIX to test if we are running on AIX. The problem with generating different assembler code on AIX is that we then have to do more testing as well, have more opportunities to get things wrong. But it might be worth it for xxpermdi, the extended mnemonics improve readability a lot. On the other hand, once we start this, where will it end :-) > > new file mode 100644 > > index 000..85ad544e22b > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c > > @@ -0,0 +1,2254 @@ > > +/* { dg-do run } */ > > +/* { dg-options "-mcpu=power10 -O2 -save-temps" } */ > > +/* { dg-require-effective-target power10_hw } */ > > +/* { dg-require-effective-target ppc_native_128bit } */ > > I don't see any other uses of the target option for ppc_native_128bit > in my tree ? I don't see where it is defined, even? Segher
PING [PATCH] Enable GCC support for Intel Key Locker extension
Hongyu Wang 于2020年9月21日周一 下午1:30写道: > > Hi: > > This patch is about to support Intel Key Locker extension. > > Key Locker provides a mechanism to encrypt and decrypt data with an AES key without having access to the raw key value. > > For more details, please refer to https://software.intel.com/content/dam/develop/external/us/en/documents/343965-intel-key-locker-specification.pdf . > > Bootstrap ok, regression test on i386/x86 backend is ok. > > OK for master? > > gcc/ChangeLog > > * common/config/i386/cpuinfo.h (get_available_features): > Detect KL, AESKLE and WIDEKL features. > * common/config/i386/i386-common.c > (OPTION_MASK_ISA_KL_SET): New. > (OPTION_MASK_ISA_WIDEKL_SET): Likewise. > (OPTION_MASK_ISA_KL_UNSET): Likewise. > (OPTION_MASK_ISA_WIDEKL_UNSET): Likewise. > (OPTION_MASK_ISA2_AVX2_UNSET): Likewise. > (OPTION_MASK_ISA2_AVX_UNSET): Likewise. > (OPTION_MASK_ISA2_SSE4_2_UNSET): Likewise. > (OPTION_MASK_ISA2_SSE4_1_UNSET): Likewise. > (OPTION_MASK_ISA2_SSE4_UNSET): Likewise. > (OPTION_MASK_ISA2_SSSE3_UNSET): Likewise. > (OPTION_MASK_ISA2_SSE3_UNSET): Likewise. > (OPTION_MASK_ISA2_SSE2_UNSET): Likewise. > (OPTION_MASK_ISA2_SSE_UNSET): Likewise. > (ix86_handle_option): Handle kl and widekl, add dependency chain > for KL and SSE2. > * common/config/i386/i386-cpuinfo.h (enum processor_features): > (FEATURE_KL, FEATURE_AESKLE, FEATURE_WIDEKL): New. > * common/config/i386/i386-isas.h: Add ISA_NAMES_TABLE_ENTRY > for KL, AESKLE and WIDEKL. > * config.gcc: Add keylockerintrin.h. > * doc/invoke.texi: Document new option -mkl and -mwidekl. > * doc/extend.texi: Document kl and widekl. > * config/i386/constraints.md > (Y1, Y2, Y3, Y4, Y5, Y6, Y7): New register constraints. > * config/i386/cpuid.h (bit_KL, bit_AESKLE, bit_WIDEKL): New. > * config/i386/i386-builtin-types.def ((UINT, UINT, V2DI, V2DI, PVOID), > (UINT, UINT, V2DI, PVOID), (VOID, V2DI, V2DI, V2DI, UINT), > (UINT8, PV2DI, V2DI, PCVOID), (UINT8, PV2DI, PCV2DI, PCVOID)): New > function types. > * config/i386/i386-builtin.def: Add > __builtin_ia32_loadiwkey, > __builtin_ia32_aesdec128kl_u8, > __builtin_ia32_aesdec256kl_u8, > __builtin_ia32_aesenc128kl_u8, > __builtin_ia32_aesenc256kl_u8, > __builtin_ia32_aesdecwide128kl_u8, > __builtin_ia32_aesdecwide256kl_u8, > __builtin_ia32_aesencwide128kl_u8, > __builtin_ia32_aesencwide256kl_u8, > __builtin_ia32_encodekey128_u32, > __builtin_ia32_encodekey256_u32. > * config/i386/i386-c.c (ix86_target_macros_internal): Handle > kl and widekl. > * config/i386/i386-options.c (isa2_opts): Add -mkl and -mwidekl. > (ix86_option_override_internal): Handle KL and WIDEKL. > (ix86_valid_target_attribute_inner_p): Add attribute for kl and widekl. > * config/i386/i386-expand.c > (ix86_expand_builtin): Expand Keylocker Builtins. > * config/i386/i386.h (TARGET_KL): New. > (TARGET_KL_P): Likewise. > (TARGET_WIDEKL): Likewise. > (TARGET_WIDEKL_P): Likewise. > (PTA_KL): Likewise. > (PTA_WIDEKL): Likewise. > (enum reg_class): Add 7 new SSE register classes. > (REG_CLASS_NAMES): Likewise. > (REG_CLASS_CONTENTS): Likewise. > * config/i386/i386.opt: Add new option mkl and mwidekl. > * config/i386/keylockerintrin.h: New header file for Keylocker. > * config/i386/immintrin.h: Include keylockerintrin.h. > * config/i386/sse.md (UNSPECV_LOADIWKEY): New. > (UNSPECV_AESDEC128KLU8): Likewise. > (UNSPECV_AESENC128KLU8): Likewise. > (UNSPECV_AESDEC256KLU8): Likewise. > (UNSPECV_AESENC256KLU8): Likewise. > (UNSPECV_AESDECWIDE128KLU8): Likewise. > (UNSPECV_AESENCWIDE128KLU8): Likewise. > (UNSPECV_AESDECWIDE256KLU8): Likewise. > (UNSPECV_AESENCWIDE256KLU8): Likewise. > (UNSPECV_ENCODEKEY128U32): Likewise. > (UNSPECV_ENCODEKEY256U32): Likewise. > (loadiwkey): New insn pattern. > (encodekey128u32): Likewise. > (encodekey256u32): Likewise. > (aesu8): Likewise. > (aesu8): Likewise. > > gcc/testsuite/ChangeLog > > * gcc.target/i386/keylocker-aesdec128kl.c: New test. > * gcc.target/i386/keylocker-aesdec256kl.c: Likewise. > * gcc.target/i386/keylocker-aesdecwide128kl.c: Likewise.
Re: [PUSHED] operator_trunc_mod::wi_fold: Return VARYING for mod by zero.
On Mon, Oct 12, 2020 at 6:57 PM Aldy Hernandez via Gcc-patches wrote: > > Division by zero should return VARYING, otherwise we propagate undefine all > over the > ranger and cause bad things to happen :) So we never should propagate UNDEFINED? >. This fixes MOD 0 to also return VARYING. > > This is Andrew's patch. I forgot to use --author for proper patch > attribution. > > Tested on x86-64 Linux. > > Pushed to trunk. > > gcc/ChangeLog: > > PR tree-optimization/97378 > * range-op.cc (operator_trunc_mod::wi_fold): Return VARYING for mod > by zero. > > gcc/testsuite/ChangeLog: > > * gcc.dg/pr97378.c: New test. > --- > gcc/range-op.cc| 6 +++--- > gcc/testsuite/gcc.dg/pr97378.c | 15 +++ > 2 files changed, 18 insertions(+), 3 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/pr97378.c > > diff --git a/gcc/range-op.cc b/gcc/range-op.cc > index ce6ae2de20c..6108de367ad 100644 > --- a/gcc/range-op.cc > +++ b/gcc/range-op.cc > @@ -1359,7 +1359,7 @@ operator_div::wi_fold (irange &r, tree type, >// If we're definitely dividing by zero, there's nothing to do. >if (wi_zero_p (type, divisor_min, divisor_max)) > { > - r.set_undefined (); > + r.set_varying (type); >return; > } > > @@ -2624,10 +2624,10 @@ operator_trunc_mod::wi_fold (irange &r, tree type, >signop sign = TYPE_SIGN (type); >unsigned prec = TYPE_PRECISION (type); > > - // Mod 0 is undefined. Return undefined. > + // Mod 0 is undefined. >if (wi_zero_p (type, rh_lb, rh_ub)) > { > - r.set_undefined (); > + r.set_varying (type); >return; > } > > diff --git a/gcc/testsuite/gcc.dg/pr97378.c b/gcc/testsuite/gcc.dg/pr97378.c > new file mode 100644 > index 000..27e4a1f4321 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/pr97378.c > @@ -0,0 +1,15 @@ > +// { dg-do compile } > +// { dg-options "-O2" } > + > +int a, b, c; > +void d() { > +e : { > + long f; > + long *g = &f; > + if ((a != 0) - (b = 0)) > +; > + else > +a &= (*g %= a *= c) >= (*g || f); > + goto e; > +} > +} > -- > 2.26.2 >
[PATCH v2] pass: Run cleanup passes before SLP [PR96789]
Hi! >> Can you repeat the compile-time measurement there? I also wonder >> whether we should worry about compile-time at -O[12] when SLP is not run. >> Thus, probably rename the cleanup pass to pre_slp_scalar_cleanup and >> gate it on && flag_slp_vectorize > > Good idea, will evaluate it. > Sorry for the late update. I evaluated compilation time on SPEC2017 INT bmks, for several option sets: A1: -Ofast -funroll-loops A2: -O1 A3: -O1 -funroll-loops A4: -O2 A5: -O2 -funroll-loops and for several guard conditions: C1: no loops after cunroll (the previous version) C2: any outermost loop unrolled C3: C1 + C2 C4: C1 + C2 + SLP only C5: C2 + SLP only (the current version) Compilation time increase percentages table: A1 A2 A3 A4 A5 C1: 0.74% 0.07% -0.25% 0.00% 0.10% C2: 0.21% 0.00% -0.19% 0.00% 0.71% C3: 0.21% 0.00% -0.06% 0.30% 0.00% C4: 0.21% -0.07% -0.38% 0.20% -0.19% C5: 0.08% 0.00% -0.38% -0.10% -0.05% C2 is a better guard than C1 (C2/A1 better than C1/A1). SLP guard is good from C2/A5 vs. C5/A5. btw, the data could have some noises especially when the difference is very small. Bootstrapped/regtested on powerpc64le-linux-gnu P8. Is it ok for trunk? BR, Kewen - gcc/ChangeLog: PR tree-optimization/96789 * passes.c (class pass_pre_slp_scalar_cleanup): New class. (make_pass_pre_slp_scalar_cleanup): New function. (pass_data_pre_slp_scalar_cleanup): New pass data. (execute_one_pass): Add support for TODO_force_next_scalar_cleanup. (pending_TODOs): Init. * passes.def (pass_pre_slp_scalar_cleanup): New pass, add pass_fre and pass_dse as its children. * timevar.def (TV_SCALAR_CLEANUP): New timevar. * tree-pass.h (TODO_force_next_scalar_cleanup): New TODO flag. (make_pass_pre_slp_scalar_cleanup): New declare. (pending_TODOs): Likewise. * tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely_1): Extend to set father_bbs for outermost loop. (tree_unroll_loops_completely): Once any outermost loop gets unrolled, set outermost_unrolled and further flag return value with TODO_force_next_scalar_cleanup. gcc/testsuite/ChangeLog: PR tree-optimization/96789 * gcc.dg/tree-ssa/ssa-dse-28.c: Adjust. * gcc.dg/tree-ssa/ssa-dse-29.c: Likewise. * gcc.dg/vect/bb-slp-41.c: Likewise. * gcc.dg/tree-ssa/pr96789.c: New test. diff --git a/gcc/passes.c b/gcc/passes.c index 6ff31ec37d7..eb938d72a42 100644 --- a/gcc/passes.c +++ b/gcc/passes.c @@ -71,6 +71,8 @@ using namespace gcc; The variable current_pass is also used for statistics and plugins. */ opt_pass *current_pass; +unsigned int pending_TODOs = 0; + /* Most passes are single-instance (within their context) and thus don't need to implement cloning, but passes that support multiple instances *must* provide their own implementation of the clone method. @@ -731,7 +733,54 @@ make_pass_late_compilation (gcc::context *ctxt) return new pass_late_compilation (ctxt); } +/* Pre-SLP scalar cleanup, it has several cleanup passes like FRE, DSE. */ + +namespace { + +const pass_data pass_data_pre_slp_scalar_cleanup = +{ + GIMPLE_PASS, /* type */ + "*pre_slp_scalar_cleanup", /* name */ + OPTGROUP_LOOP, /* optinfo_flags */ + TV_SCALAR_CLEANUP, /* tv_id */ + ( PROP_cfg | PROP_ssa ), /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_pre_slp_scalar_cleanup : public gimple_opt_pass +{ +public: + pass_pre_slp_scalar_cleanup (gcc::context *ctxt) +: gimple_opt_pass (pass_data_pre_slp_scalar_cleanup, ctxt) + { + } + + virtual bool + gate (function *) + { +return flag_tree_slp_vectorize + && (pending_TODOs & TODO_force_next_scalar_cleanup); + } + + virtual unsigned int + execute (function *) + { +pending_TODOs &= ~TODO_force_next_scalar_cleanup; +return 0; + } +}; // class pass_pre_slp_scalar_cleanup + +} // anon namespace + +gimple_opt_pass * +make_pass_pre_slp_scalar_cleanup (gcc::context *ctxt) +{ + return new pass_pre_slp_scalar_cleanup (ctxt); +} /* Set the static pass number of pass PASS to ID and record that in the mapping from static pass number to pass. */ @@ -2538,6 +2587,12 @@ execute_one_pass (opt_pass *pass) return true; } + if (todo_after & TODO_force_next_scalar_cleanup) +{ + todo_after &= ~TODO_force_next_scalar_cleanup; + pending_TODOs |= TODO_force_next_scalar_cleanup; +} + do_per_function (clear_last_verified, NULL); do_per_function (update_properties_after_pass, pass); diff --git a/gcc/passes.def b/gcc/passes.def index c0098d755bf..c74add75068 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -288,11 +288,16 @@ along with GCC; see the file COPYING3. If not se