Re: [RFC][gimple] Move can_duplicate_bb_p to gimple_can_duplicate_bb_p

2020-10-12 Thread Richard Biener
On Fri, 9 Oct 2020, Tom de Vries wrote:

> Hi,
> 
> The function gimple_can_duplicate_bb_p currently always returns true.
> 
> The presence of can_duplicate_bb_p in tracer.c however suggests that
> there are cases when bb's indeed cannot be duplicated.
> 
> Move the implementation of can_duplicate_bb_p to gimple_can_duplicate_bb_p.
> 
> Bootstrapped and reg-tested on x86_64-linux.
> 
> Build x86_64-linux with nvptx accelerator and tested libgomp.
> 
> No issues found.
> 
> As corner-case check, bootstrapped and reg-tested a patch that makes
> gimple_can_duplicate_bb_p always return false, resulting in
> PR97333 - "[gimple_can_duplicate_bb_p == false, tree-ssa-threadupdate]
> ICE in duplicate_block, at cfghooks.c:1093".
> 
> Any comments?

In principle it's correct to move this to the CFG hook since there
now seem to be stmts that cannot be duplicated and thus we need
to implement can_duplicate_bb_p.

Some minor things below...

> Thanks,
> - Tom
> 
> [gimple] Move can_duplicate_bb_p to gimple_can_duplicate_bb_p
> 
> gcc/ChangeLog:
> 
> 2020-10-09  Tom de Vries  
> 
>   * tracer.c (cached_can_duplicate_bb_p): Use can_duplicate_block_p
>   instead of can_duplicate_bb_p.
>   (can_duplicate_insn_p, can_duplicate_bb_no_insn_iter_p): Move ...
>   * tree-cfg.c: ... here.
>   * tracer.c (can_duplicate_bb_p): Move ...
>   * tree-cfg.c (gimple_can_duplicate_bb_p): here.
>   * tree-cfg.h (can_duplicate_insn_p, can_duplicate_bb_no_insn_iter_p):
>   Declare.
> 
> ---
>  gcc/tracer.c   | 61 
> +-
>  gcc/tree-cfg.c | 54 ++-
>  gcc/tree-cfg.h |  2 ++
>  3 files changed, 56 insertions(+), 61 deletions(-)
> 
> diff --git a/gcc/tracer.c b/gcc/tracer.c
> index e1c2b9527e5..16b46c65b14 100644
> --- a/gcc/tracer.c
> +++ b/gcc/tracer.c
> @@ -84,65 +84,6 @@ bb_seen_p (basic_block bb)
>return bitmap_bit_p (bb_seen, bb->index);
>  }
>  
> -/* Return true if gimple stmt G can be duplicated.  */
> -static bool
> -can_duplicate_insn_p (gimple *g)
> -{
> -  /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be
> - duplicated as part of its group, or not at all.
> - The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of such a
> - group, so the same holds there.  */
> -  if (is_gimple_call (g)
> -  && (gimple_call_internal_p (g, IFN_GOMP_SIMT_ENTER_ALLOC)
> -   || gimple_call_internal_p (g, IFN_GOMP_SIMT_EXIT)
> -   || gimple_call_internal_p (g, IFN_GOMP_SIMT_VOTE_ANY)
> -   || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_BFLY)
> -   || gimple_call_internal_p (g, IFN_GOMP_SIMT_XCHG_IDX)))
> -return false;
> -
> -  return true;
> -}
> -
> -/* Return true if BB can be duplicated.  Avoid iterating over the insns.  */
> -static bool
> -can_duplicate_bb_no_insn_iter_p (const_basic_block bb)
> -{
> -  if (bb->index < NUM_FIXED_BLOCKS)
> -return false;
> -
> -  if (gimple *g = last_stmt (CONST_CAST_BB (bb)))
> -{
> -  /* A transaction is a single entry multiple exit region.  It
> -  must be duplicated in its entirety or not at all.  */
> -  if (gimple_code (g) == GIMPLE_TRANSACTION)
> - return false;
> -
> -  /* An IFN_UNIQUE call must be duplicated as part of its group,
> -  or not at all.  */
> -  if (is_gimple_call (g)
> -   && gimple_call_internal_p (g)
> -   && gimple_call_internal_unique_p (g))
> - return false;
> -}
> -
> -  return true;
> -}
> -
> -/* Return true if BB can be duplicated.  */
> -static bool
> -can_duplicate_bb_p (const_basic_block bb)
> -{
> -  if (!can_duplicate_bb_no_insn_iter_p (bb))
> -return false;
> -
> -  for (gimple_stmt_iterator gsi = gsi_start_bb (CONST_CAST_BB (bb));
> -   !gsi_end_p (gsi); gsi_next (&gsi))
> -if (!can_duplicate_insn_p (gsi_stmt (gsi)))
> -  return false;
> -
> -  return true;
> -}
> -
>  static sbitmap can_duplicate_bb;
>  
>  /* Cache VAL as value of can_duplicate_bb_p for BB.  */
> @@ -167,7 +108,7 @@ cached_can_duplicate_bb_p (const_basic_block bb)
>return false;
>  }
>  
> -  return can_duplicate_bb_p (bb);
> +  return can_duplicate_block_p (bb);
>  }
>  
>  /* Return true if we should ignore the basic block for purposes of tracing.  
> */
> diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
> index 5caf3b62d69..a5677859ffc 100644
> --- a/gcc/tree-cfg.c
> +++ b/gcc/tree-cfg.c
> @@ -6208,11 +6208,63 @@ gimple_split_block_before_cond_jump (basic_block bb)
>  }
>  
>  
> +/* Return true if gimple stmt G can be duplicated.  */
> +bool
> +can_duplicate_insn_p (gimple *g)

Does this need to be exported?  Please name it
can_duplicate_stmt_p.  It's also incomplete given the
function below

> +{
> +  /* An IFN_GOMP_SIMT_ENTER_ALLOC/IFN_GOMP_SIMT_EXIT call must be
> + duplicated as part of its group, or not at all.
> + The IFN_GOMP_SIMT_VOTE_ANY and IFN_GOMP_SIMT_XCHG_* are part of such a
> + group, so the same holds the

[PATCH] tree-optimization/97357 - avoid abnormals in loop splitting conditions

2020-10-12 Thread Richard Biener
This avoids abnormals in another place.

Bootstrap / regtest in progress on x86_64-unknown-linux-gnu.

2020-10-12  Richard Biener  

PR tree-optimization/97357
* tree-ssa-loop-split.c (ssa_semi_invariant_p): Abnormal
SSA names are not semi invariant.

* gcc.dg/pr97357.c: New testcase.
---
 gcc/testsuite/gcc.dg/pr97357.c | 39 ++
 gcc/tree-ssa-loop-split.c  |  3 +++
 2 files changed, 42 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr97357.c

diff --git a/gcc/testsuite/gcc.dg/pr97357.c b/gcc/testsuite/gcc.dg/pr97357.c
new file mode 100644
index 000..2b32d13a43f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr97357.c
@@ -0,0 +1,39 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-require-effective-target indirect_jumps } */
+
+#include 
+#include 
+
+void * my_malloc (size_t size);
+
+typedef struct glk {
+  struct glk *nxt;
+} glk;
+
+typedef struct Lock
+{
+ glk ByteLock;
+} Lock;
+
+static Lock *l, *lk;
+
+void bytelocks(glk *rethead, jmp_buf jb)
+{
+  glk *cur, *cur_lk;
+
+  if (( _setjmp (jb)) == 0)
+for (cur = &l->ByteLock; cur != ((glk *)0) ; cur = (cur)->nxt)
+for (cur_lk = &lk->ByteLock; cur_lk != ((glk *)0); cur_lk = 
cur_lk->nxt)
+  {
+glk *retrng;
+
+if(!rethead)
+  rethead = (glk *) my_malloc (sizeof(glk));
+retrng = (glk *) my_malloc (sizeof(glk));
+
+retrng->nxt = rethead;
+  }
+
+ return;
+}
diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
index 1eb6be5ddb2..46ee7c0fc14 100644
--- a/gcc/tree-ssa-loop-split.c
+++ b/gcc/tree-ssa-loop-split.c
@@ -977,6 +977,9 @@ ssa_semi_invariant_p (struct loop *loop, tree name,
   if (!def_bb || !flow_bb_inside_loop_p (loop, def_bb))
 return true;
 
+  if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name))
+return false;
+
   return stmt_semi_invariant_p_1 (loop, def, skip_head, stmt_stat);
 }
 
-- 
2.26.2


[PATCH] More consistently split SLP groups

2020-10-12 Thread Richard Biener
This appropriately makes matches all true after successful SLP discovery
to reliably succeed splitting.  We were picking up an eventual all
false built-up from scalars state in some cases.

Bootstrap / regtest in progress on x86_64-unknown-linux-gnu.

2020-10-12  Richard Biener  

* tree-vect-slp.c (vect_analyze_slp_instance): Set matches to true
after successful discovery but forced split.
---
 gcc/tree-vect-slp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 495fb970e24..dd2042a4db5 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -2283,7 +2283,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
dump_printf_loc (MSG_NOTE, vect_location,
 "SLP discovery succeeded but node needs "
 "splitting\n");
- matches[0] = true;
+ memset (matches, true, group_size);
  matches[group_size / const_max_nunits * const_max_nunits] = false;
  vect_free_slp_tree (node);
}
-- 
2.26.2


Re: [PATCH] arm: Fix multiple inheritance thunks for thumb-1 with -mpure-code

2020-10-12 Thread Christophe Lyon via Gcc-patches
On Thu, 8 Oct 2020 at 11:58, Richard Earnshaw
 wrote:
>
> On 08/10/2020 10:07, Christophe Lyon via Gcc-patches wrote:
> > On Tue, 6 Oct 2020 at 18:02, Richard Earnshaw
> >  wrote:
> >>
> >> On 29/09/2020 20:50, Christophe Lyon via Gcc-patches wrote:
> >>> When mi_delta is > 255 and -mpure-code is used, we cannot load delta
> >>> from code memory (like we do without -mpure-code).
> >>>
> >>> This patch builds the value of mi_delta into r3 with a series of
> >>> movs/adds/lsls.
> >>>
> >>> We also do some cleanup by not emitting the function address and delta
> >>> via .word directives at the end of the thunk since we don't use them
> >>> with -mpure-code.
> >>>
> >>> No need for new testcases, this bug was already identified by
> >>> eg. pr46287-3.C
> >>>
> >>> 2020-09-29  Christophe Lyon  
> >>>
> >>>   gcc/
> >>>   * config/arm/arm.c (arm_thumb1_mi_thunk): Build mi_delta in r3 and
> >>>   do not emit function address and delta when -mpure-code is used.
> >>
> > Hi Richard,
> >
> > Thanks for your comments.
> >
> >> There are some optimizations you can make to this code.
> >>
> >> Firstly, for values between 256 and 510 (inclusive), it would be better
> >> to just expand a mov of 255 followed by an add.
> > I now see the splitted for the "Pe" constraint which I hadn't noticed
> > before, so I can write something similar indeed.
> >
> > However, I'm note quite sure to understand the benefit in the split
> > when -mpure-code is NOT used.
> > Consider:
> > int f3_1 (void) { return 510; }
> > int f3_2 (void) { return 511; }
> > Compile with -O2 -mcpu=cortex-m0:
> > f3_1:
> > movsr0, #255
> > lslsr0, r0, #1
> > bx  lr
> > f3_2:
> > ldr r0, .L4
> > bx  lr
> >
> > The splitter makes the code bigger, does it "compensate" for this by
> > not having to load the constant?
> > Actually the constant uses 4 more bytes, which should be taken into
> > account when comparing code size,
>
> Yes, the size of the literal pool entry needs to be taken into account.
>  It might happen that the entry could be shared with another use of that
> literal, but in general that's rare.
>
> > so f3_1 uses 6 bytes, and f3_2 uses 8, so as you say below three
> > thumb1 instructions would be equivalent in size compared to loading
> > from the literal pool. Should the 256-510 range be extended?
>
> It's a bit borderline at three instructions when literal pools are not
> expensive to use, but in thumb1 literal pools tend to be quite small due
> to the limited pc offsets we can use.  I think on balance we probably
> want to use the instruction sequence unless optimizing for size.
>
> >
> >
> >> This is also true for
> >> the literal pools alternative as well, so should be handled before all
> >> this.
> > I am not sure what you mean: with -mpure-code, the above sample is compiled 
> > as:
> > f3_1:
> > movsr0, #255
> > lslsr0, r0, #1
> > bx  lr
> > f3_2:
> > movsr0, #1
> > lslsr0, r0, #8
> > addsr0, r0, #255
> > bx  lr
> >
> > so the "return 510" case is already handled as without -mpure-code.
>
> I was thinking specifically of the thunk sequence where you seem to be
> emitting instructions directly rather than generating RTL.  The examples
> you show here are not thunks.
>
OK thanks for the clarification.

Here is an updated version, split into 3 patches to hopefully make
review easier.
They apply on top of my other mpure-code patches for PR96967 and PR96770:
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554956.html
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554957.html

I kept it this way to make incremental changes easier to understand.

Patch 1: With the hope to avoid confusion and make maintenance easier,
I have updated thumb1_gen_const_int() so that it can generate either RTL or
asm. This way, all the code used to build thumb-1 constants is in the
same place,
 in case we need to improve/fix it later. We now generate shorter sequences in
several cases matching your comments.

Patch 2: Removes the equivalent loop from thumb1_movsi_insn pattern and
calls thumb1_gen_const_int.

Patch 3: Update of the original patch in this thread, now calls
thumb1_gen_const_int.

>
> >
> >>  I also suspect (but haven't check) that the base adjustment will
> >> most commonly be a multiple of the machine word size (ie 4).  If that is
> >> the case then you could generate n/4 and then shift it left by 2 for an
> >> even greater range of literals.
> > I can see there is provision for this in the !TARGET_THUMB1_ONLY case,
> > I'll update my patch.
> >
> >>  More generally, any sequence of up to
> >> three thumb1 instructions will be no larger, and probably as fast as the
> >> existing literal pool fall back.
> >>
> >> Secondly, if the value is, for example, 65536 (0x1), your code will
> >> emit a mov followed by two shift-by-8 instructions; the two shifts could
> >> be merged into 

[wwwdocs][Patch] gcc-11 + project/gomp: OpenMP status update

2020-10-12 Thread Tobias Burnus

This is a tiny update – and probably not the last for the GCC 11 status
and especially as the project page is still linked prominently at some
external pages:

Update the GOMP project page a bit; some more revisions wouldn't harm,
however: https://gcc.gnu.org/projects/gomp/

OK? Wording suggestions?

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
gcc-11 + project/gomp: OpenMP status update

	* htdocs/gcc-11/changes.html (languages): Add another newly
	supported clause: device_type.
	* htdocs/projects/gomp/index.html: Mention offloading support; add
	post-2015 news items.

diff --git a/htdocs/gcc-11/changes.html b/htdocs/gcc-11/changes.html
index e2a32e51..f7417622 100644
--- a/htdocs/gcc-11/changes.html
+++ b/htdocs/gcc-11/changes.html
@@ -123,6 +123,7 @@ a work-in-progress.
   For Fortran, OpenMP 4.5 is now finally fully supported and OpenMP 5.0
   support has been extended, including the following features which were
   before only available in C and C++: order(concurrent),
+  device_type,
   lastprivate with conditional modifier,
   if clause with simd and cancel
   modifiers, target data without map clause,
diff --git a/htdocs/projects/gomp/index.html b/htdocs/projects/gomp/index.html
index 70e6d95a..15cb1585 100644
--- a/htdocs/projects/gomp/index.html
+++ b/htdocs/projects/gomp/index.html
@@ -36,6 +36,12 @@ environments. Using OpenMP's directive-based parallelism also
 simplifies the act of converting existing serial code to
 efficient parallel code.
 
+OpenMP additionally permits to offload computations on
+accelerators such as GPUs,
+making use of their highly parallel computation support; if
+no accelarator is available, as fallback, the computation is
+then done on the host.
+
 Project goal
 To remain relevant, free software development tools must
 support emerging technologies. By implementing OpenMP, GOMP
@@ -63,6 +69,40 @@ available.
 
 Status
 
+August 20, 2020
+https://www.openmp.org/wp-content/uploads/openmp-TR9.pdf";>OpenMP
+Technical Report 9: Version 5.1 Public Comment Draft has been released.
+
+May 7, 2020
+GCC 10 has been released; it adds a number of
+newly implemented OpenMP 5.0 features on top of the GCC 9 release such as
+conditional lastprivate clause, scan
+and loop directives, order(concurrent) and
+use_device_addr clauses support, if clause on
+simd construct or partial support for the
+declare variant directive, getting closer to full support
+of the OpenMP 5.0 standard.
+
+May 3, 2019
+https://gcc.gnu.org/gcc-9/";>GCC 9 has been released and
+version 5.0 of the OpenMP specification is now partially supported in the C
+and C++ compilers.
+
+November 8, 2018
+The https://www.openmp.org/specifications/";>OpenMP v5.0
+specification has been released.
+
+May 2, 2017
+https://gcc.gnu.org/gcc-7/";>GCC 7 has been released and
+version 4.5 of the OpenMP specification is now partially supported in the
+Fortran compiler; the largest missing item is structure element
+mapping.
+
+April 27, 2016
+https://gcc.gnu.org/gcc-6/";>GCC 6 has been released and
+version 4.5 of the OpenMP specification is now supported in the C and
+C++ compilers
+
 November 14, 2015
 The final https://www.openmp.org/wp-content/uploads/openmp-4.5.pdf";>OpenMP v4.5


Re: [wwwdocs][Patch] gcc-11 + project/gomp: OpenMP status update

2020-10-12 Thread Jakub Jelinek via Gcc-patches
On Mon, Oct 12, 2020 at 10:13:09AM +0200, Tobias Burnus wrote:
> This is a tiny update – and probably not the last for the GCC 11 status
> and especially as the project page is still linked prominently at some
> external pages:
> 
> Update the GOMP project page a bit; some more revisions wouldn't harm,
> however: https://gcc.gnu.org/projects/gomp/
> 
> OK? Wording suggestions?

Thanks, LGTM, but I'd one nit, see below.  Ok with that change.

> @@ -63,6 +69,40 @@ available.
>  
>  Status
>  
> +August 20, 2020
> + href="https://www.openmp.org/wp-content/uploads/openmp-TR9.pdf";>OpenMP
> +Technical Report 9: Version 5.1 Public Comment Draft has been 
> released.
> +
> +May 7, 2020
> +GCC 10 has been released; it adds a number of
> +newly implemented OpenMP 5.0 features on top of the GCC 9 release such as
> +conditional lastprivate clause, scan
> +and loop directives, order(concurrent) and
> +use_device_addr clauses support, if clause on
> +simd construct or partial support for the
> +declare variant directive, getting closer to full support
> +of the OpenMP 5.0 standard.
> +
> +May 3, 2019
> +https://gcc.gnu.org/gcc-9/";>GCC 9 has been released and
> +version 5.0 of the OpenMP specification is now partially supported in the C
> +and C++ compilers.

While gcc-9/changes.html mentions it, I think it would be useful to add the
For details which features of OpenMP 5.0 are and which are not supported in the 
GCC 9 release see
https://gcc.gnu.org/ml/gcc-patches/2018-11/msg00628.html>this mail.
sentence.

Jakub



[committed][nvptx] Fix -msoft-stack-reserve-local format

2020-10-12 Thread Tom de Vries
Hi,

Currently, in order to use the switch -msoft-stack-reserve-local with the
default arg 128, you have to specify '-msoft-stack-reserve-local128'.

Fix the switch format such that you specify '-msoft-stack-reserve-local=128'
instead.

Tested on nvptx.

Committed to trunk.

Thanks,
- Tom

[nvptx] Fix -msoft-stack-reserve-local format

gcc/ChangeLog:

2020-10-12  Tom de Vries  

* config/nvptx/nvptx.opt (-msoft-stack-reserve-local): Rename to ...
(-msoft-stack-reserve-local=): ... this.

---
 gcc/config/nvptx/nvptx.opt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index d6910a96cf0..38454222d42 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -37,7 +37,7 @@ msoft-stack
 Target Report Mask(SOFT_STACK)
 Use custom stacks instead of local memory for automatic storage.
 
-msoft-stack-reserve-local
+msoft-stack-reserve-local=
 Target Report Joined RejectNegative UInteger Var(nvptx_softstack_size) 
Init(128)
 Specify size of .local memory used for stack when the exact amount is not 
known.
 


Re: make sincos take type from intrinsic formal, not from result assignment

2020-10-12 Thread Martin Liška

Hello Alexander.

It seems the patch caused quite some clang warnings:

/home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang/build/gcc/builtins.c:2366:5:
 warning: comparison of different enumeration types in switch statement 
('combined_fn' and 'built_in_function') [-Wenum-compare-switch]
...

Can we please fix them?
Thanks,
Martin


[PATCH] IPA: prevent an UBSAN error

2020-10-12 Thread Martin Liška

Prevents the following UBSAN error:

./xgcc -B. /home/marxin/Programming/gcc/gcc/testsuite/g++.dg/torture/pr49770.C 
-O2 -c
/home/marxin/Programming/gcc2/gcc/ipa-modref-tree.h:482:22: runtime error: load 
of value 2, which is not a valid value for type 'bool'
#0 0x1fdb4d1 in modref_tree::merge(modref_tree*, 
vec*) 
/home/marxin/Programming/gcc2/gcc/ipa-modref-tree.h:482
#1 0x1fcadaa in merge_call_side_effects(modref_summary*, gimple*, 
modref_summary*, bool) /home/marxin/Programming/gcc2/gcc/ipa-modref.c:511
#2 0x1fcbadd in analyze_call 
/home/marxin/Programming/gcc2/gcc/ipa-modref.c:642
#3 0x1fcc061 in analyze_stmt 
/home/marxin/Programming/gcc2/gcc/ipa-modref.c:732
#4 0x1fccf31 in analyze_function 
/home/marxin/Programming/gcc2/gcc/ipa-modref.c:823
#5 0x1fd17e5 in execute /home/marxin/Programming/gcc2/gcc/ipa-modref.c:1441
#6 0x25cca6e in execute_one_pass(opt_pass*) 
/home/marxin/Programming/gcc2/gcc/passes.c:2509
#7 0x25cd39b in execute_pass_list_1 
/home/marxin/Programming/gcc2/gcc/passes.c:2597
#8 0x25cd450 in execute_pass_list_1 
/home/marxin/Programming/gcc2/gcc/passes.c:2598
#9 0x25cd4ee in execute_pass_list(function*, opt_pass*) 
/home/marxin/Programming/gcc2/gcc/passes.c:2608
#10 0x25c7a5a in do_per_function_toporder(void (*)(function*, void*), 
void*) /home/marxin/Programming/gcc2/gcc/passes.c:1726
#11 0x25cfa3f in execute_ipa_pass_list(opt_pass*) 
/home/marxin/Programming/gcc2/gcc/passes.c:2941
#12 0x173572d in ipa_passes 
/home/marxin/Programming/gcc2/gcc/cgraphunit.c:2642
#13 0x17364ee in symbol_table::compile() 
/home/marxin/Programming/gcc2/gcc/cgraphunit.c:2777
#14 0x17372d9 in symbol_table::finalize_compilation_unit() 
/home/marxin/Programming/gcc2/gcc/cgraphunit.c:3022
#15 0x2a1f00a in compile_file /home/marxin/Programming/gcc2/gcc/toplev.c:485
#16 0x2a27dc8 in do_compile /home/marxin/Programming/gcc2/gcc/toplev.c:2321
#17 0x2a283cc in toplev::main(int, char**) 
/home/marxin/Programming/gcc2/gcc/toplev.c:2460
#18 0x54f21cd in main /home/marxin/Programming/gcc2/gcc/main.c:39
#19 0x76f0de09 in __libc_start_main ../csu/libc-start.c:314
#20 0x9eac09 in _start 
(/home/marxin/Programming/gcc2/objdir/gcc/cc1plus+0x9eac09)

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

* ipa-modref.c (merge_call_side_effects): Clear modref_parm_map
fields in the vector.
---
 gcc/ipa-modref.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index dd59e804c0f..b815eb8cc88 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -476,7 +476,7 @@ merge_call_side_effects (modref_summary *cur_summary,
   auto_vec  parm_map;
   bool changed = false;
 
-  parm_map.safe_grow (gimple_call_num_args (stmt));

+  parm_map.safe_grow_cleared (gimple_call_num_args (stmt));
   for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
 {
   tree op = gimple_call_arg (stmt, i);
--
2.28.0



Re: [PATCH] IPA: prevent an UBSAN error

2020-10-12 Thread Jan Hubicka
> Prevents the following UBSAN error:
> 
> ./xgcc -B. 
> /home/marxin/Programming/gcc/gcc/testsuite/g++.dg/torture/pr49770.C -O2 -c
> /home/marxin/Programming/gcc2/gcc/ipa-modref-tree.h:482:22: runtime error: 
> load of value 2, which is not a valid value for type 'bool'
> #0 0x1fdb4d1 in modref_tree::merge(modref_tree*, 
> vec*) 
> /home/marxin/Programming/gcc2/gcc/ipa-modref-tree.h:482
> #1 0x1fcadaa in merge_call_side_effects(modref_summary*, gimple*, 
> modref_summary*, bool) /home/marxin/Programming/gcc2/gcc/ipa-modref.c:511
> #2 0x1fcbadd in analyze_call 
> /home/marxin/Programming/gcc2/gcc/ipa-modref.c:642
> #3 0x1fcc061 in analyze_stmt 
> /home/marxin/Programming/gcc2/gcc/ipa-modref.c:732
> #4 0x1fccf31 in analyze_function 
> /home/marxin/Programming/gcc2/gcc/ipa-modref.c:823
> #5 0x1fd17e5 in execute 
> /home/marxin/Programming/gcc2/gcc/ipa-modref.c:1441
> #6 0x25cca6e in execute_one_pass(opt_pass*) 
> /home/marxin/Programming/gcc2/gcc/passes.c:2509
> #7 0x25cd39b in execute_pass_list_1 
> /home/marxin/Programming/gcc2/gcc/passes.c:2597
> #8 0x25cd450 in execute_pass_list_1 
> /home/marxin/Programming/gcc2/gcc/passes.c:2598
> #9 0x25cd4ee in execute_pass_list(function*, opt_pass*) 
> /home/marxin/Programming/gcc2/gcc/passes.c:2608
> #10 0x25c7a5a in do_per_function_toporder(void (*)(function*, void*), 
> void*) /home/marxin/Programming/gcc2/gcc/passes.c:1726
> #11 0x25cfa3f in execute_ipa_pass_list(opt_pass*) 
> /home/marxin/Programming/gcc2/gcc/passes.c:2941
> #12 0x173572d in ipa_passes 
> /home/marxin/Programming/gcc2/gcc/cgraphunit.c:2642
> #13 0x17364ee in symbol_table::compile() 
> /home/marxin/Programming/gcc2/gcc/cgraphunit.c:2777
> #14 0x17372d9 in symbol_table::finalize_compilation_unit() 
> /home/marxin/Programming/gcc2/gcc/cgraphunit.c:3022
> #15 0x2a1f00a in compile_file 
> /home/marxin/Programming/gcc2/gcc/toplev.c:485
> #16 0x2a27dc8 in do_compile 
> /home/marxin/Programming/gcc2/gcc/toplev.c:2321
> #17 0x2a283cc in toplev::main(int, char**) 
> /home/marxin/Programming/gcc2/gcc/toplev.c:2460
> #18 0x54f21cd in main /home/marxin/Programming/gcc2/gcc/main.c:39
> #19 0x76f0de09 in __libc_start_main ../csu/libc-start.c:314
> #20 0x9eac09 in _start 
> (/home/marxin/Programming/gcc2/objdir/gcc/cc1plus+0x9eac09)
> 
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Ready to be installed?

OK (to silence valgrind).  The situation here is harmless - when
param_index is -1 the param_offset_known has no meaning, but we still
merge it into the summary.

Honza


Re: [PATCH v2] IBM Z: Change vector copysign to use bitwise operations

2020-10-12 Thread Andreas Krebbel via Gcc-patches
On 09.10.20 17:49, Ilya Leoshkevich wrote:
> Bootstrapped and regtested on s390x-redhat-linux.  OK for master?
> 
> v1: https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555782.html
> v1 -> v2: Use related_int_vector_mode.
> 
> 
> 
> The vector copysign pattern incorrectly assumes that vector
> if_then_else operates on bits, not on elements.  This can theoretically
> mislead the optimizers.  Fix by changing it to use bitwise operations,
> like commit 2930bb321794 ("PR94613: Fix vec_sel builtin for IBM Z") did
> for vec_sel builtin.
> 
> gcc/ChangeLog:
> 
> 2020-10-07  Ilya Leoshkevich  
> 
>   * config/s390/s390-protos.h (s390_build_signbit_mask): New
>   function.
>   * config/s390/s390.c (s390_contiguous_bitmask_vector_p):
>   Bitcast the argument to an integral mode.
>   (s390_expand_vec_init): Do not call
>   s390_contiguous_bitmask_vector_p with a scalar argument.
>   (s390_build_signbit_mask): New function.
>   * config/s390/vector.md (copysign3): Use bitwise
>   operations.

Ok. Thanks!

Andreas

> ---
>  gcc/config/s390/s390-protos.h |  1 +
>  gcc/config/s390/s390.c| 44 ---
>  gcc/config/s390/vector.md | 28 +++---
>  3 files changed, 45 insertions(+), 28 deletions(-)
> 
> diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
> index 6f1bc07db17..029f7289fac 100644
> --- a/gcc/config/s390/s390-protos.h
> +++ b/gcc/config/s390/s390-protos.h
> @@ -121,6 +121,7 @@ extern void s390_expand_vec_compare_cc (rtx, enum 
> rtx_code, rtx, rtx, bool);
>  extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
>  extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
>  extern void s390_expand_vec_init (rtx, rtx);
> +extern rtx s390_build_signbit_mask (machine_mode);
>  extern rtx s390_return_addr_rtx (int, rtx);
>  extern rtx s390_back_chain_rtx (void);
>  extern rtx_insn *s390_emit_call (rtx, rtx, rtx, rtx);
> diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
> index 93894307d62..dbb541bbea7 100644
> --- a/gcc/config/s390/s390.c
> +++ b/gcc/config/s390/s390.c
> @@ -2467,6 +2467,9 @@ s390_contiguous_bitmask_vector_p (rtx op, int *start, 
> int *end)
>rtx elt;
>bool b;
>  
> +  /* Handle floats by bitcasting them to ints.  */
> +  op = gen_lowpart (related_int_vector_mode (GET_MODE (op)).require (), op);
> +
>gcc_assert (!!start == !!end);
>if (!const_vec_duplicate_p (op, &elt)
>|| !CONST_INT_P (elt))
> @@ -6863,15 +6866,16 @@ s390_expand_vec_init (rtx target, rtx vals)
>  }
>  
>/* Use vector gen mask or vector gen byte mask if possible.  */
> -  if (all_same && all_const_int
> -  && (XVECEXP (vals, 0, 0) == const0_rtx
> -   || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
> -NULL, NULL)
> -   || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
> +  if (all_same && all_const_int)
>  {
> -  emit_insn (gen_rtx_SET (target,
> -   gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0;
> -  return;
> +  rtx vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
> +  if (XVECEXP (vals, 0, 0) == const0_rtx
> +   || s390_contiguous_bitmask_vector_p (vec, NULL, NULL)
> +   || s390_bytemask_vector_p (vec, NULL))
> + {
> +   emit_insn (gen_rtx_SET (target, vec));
> +   return;
> + }
>  }
>  
>/* Use vector replicate instructions.  vlrep/vrepi/vrep  */
> @@ -6949,6 +6953,30 @@ s390_expand_vec_init (rtx target, rtx vals)
>  }
>  }
>  
> +/* Emit a vector constant that contains 1s in each element's sign bit 
> position
> +   and 0s in other positions.  MODE is the desired constant's mode.  */
> +extern rtx
> +s390_build_signbit_mask (machine_mode mode)
> +{
> +  /* Generate the integral element mask value.  */
> +  machine_mode inner_mode = GET_MODE_INNER (mode);
> +  int inner_bitsize = GET_MODE_BITSIZE (inner_mode);
> +  wide_int mask_val = wi::set_bit_in_zero (inner_bitsize - 1, inner_bitsize);
> +
> +  /* Emit the element mask rtx.  Use gen_lowpart in order to cast the 
> integral
> + value to the desired mode.  */
> +  machine_mode int_mode = related_int_vector_mode (mode).require ();
> +  rtx mask = immed_wide_int_const (mask_val, GET_MODE_INNER (int_mode));
> +  mask = gen_lowpart (inner_mode, mask);
> +
> +  /* Emit the vector mask rtx by mode the element mask rtx.  */
> +  int nunits = GET_MODE_NUNITS (mode);
> +  rtvec v = rtvec_alloc (nunits);
> +  for (int i = 0; i < nunits; i++)
> +RTVEC_ELT (v, i) = mask;
> +  return gen_rtx_CONST_VECTOR (mode, v);
> +}
> +
>  /* Structure to hold the initial parameters for a compare_and_swap operation
> in HImode and QImode.  */
>  
> diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
> index 2573b7d980a..e9332bad0fd 100644
> --- a/gcc/config/s390/vector.md
> +++ b/gcc/config/s390/vector.md
> @@ -1425,28 +1425,16 @@

[PATCH] Ignore shifts larger than precision in operator_rshift::op1_range.

2020-10-12 Thread Aldy Hernandez via Gcc-patches
Pushed as obvious.

gcc/ChangeLog:

PR tree-optimization/97371
* range-op.cc (operator_rshift::op1_range): Ignore shifts larger than
or equal to type precision.

gcc/testsuite/ChangeLog:

* gcc.dg/pr97371.c: New test.
---
 gcc/range-op.cc| 7 +++
 gcc/testsuite/gcc.dg/pr97371.c | 8 
 2 files changed, 15 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr97371.c

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index d1a11b34894..ce6ae2de20c 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -1626,6 +1626,13 @@ operator_rshift::op1_range (irange &r,
   tree shift;
   if (op2.singleton_p (&shift))
 {
+  // Ignore nonsensical shifts.
+  unsigned prec = TYPE_PRECISION (type);
+  if (wi::ge_p (wi::to_wide (shift),
+   wi::uhwi (prec, TYPE_PRECISION (TREE_TYPE (shift))),
+   UNSIGNED))
+   return false;
+
   // Folding the original operation may discard some impossible
   // ranges from the LHS.
   int_range_max lhs_refined;
diff --git a/gcc/testsuite/gcc.dg/pr97371.c b/gcc/testsuite/gcc.dg/pr97371.c
new file mode 100644
index 000..ffefad0287e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr97371.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -w" } */
+
+int a, b;
+void c() {
+  if (b >> 38)
+a = b;
+}
-- 
2.26.2



ipa-modref cleanups

2020-10-12 Thread Jan Hubicka
Hi,
this is largely mechanical patch fixing some suboptimal datastrcuture decision
in modref.  It records three different things
 1) optimization_summaries that are used by tree-ssa-alias to disambiguate
(computed by local passes or ipa execute)
 2) summaries produced by local analysis and used by the ipa execute 
 3) summaries_lto produced by analysis when streaming is expected,
streamed, used by ipa execute 

All three items are stored in "summaries" datastructure where 1 dn 2
are mixed and differentiated by "finished" flags.

This use extra memory and also makes it impossible to use modref while producing
other IPA summaries (by ipa-prop and ipa-devirt).  This patch separates the
summaries into three special purpose datastructures.

There is one fix to propagation in ipa_merge_modref_summary_after_inlining
where check to ignore stores was placed incorrectly. This seems to lead
to increased clobber disambiguations:

Alias oracle query stats:
  refs_may_alias_p: 64266142 disambiguations, 74474762 queries
  ref_maybe_used_by_call_p: 142295 disambiguations, 65168507 queries
  call_may_clobber_ref_p: 22975 disambiguations, 28762 queries
  nonoverlapping_component_refs_p: 0 disambiguations, 36805 queries
  nonoverlapping_refs_since_match_p: 19389 disambiguations, 4 must 
overlaps, 75714 queries
  aliasing_component_refs_p: 54702 disambiguations, 759023 queries
  TBAA oracle: 23639134 disambiguations 56006211 queries
   16113791 are in alias set 0
   10615301 queries asked about the same object
   125 queries asked about the same alias set
   0 access volatile
   3994283 are dependent in the DAG
   1643577 are aritificially in conflict with void *

Modref stats:
  modref use: 11659 disambiguations, 40203 queries
  modref clobber: 1509635 disambiguations, 1830233 queries
  3919009 tbaa queries (2.141262 per modref query)
  623504 base compares (0.340669 per modref query)

PTA query stats:
  pt_solution_includes: 967422 disambiguations, 13605769 queries
  pt_solutions_intersect: 1033368 disambiguations, 13121788 queries

Bootstrapped/regtested x86_64-linux. I plan to commit it later today if there
are no complains.

gcc/ChangeLog:

2020-10-11  Jan Hubicka  

* ipa-modref.c (modref_summaries): Remove field IPA.
(class modref_summary_lto): New global variable.
(class modref_summaries_lto): New.
(modref_summary::modref_summary): Remove loads_lto and stores_lto.
(modref_summary::~modref_summary): Remove loads_lto and stores_lto.
(modref_summary::useful_p): Do not use lto_useful.
(modref_records_lto): New typedef.
(struct modref_summary_lto): New type.
(modref_summary_lto::modref_summary_lto): New member function.
(modref_summary_lto::~modref_summary_lto): New member function.
(modref_summary_lto::useful_p): New member function.
(modref_summary::dump): Do not handle lto.
(modref_summary_lto::dump): New member function.
(get_modref_function_summary): Use optimization_summary.
(merge_call_side_effects): Use optimization_summary.
(analyze_call): Use optimization_summary.
(struct summary_ptrs): New struture.
(analyze_load): Update to handle separate lto and non-lto summaries.
(analyze_store): Likewise.
(analyze_stmt): Likewise.
(remove_summary): Break out from ...
(analyze_function): ... here; update to handle seprated summaries.
(modref_summaries::insert): Do not handle lto summary.
(modref_summaries_lto::insert): New member function.
(modref_summaries::duplicate): Do not handle lto summary.
(modref_summaries_lto::duplicate): New member function.
(read_modref_records): Expect nolto_ret or lto_ret to be NULL>
(modref_write): Write lto summary.
(read_section): Handle separated summaries.
(modref_read): Initialize separated summaries.
(modref_transform): Handle separated summaries.
(pass_modref::execute): Turn summary to optimization_summary; handle
separate summaries.
(ignore_edge): Handle separate summaries.
(ipa_merge_modref_summary_after_inlining): Likewise.
(collapse_loads): Likewise.
(modref_propagate_in_scc): Likewise.
(pass_ipa_modref::execute): Likewise.
(ipa_modref_c_finalize): Likewise.
* ipa-modref.h (modref_records_lto): Remove typedef.
(struct modref_summary): Remove stores_lto, loads_lto and finished
fields; remove lto_useful_p member function.
gcc/ChangeLog:

2020-10-12  Jan Hubicka  

* ipa-modref.c (GTY):
(class modref_summary_lto):
(class GTY):
(modref_summary::modref_summary):
(modref_summary::~modref_summary):
(modref_summary::lto_useful_p):
(modref_summary::useful_p):
(struct GTY):
(modref_summary_lto::modref_summary_l

[committed] d: Merge upstream dmd 3a9790525

2020-10-12 Thread Iain Buclaw via Gcc-patches
Hi,

This patch merges the D front-end implementation with upstream dmd
3a9790525.  Fixes the return codes to match the documentation of
Target::isVectorTypeSupported.

Bootstrapped and regression tested on x86_64-linux-gnu, and committed to
mainline.

Regards,
Iain.

---
gcc/d/ChangeLog:

* dmd/MERGE: Merge upstream dmd 3a9790525
* d-target.cc (Target::isVectorTypeSupported): Adjust return codes for
invalid size and invalid base type.
---
 gcc/d/d-target.cc | 6 +++---
 gcc/d/dmd/MERGE   | 2 +-
 gcc/d/dmd/mtype.c | 8 
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/d/d-target.cc b/gcc/d/d-target.cc
index 4a38cca56b4..78f14203b5c 100644
--- a/gcc/d/d-target.cc
+++ b/gcc/d/d-target.cc
@@ -241,7 +241,7 @@ Target::isVectorTypeSupported (int sz, Type *type)
 {
   /* Size must be greater than zero, and a power of two.  */
   if (sz <= 0 || sz & (sz - 1))
-return 2;
+return 3;
 
   /* __vector(void[]) is treated same as __vector(ubyte[])  */
   if (type == Type::tvoid)
@@ -249,7 +249,7 @@ Target::isVectorTypeSupported (int sz, Type *type)
 
   /* No support for non-trivial types, complex types, or booleans.  */
   if (!type->isTypeBasic () || type->iscomplex () || type->ty == Tbool)
-return 3;
+return 2;
 
   /* In [simd/vector extensions], which vector types are supported depends on
  the target.  The implementation is expected to only support the vector
@@ -258,7 +258,7 @@ Target::isVectorTypeSupported (int sz, Type *type)
   tree ctype = build_vector_type (build_ctype (type), nunits);
 
   if (!targetm.vector_mode_supported_p (TYPE_MODE (ctype)))
-return 3;
+return 2;
 
   return 0;
 }
diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE
index 4676645f971..8a59cbde78e 100644
--- a/gcc/d/dmd/MERGE
+++ b/gcc/d/dmd/MERGE
@@ -1,4 +1,4 @@
-e49192807967c6f11252683a731c5a0159ef36da
+3a979052509fff8170ba80e48817377a60e78eb3
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/dmd repository.
diff --git a/gcc/d/dmd/mtype.c b/gcc/d/dmd/mtype.c
index 36471557dfc..bc66be028c1 100644
--- a/gcc/d/dmd/mtype.c
+++ b/gcc/d/dmd/mtype.c
@@ -3824,12 +3824,12 @@ Type *TypeVector::semantic(Loc loc, Scope *sc)
 case 1: // no support at all
 error(loc, "SIMD vector types not supported on this platform");
 return terror;
-case 2: // invalid size
-error(loc, "%d byte vector type %s is not supported on this platform", 
sz, toChars());
-return terror;
-case 3: // invalid base type
+case 2: // invalid base type
 error(loc, "vector type %s is not supported on this platform", 
toChars());
 return terror;
+case 3: // invalid size
+error(loc, "%d byte vector type %s is not supported on this platform", 
sz, toChars());
+return terror;
 default:
 assert(0);
 }
-- 
2.25.1



[committed] libphobos: Override tool_timeout value in testsuite

2020-10-12 Thread Iain Buclaw via Gcc-patches
Hi,

Some of the larger tests in the phobos testsuite on occasion trigger the
default timeout limit.  Increasing the limit to 10 minutes should give
compilation enough time to finish.

Regression tested on x86_64-linux-gnu, and committed to mainline.

Regards,
Iain.

---
libphobos/ChangeLog:

* testsuite/lib/libphobos.exp: Define tool_timeout, set to 600.
---
 libphobos/testsuite/lib/libphobos.exp | 4 
 1 file changed, 4 insertions(+)

diff --git a/libphobos/testsuite/lib/libphobos.exp 
b/libphobos/testsuite/lib/libphobos.exp
index 2e9da95ac1c..790480bf95c 100644
--- a/libphobos/testsuite/lib/libphobos.exp
+++ b/libphobos/testsuite/lib/libphobos.exp
@@ -105,6 +105,7 @@ proc libphobos_init { args } {
 global gdcpaths gdcldflags
 global gluefile wrap_flags
 global ld_library_path
+global tool_timeout
 global DEFAULT_DFLAGS
 
 # If a testcase doesn't have special options, use these.
@@ -185,6 +186,9 @@ proc libphobos_init { args } {
}
 }
 
+# Set the default timeout for phobos tests.
+set tool_timeout 600
+
 set_ld_library_path_env_vars
 
 libphobos_maybe_build_wrapper "${objdir}/testglue.o"
-- 
2.25.1



[committed] d: Fix alias protection being ignored if used before declaration.

2020-10-12 Thread Iain Buclaw via Gcc-patches
Hi,

This patch merges the D front-end implementation with upstream dmd
3a9790525.  Fixes a symbol resolver bug where a private alias becomes
public if used before its declaration.

Bootstrapped and regression tested on x86_64-linux-gnu, and committed to
mainline, and backported to the gcc-10 and gcc-9 release branches.

Regards,
Iain.

---
gcc/d/ChangeLog:

* dmd/MERGE: Merge upstream dmd 70aabfb51
---
 gcc/d/dmd/MERGE  |  2 +-
 gcc/d/dmd/declaration.c  |  7 +++
 gcc/testsuite/gdc.test/fail_compilation/fail21001.d  | 12 
 .../gdc.test/fail_compilation/imports/fail21001b.d   |  5 +
 .../fail_compilation/imports/issue21295ast_node.d|  5 +
 .../fail_compilation/imports/issue21295astcodegen.d  |  4 
 .../fail_compilation/imports/issue21295dtemplate.d   |  3 +++
 .../fail_compilation/imports/issue21295visitor.d |  3 +++
 gcc/testsuite/gdc.test/fail_compilation/issue21295.d |  9 +
 9 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gdc.test/fail_compilation/fail21001.d
 create mode 100644 gcc/testsuite/gdc.test/fail_compilation/imports/fail21001b.d
 create mode 100644 
gcc/testsuite/gdc.test/fail_compilation/imports/issue21295ast_node.d
 create mode 100644 
gcc/testsuite/gdc.test/fail_compilation/imports/issue21295astcodegen.d
 create mode 100644 
gcc/testsuite/gdc.test/fail_compilation/imports/issue21295dtemplate.d
 create mode 100644 
gcc/testsuite/gdc.test/fail_compilation/imports/issue21295visitor.d
 create mode 100644 gcc/testsuite/gdc.test/fail_compilation/issue21295.d

diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE
index 8a59cbde78e..5f6193f76b7 100644
--- a/gcc/d/dmd/MERGE
+++ b/gcc/d/dmd/MERGE
@@ -1,4 +1,4 @@
-3a979052509fff8170ba80e48817377a60e78eb3
+70aabfb511d55f2bfbdccbac7868519d9d4b63da
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/dmd repository.
diff --git a/gcc/d/dmd/declaration.c b/gcc/d/dmd/declaration.c
index 08b295070b2..f490cc5a413 100644
--- a/gcc/d/dmd/declaration.c
+++ b/gcc/d/dmd/declaration.c
@@ -340,6 +340,13 @@ void AliasDeclaration::semantic(Scope *sc)
 void AliasDeclaration::aliasSemantic(Scope *sc)
 {
 //printf("AliasDeclaration::semantic() %s\n", toChars());
+
+// as AliasDeclaration::semantic, in case we're called first.
+// see https://issues.dlang.org/show_bug.cgi?id=21001
+storage_class |= sc->stc & STCdeprecated;
+protection = sc->protection;
+userAttribDecl = sc->userAttribDecl;
+
 // TypeTraits needs to know if it's located in an AliasDeclaration
 sc->flags |= SCOPEalias;
 
diff --git a/gcc/testsuite/gdc.test/fail_compilation/fail21001.d 
b/gcc/testsuite/gdc.test/fail_compilation/fail21001.d
new file mode 100644
index 000..0faeb4038ef
--- /dev/null
+++ b/gcc/testsuite/gdc.test/fail_compilation/fail21001.d
@@ -0,0 +1,12 @@
+/*
+TEST_OUTPUT:
+---
+fail_compilation/fail21001.d(12): Error: undefined identifier `Alias`
+---
+*/
+
+module fail21001;
+
+import imports.fail21001b;
+
+void main() { Alias var; }
diff --git a/gcc/testsuite/gdc.test/fail_compilation/imports/fail21001b.d 
b/gcc/testsuite/gdc.test/fail_compilation/imports/fail21001b.d
new file mode 100644
index 000..69c1c20a356
--- /dev/null
+++ b/gcc/testsuite/gdc.test/fail_compilation/imports/fail21001b.d
@@ -0,0 +1,5 @@
+module imports.fail21001b;
+
+private struct S { Alias member; }
+
+private alias Alias = int;
diff --git 
a/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295ast_node.d 
b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295ast_node.d
new file mode 100644
index 000..d9298bf972d
--- /dev/null
+++ b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295ast_node.d
@@ -0,0 +1,5 @@
+module imports.issue21295ast_node;
+import imports.issue21295visitor : Visitor;
+class ASTNode {
+void accept(Visitor);
+}
diff --git 
a/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295astcodegen.d 
b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295astcodegen.d
new file mode 100644
index 000..5eccf6ac028
--- /dev/null
+++ b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295astcodegen.d
@@ -0,0 +1,4 @@
+module imports.issue21295astcodegen;
+struct ASTCodegen {
+import imports.issue21295dtemplate;
+}
diff --git 
a/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295dtemplate.d 
b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295dtemplate.d
new file mode 100644
index 000..8864f9ea27e
--- /dev/null
+++ b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295dtemplate.d
@@ -0,0 +1,3 @@
+module imports.issue21295dtemplate;
+import imports.issue21295ast_node;
+class TemplateParameter : ASTNode { }
diff --git 
a/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295visitor.d 
b/gcc/testsuite/gdc.test/fail_compilation/imports/issue21295visitor.d
new file mode 100644
index 000..3665

[PATCH] PR target/97349 AArch64: Incorrect types for some Neon vdupq_n_<...> intrinsics

2020-10-12 Thread Kyrylo Tkachov via Gcc-patches
Hi all,

This patch fixes the PR by adjusting the input types of the intrinsic 
prototypes to the ones mandated by ACLE
Turns out the tests in the testsuite were already using the correct ones, but 
implicit conversions hid the bug...

Bootstrapped and tested on aarch64-none-linux-gnu.
Pushing to master and later the branches.
Thanks,
Kyrill

gcc/
PR target/97349
* config/aarch64/arm_neon.h (vdupq_n_p8, vdupq_n_p16, vdupq_n_p64,
vdupq_n_s8, vdupq_n_s16, vdupq_n_u8, vdupq_n_u16): Fix argument type.

gcc/testsuite/
PR target/97349
* gcc.target/aarch64/simd/pr97349.c: New test.


vdup-types.patch
Description: vdup-types.patch


[PATCH] SLP: fix SVE issues

2020-10-12 Thread Martin Liška

The patch fixes the following 2 issues:

  .MASK_STORE_LANES (&a, 4B, max_mask_34, vect_array.12);

here we miss to return the last argument as stored value.

ivtmp_32 = ivtmp_31 + POLY_INT_CST [4, 4];

here we miss a bail out in vect_recog_over_widening_pattern.

gcc/ChangeLog:

PR tree-optimization/97079
* internal-fn.c (internal_fn_stored_value_index): Handle also
.MASK_STORE_LANES.
* tree-vect-patterns.c (vect_recog_over_widening_pattern): Bail
out for POLY_INT_CST.

gcc/testsuite/ChangeLog:

PR tree-optimization/97079
* gcc.target/aarch64/sve/pr97079.c: New test.

I'm running tests on x86_64-linux-gnu and I would appreciate running that
on a SVE aarch64 machine.

Ready for master?
Thanks,
Martin

---
 gcc/internal-fn.c |  1 +
 .../gcc.target/aarch64/sve/pr97079.c  | 22 +++
 gcc/tree-vect-patterns.c  |  2 ++
 3 files changed, 25 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr97079.c

diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 92cb3cd845a..792d2ca568a 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -3795,6 +3795,7 @@ internal_fn_stored_value_index (internal_fn fn)
   switch (fn)
 {
 case IFN_MASK_STORE:
+case IFN_MASK_STORE_LANES:
 case IFN_SCATTER_STORE:
 case IFN_MASK_SCATTER_STORE:
 case IFN_LEN_STORE:
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c
new file mode 100644
index 000..06e6a7cde94
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+sve" } */
+
+void g(void);
+
+int a[8][3];
+int b;
+void c(void)
+{
+  int d[] = {7, 3};
+  int *e = a[0];
+  int f;
+  b = 0;
+  for (; b < 8; b++)
+{
+  f = 0;
+  for (; f < 3; f++)
+   a[b][f] = 0;
+}
+  g();
+  *e = (long)d;
+}
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 71e4e106202..6302bc42f46 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1571,6 +1571,8 @@ vect_recog_over_widening_pattern (vec_info *vinfo,
   tree op = gimple_op (last_stmt, first_op + i);
   if (TREE_CODE (op) == INTEGER_CST)
unprom[i].set_op (op, vect_constant_def);
+  else if (TREE_CODE (op) == POLY_INT_CST)
+   return NULL;
   else if (TREE_CODE (op) == SSA_NAME)
{
  bool op_single_use_p = true;
--
2.28.0



Re: [PATCH] SLP: fix SVE issues

2020-10-12 Thread Richard Biener via Gcc-patches
On Mon, Oct 12, 2020 at 12:59 PM Martin Liška  wrote:
>
> The patch fixes the following 2 issues:
>
>.MASK_STORE_LANES (&a, 4B, max_mask_34, vect_array.12);
>
> here we miss to return the last argument as stored value.
>
> ivtmp_32 = ivtmp_31 + POLY_INT_CST [4, 4];
>
> here we miss a bail out in vect_recog_over_widening_pattern.
>
> gcc/ChangeLog:
>
> PR tree-optimization/97079
> * internal-fn.c (internal_fn_stored_value_index): Handle also
> .MASK_STORE_LANES.
> * tree-vect-patterns.c (vect_recog_over_widening_pattern): Bail
> out for POLY_INT_CST.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/97079
> * gcc.target/aarch64/sve/pr97079.c: New test.
>
> I'm running tests on x86_64-linux-gnu and I would appreciate running that
> on a SVE aarch64 machine.
>
> Ready for master?
> Thanks,
> Martin
>
> ---
>   gcc/internal-fn.c |  1 +
>   .../gcc.target/aarch64/sve/pr97079.c  | 22 +++
>   gcc/tree-vect-patterns.c  |  2 ++
>   3 files changed, 25 insertions(+)
>   create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr97079.c
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 92cb3cd845a..792d2ca568a 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -3795,6 +3795,7 @@ internal_fn_stored_value_index (internal_fn fn)
> switch (fn)
>   {
>   case IFN_MASK_STORE:
> +case IFN_MASK_STORE_LANES:
>   case IFN_SCATTER_STORE:
>   case IFN_MASK_SCATTER_STORE:
>   case IFN_LEN_STORE:
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c
> new file mode 100644
> index 000..06e6a7cde94
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -march=armv8.2-a+sve" } */
> +
> +void g(void);
> +
> +int a[8][3];
> +int b;
> +void c(void)
> +{
> +  int d[] = {7, 3};
> +  int *e = a[0];
> +  int f;
> +  b = 0;
> +  for (; b < 8; b++)
> +{
> +  f = 0;
> +  for (; f < 3; f++)
> +   a[b][f] = 0;
> +}
> +  g();
> +  *e = (long)d;
> +}
> diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> index 71e4e106202..6302bc42f46 100644
> --- a/gcc/tree-vect-patterns.c
> +++ b/gcc/tree-vect-patterns.c
> @@ -1571,6 +1571,8 @@ vect_recog_over_widening_pattern (vec_info *vinfo,
> tree op = gimple_op (last_stmt, first_op + i);
> if (TREE_CODE (op) == INTEGER_CST)
> unprom[i].set_op (op, vect_constant_def);
> +  else if (TREE_CODE (op) == POLY_INT_CST)
> +   return NULL;

can you make this a simpler

  else
 return NULL;

at the end of the if please?

Thanks,
Richard.

> else if (TREE_CODE (op) == SSA_NAME)
> {
>   bool op_single_use_p = true;
> --
> 2.28.0
>


Re: [PATCH] SLP: fix SVE issues

2020-10-12 Thread Richard Biener via Gcc-patches
On Mon, Oct 12, 2020 at 1:26 PM Richard Biener
 wrote:
>
> On Mon, Oct 12, 2020 at 12:59 PM Martin Liška  wrote:
> >
> > The patch fixes the following 2 issues:
> >
> >.MASK_STORE_LANES (&a, 4B, max_mask_34, vect_array.12);
> >
> > here we miss to return the last argument as stored value.
> >
> > ivtmp_32 = ivtmp_31 + POLY_INT_CST [4, 4];
> >
> > here we miss a bail out in vect_recog_over_widening_pattern.
> >
> > gcc/ChangeLog:
> >
> > PR tree-optimization/97079
> > * internal-fn.c (internal_fn_stored_value_index): Handle also
> > .MASK_STORE_LANES.
> > * tree-vect-patterns.c (vect_recog_over_widening_pattern): Bail
> > out for POLY_INT_CST.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR tree-optimization/97079
> > * gcc.target/aarch64/sve/pr97079.c: New test.
> >
> > I'm running tests on x86_64-linux-gnu and I would appreciate running that
> > on a SVE aarch64 machine.
> >
> > Ready for master?
> > Thanks,
> > Martin
> >
> > ---
> >   gcc/internal-fn.c |  1 +
> >   .../gcc.target/aarch64/sve/pr97079.c  | 22 +++
> >   gcc/tree-vect-patterns.c  |  2 ++
> >   3 files changed, 25 insertions(+)
> >   create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr97079.c
> >
> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > index 92cb3cd845a..792d2ca568a 100644
> > --- a/gcc/internal-fn.c
> > +++ b/gcc/internal-fn.c
> > @@ -3795,6 +3795,7 @@ internal_fn_stored_value_index (internal_fn fn)
> > switch (fn)
> >   {
> >   case IFN_MASK_STORE:
> > +case IFN_MASK_STORE_LANES:
> >   case IFN_SCATTER_STORE:
> >   case IFN_MASK_SCATTER_STORE:
> >   case IFN_LEN_STORE:
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c 
> > b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c
> > new file mode 100644
> > index 000..06e6a7cde94
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr97079.c
> > @@ -0,0 +1,22 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -march=armv8.2-a+sve" } */
> > +
> > +void g(void);
> > +
> > +int a[8][3];
> > +int b;
> > +void c(void)
> > +{
> > +  int d[] = {7, 3};
> > +  int *e = a[0];
> > +  int f;
> > +  b = 0;
> > +  for (; b < 8; b++)
> > +{
> > +  f = 0;
> > +  for (; f < 3; f++)
> > +   a[b][f] = 0;
> > +}
> > +  g();
> > +  *e = (long)d;
> > +}
> > diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
> > index 71e4e106202..6302bc42f46 100644
> > --- a/gcc/tree-vect-patterns.c
> > +++ b/gcc/tree-vect-patterns.c
> > @@ -1571,6 +1571,8 @@ vect_recog_over_widening_pattern (vec_info *vinfo,
> > tree op = gimple_op (last_stmt, first_op + i);
> > if (TREE_CODE (op) == INTEGER_CST)
> > unprom[i].set_op (op, vect_constant_def);
> > +  else if (TREE_CODE (op) == POLY_INT_CST)
> > +   return NULL;
>
> can you make this a simpler
>
>   else
>  return NULL;
>
> at the end of the if please?

Btw, POLY_INT_CST can likely be handled the same as INTEGER_CST - I suppose
you tried that? (it might need further adjustments downstream).

Richard.

>
> Thanks,
> Richard.
>
> > else if (TREE_CODE (op) == SSA_NAME)
> > {
> >   bool op_single_use_p = true;
> > --
> > 2.28.0
> >


Re: [PATCH] SLP: fix SVE issues

2020-10-12 Thread Martin Liška

On 10/12/20 1:27 PM, Richard Biener wrote:

Btw, POLY_INT_CST can likely be handled the same as INTEGER_CST - I suppose
you tried that? (it might need further adjustments downstream).


Yes, it can. But it seemed to me like an incorrect match:

/home/marxin/Programming/testcases/pr97079-2.c:10:12: note:   extra pattern 
stmt: patt_2 = (unsigned int) ivtmp_31;
/home/marxin/Programming/testcases/pr97079-2.c:10:12: note:   extra pattern 
stmt: patt_1 = (unsigned int) POLY_INT_CST [4, 4];
/home/marxin/Programming/testcases/pr97079-2.c:10:12: note:   extra pattern 
stmt: patt_6 = patt_2 + patt_1;

dunno if we can make such a casting?

Martin



Re: [PATCH PR96757] aarch64: ICE during GIMPLE pass: vect

2020-10-12 Thread Richard Sandiford via Gcc-patches
"duanbo (C)"  writes:
>> -Original Message-
>> From: Richard Sandiford [mailto:richard.sandif...@arm.com]
>> Sent: Wednesday, September 30, 2020 6:38 PM
>> To: duanbo (C) 
>> Cc: GCC Patches 
>> Subject: Re: [PATCH PR96757] aarch64: ICE during GIMPLE pass: vect
>> 
>> Thanks for the update, looks good apart from…
>> 
>> "duanbo (C)"  writes:
>> > @@ -4361,7 +4391,7 @@ vect_recog_mask_conversion_pattern (vec_info
>> *vinfo,
>> >if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
>> >TYPE_VECTOR_SUBPARTS (vectype2))
>> >  && (TREE_CODE (rhs1) == SSA_NAME
>> > -|| rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0
>> > +|| !rhs1_op0_type || !rhs1_op1_type))
>> >return NULL;
>> 
>> …I think this should be:
>> 
>>&& (TREE_CODE (rhs1) == SSA_NAME
>>|| (!rhs1_op0_type && !rhs1_op1_type))
>> 
>> i.e. punt only if both types are already OK.  If one operand wants a specific
>> mask type, we should continue to the code below and attach the chosen
>> type to the comparison.
>> 
>> Although I guess this simplifies to:
>> 
>>   if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
>> TYPE_VECTOR_SUBPARTS (vectype2))
>>   && !rhs1_op0_type
>>   && !rhs1_op1_type)
>> return NULL;
>> 
>> (I think the comment above the code is still accurate with this change.)
>> 
>> > @@ -4393,7 +4423,16 @@ vect_recog_mask_conversion_pattern
>> (vec_info *vinfo,
>> >if (TREE_CODE (rhs1) != SSA_NAME)
>> >{
>> >  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
>> > -pattern_stmt = gimple_build_assign (tmp, rhs1);
>> > +if (rhs1_op0_type && TYPE_PRECISION (rhs1_op0_type)
>> > +  != TYPE_PRECISION (rhs1_type))
>> > +  rhs1_op0 = build_mask_conversion (vinfo, rhs1_op0,
>> > +vectype2, stmt_vinfo);
>> > +if (rhs1_op1_type && TYPE_PRECISION (rhs1_op1_type)
>> > +  != TYPE_PRECISION (rhs1_type))
>> 
>> Very minor -- I would have fixed this up before committing if it wasn't for 
>> the
>> above -- but: GCC formatting is instead:
>> 
>>if (rhs1_op1_type
>>&& TYPE_PRECISION (rhs1_op1_type) != TYPE_PRECISION
>> (rhs1_type))
>> 
>> LGTM with those changes, thanks.
>> 
>> Richard
>> 
>> > +  rhs1_op1 = build_mask_conversion (vinfo, rhs1_op1,
>> > +vectype2, stmt_vinfo);
>> > +pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
>> > +rhs1_op0, rhs1_op1);
>> >  rhs1 = tmp;
>> >  append_pattern_def_seq (vinfo, stmt_vinfo, pattern_stmt,
>> vectype2,
>> >  rhs1_type);
>
> Sorry for the late reply.
> I have modified the patch according to your suggestion, and it works well.

Looks good, thanks.  Pushed to trunk.

Richard


[patch, committed] nvptx - invoke.texi: Update default of -misa (was: [committed][nvptx] Set -misa=sm_35 by default)

2020-10-12 Thread Tobias Burnus

On 10/9/20 1:56 PM, Tom de Vries wrote:


[nvptx] Set -misa=sm_35 by default

I committed the attached follow-up commit as obvious,
r11-3818-g91e4e16b550540723cca824b9674c7d8c43f4849

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
commit 91e4e16b550540723cca824b9674c7d8c43f4849
Author: Tobias Burnus 
Date:   Mon Oct 12 13:13:20 2020 +0200

nvptx - invoke.texi: Update default of -misa

Followup to commit 383400a6078d75bbfa1216c9af2c37f7e88740c9

gcc/ChangeLog
* doc/invoke.texi (nvptx's -misa): Update default to sm_35.

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b8c807e631c..307f4f5426c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -25593,7 +25593,7 @@ Generate code for 32-bit or 64-bit ABI.
 @opindex march
 Generate code for given the specified PTX ISA (e.g.@: @samp{sm_35}).  ISA
 strings must be lower-case.  Valid ISA strings include @samp{sm_30} and
-@samp{sm_35}.  The default ISA is sm_30.
+@samp{sm_35}.  The default ISA is sm_35.
 
 @item -mmainkernel
 @opindex mmainkernel


[PATCH, wwwdocs] gcc-11/changes: NVPTX: Mention new -misa=sm_35 default

2020-10-12 Thread Tom de Vries
Hi,

Mention new -misa=sm_35 default for NVPTX target in the gcc 11 release notes.

See also PR target/97348.

Verified using the validator

OK?

Thanks,
- Tom

gcc-11/changes: NVPTX: Mention new -misa=sm_35 default

---
 htdocs/gcc-11/changes.html | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/htdocs/gcc-11/changes.html b/htdocs/gcc-11/changes.html
index f7417622..37b9127a 100644
--- a/htdocs/gcc-11/changes.html
+++ b/htdocs/gcc-11/changes.html
@@ -225,7 +225,13 @@ a work-in-progress.
 
 
 
-
+NVPTX
+
+  The -misa default has changed from sm_30
+  to sm_35.
+  
+
+
 
 
 


Re: Ping: [PATCH][Arm] Enable MVE SIMD modes for vectorization

2020-10-12 Thread Christophe Lyon via Gcc-patches
Hi,


On Thu, 8 Oct 2020 at 16:22, Christophe Lyon  wrote:
>
> On Thu, 8 Oct 2020 at 16:08, Dennis Zhang  wrote:
> >
> > Hi Christophe,
> >
> > On 08/10/2020 14:14, Christophe Lyon wrote:
> > > Hi,
> > >
> > >
> > > On Tue, 6 Oct 2020 at 15:37, Dennis Zhang via Gcc-patches
> > >  wrote:
> > >>
> > >> On 9/16/20 4:00 PM, Dennis Zhang wrote:
> > >>> Hi all,
> > >>>
> > >>> This patch enables SIMD modes for MVE auto-vectorization.
> > >>> In this patch, the integer and float MVE SIMD modes are returned by
> > >>> arm_preferred_simd_mode (TARGET_VECTORIZE_PREFERRED_SIMD_MODE hook) when
> > >>> MVE or MVE_FLOAT is enabled.
> > >>> Then the expanders for auto-vectorization can be used for generating MVE
> > >>> SIMD code.
> > >>>
> > >>> This patch also fixes bugs in MVE vreiterpretq_*.c tests which are
> > >>> revealed by the enabled MVE SIMD modes.
> > >>> The tests are for checking the MVE reinterpret intrinsics.
> > >>> There are two functions in each of the tests. The two functions contain
> > >>> the pattern of identical code so that they are folded in icf pass.
> > >>> Because of icf, the instruction count only checks one function which is 
> > >>> 8.
> > >>> However when the SIMD modes are enabled, the estimation of the code size
> > >>> becomes smaller so that inlining is applied after icf, then the
> > >>> instruction count becomes 16 which causes failure of the tests.
> > >>> Because the icf is not the expected pattern to be tested but causes
> > >>> above issues, -fno-ipa-icf is applied to the tests to avoid unstable
> > >>> instruction count.
> > >>>
> > >>> This patch is separated from
> > >>> https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552104.html
> > >>> because this part is not strongly connected to the aim of that one so
> > >>> that causing confusion.
> > >>>
> > >>> Regtested and bootstraped.
> > >>>
> > >>> Is it OK for trunk please?
> > >>>
> > >>> Thanks
> > >>> Dennis
> > >>>
> > >>> gcc/ChangeLog:
> > >>>
> > >>> 2020-09-15  Dennis Zhang  
> > >>>
> > >>>* config/arm/arm.c (arm_preferred_simd_mode): Enable MVE SIMD 
> > >>> modes.
> > >>>
> > >
> > > Since toolchain builds work again after Jakub's divmod fix, I'm now
> > > facing another build error likely caused by this patch:
> > > In file included from
> > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/coretypes.h:449:0,
> > >   from
> > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:28:
> > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:
> > > In function 'machine_mode arm_preferred_simd_mode(scalar_mode)':
> > > ./insn-modes.h:196:71: error: temporary of non-literal type
> > > 'scalar_int_mode' in a constant expression
> > >   #define QImode (scalar_int_mode ((scalar_int_mode::from_int) E_QImode))
> > > ^
> > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:28970:12:
> > > note: in expansion of macro 'QImode'
> > > case QImode:
> > >
> > > and similarly for the other cases.
> > >
> > > Does the build work for you?
> > >
> > > Thanks,
> > >
> > > Christophe
> > >
> >
> > Thanks for the report. Sorry to see the error.
> > I tested it for arm-none-eabi and arm-none-linux-gnueabi targets. I
> > didn't get this error.
> > Could you please help to show the configuration you use for your build?
> > I will test and fix at once.
> >
>
> It fails on all of them for me. Does it work for you with current
> master? (r11-3720-gf18eeb6b958acd5e1590ca4a73231486b749be9b)
>

So... I guess you are using a host with GCC more recent than 4.8.5? :-)
When I build manually on ubuntu-16.04 with gcc-5.4, the build succeeds,
and after manually building with the same environment in the compute
farm I use for validation (RHEL 7, gcc-4.8.5), I managed to reproduce the
build failure.
It's a matter of replacing
case QImode:
with
case E_QImode:

Is the attached patch OK? Or do we instead want to revisit the minimum
gcc version required to build gcc?

Thanks,

Christophe


> > Thanks
> > Dennis
gcc-4.8.5 does not accept case clauses with non-literal type, which
happens for "QImode" as it expands to (scalar_int_mode
((scalar_int_mode::from_int) E_QImode)).

Use E_QImode instead in arm_preferred_simd_mode, to fix the
build. Same for HImode, SImode, HFmode and SFmode as introduced by a
recent patch.


2020-10-12  Christophe Lyon  

gcc/
* config/arm/arm.c (arm_preferred_simd_mode): Use E_FOOmode
instead of FOOmode.

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 5d9c995..0b8c5fa 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -28967,11 +28967,11 @@ arm_preferred_simd_mode (scalar_mode mode)
   if (TARGET_HAVE_MVE)
 switch (mode)
   {
-  case QImode:
+  case E_QImode:
return V16QImode;
-  case HImode:
+  case E_HImode:
return V8HImode;
-  case SImod

Re: [patch, committed] nvptx - invoke.texi: Update default of -misa (was: [committed][nvptx] Set -misa=sm_35 by default)

2020-10-12 Thread Tom de Vries
On 10/12/20 1:34 PM, Tobias Burnus wrote:
> On 10/9/20 1:56 PM, Tom de Vries wrote:
> 
>> [nvptx] Set -misa=sm_35 by default
> I committed the attached follow-up commit as obvious,
> r11-3818-g91e4e16b550540723cca824b9674c7d8c43f4849

Thanks for catching this.

Thanks,
- Tom


Re: [PATCH] calls.c:can_implement_as_sibling_call_p REG_PARM_STACK_SPACE check

2020-10-12 Thread Alan Modra via Gcc-patches
Ping?

On Fri, Oct 02, 2020 at 05:03:50PM +0930, Alan Modra wrote:
https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555309.html

-- 
Alan Modra
Australia Development Lab, IBM


Re: [r11-3641 Regression] FAIL: gcc.dg/torture/pta-ptrarith-1.c -Os scan-tree-dump alias "ESCAPED = {[^\n}]* i f [^\n}]*}" on Linux/x86_64 (-m32 -march=cascadelake)

2020-10-12 Thread Richard Sandiford via Gcc-patches
Martin Sebor via Gcc-patches  writes:
> On 10/4/20 10:51 AM, H.J. Lu via Gcc-patches wrote:
>> On Sat, Oct 3, 2020 at 5:57 PM Segher Boessenkool
>>  wrote:
>>>
>>> On Sat, Oct 03, 2020 at 12:21:04PM -0700, sunil.k.pandey via Gcc-patches 
>>> wrote:
 On Linux/x86_64,

 c34db4b6f8a5d80367c709309f9b00cb32630054 is the first bad commit
 commit c34db4b6f8a5d80367c709309f9b00cb32630054
 Author: Jan Hubicka 
 Date:   Sat Oct 3 17:20:16 2020 +0200

  Track access ranges in ipa-modref

 caused
>>>
>>> [ ... ]
>>>
>>> This isn't a patch.  Wrong mailing list?
>> 
>> I view this as a follow up of
>> 
>> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555314.html
>> 
>> What do people think about this kind of followups?  Is this appropriate
>> for this mailing list?
>
> A number of people routinely send emails similar to these to this
> list to point out regressions on their targets.  I find both kinds
> of emails very useful and don't mind the additional traffic.

+1 FWIW.  I think it's great that we have this kind of automatic CI, and
this seems like a natural place to send the reports.  Shovelling them into
bugzilla is likely to create more work rather than less, especially since
the fix turnaround should (hopefully) be short.

Richard


[PATCH] fix SLP subgraph detection wrt fully shared lanes

2020-10-12 Thread Richard Biener
When a VEC_PERM SLP node just permutes existing lanes this confuses
the SLP subgraph detection where I tried to elide a node-based
visited hash-map in a way that doesn't work.  Fixed by adding such.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2020-10-12  Richard Biener  

* tree-vect-slp.c (vect_bb_partition_graph_r): Use visited
hash-map.
(vect_bb_partition_graph): Likewise.
---
 gcc/tree-vect-slp.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index dd2042a4db5..8acef6f3cef 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -3254,18 +3254,19 @@ static void
 vect_bb_partition_graph_r (bb_vec_info bb_vinfo,
   slp_instance instance, slp_tree node,
   hash_map 
&stmt_to_instance,
-  hash_map 
&instance_leader)
+  hash_map 
&instance_leader,
+  hash_set &visited)
 {
   stmt_vec_info stmt_info;
   unsigned i;
-  bool all = true;
+
   FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
 {
   bool existed_p;
   slp_instance &stmt_instance
= stmt_to_instance.get_or_insert (stmt_info, &existed_p);
   if (!existed_p)
-   all = false;
+   ;
   else if (stmt_instance != instance)
{
  /* If we're running into a previously marked stmt make us the
@@ -3279,15 +3280,15 @@ vect_bb_partition_graph_r (bb_vec_info bb_vinfo,
}
   stmt_instance = instance;
 }
-  /* If not all stmts had been visited we have to recurse on children.  */
-  if (all)
+
+  if (visited.add (node))
 return;
 
   slp_tree child;
   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
   vect_bb_partition_graph_r (bb_vinfo, instance, child, stmt_to_instance,
-instance_leader);
+instance_leader, visited);
 }
 
 /* Partition the SLP graph into pieces that can be costed independently.  */
@@ -3302,13 +3303,15 @@ vect_bb_partition_graph (bb_vec_info bb_vinfo)
  marked stmt, make the stmts leader the current SLP graph entry.  */
   hash_map stmt_to_instance;
   hash_map instance_leader;
+  hash_set visited;
   slp_instance instance;
   for (unsigned i = 0; bb_vinfo->slp_instances.iterate (i, &instance); ++i)
 {
   instance_leader.put (instance, instance);
   vect_bb_partition_graph_r (bb_vinfo,
 instance, SLP_INSTANCE_TREE (instance),
-stmt_to_instance, instance_leader);
+stmt_to_instance, instance_leader,
+visited);
 }
 
   /* Then collect entries to each independent subgraph.  */
-- 
2.26.2


Re: [PATCH] Add if-chain to switch conversion pass.

2020-10-12 Thread Martin Liška

On 10/6/20 4:12 PM, Jakub Jelinek wrote:

On Tue, Oct 06, 2020 at 03:48:38PM +0200, Martin Liška wrote:

On 10/6/20 9:47 AM, Richard Biener wrote:

But is it really extensible with the current implementation?  I doubt so.


I must agree with the statement. So let's make the pass properly.
I would need a help with the algorithm where I'm planning to do the following
steps:

1) for each BB ending with a gcond, parse index variable and it's VR;
I'll support:
a) index == 123 ([123, 123])
b) 1 <= index && index <= 9 ([1, 9])
c) index == 123 || index == 12345 ([123, 123] [12345, 12345])
d) index != 1 ([1, 1])
e) index != 1 && index != 5 ([1, 1] [5, 5])


The fold_range_test created cases are essential to support, so
f) index - 123U < 456U ([123, 456+123])
g) (unsigned) index - 123U < 456U (ditto)
but the discovery should actually recurse on all of those forms, so it will
handle
(unsigned) index - 123U < 456U || (unsigned) index - 16384U <= 32711U
etc.
You can see what reassoc init_range_entry does and do something similar?


All right, I started to use init_range_entry in combination with 
linearize_expr_tree.
One thing I have problem with is that linearize_expr_tree doesn't properly mark
all statements as visited for cases like:

   :
  index2.1_1 = (unsigned int) index2_16(D);
  _2 = index2.1_1 + 4294967196;
  _3 = _2 <= 100;
  _5 = index2.1_1 + 4294966996;
  _6 = _5 <= 33;
  _7 = _3 | _6;
  if (_7 != 0)
goto ; [INV]
  else
goto ; [INV]

As seen, all statements in this BB are used by the final _7 != 0 and it would
be handy for me to identify all statements that should be hoisted.

Thoughts how can I achieve that?
Thanks,
Martin


Re: [PATCH] Add if-chain to switch conversion pass.

2020-10-12 Thread Martin Liška

On 10/7/20 10:00 AM, Richard Biener wrote:

As said I'd have a BB-local pass over BBs recording the index variable
and the range covered by the BBs gcond, plus recording how many excess
stmts there are for eventual code motion.

Only after that BB-local pass start to group BBs in a walk from dominated to
dominating BBs looking for common indexes and building a case vector.

The main thing is to avoid repeatedly analyzing BBs conditions (so the first
pass could be also a on-demand precompute thing) and making the
case vector build optimal.


I have a patch that does that using the infrastructure from tree-ssa-reassoc.
Now I would like to implement the code hoisting. Am I right that we want
something like:

if (index == C0)
  goto BB_0;
else
{
  BB1_to_hoist_stmts;
  if (index == C1)
goto BB_1;
  else
  {
BB2_to_hoist_stmts;
if (index == C2)
  goto BB_2;
else
  goto default_BB;
  }
}

be converted into:

switch(index)
{
   case C0:
 goto BB_0;
   case C1:
 BB1_to_hoist_stmts;
 goto BB_1;
   case C2:
 BB1_to_hoist_stmts;
 BB2_to_hoist_stmts;
 goto BB_2;
   else:
 BB1_to_hoist_stmts;
 BB2_to_hoist_stmts;
 goto default_BB;
}

?

Thanks,
Martin


Re: [PATCH] Add if-chain to switch conversion pass.

2020-10-12 Thread Jakub Jelinek via Gcc-patches
On Mon, Oct 12, 2020 at 02:39:24PM +0200, Martin Liška wrote:
> All right, I started to use init_range_entry in combination with 
> linearize_expr_tree.
> One thing I have problem with is that linearize_expr_tree doesn't properly 
> mark
> all statements as visited for cases like:

Not sure if linearize_expr_tree is what you want, then you run into many
reassoc dependencies (e.g. having computed uids and all that).
My suggestion was to just copy and tweak init_range_entry (and reuse the
fold_const range step stuff).
There is no need to linearize anything, for what you want it doesn't matter
if you process (x | y) | (z | w) where all of x, y, z, w are some
comparisons, or x | (y | (z | w)) etc.  All you want to ensure is that
all the logical operations feeding each GIMPLE_COND are the same (all |s or
all &s), and that they make sense also for the basic blocks, then for each
of the subconditions find the ranges and verify that they all use the same
index.  And then I think you shouldn't hoist anything either, rather check
that all the blocks but the first one are no_side_effect_bb (perhaps export
that one from reassoc), thus when you turn that into a switch starting at
the end of first bb, you can just throw away all the non-side-effects basic
blocks.  Or do you want instead allow other stmts in those bbs and check
that either it is consumed all in the same bb, or it is consumed in the bbs
dominated by the bb that the case label for the particular case would be
added for, and sink the statements to that bb?
> 
>:
>   index2.1_1 = (unsigned int) index2_16(D);
>   _2 = index2.1_1 + 4294967196;
>   _3 = _2 <= 100;
>   _5 = index2.1_1 + 4294966996;
>   _6 = _5 <= 33;
>   _7 = _3 | _6;
>   if (_7 != 0)
> goto ; [INV]
>   else
> goto ; [INV]
> 
> As seen, all statements in this BB are used by the final _7 != 0 and it would
> be handy for me to identify all statements that should be hoisted.
> 
> Thoughts how can I achieve that?

Jakub



Re: [PATCH] Add if-chain to switch conversion pass.

2020-10-12 Thread Martin Liška

On 10/12/20 2:44 PM, Martin Liška wrote:

On 10/7/20 10:00 AM, Richard Biener wrote:

As said I'd have a BB-local pass over BBs recording the index variable
and the range covered by the BBs gcond, plus recording how many excess
stmts there are for eventual code motion.

Only after that BB-local pass start to group BBs in a walk from dominated to
dominating BBs looking for common indexes and building a case vector.

The main thing is to avoid repeatedly analyzing BBs conditions (so the first
pass could be also a on-demand precompute thing) and making the
case vector build optimal.


I have a patch that does that using the infrastructure from tree-ssa-reassoc.
Now I would like to implement the code hoisting. Am I right that we want
something like:

if (index == C0)
   goto BB_0;
else
{
   BB1_to_hoist_stmts;
   if (index == C1)
     goto BB_1;
   else
   {
     BB2_to_hoist_stmts;
     if (index == C2)
   goto BB_2;
     else
   goto default_BB;
   }
}

be converted into:

switch(index)
{
    case C0:
  goto BB_0;
    case C1:
  BB1_to_hoist_stmts;
  goto BB_1;
    case C2:
  BB1_to_hoist_stmts;
  BB2_to_hoist_stmts;
  goto BB_2;
    else:
  BB1_to_hoist_stmts;
  BB2_to_hoist_stmts;
  goto default_BB;
}

?


Reading again Richi's comment:


ifcombine simply hoists any stmts without side-effects up the
dominator tree and thus only requires BBs without side-effects
(IIRC there's a predicate fn for that).


we likely want to hoist the statements "up" to the gswitch BB?

Martin



Thanks,
Martin




Re: [PATCH] SLP: fix SVE issues

2020-10-12 Thread Richard Biener via Gcc-patches
On Mon, Oct 12, 2020 at 1:31 PM Martin Liška  wrote:
>
> On 10/12/20 1:27 PM, Richard Biener wrote:
> > Btw, POLY_INT_CST can likely be handled the same as INTEGER_CST - I suppose
> > you tried that? (it might need further adjustments downstream).
>
> Yes, it can. But it seemed to me like an incorrect match:
>
> /home/marxin/Programming/testcases/pr97079-2.c:10:12: note:   extra pattern 
> stmt: patt_2 = (unsigned int) ivtmp_31;
> /home/marxin/Programming/testcases/pr97079-2.c:10:12: note:   extra pattern 
> stmt: patt_1 = (unsigned int) POLY_INT_CST [4, 4];
> /home/marxin/Programming/testcases/pr97079-2.c:10:12: note:   extra pattern 
> stmt: patt_6 = patt_2 + patt_1;
>
> dunno if we can make such a casting?

We should be able to constant-fold it at least.

Richard.

> Martin
>


RE: Ping: [PATCH][Arm] Enable MVE SIMD modes for vectorization

2020-10-12 Thread Kyrylo Tkachov via Gcc-patches
Hi Christophe,

> -Original Message-
> From: Gcc-patches  On Behalf Of
> Christophe Lyon via Gcc-patches
> Sent: 12 October 2020 12:41
> To: Dennis Zhang 
> Cc: Richard Earnshaw ; nd ;
> gcc-patches@gcc.gnu.org; Ramana Radhakrishnan
> 
> Subject: Re: Ping: [PATCH][Arm] Enable MVE SIMD modes for vectorization
> 
> Hi,
> 
> 
> On Thu, 8 Oct 2020 at 16:22, Christophe Lyon 
> wrote:
> >
> > On Thu, 8 Oct 2020 at 16:08, Dennis Zhang 
> wrote:
> > >
> > > Hi Christophe,
> > >
> > > On 08/10/2020 14:14, Christophe Lyon wrote:
> > > > Hi,
> > > >
> > > >
> > > > On Tue, 6 Oct 2020 at 15:37, Dennis Zhang via Gcc-patches
> > > >  wrote:
> > > >>
> > > >> On 9/16/20 4:00 PM, Dennis Zhang wrote:
> > > >>> Hi all,
> > > >>>
> > > >>> This patch enables SIMD modes for MVE auto-vectorization.
> > > >>> In this patch, the integer and float MVE SIMD modes are returned by
> > > >>> arm_preferred_simd_mode
> (TARGET_VECTORIZE_PREFERRED_SIMD_MODE hook) when
> > > >>> MVE or MVE_FLOAT is enabled.
> > > >>> Then the expanders for auto-vectorization can be used for
> generating MVE
> > > >>> SIMD code.
> > > >>>
> > > >>> This patch also fixes bugs in MVE vreiterpretq_*.c tests which are
> > > >>> revealed by the enabled MVE SIMD modes.
> > > >>> The tests are for checking the MVE reinterpret intrinsics.
> > > >>> There are two functions in each of the tests. The two functions
> contain
> > > >>> the pattern of identical code so that they are folded in icf pass.
> > > >>> Because of icf, the instruction count only checks one function which
> is 8.
> > > >>> However when the SIMD modes are enabled, the estimation of the
> code size
> > > >>> becomes smaller so that inlining is applied after icf, then the
> > > >>> instruction count becomes 16 which causes failure of the tests.
> > > >>> Because the icf is not the expected pattern to be tested but causes
> > > >>> above issues, -fno-ipa-icf is applied to the tests to avoid unstable
> > > >>> instruction count.
> > > >>>
> > > >>> This patch is separated from
> > > >>> https://gcc.gnu.org/pipermail/gcc-patches/2020-
> August/552104.html
> > > >>> because this part is not strongly connected to the aim of that one so
> > > >>> that causing confusion.
> > > >>>
> > > >>> Regtested and bootstraped.
> > > >>>
> > > >>> Is it OK for trunk please?
> > > >>>
> > > >>> Thanks
> > > >>> Dennis
> > > >>>
> > > >>> gcc/ChangeLog:
> > > >>>
> > > >>> 2020-09-15  Dennis Zhang  
> > > >>>
> > > >>>* config/arm/arm.c (arm_preferred_simd_mode): Enable MVE
> SIMD modes.
> > > >>>
> > > >
> > > > Since toolchain builds work again after Jakub's divmod fix, I'm now
> > > > facing another build error likely caused by this patch:
> > > > In file included from
> > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-
> fsf/gccsrc/gcc/coretypes.h:449:0,
> > > >   from
> > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-
> fsf/gccsrc/gcc/config/arm/arm.c:28:
> > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-
> fsf/gccsrc/gcc/config/arm/arm.c:
> > > > In function 'machine_mode arm_preferred_simd_mode(scalar_mode)':
> > > > ./insn-modes.h:196:71: error: temporary of non-literal type
> > > > 'scalar_int_mode' in a constant expression
> > > >   #define QImode (scalar_int_mode ((scalar_int_mode::from_int)
> E_QImode))
> > > > 
> > > > ^
> > > > /tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-
> fsf/gccsrc/gcc/config/arm/arm.c:28970:12:
> > > > note: in expansion of macro 'QImode'
> > > > case QImode:
> > > >
> > > > and similarly for the other cases.
> > > >
> > > > Does the build work for you?
> > > >
> > > > Thanks,
> > > >
> > > > Christophe
> > > >
> > >
> > > Thanks for the report. Sorry to see the error.
> > > I tested it for arm-none-eabi and arm-none-linux-gnueabi targets. I
> > > didn't get this error.
> > > Could you please help to show the configuration you use for your build?
> > > I will test and fix at once.
> > >
> >
> > It fails on all of them for me. Does it work for you with current
> > master? (r11-3720-gf18eeb6b958acd5e1590ca4a73231486b749be9b)
> >
> 
> So... I guess you are using a host with GCC more recent than 4.8.5? :-)
> When I build manually on ubuntu-16.04 with gcc-5.4, the build succeeds,
> and after manually building with the same environment in the compute
> farm I use for validation (RHEL 7, gcc-4.8.5), I managed to reproduce the
> build failure.
> It's a matter of replacing
> case QImode:
> with
> case E_QImode:
> 
> Is the attached patch OK? Or do we instead want to revisit the minimum
> gcc version required to build gcc?

I'd rather go with this patch as long as it passes the usual testing.
Thanks,
Kyrill

> 
> Thanks,
> 
> Christophe
> 
> 
> > > Thanks
> > > Dennis


Re: [r11-3641 Regression] FAIL: gcc.dg/torture/pta-ptrarith-1.c -Os scan-tree-dump alias "ESCAPED = {[^\n}]* i f [^\n}]*}" on Linux/x86_64 (-m32 -march=cascadelake)

2020-10-12 Thread Christophe Lyon via Gcc-patches
On Mon, 5 Oct 2020 at 17:19, Segher Boessenkool
 wrote:
>
> On Sun, Oct 04, 2020 at 09:51:23AM -0700, H.J. Lu wrote:
> > On Sat, Oct 3, 2020 at 5:57 PM Segher Boessenkool
> >  wrote:
> > > On Sat, Oct 03, 2020 at 12:21:04PM -0700, sunil.k.pandey via Gcc-patches 
> > > wrote:
> > > > On Linux/x86_64,
> > > >
> > > > c34db4b6f8a5d80367c709309f9b00cb32630054 is the first bad commit
> > > > commit c34db4b6f8a5d80367c709309f9b00cb32630054
> > > > Author: Jan Hubicka 
> > > > Date:   Sat Oct 3 17:20:16 2020 +0200
> > > >
> > > > Track access ranges in ipa-modref
> > > >
> > > > caused
> > >
> > > [ ... ]
> > >
> > > This isn't a patch.  Wrong mailing list?
> >
> > I view this as a follow up of
> >
> > https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555314.html
>
> But it *isn't* a follow-up of that mail.  That is my point.  Most of
> these messages do not finger any particular patch even, I think?
>

That's why I kept the reporting part manual on my side: once you know
which commit introduced a failure/regression (either via bisect, or by
some other way), it's not always easy to identify the gcc-patches
message to which you want to reply.
And as already said in this thread, we certainly want to avoid sending
a regression email for each test, multiplied by the number of
configurations under test.

> > What do people think about this kind of followups?  Is this appropriate
> > for this mailing list?
>
> Please just use bugzilla.  And report bugs there the way they should be
> reported: full command lines, full description of the errors, and
> everything else needed to easily reproduce the problem.
>
It seems some people prefer such regressions reports in bugzilla,
others in gcc-patches@.

In general when I report a regression I noticed in the GCC testsuite,
I tend to assume that the testname and GCC configure options are
sufficient for a usual contributor to reproduce.
Not sure if it matches "full" and "easily" in your mind?

With all the automated builds where the build dir is removed from the
server at the end whatever the result, it does take time if I have to
reproduce the problem manually before reporting.

Christophe

> *Actually* following up to the patch mail could be useful (but you can
> than just point to the bugzilla).  Sending spam to gcc-patches@ is not
> useful for most users of the list.
>
>
> Segher


Re: [r11-3641 Regression] FAIL: gcc.dg/torture/pta-ptrarith-1.c -Os scan-tree-dump alias "ESCAPED = {[^\n}]* i f [^\n}]*}" on Linux/x86_64 (-m32 -march=cascadelake)

2020-10-12 Thread Richard Biener via Gcc-patches
On Mon, Oct 12, 2020 at 3:27 PM Christophe Lyon via Gcc-patches
 wrote:
>
> On Mon, 5 Oct 2020 at 17:19, Segher Boessenkool
>  wrote:
> >
> > On Sun, Oct 04, 2020 at 09:51:23AM -0700, H.J. Lu wrote:
> > > On Sat, Oct 3, 2020 at 5:57 PM Segher Boessenkool
> > >  wrote:
> > > > On Sat, Oct 03, 2020 at 12:21:04PM -0700, sunil.k.pandey via 
> > > > Gcc-patches wrote:
> > > > > On Linux/x86_64,
> > > > >
> > > > > c34db4b6f8a5d80367c709309f9b00cb32630054 is the first bad commit
> > > > > commit c34db4b6f8a5d80367c709309f9b00cb32630054
> > > > > Author: Jan Hubicka 
> > > > > Date:   Sat Oct 3 17:20:16 2020 +0200
> > > > >
> > > > > Track access ranges in ipa-modref
> > > > >
> > > > > caused
> > > >
> > > > [ ... ]
> > > >
> > > > This isn't a patch.  Wrong mailing list?
> > >
> > > I view this as a follow up of
> > >
> > > https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555314.html
> >
> > But it *isn't* a follow-up of that mail.  That is my point.  Most of
> > these messages do not finger any particular patch even, I think?
> >
>
> That's why I kept the reporting part manual on my side: once you know
> which commit introduced a failure/regression (either via bisect, or by
> some other way), it's not always easy to identify the gcc-patches
> message to which you want to reply.
> And as already said in this thread, we certainly want to avoid sending
> a regression email for each test, multiplied by the number of
> configurations under test.

Definitely.

> > > What do people think about this kind of followups?  Is this appropriate
> > > for this mailing list?
> >
> > Please just use bugzilla.  And report bugs there the way they should be
> > reported: full command lines, full description of the errors, and
> > everything else needed to easily reproduce the problem.
> >
> It seems some people prefer such regressions reports in bugzilla,
> others in gcc-patches@.

We also want to avoid reporting a bug for each test, multiplied by the
number of configurations under test.

> In general when I report a regression I noticed in the GCC testsuite,
> I tend to assume that the testname and GCC configure options are
> sufficient for a usual contributor to reproduce.
> Not sure if it matches "full" and "easily" in your mind?
>
> With all the automated builds where the build dir is removed from the
> server at the end whatever the result, it does take time if I have to
> reproduce the problem manually before reporting.

And that's IMHO and important step - the human sanitizing of the
report - eventually checking the issue isn't already fixed or reported.

Richard.

>
> Christophe
>
> > *Actually* following up to the patch mail could be useful (but you can
> > than just point to the bugzilla).  Sending spam to gcc-patches@ is not
> > useful for most users of the list.
> >
> >
> > Segher


[PATCH] libstdc++: Apply proposed resolution for LWG 3449 [PR95322]

2020-10-12 Thread Patrick Palka via Gcc-patches
Now that the frontend bug PR96805 is fixed, we can apply the proposed
resolution for this issue.

This slightly deviates from the proposed resolution by declaring _CI a
member of take_view instead of take_view::_Sentinel, since it doesn't
depend on anything within _Sentinel anymore.

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

libstdc++-v3/ChangeLog:

PR libstdc++/95322
* include/std/ranges (take_view::_CI): Define this alias
template as per LWG 3449 and remove ...
(take_view::_Sentinel::_CI): ... this type alias.
(take_view::_Sentinel::operator==): Adjust use of _CI
accordingly.  Define a second overload that accepts an iterator
of the opposite constness as per LWG 3449.
(take_while_view::_Sentinel::operator==): Likewise.
* testsuite/std/ranges/adaptors/95322.cc: Add tests for LWG 3449.
---
 libstdc++-v3/include/std/ranges   | 23 +--
 .../testsuite/std/ranges/adaptors/95322.cc| 28 ++-
 2 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 10f1f7b525b..19bc01a3b3a 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -1662,13 +1662,15 @@ namespace views
 class take_view : public view_interface>
 {
 private:
+  template
+   using _CI = counted_iterator<
+ iterator_t<__detail::__maybe_const_t<_Const, _Vp>>>;
+
   template
struct _Sentinel
{
private:
  using _Base = __detail::__maybe_const_t<_Const, _Vp>;
- using _CI = counted_iterator>;
-
  sentinel_t<_Base> _M_end = sentinel_t<_Base>();
 
public:
@@ -1689,7 +1691,15 @@ namespace views
  base() const
  { return _M_end; }
 
- friend constexpr bool operator==(const _CI& __y, const _Sentinel& __x)
+ friend constexpr bool
+ operator==(const _CI<_Const>& __y, const _Sentinel& __x)
+ { return __y.count() == 0 || __y.base() == __x._M_end; }
+
+ template>
+   requires sentinel_for, iterator_t<_Base2>>
+ friend constexpr bool
+ operator==(const _CI<_OtherConst>& __y, const _Sentinel& __x)
  { return __y.count() == 0 || __y.base() == __x._M_end; }
 
  friend _Sentinel;
@@ -1839,6 +1849,13 @@ namespace views
  operator==(const iterator_t<_Base>& __x, const _Sentinel& __y)
  { return __y._M_end == __x || !std::__invoke(*__y._M_pred, *__x); }
 
+ template>
+   requires sentinel_for, iterator_t<_Base2>>
+ friend constexpr bool
+ operator==(const iterator_t<_Base2>& __x, const _Sentinel& __y)
+ { return __y._M_end == __x || !std::__invoke(*__y._M_pred, *__x); }
+
  friend _Sentinel;
};
 
diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/95322.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/95322.cc
index 67bc7d33917..41785a0a8fa 100644
--- a/libstdc++-v3/testsuite/std/ranges/adaptors/95322.cc
+++ b/libstdc++-v3/testsuite/std/ranges/adaptors/95322.cc
@@ -26,7 +26,7 @@ using __gnu_test::test_forward_range;
 void
 test01()
 {
-  // PR libstdc++/95322 and LWG 3488
+  // PR libstdc++/95322 and LWG 3448
   int a[2]{1, 2};
   test_forward_range v{a};
   auto view1 = v | std::views::take(2);
@@ -51,8 +51,34 @@ test02()
   VERIFY( !eq );
 }
 
+void
+test03()
+{
+  // LWG 3449, for take_view
+  int a[2]{1, 2};
+  test_forward_range v{a};
+  auto view1 = v | std::views::transform(std::identity{});
+  auto view2 = view1 | std::views::take(2);
+  const bool eq = std::ranges::cbegin(view2) == std::ranges::end(view2);
+  VERIFY( !eq );
+}
+
+void
+test04()
+{
+  // LWG 3449, for take_while_view
+  int a[2]{1, 2};
+  test_forward_range v{a};
+  auto view1 = v | std::views::transform(std::identity{});
+  auto view2 = view1 | std::views::take_while([] (int i) { return true; });
+  const bool eq = std::ranges::cbegin(view2) == std::ranges::end(view2);
+  VERIFY( !eq );
+}
+
 int main()
 {
   test01();
   test02();
+  test03();
+  test04();
 }
-- 
2.29.0.rc0



[PATCH] libstdc++: Apply proposed resolution for LWG 3450

2020-10-12 Thread Patrick Palka via Gcc-patches
libstdc++-v3/ChangeLog:

* include/std/ranges (take_while_view::begin): Constrain the
const overload further as per LWG 3450.
(take_while_view::end): Likewise.
* testsuite/std/ranges/adaptors/take_while.cc: Add test for LWG
3450.
---
 libstdc++-v3/include/std/ranges|  2 ++
 .../testsuite/std/ranges/adaptors/take_while.cc| 10 ++
 2 files changed, 12 insertions(+)

diff --git a/libstdc++-v3/include/std/ranges b/libstdc++-v3/include/std/ranges
index 19bc01a3b3a..55e7c4de234 100644
--- a/libstdc++-v3/include/std/ranges
+++ b/libstdc++-v3/include/std/ranges
@@ -1888,6 +1888,7 @@ namespace views
 
   constexpr auto
   begin() const requires range
+   && indirect_unary_predicate>
   { return ranges::begin(_M_base); }
 
   constexpr auto
@@ -1897,6 +1898,7 @@ namespace views
 
   constexpr auto
   end() const requires range
+   && indirect_unary_predicate>
   { return _Sentinel(ranges::end(_M_base),
   std::__addressof(*_M_pred)); }
 };
diff --git a/libstdc++-v3/testsuite/std/ranges/adaptors/take_while.cc 
b/libstdc++-v3/testsuite/std/ranges/adaptors/take_while.cc
index d587127b97e..48771397f1d 100644
--- a/libstdc++-v3/testsuite/std/ranges/adaptors/take_while.cc
+++ b/libstdc++-v3/testsuite/std/ranges/adaptors/take_while.cc
@@ -70,10 +70,20 @@ test03()
   b = ranges::end(v);
 }
 
+void
+test04()
+{
+  // LWG 3450
+  auto v = views::single(1) | views::take_while([](int& x) { return true;});
+  static_assert(ranges::range);
+  static_assert(!ranges::range);
+}
+
 int
 main()
 {
   test01();
   test02();
   test03();
+  test04();
 }
-- 
2.29.0.rc0



Re: [PATCH, wwwdocs] gcc-11/changes: NVPTX: Mention new -misa=sm_35 default

2020-10-12 Thread Gerald Pfeifer
Hi Tom,

On Mon, 12 Oct 2020, Tom de Vries wrote:
> Mention new -misa=sm_35 default for NVPTX target in the gcc 11 release 
> notes.
> 
> See also PR target/97348.
> 
> Verified using the validator
> 
> OK?

yes, this is okay.

FWIW, I am happy to review wwwdocs patches.  As nvptx maintainer
you can also self approve if you desire so.  As you prefer. :-)

Gerald


Re: [PING][PATCH] correct handling of indices into arrays with elements larger than 1 (PR c++/96511)

2020-10-12 Thread Martin Sebor via Gcc-patches

On 10/11/20 9:44 PM, Jason Merrill wrote:

On 10/11/20 6:45 PM, Martin Sebor wrote:

On 10/9/20 9:13 AM, Jason Merrill wrote:

On 10/9/20 10:51 AM, Martin Sebor wrote:

On 10/8/20 1:40 PM, Jason Merrill wrote:

On 10/8/20 3:18 PM, Martin Sebor wrote:

On 10/7/20 3:01 PM, Jason Merrill wrote:

On 10/7/20 4:11 PM, Martin Sebor wrote:

...

For the various member functions, please include the 
comments with the definition as well as the in-class 
declaration.


Only one access_ref member function is defined 
out-of-line: offset_bounded().  I've adjusted the comment 
and copied it above

the function definition.


And size_remaining, as quoted above?


I have this in my tree:

/* Return the maximum amount of space remaining and if non-null, set
    argument to the minimum.  */

I'll add it when I commit the patch.



I also don't see a comment above the definition of offset_bounded 
in the new patch?


There is a comment in the latest patch.

...
The goal of conditionals is to avoid overwhelming the user 
with

excessive numbers that may not be meaningful or even relevant
to the warning.  I've corrected the function body, tweaked 
and
renamed the get_range function to get_offset_range to do a 
better

job of extracting ranges from the types of some nonconstant
expressions the front end passes it, and added a new test for
all this.  Attached is the new revision.


offset_bounded looks unchanged in the new patch.  It still 
returns true iff either the range is a single value or one of the 
bounds are unrepresentable in ptrdiff_t.  I'm still unclear how 
this corresponds to "Return true if OFFRNG is bounded to a 
subrange of possible offset values."


I don't think you're looking at the latest patch.  It has this:

+/* Return true if OFFRNG is bounded to a subrange of offset values
+   valid for the largest possible object.  */
+
  bool
  access_ref::offset_bounded () const
  {
-  if (offrng[0] == offrng[1])
-    return false;
-
    tree min = TYPE_MIN_VALUE (ptrdiff_type_node);
    tree max = TYPE_MAX_VALUE (ptrdiff_type_node);
-  return offrng[0] <= wi::to_offset (min) || offrng[1] >= 
wi::to_offset (max);
+  return wi::to_offset (min) <= offrng[0] && offrng[1] <= 
wi::to_offset (max);

  }

Here's a link to it in the archive:

https://gcc.gnu.org/pipermail/gcc-patches/2020-September/555019.html
https://gcc.gnu.org/pipermail/gcc-patches/attachments/20200928/9026783a/attachment-0003.bin 







Ah, yes, there are two patches in that email; the first introduces 
the broken offset_bounded, and the second one fixes it without 
mentioning that in the ChangeLog.  How about moving the fix to the 
first patch?


Sure, I can do that.  Anything else or is the final version okay
to commit with this adjustment?


OK with that adjustment.


I've done more testing and found a bug in the second patch: adding
an offset in an inverted range to an existing offset range isn't as
simple as adding up the bounds because they mean different things:
like an anti-range, an inverted range is a union of two subranges.
Instead, the upper bound needs to be extended to PTRDIFF_MAX because
that is the maximum being added, and the lower bound either reset to
zero if the absolute value of the maximum being added is less than
it, or incremented by the absolute value otherwise.

For example, given:

   char a[8];
   char *pa = a;
   char *p1 = pa + i;   // i's range is [3, 5]
   char *p2 = p1 + j;   // j's range is [1, -4]

the range of p2's offset isn't [4, 1] but [4, PTRDIFF_MAX] (or more
precisely [4, 8] if we assume it's valid).  But the range of p3's
valid offset in this last pointer

   char *p3 = p2 + k;   // k's range is [5, -4]

is all of [0, PTRDIFF_MAX] (or, more accurately, [0, 8]).

This may seem obvious but it took me a while at first to wrap my head
around.


It makes sense, but doesn't seem obvious; a bit more comment might be nice.


I just now noticed this suggestion, after pushing both patches.
I'll keep it in mind and add something later.




I've tweaked access_ref::add_offset in the patch to handle this
correctly.  The function now ensures that every offset is in
a regular range (and not an inverted one).  That in turn simplifies
access_ref::size_remaining.  Since an inverted range is the same as
an anti-range, there's no reason to exclude the latter anymore(*).
The diff on top of the approved patch is attached.

I've retested this new revision of the patch with Glibc and GDB/
Binutils, (the latter fails due to PR 97360), and the Linux kernel.

Please let me know if you have any questions or concerns with
this change.  If not, I'd like to commit it sometime tomorrow.

Martin

[*] I was curious how often these inverted ranges/anti-ranges come
up in pointer arithmetic to see if handling them is worthwhile.  I
instrumented GCC to print them in get_range() on master where they
are only looked at in calls to built-in functions, and in another
patch I'm working on where they are looked at for every pointer
addition.  They accoun

Re: Ping: [PATCH][Arm] Enable MVE SIMD modes for vectorization

2020-10-12 Thread Dennis Zhang via Gcc-patches

Hi Christophe,

On 12/10/2020 12:40, Christophe Lyon wrote:

Hi,


On Thu, 8 Oct 2020 at 16:22, Christophe Lyon  wrote:


On Thu, 8 Oct 2020 at 16:08, Dennis Zhang  wrote:


Hi Christophe,

On 08/10/2020 14:14, Christophe Lyon wrote:

Hi,


On Tue, 6 Oct 2020 at 15:37, Dennis Zhang via Gcc-patches
 wrote:


On 9/16/20 4:00 PM, Dennis Zhang wrote:

Hi all,

This patch enables SIMD modes for MVE auto-vectorization.
In this patch, the integer and float MVE SIMD modes are returned by
arm_preferred_simd_mode (TARGET_VECTORIZE_PREFERRED_SIMD_MODE hook) when
MVE or MVE_FLOAT is enabled.
Then the expanders for auto-vectorization can be used for generating MVE
SIMD code.

This patch also fixes bugs in MVE vreiterpretq_*.c tests which are
revealed by the enabled MVE SIMD modes.
The tests are for checking the MVE reinterpret intrinsics.
There are two functions in each of the tests. The two functions contain
the pattern of identical code so that they are folded in icf pass.
Because of icf, the instruction count only checks one function which is 8.
However when the SIMD modes are enabled, the estimation of the code size
becomes smaller so that inlining is applied after icf, then the
instruction count becomes 16 which causes failure of the tests.
Because the icf is not the expected pattern to be tested but causes
above issues, -fno-ipa-icf is applied to the tests to avoid unstable
instruction count.

This patch is separated from
https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552104.html
because this part is not strongly connected to the aim of that one so
that causing confusion.

Regtested and bootstraped.

Is it OK for trunk please?

Thanks
Dennis

gcc/ChangeLog:

2020-09-15  Dennis Zhang  

* config/arm/arm.c (arm_preferred_simd_mode): Enable MVE SIMD modes.



Since toolchain builds work again after Jakub's divmod fix, I'm now
facing another build error likely caused by this patch:
In file included from
/tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/coretypes.h:449:0,
   from
/tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:28:
/tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:
In function 'machine_mode arm_preferred_simd_mode(scalar_mode)':
./insn-modes.h:196:71: error: temporary of non-literal type
'scalar_int_mode' in a constant expression
   #define QImode (scalar_int_mode ((scalar_int_mode::from_int) E_QImode))
 ^
/tmp/2601185_2.tmpdir/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/config/arm/arm.c:28970:12:
note: in expansion of macro 'QImode'
 case QImode:

and similarly for the other cases.

Does the build work for you?

Thanks,

Christophe



Thanks for the report. Sorry to see the error.
I tested it for arm-none-eabi and arm-none-linux-gnueabi targets. I
didn't get this error.
Could you please help to show the configuration you use for your build?
I will test and fix at once.



It fails on all of them for me. Does it work for you with current
master? (r11-3720-gf18eeb6b958acd5e1590ca4a73231486b749be9b)



So... I guess you are using a host with GCC more recent than 4.8.5? :-)
When I build manually on ubuntu-16.04 with gcc-5.4, the build succeeds,
and after manually building with the same environment in the compute
farm I use for validation (RHEL 7, gcc-4.8.5), I managed to reproduce the
build failure.
It's a matter of replacing
case QImode:
with
case E_QImode:

Is the attached patch OK? Or do we instead want to revisit the minimum
gcc version required to build gcc?

Thanks,

Christophe



I've tested your patch and it works with my other patches depending on 
this one. So I agree this patch is OK. Thanks for the fix.


Bests
Dennis


Re: [Patch] libgomp: Add, if existing, -latomic to libgomp.spec --as-needed (was: Re: [RFC] Offloading and automatic linking of libraries)

2020-10-12 Thread Tobias Burnus

Hi all,

first: *PING*.

secondly, I think the change to testsuite/lib/libgomp.exp's libgomp_init
is also needed.
(Hence, I now added it.) I have a too new system-installed libatomic to
be sure that
it fails without.

OK?

Tobias

-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter
libgomp: Add, if existing, -latomic to libgomp.spec --as-needed

libgomp/ChangeLog:

	* acinclude.m4 (LIBGOMP_CHECK_LIBATOMIC): New; set
	@LIBATOMICSPEC@ is the target libatomic is built.
	* configure.ac: Call LIBGOMP_CHECK_LIBATOMIC.
	* libgomp.spec.in: Add @LIBATOMICSPEC@.
	* testsuite/lib/libgomp.exp (libgomp_init): Add libatomic
	unconditionally if the lib exists.
	* Makefile.in: Regenerate.
	* configure: Regenerate.
	* testsuite/Makefile.in: Regenerate.

 libgomp/Makefile.in   |   1 +
 libgomp/acinclude.m4  |  63 
 libgomp/configure | 100 +-
 libgomp/configure.ac  |   2 +
 libgomp/libgomp.spec.in   |   2 +-
 libgomp/testsuite/Makefile.in |   1 +
 libgomp/testsuite/lib/libgomp.exp |  20 
 7 files changed, 175 insertions(+), 14 deletions(-)

diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
index 00d5e2919ee..a8ec69f1822 100644
--- a/libgomp/Makefile.in
+++ b/libgomp/Makefile.in
@@ -395,6 +395,7 @@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 INTPTR_T_KIND = @INTPTR_T_KIND@
 LD = @LD@
 LDFLAGS = @LDFLAGS@
+LIBATOMICSPEC = @LIBATOMICSPEC@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
diff --git a/libgomp/acinclude.m4 b/libgomp/acinclude.m4
index dbf54d06db9..3d7e5e08c3a 100644
--- a/libgomp/acinclude.m4
+++ b/libgomp/acinclude.m4
@@ -365,3 +365,66 @@ if test $enable_symvers != no ; then
 esac
 fi
 ])
+
+dnl Check whether libatomic exists
+AC_DEFUN([LIBGOMP_CHECK_LIBATOMIC], [
+  LIBATOMICSPEC=
+  libgomp_libatomic=no
+
+  if echo " ${TARGET_CONFIGDIRS} " | grep " libatomic " > /dev/null 2>&1 ; then
+libgomp_libatomic=yes;
+  fi
+
+  AC_MSG_CHECKING([for target-libatomic support])
+  AC_MSG_RESULT([$libgomp_libatomic])
+
+  if test "x$libgomp_libatomic" = xyes; then
+dnl Check whether -Wl,--as-needed resp. -Wl,-zignore is supported
+dnl
+dnl Turn warnings into error to avoid testsuite breakage.  So enable
+dnl AC_LANG_WERROR, but there's currently (autoconf 2.64) no way to turn
+dnl it off again.  As a workaround, save and restore werror flag like
+dnl AC_PATH_XTRA.
+dnl Cf. http://gcc.gnu.org/ml/gcc-patches/2010-05/msg01889.html
+ac_xsave_[]_AC_LANG_ABBREV[]_werror_flag=$ac_[]_AC_LANG_ABBREV[]_werror_flag
+AC_CACHE_CHECK([whether --as-needed/-z ignore works],
+  [libgomp_cv_have_as_needed],
+  [
+  # Test for native Solaris options first.
+  # No whitespace after -z to pass it through -Wl.
+  libgomp_cv_as_needed_option="-zignore"
+  libgomp_cv_no_as_needed_option="-zrecord"
+  save_LDFLAGS="$LDFLAGS"
+  LDFLAGS="$LDFLAGS -Wl,$libgomp_cv_as_needed_option -lm -Wl,$libompr_cv_no_as_needed_option"
+  libgomp_cv_have_as_needed=no
+  AC_LANG_WERROR
+  AC_LINK_IFELSE([AC_LANG_PROGRAM([])],
+		 [libgomp_cv_have_as_needed=yes],
+		 [libgomp_cv_have_as_needed=no])
+  LDFLAGS="$save_LDFLAGS"
+  if test "x$libgomp_cv_have_as_needed" = xno; then
+	libgomp_cv_as_needed_option="--as-needed"
+	libgomp_cv_no_as_needed_option="--no-as-needed"
+	save_LDFLAGS="$LDFLAGS"
+	LDFLAGS="$LDFLAGS -Wl,$libgomp_cv_as_needed_option -lm -Wl,$libgomp_cv_no_as_needed_option"
+	libgomp_cv_have_as_needed=no
+	AC_LANG_WERROR
+	AC_LINK_IFELSE([AC_LANG_PROGRAM([])],
+		   [libgomp_cv_have_as_needed=yes],
+		   [libgomp_cv_have_as_needed=no])
+	LDFLAGS="$save_LDFLAGS"
+  fi
+  ac_[]_AC_LANG_ABBREV[]_werror_flag=$ac_xsave_[]_AC_LANG_ABBREV[]_werror_flag
+])
+
+dnl For static libgfortran linkage, depend on libquadmath only if needed.
+if test "x$libgomp_cv_have_as_needed" = xyes; then
+  LIBATOMICSPEC="$libgomp_cv_as_needed_option -latomic $libgomp_cv_no_as_needed_option"
+else
+  LIBATOMICSPEC="-latomic"
+fi
+  fi
+
+  dnl For the spec file
+  AC_SUBST(LIBATOMICSPEC)
+])
diff --git a/libgomp/configure b/libgomp/configure
index e48371d5093..a4d93974084 100755
--- a/libgomp/configure
+++ b/libgomp/configure
@@ -630,6 +630,8 @@ ac_includes_default="\
 
 ac_header_list=
 ac_func_list=
+ac_c_werror_flag=
+ac_c_werror_flag=
 ac_subst_vars='am__EXEEXT_FALSE
 am__EXEEXT_TRUE
 LTLIBOBJS
@@ -653,6 +655,7 @@ USE_FORTRAN_FALSE
 USE_FORTRAN_TRUE
 link_gomp
 tmake_file
+LIBATOMICSPEC
 XLDFLAGS
 XCFLAGS
 config_path
@@ -11429,7 +11432,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11432 "configure"
+#line 11435 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ 

Re: [r11-3641 Regression] FAIL: gcc.dg/torture/pta-ptrarith-1.c -Os scan-tree-dump alias "ESCAPED = {[^\n}]* i f [^\n}]*}" on Linux/x86_64 (-m32 -march=cascadelake)

2020-10-12 Thread Segher Boessenkool
On Mon, Oct 12, 2020 at 01:24:44PM +0100, Richard Sandiford wrote:
> Martin Sebor via Gcc-patches  writes:
> > On 10/4/20 10:51 AM, H.J. Lu via Gcc-patches wrote:
> >> On Sat, Oct 3, 2020 at 5:57 PM Segher Boessenkool
> >>  wrote:
> >>>
> >>> On Sat, Oct 03, 2020 at 12:21:04PM -0700, sunil.k.pandey via Gcc-patches 
> >>> wrote:
>  On Linux/x86_64,
> 
>  c34db4b6f8a5d80367c709309f9b00cb32630054 is the first bad commit
>  commit c34db4b6f8a5d80367c709309f9b00cb32630054
>  Author: Jan Hubicka 
>  Date:   Sat Oct 3 17:20:16 2020 +0200
> 
>   Track access ranges in ipa-modref
> 
>  caused
> >>>
> >>> [ ... ]
> >>>
> >>> This isn't a patch.  Wrong mailing list?
> >> 
> >> I view this as a follow up of
> >> 
> >> https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555314.html
> >> 
> >> What do people think about this kind of followups?  Is this appropriate
> >> for this mailing list?
> >
> > A number of people routinely send emails similar to these to this
> > list to point out regressions on their targets.  I find both kinds
> > of emails very useful and don't mind the additional traffic.
> 
> +1 FWIW.  I think it's great that we have this kind of automatic CI, and
> this seems like a natural place to send the reports.  Shovelling them into
> bugzilla is likely to create more work rather than less, especially since
> the fix turnaround should (hopefully) be short.

But send them as reply to the patch discussion then!


Segher


Re: [PATCH] libstdc++: Apply proposed resolution for LWG 3449 [PR95322]

2020-10-12 Thread Jonathan Wakely via Gcc-patches

On 12/10/20 10:19 -0400, Patrick Palka via Libstdc++ wrote:

Now that the frontend bug PR96805 is fixed, we can apply the proposed
resolution for this issue.

This slightly deviates from the proposed resolution by declaring _CI a
member of take_view instead of take_view::_Sentinel, since it doesn't
depend on anything within _Sentinel anymore.

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?


OK, thanks.



Re: [PATCH] libstdc++: Apply proposed resolution for LWG 3450

2020-10-12 Thread Jonathan Wakely via Gcc-patches

On 12/10/20 10:19 -0400, Patrick Palka via Libstdc++ wrote:

libstdc++-v3/ChangeLog:

* include/std/ranges (take_while_view::begin): Constrain the
const overload further as per LWG 3450.
(take_while_view::end): Likewise.
* testsuite/std/ranges/adaptors/take_while.cc: Add test for LWG
3450.


OK, thanks.



[committed] analyzer: add warnings about writes to constant regions [PR95007]

2020-10-12 Thread David Malcolm via Gcc-patches
This patch adds two new warnings:
  -Wanalyzer-write-to-const
  -Wanalyzer-write-to-string-literal
for code paths where the analyzer detects a write to a constant region.

As noted in the documentation part of the patch, the analyzer doesn't
prioritize detection of such writes, in that the state-merging logic
will blithely lose the distinction between const and non-const regions.
Hence false negatives are likely to arise due to state-merging.

However, if the analyzer does happen to spot such a write, it seems worth
reporting, hence this patch.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-3829-g3175d40fc52fb8eb3c3b18cc343d773da24434fb.

gcc/analyzer/ChangeLog:
* analyzer.opt (Wanalyzer-write-to-const): New.
(Wanalyzer-write-to-string-literal): New.
* region-model-impl-calls.cc (region_model::impl_call_memcpy):
Call check_for_writable_region.
(region_model::impl_call_memset): Likewise.
(region_model::impl_call_strcpy): Likewise.
* region-model.cc (class write_to_const_diagnostic): New.
(class write_to_string_literal_diagnostic): New.
(region_model::check_for_writable_region): New.
(region_model::set_value): Call check_for_writable_region.
* region-model.h (region_model::check_for_writable_region): New
decl.

gcc/ChangeLog:
* doc/invoke.texi: Document -Wanalyzer-write-to-const and
-Wanalyzer-write-to-string-literal.

gcc/testsuite/ChangeLog:
PR c/83347
PR middle-end/90404
PR analyzer/95007
* gcc.dg/analyzer/write-to-const-1.c: New test.
* gcc.dg/analyzer/write-to-string-literal-1.c: New test.
---
 gcc/analyzer/analyzer.opt |   8 ++
 gcc/analyzer/region-model-impl-calls.cc   |   6 +
 gcc/analyzer/region-model.cc  | 117 +-
 gcc/analyzer/region-model.h   |   3 +
 gcc/doc/invoke.texi   |  28 +
 .../gcc.dg/analyzer/write-to-const-1.c|  29 +
 .../analyzer/write-to-string-literal-1.c  |  58 +
 7 files changed, 248 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/write-to-const-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/write-to-string-literal-1.c

diff --git a/gcc/analyzer/analyzer.opt b/gcc/analyzer/analyzer.opt
index a4d384211f3..c9df6dc7673 100644
--- a/gcc/analyzer/analyzer.opt
+++ b/gcc/analyzer/analyzer.opt
@@ -114,6 +114,14 @@ Wanalyzer-use-of-pointer-in-stale-stack-frame
 Common Var(warn_analyzer_use_of_pointer_in_stale_stack_frame) Init(1) Warning
 Warn about code paths in which a pointer to a stale stack frame is used.
 
+Wanalyzer-write-to-const
+Common Var(warn_analyzer_write_to_const) Init(1) Warning
+Warn about code paths which attempt to write to a const object.
+
+Wanalyzer-write-to-string-literal
+Common Var(warn_analyzer_write_to_string_literal) Init(1) Warning
+Warn about code paths which attempt to write to a string literal.
+
 Wanalyzer-too-complex
 Common Var(warn_analyzer_too_complex) Init(0) Warning
 Warn if the code is too complicated for the analyzer to fully explore.
diff --git a/gcc/analyzer/region-model-impl-calls.cc 
b/gcc/analyzer/region-model-impl-calls.cc
index 009b8c3ecb0..ef84e638992 100644
--- a/gcc/analyzer/region-model-impl-calls.cc
+++ b/gcc/analyzer/region-model-impl-calls.cc
@@ -305,6 +305,8 @@ region_model::impl_call_memcpy (const call_details &cd)
return;
 }
 
+  check_for_writable_region (dest_reg, cd.get_ctxt ());
+
   /* Otherwise, mark region's contents as unknown.  */
   mark_region_as_unknown (dest_reg);
 }
@@ -346,6 +348,8 @@ region_model::impl_call_memset (const call_details &cd)
}
 }
 
+  check_for_writable_region (dest_reg, cd.get_ctxt ());
+
   /* Otherwise, mark region's contents as unknown.  */
   mark_region_as_unknown (dest_reg);
   return false;
@@ -397,6 +401,8 @@ region_model::impl_call_strcpy (const call_details &cd)
 
   cd.maybe_set_lhs (dest_sval);
 
+  check_for_writable_region (dest_reg, cd.get_ctxt ());
+
   /* For now, just mark region's contents as unknown.  */
   mark_region_as_unknown (dest_reg);
 }
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index a88a295a241..480f25a3a4b 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -1532,16 +1532,131 @@ region_model::deref_rvalue (const svalue *ptr_sval, 
tree ptr_tree,
   return m_mgr->get_symbolic_region (ptr_sval);
 }
 
+/* A subclass of pending_diagnostic for complaining about writes to
+   constant regions of memory.  */
+
+class write_to_const_diagnostic
+: public pending_diagnostic_subclass
+{
+public:
+  write_to_const_diagnostic (const region *reg, tree decl)
+  : m_reg (reg), m_decl (decl)
+  {}
+
+  const char *get_kind () const FINAL OVERRIDE
+  {
+return "write_to_const_diagnostic";
+  }
+
+  bool operator== (const write_to_const_diagnostic &other) cons

Re: [PING][PATCH v2] combine: Don't turn (mult (extend x) 2^n) into extract [PR96998]

2020-10-12 Thread Richard Sandiford via Gcc-patches
Segher Boessenkool  writes:
> On Fri, Oct 09, 2020 at 09:38:09AM +0100, Alex Coplan wrote:
>> Hi Segher,
>> 
>> On 08/10/2020 15:20, Segher Boessenkool wrote:
>> > On Thu, Oct 08, 2020 at 11:21:26AM +0100, Alex Coplan wrote:
>> > > Ping. The kernel is still broken on AArch64.
>> > 
>> > You *cannot* fix a correctness bug with a combine addition.
>> 
>> https://gcc.gnu.org/pipermail/gcc-patches/2020-September/555158.html
>> explains why we do precisely that.
>
> And it still is wrong.
>
>> Also, as your own testing confirmed, the patch does indeed fix the issue.
>
> No, it did not.  It showed that before the patch the bug was hit, and
> after it it was not.  It does not show the bug was solved.

I agree there's a target bug here.  Please see the explanation I posted
in: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554518.html
(especially the first sentence quoted below :-)).

The situation as things stand is that aarch64 has a bug: it accepts
an odd sign_extract representation of addresses, but doesn't accept
that same odd form of address as an LEA.  We have two options:

(a) add back instructions that recognise the odd form of LEA, or
(b) remove the code that accepts the odd addresses

I think (b) is the way to go here.  But doing that on its own
would regress code quality.  The reason we recognised the odd
addresses in the first place was because that was the rtl that
combine happened to generate for an important case.

So if we go for (b) but fix the aarch64 bug strictly before the
combine patch, we would need to:

(1) Apply the target fix and adjust the testsuite markup to make sure
that the git commit doesn't regress anyone's test results.

(2) Apply the combine patch and revert the testsuite markup changes
from (1).

That seems like make-work, and would still show as a blip for
people doing performance tracking.

If you prefer, we could fix the aarch64 bug and patch combine as a
single commit.  See:

https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554257.html

for the full patch, including the aarch64 bugfix.

Thanks,
Richard


Re: [r11-3641 Regression] FAIL: gcc.dg/torture/pta-ptrarith-1.c -Os scan-tree-dump alias "ESCAPED = {[^\n}]* i f [^\n}]*}" on Linux/x86_64 (-m32 -march=cascadelake)

2020-10-12 Thread Segher Boessenkool
On Mon, Oct 12, 2020 at 03:26:38PM +0200, Christophe Lyon wrote:
> That's why I kept the reporting part manual on my side: once you know
> which commit introduced a failure/regression (either via bisect, or by
> some other way), it's not always easy to identify the gcc-patches
> message to which you want to reply.

But it *should* be: the check-in subject should be in the patch mail, or
failing that, at least the changelog entries should be!

> > > What do people think about this kind of followups?  Is this appropriate
> > > for this mailing list?
> >
> > Please just use bugzilla.  And report bugs there the way they should be
> > reported: full command lines, full description of the errors, and
> > everything else needed to easily reproduce the problem.
> >
> It seems some people prefer such regressions reports in bugzilla,
> others in gcc-patches@.

If it will be resolved quickly, and by just telling the author, email is
fine of course.  Otherwise, you need bugzilla.

> In general when I report a regression I noticed in the GCC testsuite,
> I tend to assume that the testname and GCC configure options are
> sufficient for a usual contributor to reproduce.
> Not sure if it matches "full" and "easily" in your mind?

Tests are often ran with multiple sets of options.  If you give enough
info that people can reproduce your configuration (hint: most bug
reports do *not*), all is fine of course.  But in general we *do* need
all info (as documented in the bug reporting instructions), or we get
a frustrating "I cannot reproduce this" game.

> With all the automated builds where the build dir is removed from the
> server at the end whatever the result, it does take time if I have to
> reproduce the problem manually before reporting.

Yes, and it is *easier* to reproduce for you than for other people!

> > *Actually* following up to the patch mail could be useful (but you can
> > than just point to the bugzilla).  Sending spam to gcc-patches@ is not
> > useful for most users of the list.

^^^ Still my main point.


Segher


[PUSHED] operator_trunc_mod::wi_fold: Return VARYING for mod by zero.

2020-10-12 Thread Aldy Hernandez via Gcc-patches
Division by zero should return VARYING, otherwise we propagate undefine all 
over the
ranger and cause bad things to happen :).  This fixes MOD 0 to also return 
VARYING.

This is Andrew's patch.  I forgot to use --author for proper patch
attribution.

Tested on x86-64 Linux.

Pushed to trunk.

gcc/ChangeLog:

PR tree-optimization/97378
* range-op.cc (operator_trunc_mod::wi_fold): Return VARYING for mod by 
zero.

gcc/testsuite/ChangeLog:

* gcc.dg/pr97378.c: New test.
---
 gcc/range-op.cc|  6 +++---
 gcc/testsuite/gcc.dg/pr97378.c | 15 +++
 2 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr97378.c

diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index ce6ae2de20c..6108de367ad 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -1359,7 +1359,7 @@ operator_div::wi_fold (irange &r, tree type,
   // If we're definitely dividing by zero, there's nothing to do.
   if (wi_zero_p (type, divisor_min, divisor_max))
 {
-  r.set_undefined ();
+  r.set_varying (type);
   return;
 }
 
@@ -2624,10 +2624,10 @@ operator_trunc_mod::wi_fold (irange &r, tree type,
   signop sign = TYPE_SIGN (type);
   unsigned prec = TYPE_PRECISION (type);
 
-  // Mod 0 is undefined.  Return undefined.
+  // Mod 0 is undefined.
   if (wi_zero_p (type, rh_lb, rh_ub))
 {
-  r.set_undefined ();
+  r.set_varying (type);
   return;
 }
 
diff --git a/gcc/testsuite/gcc.dg/pr97378.c b/gcc/testsuite/gcc.dg/pr97378.c
new file mode 100644
index 000..27e4a1f4321
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr97378.c
@@ -0,0 +1,15 @@
+// { dg-do compile }
+// { dg-options "-O2" }
+
+int a, b, c;
+void d() {
+e : {
+  long f;
+  long *g = &f;
+  if ((a != 0) - (b = 0))
+;
+  else
+a &= (*g %= a *= c) >= (*g || f);
+  goto e;
+}
+}
-- 
2.26.2



Re: [PING][PATCH v2] combine: Don't turn (mult (extend x) 2^n) into extract [PR96998]

2020-10-12 Thread Segher Boessenkool
On Mon, Oct 12, 2020 at 05:19:58PM +0100, Richard Sandiford wrote:
> Segher Boessenkool  writes:
> > On Fri, Oct 09, 2020 at 09:38:09AM +0100, Alex Coplan wrote:
> >> Hi Segher,
> >> 
> >> On 08/10/2020 15:20, Segher Boessenkool wrote:
> >> > On Thu, Oct 08, 2020 at 11:21:26AM +0100, Alex Coplan wrote:
> >> > > Ping. The kernel is still broken on AArch64.
> >> > 
> >> > You *cannot* fix a correctness bug with a combine addition.
> >> 
> >> https://gcc.gnu.org/pipermail/gcc-patches/2020-September/555158.html
> >> explains why we do precisely that.
> >
> > And it still is wrong.
> >
> >> Also, as your own testing confirmed, the patch does indeed fix the issue.
> >
> > No, it did not.  It showed that before the patch the bug was hit, and
> > after it it was not.  It does not show the bug was solved.
> 
> I agree there's a target bug here.  Please see the explanation I posted
> in: https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554518.html
> (especially the first sentence quoted below :-)).
> 
> The situation as things stand is that aarch64 has a bug: it accepts
> an odd sign_extract representation of addresses, but doesn't accept
> that same odd form of address as an LEA.  We have two options:
> 
> (a) add back instructions that recognise the odd form of LEA, or
> (b) remove the code that accepts the odd addresses
> 
> I think (b) is the way to go here.

Either seems to be fine.

> But doing that on its own
> would regress code quality.  The reason we recognised the odd
> addresses in the first place was because that was the rtl that
> combine happened to generate for an important case.
> 
> So if we go for (b) but fix the aarch64 bug strictly before the
> combine patch, we would need to:

This is necessary to be able to evaluate what such a combine patch
does in practice -- so there is no other way.

> (1) Apply the target fix and adjust the testsuite markup to make sure
> that the git commit doesn't regress anyone's test results.

It is normal to regress the testsuite for a little while.

> (2) Apply the combine patch and revert the testsuite markup changes
> from (1).
> 
> That seems like make-work, and would still show as a blip for
> people doing performance tracking.

Yes, that is make-work.  Just regress the testsuite.

You do not even have to apply the target patch first (but you need to
send it as separate patch, so that other people can test it!)

> If you prefer, we could fix the aarch64 bug and patch combine as a
> single commit.  See:
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554257.html
> 
> for the full patch, including the aarch64 bugfix.

I need separate patches, so that I can see what the current combine
does, without ICEing all over.  That is all.  Send it as a series of two
patches, or something.


Segher


[committed] libstdc++: Update doxyfile to Doxygen 1.8.20 format

2020-10-12 Thread Jonathan Wakely via Gcc-patches
libstdc++-v3/ChangeLog:

* doc/doxygen/user.cfg.in: Update to Doxygen 1.8.20 format.

Tested x86_64-linux. Committed to trunk.

commit c840700852c69b2bf7a73df77d8d9f0658330dae
Author: Jonathan Wakely 
Date:   Mon Oct 12 18:14:01 2020

libstdc++: Update doxyfile to Doxygen 1.8.20 format

libstdc++-v3/ChangeLog:

* doc/doxygen/user.cfg.in: Update to Doxygen 1.8.20 format.

diff --git a/libstdc++-v3/doc/doxygen/user.cfg.in 
b/libstdc++-v3/doc/doxygen/user.cfg.in
index 4495b1c9ede..9b49a15d31b 100644
--- a/libstdc++-v3/doc/doxygen/user.cfg.in
+++ b/libstdc++-v3/doc/doxygen/user.cfg.in
@@ -1,4 +1,4 @@
-# Doxyfile 1.8.14
+# Doxyfile 1.8.20
 
 # This file describes the settings to be used by the documentation system
 # doxygen (www.doxygen.org) for a project.
@@ -17,10 +17,10 @@
 # Project related configuration options
 #---
 
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all 
text
-# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
-# built into libc) for the transcoding. See
+# This tag specifies the encoding used for all characters in the configuration
+# file that follow. The default is UTF-8 which is also the encoding used for 
all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
 # https://www.gnu.org/software/libiconv/ for the list of possible encodings.
 # The default value is: UTF-8.
 
@@ -93,6 +93,14 @@ ALLOW_UNICODE_NAMES= NO
 
 OUTPUT_LANGUAGE= English
 
+# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all generated output in the proper direction.
+# Possible values are: None, LTR, RTL and Context.
+# The default value is: None.
+
+OUTPUT_TEXT_DIRECTION  = None
+
 # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
 # descriptions after the members that are listed in the file and class
 # documentation (similar to Javadoc). Set to NO to disable this.
@@ -179,6 +187,16 @@ SHORT_NAMES= @shortname@
 
 JAVADOC_AUTOBRIEF  = NO
 
+# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line
+# such as
+# /***
+# as being the beginning of a Javadoc-style comment "banner". If set to NO, the
+# Javadoc-style will behave just like regular comments and it will not be
+# interpreted by doxygen.
+# The default value is: NO.
+
+JAVADOC_BANNER = NO
+
 # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
 # line (until the first dot) of a Qt-style comment as the brief description. If
 # set to NO, the Qt-style will behave just like regular Qt-style comments (thus
@@ -199,6 +217,14 @@ QT_AUTOBRIEF   = NO
 
 MULTILINE_CPP_IS_BRIEF = YES
 
+# By default Python docstrings are displayed as preformatted text and doxygen's
+# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the
+# doxygen's special commands can be used and the contents of the docstring
+# documentation blocks is shown as doxygen documentation.
+# The default value is: YES.
+
+PYTHON_DOCSTRING   = YES
+
 # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits 
the
 # documentation from any documented member that it re-implements.
 # The default value is: YES.
@@ -228,17 +254,15 @@ TAB_SIZE   = 8
 # "Side Effects:". You can put \n's in the value part of an alias to insert
 # newlines (in the resulting output). You can put ^^ in the value part of an
 # alias to insert a newline as if a physical newline was in the original file.
+# When you need a literal { or } or , in the value part of an alias you have to
+# escape them by means of a backslash (\), this can lead to conflicts with the
+# commands \{ and \} for these it is advised to use the version @{ and @} or 
use
+# a double escape (\\{ and \\})
 
 ALIASES= "doctodo=@todo Needs documentation! See 
http://gcc.gnu.org/onlinedocs/libstdc++/manual/documentation_style.html"; \
  "headername{1}=Instead, include \<\1\>." \
  "headername{2}=Instead, include \<\1\> or \<\2\>."
 
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST  =
-
 # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 
sources
 # only. Doxygen will then generate output that is more tailored for C. For
 # instance, some of the names that are used will be different. The list of all
@@ -267,17 +291,26 @@ OPTIMIZE_FOR_FORTRAN   = NO
 
 OPTIMIZE_OUT

[committed] libstdc++: Include C++17 features in doxygen API docs

2020-10-12 Thread Jonathan Wakely via Gcc-patches
libstdc++-v3/ChangeLog:

* doc/doxygen/user.cfg.in (PREDEFINED): Use __cplusplus=201703L
so that C++17 features are documented.

Tested x86_64-linux. Committed to trunk.

commit 925eb6a0fbf5aa6e99e8b461a41cbeeab507afcd
Author: Jonathan Wakely 
Date:   Mon Oct 12 18:14:01 2020

libstdc++: Include C++17 features in doxygen API docs

libstdc++-v3/ChangeLog:

* doc/doxygen/user.cfg.in (PREDEFINED): Use __cplusplus=201703L
so that C++17 features are documented.

diff --git a/libstdc++-v3/doc/doxygen/user.cfg.in 
b/libstdc++-v3/doc/doxygen/user.cfg.in
index 19f8ffd8230..4495b1c9ede 100644
--- a/libstdc++-v3/doc/doxygen/user.cfg.in
+++ b/libstdc++-v3/doc/doxygen/user.cfg.in
@@ -2250,7 +2250,7 @@ INCLUDE_FILE_PATTERNS  =
 # recursively expanded use the := operator instead of the = operator.
 # This tag requires that the tag ENABLE_PREPROCESSING is set to YES.
 
-PREDEFINED = __cplusplus=201402L \
+PREDEFINED = __cplusplus=201703L \
  __GTHREADS \
  _GLIBCXX_HAS_GTHREADS \
  _GLIBCXX_HAVE_TLS \


Re: make sincos take type from intrinsic formal, not from result assignment

2020-10-12 Thread Alexandre Oliva
Hello, Martin,

On Oct 12, 2020, Martin Liška  wrote:

> It seems the patch caused quite some clang warnings:

> /home/marxin/BIG/buildbot/buildworker/marxinbox-gcc-clang/build/gcc/builtins.c:2366:5:
> warning: comparison of different enumeration types in switch statement
> ('combined_fn' and 'built_in_function') [-Wenum-compare-switch]
> ...

Thanks for the report.

> Can we please fix them?

Here's the patch I'm just about to begin regstrapping.
I'll check it in, as obvious, once I'm done.

Thanks again,


mathfn_built_in_type case type fix

Martin Liška reported warnings about type mismatches in the cases in
the recently-introduced mathfn_built_in_type.  This patch adjusts the
macros to use the combined_fn enumerators rather than the
(currently same-numbered) built_in_function ones.

for  gcc/ChangeLog

* builtins.c (mathfn_built_in_type): Use CFN_ enumerators.
---
 gcc/builtins.c |   26 +-
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 3a77da2..3f799e5 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -2451,37 +2451,37 @@ tree
 mathfn_built_in_type (combined_fn fn)
 {
 #define CASE_MATHFN(MATHFN)\
-  case BUILT_IN_##MATHFN:  \
+  case CFN_BUILT_IN_##MATHFN:  \
 return double_type_node;   \
-  case BUILT_IN_##MATHFN##F:   \
+  case CFN_BUILT_IN_##MATHFN##F:   \
 return float_type_node;\
-  case BUILT_IN_##MATHFN##L:   \
+  case CFN_BUILT_IN_##MATHFN##L:   \
 return long_double_type_node;
 
 #define CASE_MATHFN_FLOATN(MATHFN) \
   CASE_MATHFN(MATHFN)  \
-  case BUILT_IN_##MATHFN##F16: \
+  case CFN_BUILT_IN_##MATHFN##F16: \
 return float16_type_node;  \
-  case BUILT_IN_##MATHFN##F32: \
+  case CFN_BUILT_IN_##MATHFN##F32: \
 return float32_type_node;  \
-  case BUILT_IN_##MATHFN##F64: \
+  case CFN_BUILT_IN_##MATHFN##F64: \
 return float64_type_node;  \
-  case BUILT_IN_##MATHFN##F128:\
+  case CFN_BUILT_IN_##MATHFN##F128:\
 return float128_type_node; \
-  case BUILT_IN_##MATHFN##F32X:\
+  case CFN_BUILT_IN_##MATHFN##F32X:\
 return float32x_type_node; \
-  case BUILT_IN_##MATHFN##F64X:\
+  case CFN_BUILT_IN_##MATHFN##F64X:\
 return float64x_type_node; \
-  case BUILT_IN_##MATHFN##F128X:   \
+  case CFN_BUILT_IN_##MATHFN##F128X:   \
 return float128x_type_node;
 
 /* Similar to above, but appends _R after any F/L suffix.  */
 #define CASE_MATHFN_REENT(MATHFN) \
-  case BUILT_IN_##MATHFN##_R:  \
+  case CFN_BUILT_IN_##MATHFN##_R:  \
 return double_type_node;   \
-  case BUILT_IN_##MATHFN##F_R: \
+  case CFN_BUILT_IN_##MATHFN##F_R: \
 return float_type_node;\
-  case BUILT_IN_##MATHFN##L_R: \
+  case CFN_BUILT_IN_##MATHFN##L_R: \
 return long_double_type_node;
 
   switch (fn)


-- 
Alexandre Oliva, happy hacker
https://FSFLA.org/blogs/lxo/
Free Software Activist
GNU Toolchain Engineer


Merge from master to gccgo branch

2020-10-12 Thread Ian Lance Taylor via Gcc-patches
I merged master revision 3175d40fc52fb8eb3c3b18cc343d773da24434fb to
the gccgo branch.

Ian


[PUSHED] Do not ignore failures from gimple_range_calc_op2.

2020-10-12 Thread Aldy Hernandez via Gcc-patches
From: Andrew MacLeod 

[posting on behalf of Andrew]

We were ignoring the return value if op2 returned false and getting garbage 
ranges propagated.

Tested on x86-64 Linux.

Pushed.

gcc/ChangeLog:

PR tree-optimization/97381
* gimple-range-gori.cc (gori_compute::compute_operand2_range): If a 
range cannot be
calculated through operand 2, return false.

gcc/testsuite/ChangeLog:

* gcc.dg/pr97381.c: New test.
---
 gcc/gimple-range-gori.cc   |  5 +++--
 gcc/testsuite/gcc.dg/pr97381.c | 13 +
 2 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr97381.c

diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index 986427669a7..c4bfc658319 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -920,8 +920,9 @@ gori_compute::compute_operand2_range (irange &r, gimple 
*stmt,
   expr_range_in_bb (op2_range, op2, gimple_bb (stmt));
 
   // Intersect with range for op2 based on lhs and op1.
-  if (gimple_range_calc_op2 (r, stmt, lhs, op1_range))
-op2_range.intersect (r);
+  if (!gimple_range_calc_op2 (r, stmt, lhs, op1_range))
+return false;
+  op2_range.intersect (r);
 
   gimple *src_stmt = SSA_NAME_DEF_STMT (op2);
   // If def stmt is outside of this BB, then name must be an import.
diff --git a/gcc/testsuite/gcc.dg/pr97381.c b/gcc/testsuite/gcc.dg/pr97381.c
new file mode 100644
index 000..947692cb1f6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr97381.c
@@ -0,0 +1,13 @@
+// { dg-do compile }
+// { dg-options "-O2" }
+
+int a;
+void b() {
+  char c = 27;
+  for (; c <= 85; c += 1) {
+a /= 148372120 * c;
+if (a)
+  for (;;)
+;
+  }
+}
-- 
2.26.2



Re: [PATCH] MIPS/libphobos: Fix switchcontext.S assembly for MIPS I ISA

2020-10-12 Thread Maciej W. Rozycki
On Thu, 8 Oct 2020, Iain Buclaw wrote:

> >  Noticed in a build of a MIPS I toolchain.  I have no way to run MIPS 
> > regression-testing right now, however in `libopcodes' the L.D and S.D 
> > instructions are strict aliases valid for the MIPS II and higher ISAs, and 
> > just to double-check that I have built MIPS32r2 GCC with and without the 
> > change applied and verified with `objdump' that the respective target 
> > objects produced are identical.
> > 
> >  OK to apply to trunk, and -- as a fatal compilation error -- to backport 
> > to active release branches?
> > 
> 
> Fine with me, thanks.

 Applied to trunk, thanks.

 Jakub, Richard: I should have cc-ed you for the backports to GCC 8/9/10.  
OK to backport as a fatal build failure fix, or shall we leave this as it 
stands?  FAOD the L.D and S.D assembly instructions have been supported in 
binutils as long as the MIPS port has, i.e. from:

commit 45b1470513cfef2af6fd5532d33a54a840b4600a
Author: Ian Lance Taylor 
Date:   Wed Aug 18 19:40:37 1993 +

  Maciej


[r11-3827 Regression] FAIL: g++.dg/asan/asan_test.C -O2 (test for excess errors) on Linux/x86_64

2020-10-12 Thread sunil.k.pandey via Gcc-patches
On Linux/x86_64,

83685efd5fd1623cfc4e4c435ce2773d95d458d1 is the first bad commit
commit 83685efd5fd1623cfc4e4c435ce2773d95d458d1
Author: Martin Sebor 
Date:   Fri Oct 9 14:48:43 2020 -0600

Generalize compute_objsize to return maximum size/offset instead of failing 
(PR middle-end/97023).

caused

FAIL: gcc.dg/Wstringop-overflow-47.c  (test for warnings, line 29)
FAIL: gcc.dg/Wstringop-overflow-47.c  (test for warnings, line 32)
FAIL: gcc.dg/Wstringop-overflow-47.c  (test for warnings, line 37)
FAIL: g++.dg/asan/asan_test.C   -O2  (test for excess errors)

with GCC configured with

Configured with: ../../gcc/configure 
--prefix=/local/skpandey/gccwork/toolwork/gcc-bisect-master/master/r11-3827/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=gcc.dg/Wstringop-overflow-47.c --target_board='unix{-m32\ 
-march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="dg.exp=gcc.dg/Wstringop-overflow-47.c --target_board='unix{-m64\ 
-march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="asan.exp=g++.dg/asan/asan_test.C --target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="asan.exp=g++.dg/asan/asan_test.C --target_board='unix{-m32\ 
-march=cascadelake}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="asan.exp=g++.dg/asan/asan_test.C --target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check 
RUNTESTFLAGS="asan.exp=g++.dg/asan/asan_test.C --target_board='unix{-m64\ 
-march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at skpgkp2 at gmail dot com)


Re: [PATCH 2a/5] rs6000, vec_rlnm builtin fix arguments

2020-10-12 Thread Carl Love via Gcc-patches
Will, Segher:

This patch fixes an error in how the vec_rlnm() builtin parameters are
handled.  The current test for this builtin are compile only.  The
issue was found in the path that adds the 128-bit operands to the
vec_rlnm() builtin.  The new test for the 128-bit operands is a compile
and run test.

Re-tested the patch on Power 9 with no regression errors.

Carl

-

gcc/ChangeLog

2020-10-08  Carl Love  

* config/rs6000/altivec.h (vec_rlnm): Fix bug in argument generation.
---
 gcc/config/rs6000/altivec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 8a2dcda0144..f7720d136c9 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -183,7 +183,7 @@
 #define vec_recipdiv __builtin_vec_recipdiv
 #define vec_rlmi __builtin_vec_rlmi
 #define vec_vrlnm __builtin_vec_rlnm
-#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((c)<<8)|(b)))
+#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((b)<<8)|(c)))
 #define vec_rsqrt __builtin_vec_rsqrt
 #define vec_rsqrte __builtin_vec_rsqrte
 #define vec_signed __builtin_vec_vsigned
-- 
2.17.1




Re: [PATCH 3/5] Add TI to TD (128-bit DFP) and TD to TI support

2020-10-12 Thread Carl Love via Gcc-patches
Will, Segher:
 
This patch adds support for converting to/from 128-bit integers and
128-bit decimal floating point formats.
 
Updated ChangeLog comments.  Fixed up comments in the test program.

Re-tested the patch on Power 9 with no regression errors.
   
Carl

---

gcc/ChangeLog

2020-10-12  Carl Love  
* config/rs6000/dfp.md (floattitd2, fixtdti2): New define_insns.
* config/rs6000/rs6000-call.c (P10V_BUILTIN_VCMPNET_P, 
P10V_BUILTIN_VCMPAET_P):
New overloaded definitions.

gcc/testsuite/ChangeLog

2020-10-12  Carl Love  
* gcc.target/powerpc/int_128bit-runnable.c: Add 128-bit DFP
conversion tests.
---
 gcc/config/rs6000/dfp.md  | 14 +
 gcc/config/rs6000/rs6000-call.c   |  4 ++
 .../gcc.target/powerpc/int_128bit-runnable.c  | 61 +++
 3 files changed, 79 insertions(+)

diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index 8f822732bac..0e82e315fee 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -222,6 +222,13 @@
   "dcffixq %0,%1"
   [(set_attr "type" "dfp")])
 
+(define_insn "floattitd2"
+  [(set (match_operand:TD 0 "gpc_reg_operand" "=d")
+   (float:TD (match_operand:TI 1 "gpc_reg_operand" "v")))]
+  "TARGET_POWER10"
+  "dcffixqq %0,%1"
+  [(set_attr "type" "dfp")])
+
 ;; Convert a decimal64/128 to a decimal64/128 whose value is an integer.
 ;; This is the first stage of converting it to an integer type.
 
@@ -241,6 +248,13 @@
   "TARGET_DFP"
   "dctfix %0,%1"
   [(set_attr "type" "dfp")])
+
+(define_insn "fixtdti2"
+  [(set (match_operand:TI 0 "gpc_reg_operand" "=v")
+   (fix:TI (match_operand:TD 1 "gpc_reg_operand" "d")))]
+  "TARGET_POWER10"
+  "dctfixqq %0,%1"
+  [(set_attr "type" "dfp")])
 
 ;; Decimal builtin support
 
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 87fff5c1c80..8d00a25d806 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -4967,6 +4967,8 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
 RS6000_BTI_bool_V2DI, 0 },
   { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P,
 RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P9V_BUILTIN_VEC_VCMPNE_P, P10V_BUILTIN_VCMPNET_P,
+RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
 
   { P9V_BUILTIN_VEC_VCMPNE_P, P9V_BUILTIN_VCMPNEFP_P,
 RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
@@ -5074,6 +5076,8 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
 RS6000_BTI_bool_V2DI, 0 },
   { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P,
 RS6000_BTI_INTSI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 },
+  { P9V_BUILTIN_VEC_VCMPAE_P, P10V_BUILTIN_VCMPAET_P,
+RS6000_BTI_INTSI, RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, 0 },
   { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEFP_P,
 RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
   { P9V_BUILTIN_VEC_VCMPAE_P, P9V_BUILTIN_VCMPAEDP_P,
diff --git a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c 
b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
index 85ad544e22b..9d281850ee3 100644
--- a/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
@@ -38,6 +38,7 @@
 #if DEBUG
 #include 
 #include 
+#include 
 
 
 void print_i128(__int128_t val)
@@ -59,6 +60,13 @@ int main ()
   __int128_t arg1, result;
   __uint128_t uarg2;
 
+  _Decimal128 arg1_dfp128, result_dfp128, expected_result_dfp128;
+
+  struct conv_t {
+__uint128_t u128;
+_Decimal128 d128;
+  } conv, conv2;
+
   vector signed long long int vec_arg1_di, vec_arg2_di;
   vector unsigned long long int vec_uarg1_di, vec_uarg2_di, vec_uarg3_di;
   vector unsigned long long int vec_uresult_di;
@@ -2249,6 +2257,59 @@ int main ()
 abort();
 #endif
   }
+  
+  /* DFP to __int128 and __int128 to DFP conversions */
+  /* Print the DFP value as an unsigned int so we can see the bit patterns.  */
+  conv.u128 = 0x2208ULL;
+  conv.u128 = (conv.u128 << 64) | 0x4ULL;   //DFP bit pattern for integer 4
+  expected_result_dfp128 = conv.d128;
 
+  arg1 = 4;
+
+  conv.d128 = (_Decimal128) arg1;
+
+  result_dfp128 = (_Decimal128) arg1;
+  if (((conv.u128 >>64) != 0x2208ULL) &&
+  ((conv.u128 & 0x) != 0x4ULL)) {
+#if DEBUG
+printf("ERROR:  convert int128 value ");
+print_i128 (arg1);
+conv.d128 = result_dfp128;
+printf("\nto DFP value 0x%llx %llx (printed as hex bit string) ",
+  (unsigned long long)((conv.u128) >>64),
+  (unsigned long long)((conv.u128) & 0x));
+
+conv.d128 = expected_result_dfp128;
+printf("\ndoes not match expected_result = 0x%llx %llx\n\n",
+  (unsigned long long) (conv.u128>>64),
+  (unsigned long long) (conv.u128 & 

Re: [PATCH 2b/5] RS6000 add 128-bit Integer Operations

2020-10-12 Thread Carl Love via Gcc-patches
Will, Segher:

This patch adds the 128-bit integer support for divide, modulo, shift,
compare of 128-bit integers instructions and builtin support.

Fixed the references to 128-bit in ChangeLog that got missed in the
last go round.

Fixed missing spaces in emit_insn calls.

Re-tested the patch on Power 9 with no regression errors.

Carl
--


gcc/ChangeLog

2020-10-08  Carl Love  
* config/rs6000/altivec.h (vec_signextq, vec_dive, vec_mod): Add define
for new builtins.
* config/rs6000/altivec.md (UNSPEC_VMULEUD, UNSPEC_VMULESD,
UNSPEC_VMULOUD, UNSPEC_VMULOSD): New unspecs.
(altivec_eqv1ti, altivec_gtv1ti, altivec_gtuv1ti, altivec_vmuleud,
altivec_vmuloud, altivec_vmulesd, altivec_vmulosd, altivec_vrlq,
altivec_vrlqmi, altivec_vrlqmi_inst, altivec_vrlqnm,
altivec_vrlqnm_inst, altivec_vslq, altivec_vsrq, altivec_vsraq,
altivec_vcmpequt_p, altivec_vcmpgtst_p, altivec_vcmpgtut_p): New
define_insn.
(vec_widen_umult_even_v2di, vec_widen_smult_even_v2di,
vec_widen_umult_odd_v2di, vec_widen_smult_odd_v2di, altivec_vrlqmi,
altivec_vrlqnm): New define_expands.
* config/rs6000/rs6000-builtin.def (VCMPEQUT_P, VCMPGTST_P,
VCMPGTUT_P): Add macro expansions.
(VCMPGTUT, VCMPGTST, VCMPEQUT, CMPNET, CMPGE_1TI,
CMPGE_U1TI, CMPLE_1TI, CMPLE_U1TI, VNOR_V1TI_UNS, VNOR_V1TI, VCMPNET_P,
VCMPAET_P, VSIGNEXTSD2Q, VMULEUD, VMULESD, VMULOUD, VMULOSD, VRLQ,
VSLQ, VSRQ, VSRAQ, VRLQNM, DIV_V1TI, UDIV_V1TI, DIVES_V1TI, DIVEU_V1TI,
MODS_V1TI, MODU_V1TI, VRLQMI): New macro expansions.
(VRLQ, VSLQ, VSRQ, VSRAQ, DIVE, MOD, SIGNEXT): New overload expansions.
* config/rs6000/rs6000-call.c (P10_BUILTIN_VCMPEQUT,
P10V_BUILTIN_CMPGE_1TI, P10V_BUILTIN_CMPGE_U1TI,
P10V_BUILTIN_VCMPGTUT, P10V_BUILTIN_VCMPGTST,
P10V_BUILTIN_CMPLE_1TI, P10V_BUILTIN_VCMPLE_U1TI,
P10V_BUILTIN_DIV_V1TI, P10V_BUILTIN_UDIV_V1TI,
P10V_BUILTIN_VMULESD, P10V_BUILTIN_VMULEUD,
P10V_BUILTIN_VMULOSD, P10V_BUILTIN_VMULOUD,
P10V_BUILTIN_VNOR_V1TI, P10V_BUILTIN_VNOR_V1TI_UNS,
P10V_BUILTIN_VRLQ, P10V_BUILTIN_VRLQMI,
P10V_BUILTIN_VRLQNM, P10V_BUILTIN_VSLQ,
P10V_BUILTIN_VSRQ, P10V_BUILTIN_VSRAQ,
P10V_BUILTIN_VCMPGTUT_P, P10V_BUILTIN_VCMPGTST_P,
P10V_BUILTIN_VCMPEQUT_P, P10V_BUILTIN_VCMPGTUT_P,
P10V_BUILTIN_VCMPGTST_P, P10V_BUILTIN_CMPNET,
P10V_BUILTIN_VCMPNET_P, P10V_BUILTIN_VCMPAET_P,
P10V_BUILTIN_VSIGNEXTSD2Q, P10V_BUILTIN_DIVES_V1TI,
P10V_BUILTIN_MODS_V1TI, P10V_BUILTIN_MODU_V1TI):
New overloaded definitions.
(rs6000_gimple_fold_builtin) [P10V_BUILTIN_VCMPEQUT,
P10_BUILTIN_CMPNET, P10_BUILTIN_CMPGE_1TI,
P10_BUILTIN_CMPGE_U1TI, P10_BUILTIN_VCMPGTUT,
P10_BUILTIN_VCMPGTST, P10_BUILTIN_CMPLE_1TI,
P10_BUILTIN_CMPLE_U1TI]: New case statements.
(rs6000_init_builtins) [bool_V1TI_type_node, int_ftype_int_v1ti_v1ti]:
New assignments.
(altivec_init_builtins): New E_V1TImode case statement.
(builtin_function_type)[P10_BUILTIN_128BIT_VMULEUD,
P10_BUILTIN_128BIT_VMULOUD, P10_BUILTIN_128BIT_DIVEU_V1TI,
P10_BUILTIN_128BIT_MODU_V1TI, P10_BUILTIN_CMPGE_U1TI,
P10_BUILTIN_VCMPGTUT, P10_BUILTIN_VCMPEQUT]: New case statements.
* config/rs6000/r6000.c (rs6000_handle_altivec_attribute)[E_TImode,
E_V1TImode]: New case statements.
* config/rs6000/r6000.h (rs6000_builtin_type_index): New enum
value RS6000_BTI_bool_V1TI.
* config/rs6000/vector.md (vector_gtv1ti,vector_nltv1ti,
vector_gtuv1ti, vector_nltuv1ti, vector_ngtv1ti, vector_ngtuv1ti,
vector_eq_v1ti_p, vector_ne_v1ti_p, vector_ae_v1ti_p,
vector_gt_v1ti_p, vector_gtu_v1ti_p, vrotlv1ti3, vashlv1ti3,
vlshrv1ti3, vashrv1ti3): New define_expands.
* config/rs6000/vsx.md (UNSPEC_VSX_DIVSQ, UNSPEC_VSX_DIVUQ,
UNSPEC_VSX_DIVESQ, UNSPEC_VSX_DIVEUQ, UNSPEC_VSX_MODSQ,
UNSPEC_VSX_MODUQ): New unspecs.
(vsx_div_v1ti, vsx_udiv_v1ti, vsx_dives_v1ti, vsx_diveu_v1ti,
vsx_mods_v1ti, vsx_modu_v1ti, xxswapd_v1ti, vsx_sign_extend_v2di_v1ti):
New define_insns.
(vcmpnet): New define_expand.
* gcc/doc/extend.texi: Add documentation for the new builtins vec_rl,
vec_rlmi, vec_rlnm, vec_sl, vec_sr, vec_sra, vec_mule, vec_mulo,
vec_div, vec_dive, vec_mod, vec_cmpeq, vec_cmpne, vec_cmpgt, vec_cmplt,
vec_cmpge, vec_cmple, vec_all_eq, vec_all_ne, vec_all_gt, vec_all_lt,
vec_all_ge, vec_all_le, vec_any_eq, vec_any_ne, vec_any_gt, vec_any_lt,
vec_any_ge, vec_any_le.

gcc/testsuite/ChangeLog

2020-10-08 Carl Love  
* gcc.target/powerpc/int_128bit-runnable.c: New test file.
---
 gcc/config/rs6000/altivec.h   |4 +
 gc

Re: [PATCH 4/5] Test 128-bit shifts for just the int128 type.

2020-10-12 Thread Carl Love via Gcc-patches
Will, Segher:

Patch 4 adds the vector 128-bit integer shift instruction support for
the V1TI type.

This patch also renames and moves the VSX_TI iterator from vsx.md to
VEC_TI in vector.md.  The uses of VEC_TI are also updated.

Re-tested the patch on Power 9 with no regression errors.

Carl



gcc/ChangeLog

2020-10-12  Carl Love  
* config/rs6000/altivec.md (altivec_vslq, altivec_vsrq):
Rename to altivec_vslq_, altivec_vsrq_, mode VEC_TI.
* config/rs6000/vector.md (VEC_TI): Was named VSX_TI in vsx.md.
(vashlv1ti3): Change to vashl3, mode VEC_TI.
(vlshrv1ti3): Change to vlshr3, mode VEC_TI.
* config/rs6000/vsx.md (VSX_TI): Remove define_mode_iterator. Update
uses of VSX_TI to VEC_TI.

gcc/testsuite/ChangeLog

2020-10-12  Carl Love  
gcc.target/powerpc/int_128bit-runnable.c: Add shift_right, shift_left
tests.
---
 gcc/config/rs6000/altivec.md  | 16 -
 gcc/config/rs6000/vector.md   | 27 ---
 gcc/config/rs6000/vsx.md  | 33 +--
 .../gcc.target/powerpc/int_128bit-runnable.c  | 16 +++--
 4 files changed, 52 insertions(+), 40 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index e9623bc3285..9b70830ae00 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2220,10 +2220,10 @@
   "vsl %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
-(define_insn "altivec_vslq"
-  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
-   (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
-(match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_insn "altivec_vslq_"
+  [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+   (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v")
+(match_operand:VEC_TI 2 "vsx_register_operand" "v")))]
   "TARGET_POWER10"
   /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
   "vslq %0,%1,%2"
@@ -2237,10 +2237,10 @@
   "vsr %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
-(define_insn "altivec_vsrq"
-  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
-   (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
-  (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_insn "altivec_vsrq_"
+  [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+   (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand" "v")
+  (match_operand:VEC_TI 2 "vsx_register_operand" 
"v")))]
   "TARGET_POWER10"
   /* Shift amount in needs to be in bits[57:63] of 128-bit operand. */
   "vsrq %0,%1,%2"
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index c2ae74fbe92..b2f17063ac9 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -26,6 +26,9 @@
 ;; Vector int modes
 (define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
 
+;; 128-bit int modes
+(define_mode_iterator VEC_TI [V1TI TI])
+
 ;; Vector int modes for parity
 (define_mode_iterator VEC_IP [V8HI
  V4SI
@@ -1627,17 +1630,17 @@
   "")
 
 ;; No immediate version of this 128-bit instruction
-(define_expand "vashlv1ti3"
-  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
-   (ashift:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
-(match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_expand "vashl3"
+  [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+   (ashift:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand")
+(match_operand:VEC_TI 2 "vsx_register_operand")))]
   "TARGET_POWER10"
 {
   /* Shift amount in needs to be put in bits[57:63] of 128-bit operand2. */
-  rtx tmp = gen_reg_rtx (V1TImode);
+  rtx tmp = gen_reg_rtx (mode);
 
   emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
-  emit_insn (gen_altivec_vslq (operands[0], operands[1], tmp));
+  emit_insn(gen_altivec_vslq_ (operands[0], operands[1], tmp));
   DONE;
 })
 
@@ -1650,17 +1653,17 @@
   "")
 
 ;; No immediate version of this 128-bit instruction
-(define_expand "vlshrv1ti3"
-  [(set (match_operand:V1TI 0 "vsx_register_operand" "=v")
-   (lshiftrt:V1TI (match_operand:V1TI 1 "vsx_register_operand" "v")
-  (match_operand:V1TI 2 "vsx_register_operand" "v")))]
+(define_expand "vlshr3"
+  [(set (match_operand:VEC_TI 0 "vsx_register_operand" "=v")
+   (lshiftrt:VEC_TI (match_operand:VEC_TI 1 "vsx_register_operand")
+  (match_operand:VEC_TI 2 "vsx_register_operand")))]
   "TARGET_POWER10"
 {
   /* Shift amount in needs to be put into bits[57:63] of 128-bit operand2. */
-  rtx tmp = gen_reg_rtx (V1TImode);
+  rtx tmp = gen_reg_rtx (mode);
 
   emit_insn (gen_xxswapd_v1ti (tmp, operands[2]));
-  emit_insn (gen_altivec_vsrq (oper

Re: [PATCH 5/5] Conversions between 128-bit integer and floating point values.

2020-10-12 Thread Carl Love via Gcc-patches
Will, Segher:
 
This patch adds support for converting to/from 128-bit integers and
128-bit decimal floating point formats using the new P10 instructions
dcffixqq and dctfixqq.  The new instructions are only used on P10 HW,
otherwise the conversions continue to use the existing SW routines.

The files fixkfti-sw.c and fixunskfti-sw.c are renamed versions of
fixkfti.c and fixunskfti.c respectively.  The function names in the
files were updated with the rename as well as some white spaces fixes.

Fixed a typo in the ChangeLog noted by Will.

Removed the target ppc_native_128bit from the test case as we no longer
have the 128-bit flag.

Re-tested the patch on Power 9 with no regression errors.
Carl

--



gcc/ChangeLog

2020-10-12  Carl Love  
* config/rs6000/rs6000.md (floatti2, floatunsti2,
fix_truncti2, fixuns_truncti2): Add
define_insn for mode IEEE 128.
* libgcc/config/rs6000/fixkfti.c: Renamed to fixkfti-sw.c.
Change calls of __fixkfti to __fixkfti_sw.
* libgcc/config/rs6000/fixunskfti.c: Renamed to fixunskfti-sw.c.
Change calls of __fixunskfti to __fixunskfti_sw.
* libgcc/config/rs6000/float128-hw.c (__floattikf_hw,
__floatuntikf_hw, __fixkfti_hw, __fixunskfti_hw):
New functions.
* libgcc/config/rs6000/float128-ifunc.c (SW_OR_HW_ISA3_1):
New macro.
(__floattikf_resolve, __floatuntikf_resolve, __fixkfti_resolve,
__fixunskfti_resolve): Add resolve functions.
(__floattikf, __floatuntikf, __fixkfti, __fixunskfti): New
functions.
* libgcc/config/rs6000/float128-sed (floattitf, __floatuntitf,
__fixtfti, __fixunstfti): Add editor commands to change
names.
* libgcc/config/rs6000/float128-sed-hw (__floattitf,
__floatuntitf, __fixtfti, __fixunstfti): Add editor commands
to change names.
* libgcc/config/rs6000/floattikf.c: Renamed to floattikf-sw.c.
* libgcc/config/rs6000/floatuntikf.c: Renamed to floatuntikf-sw.c.
* libgcc/config/rs6000/quaad-float128.h (__floattikf_sw,
__floatuntikf_sw, __fixkfti_sw, __fixunskfti_sw, __floattikf_hw,
__floatuntikf_hw, __fixkfti_hw, __fixunskfti_hw, __floattikf,
__floatuntikf, __fixkfti, __fixunskfti): New extern declarations.
* libgcc/config/rs6000/t-float128 (floattikf, floatuntikf,
fixkfti, fixunskfti): Remove file names from fp128_ppc_funcs.
(floattikf-sw, floatuntikf-sw, fixkfti-sw, fixunskfti-sw): Add
file names to fp128_ppc_funcs.

gcc/testsuite/ChangeLog

2020-10-12  Carl Love  
* gcc.target/powerpc/fp128_conversions.c: New file.
---
 gcc/config/rs6000/rs6000.md   |  36 +++
 .../gcc.target/powerpc/fp128_conversions.c| 283 ++
 .../config/rs6000/{fixkfti.c => fixkfti-sw.c} |   4 +-
 .../rs6000/{fixunskfti.c => fixunskfti-sw.c}  |   4 +-
 libgcc/config/rs6000/float128-hw.c|  24 ++
 libgcc/config/rs6000/float128-ifunc.c |  44 ++-
 libgcc/config/rs6000/float128-sed |   4 +
 libgcc/config/rs6000/float128-sed-hw  |   4 +
 .../rs6000/{floattikf.c => floattikf-sw.c}|   4 +-
 .../{floatuntikf.c => floatuntikf-sw.c}   |   4 +-
 libgcc/config/rs6000/quad-float128.h  |  17 +-
 libgcc/config/rs6000/t-float128   |   3 +-
 12 files changed, 411 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/fp128_conversions.c
 rename libgcc/config/rs6000/{fixkfti.c => fixkfti-sw.c} (96%)
 rename libgcc/config/rs6000/{fixunskfti.c => fixunskfti-sw.c} (96%)
 rename libgcc/config/rs6000/{floattikf.c => floattikf-sw.c} (96%)
 rename libgcc/config/rs6000/{floatuntikf.c => floatuntikf-sw.c} (96%)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 694ff70635e..5db5d0b4505 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6390,6 +6390,42 @@
xscvsxddp %x0,%x1"
   [(set_attr "type" "fp")])
 
+(define_insn "floatti2"
+  [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v")
+   (float:IEEE128 (match_operand:TI 1 "vsx_register_operand" "v")))]
+  "TARGET_POWER10"
+{
+  return  "xscvsqqp %0,%1";
+}
+  [(set_attr "type" "fp")])
+
+(define_insn "floatunsti2"
+  [(set (match_operand:IEEE128 0 "vsx_register_operand" "=v")
+   (unsigned_float:IEEE128 (match_operand:TI 1 "vsx_register_operand" 
"v")))]
+  "TARGET_POWER10"
+{
+  return  "xscvuqqp %0,%1";
+}
+  [(set_attr "type" "fp")])
+
+(define_insn "fix_truncti2"
+  [(set (match_operand:TI 0 "vsx_register_operand" "=v")
+   (fix:TI (match_operand:IEEE128 1 "vsx_register_operand" "v")))]
+  "TARGET_POWER10"
+{
+  return  "xscvqpsqz %0,%1";
+}
+  [(set_attr "type" "fp")])
+
+(define_insn "fixuns_truncti2"
+  [(set (match_operand:TI 0 "vsx_register_operand" "=v")
+   (unsigned_fix:TI (match_operand:IEEE128 1 "vsx_regi

Re: [PATCH 1/5] RS6000 Add 128-bit Binary Integer sign extend operations

2020-10-12 Thread Carl Love via Gcc-patches
Will, Segher:

Patch 1, adds the 128-bit sign extension instruction support and
corresponding builtin support.

Removed the blank line per Will's latest feedback.

Retested the patch on Power 9 with no regression errors.

Carl
--

gcc/ChangeLog

2020-10-08  Carl Love  
* config/rs6000/altivec.h (vec_signextll, vec_signexti): Add define
for new builtins.
* config/rs6000/rs6000-builtin.def (VSIGNEXTI, VSIGNEXTLL):  Add
overloaded builtin definitions.
(VSIGNEXTSB2W, VSIGNEXTSH2W, VSIGNEXTSB2D, VSIGNEXTSH2D,VSIGNEXTSW2D):
Add builtin expansions.
* config/rs6000-call.c (P9V_BUILTIN_VEC_VSIGNEXTI,
P9V_BUILTIN_VEC_VSIGNEXTLL): Add overloaded argument definitions.
* config/rs6000/vsx.md: Make define_insn vsx_sign_extend_si_v2di
visible.
* doc/extend.texi:  Add documentation for the vec_signexti and
vec_signextll builtins.

gcc/testsuite/ChangeLog

2020-10-08  Carl Love  
* gcc.target/powerpc/p9-sign_extend-runnable.c:  New test case.
---
 gcc/config/rs6000/altivec.h   |   2 +
 gcc/config/rs6000/rs6000-builtin.def  |   9 ++
 gcc/config/rs6000/rs6000-call.c   |  13 ++
 gcc/config/rs6000/vsx.md  |   2 +-
 gcc/doc/extend.texi   |  15 ++
 .../powerpc/p9-sign_extend-runnable.c | 128 ++
 6 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index f7720d136c9..562c5273f71 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -494,6 +494,8 @@
 
 #define vec_xlx __builtin_vec_vextulx
 #define vec_xrx __builtin_vec_vexturx
+#define vec_signexti  __builtin_vec_vsignexti
+#define vec_signextll __builtin_vec_vsignextll
 #endif
 
 /* Predicates.
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index e91a48ddf5f..4c2e9460949 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2715,6 +2715,8 @@ BU_P9V_OVERLOAD_1 (VPRTYBD,   "vprtybd")
 BU_P9V_OVERLOAD_1 (VPRTYBQ,"vprtybq")
 BU_P9V_OVERLOAD_1 (VPRTYBW,"vprtybw")
 BU_P9V_OVERLOAD_1 (VPARITY_LSBB,   "vparity_lsbb")
+BU_P9V_OVERLOAD_1 (VSIGNEXTI,  "vsignexti")
+BU_P9V_OVERLOAD_1 (VSIGNEXTLL, "vsignextll")
 
 /* 2 argument functions added in ISA 3.0 (power9).  */
 BU_P9_2 (CMPRB,"byte_in_range",CONST,  cmprb)
@@ -2726,6 +2728,13 @@ BU_P9_OVERLOAD_2 (CMPRB, "byte_in_range")
 BU_P9_OVERLOAD_2 (CMPRB2,  "byte_in_either_range")
 BU_P9_OVERLOAD_2 (CMPEQB,  "byte_in_set")
 
+/* Sign extend builtins that work on ISA 3.0, but not defined until ISA 3.1.  
*/
+BU_P9V_AV_1 (VSIGNEXTSB2W, "vsignextsb2w", CONST,  
vsx_sign_extend_qi_v4si)
+BU_P9V_AV_1 (VSIGNEXTSH2W, "vsignextsh2w", CONST,  
vsx_sign_extend_hi_v4si)
+BU_P9V_AV_1 (VSIGNEXTSB2D, "vsignextsb2d", CONST,  
vsx_sign_extend_qi_v2di)
+BU_P9V_AV_1 (VSIGNEXTSH2D, "vsignextsh2d", CONST,  
vsx_sign_extend_hi_v2di)
+BU_P9V_AV_1 (VSIGNEXTSW2D, "vsignextsw2d", CONST,  
vsx_sign_extend_si_v2di)
+
 /* Builtins for scalar instructions added in ISA 3.1 (power10).  */
 BU_P10_MISC_2 (CFUGED, "cfuged", CONST, cfuged)
 BU_P10_MISC_2 (CNTLZDM, "cntlzdm", CONST, cntlzdm)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index a8b520834c7..9e514a01012 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -5527,6 +5527,19 @@ const struct altivec_builtin_types 
altivec_overloaded_builtins[] = {
 RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
 RS6000_BTI_INTSI, RS6000_BTI_INTSI },
 
+  /* Sign extend builtins that work work on ISA 3.0, not added until ISA 3.1 */
+  { P9V_BUILTIN_VEC_VSIGNEXTI, P9V_BUILTIN_VSIGNEXTSB2W,
+RS6000_BTI_V4SI, RS6000_BTI_V16QI, 0, 0 },
+  { P9V_BUILTIN_VEC_VSIGNEXTI, P9V_BUILTIN_VSIGNEXTSH2W,
+RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
+
+  { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSB2D,
+RS6000_BTI_V2DI, RS6000_BTI_V16QI, 0, 0 },
+  { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSH2D,
+RS6000_BTI_V2DI, RS6000_BTI_V8HI, 0, 0 },
+  { P9V_BUILTIN_VEC_VSIGNEXTLL, P9V_BUILTIN_VSIGNEXTSW2D,
+RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+
   /* Overloaded built-in functions for ISA3.1 (power10). */
   { P10_BUILTIN_VEC_CLRL, P10V_BUILTIN_VCLRLB,
 RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_UINTSI, 0 },
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4ff52455fd3..31fcffe8f33 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -4787,7 +4787,7 @@
   "vextsh2 %0,%1"
   [(set_attr "type" "vecexts")])
 
-(define_insn "*vsx_sign_extend_si_v2di"
+(define_insn "vsx_sign_exte

Re: [PATCH 1/5] RS6000 Add 128-bit Binary Integer sign extend operations

2020-10-12 Thread Segher Boessenkool
Hi!

On Wed, Oct 07, 2020 at 04:08:12PM -0500, will schmidt wrote:
> On Mon, 2020-10-05 at 11:51 -0700, Carl Love wrote:
> > +/* Sign extend builtins that work on ISA 3.0, but not defined until ISA 
> > 3.1.  */
> 
> I have mixed feelings about straddling the ISA 3.0 and 3.1 ; but not
> sure how to properly improve.  (I defer).

The builtins are not defined in the ISA.  The instructions generated by
these builtins are ISA 3.0 insns, but the builtins themselves were only
defined contemporary with ISA 3.1.

I don't know how to write that comment more clearly.  Well, maybe we
have to write it out, not everything is best explained in few words?


Segher


Re: [PATCH 1/5] RS6000 Add 128-bit Binary Integer sign extend operations

2020-10-12 Thread Segher Boessenkool
Hi!

On Mon, Oct 12, 2020 at 01:15:32PM -0700, Carl Love wrote:
> Patch 1, adds the 128-bit sign extension instruction support and
> corresponding builtin support.

>   * config/rs6000/altivec.h (vec_signextll, vec_signexti): Add define
>   for new builtins.
>   * config/rs6000/rs6000-builtin.def (VSIGNEXTI, VSIGNEXTLL):  Add
>   overloaded builtin definitions.
>   (VSIGNEXTSB2W, VSIGNEXTSH2W, VSIGNEXTSB2D, VSIGNEXTSH2D,VSIGNEXTSW2D):
>   Add builtin expansions.
>   * config/rs6000-call.c (P9V_BUILTIN_VEC_VSIGNEXTI,
>   P9V_BUILTIN_VEC_VSIGNEXTLL): Add overloaded argument definitions.
>   * config/rs6000/vsx.md: Make define_insn vsx_sign_extend_si_v2di
>   visible.
>   * doc/extend.texi:  Add documentation for the vec_signexti and
>   vec_signextll builtins.

> +uThe following sign extension builtins are provided.

Typo ("uThe").  Probably should be a colon at the end, while you're at it.

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/p9-sign_extend-runnable.c
> @@ -0,0 +1,128 @@
> +/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } 
> */

Why only on Linux?  (And everything in gcc.target/powerpc/ is powerpc*
always, so could just be *-*-linux*).

Looks good otherwise.


Segher


RE: [PATCH 1/5] RS6000 Add 128-bit Binary Integer sign extend operations

2020-10-12 Thread Carl Love via Gcc-patches
On Mon, 2020-10-12 at 15:43 -0500, Segher Boessenkool wrote:
> Hi!
> 
> On Wed, Oct 07, 2020 at 04:08:12PM -0500, will schmidt wrote:
> > On Mon, 2020-10-05 at 11:51 -0700, Carl Love wrote:
> > > +/* Sign extend builtins that work on ISA 3.0, but not defined
> > > until ISA 3.1.  */
> > 
> > I have mixed feelings about straddling the ISA 3.0 and 3.1 ; but
> > not
> > sure how to properly improve.  (I defer).
> 
> The builtins are not defined in the ISA.  The instructions generated
> by
> these builtins are ISA 3.0 insns, but the builtins themselves were
> only
> defined contemporary with ISA 3.1.
> 
> I don't know how to write that comment more clearly.  Well, maybe we
> have to write it out, not everything is best explained in few words?
> 

OK, we can just drop the comment all together.

 Carl 



[Ada, FYI] make sin and cos generics inlineable

2020-10-12 Thread Alexandre Oliva


Enable the sincos optimization within callers of these
(single-argument) elementary functions.

Regstrapped on x86_64-linux-gnu, approved by Arno, checking in.

for  gcc/ada/ChangeLog

* libgnat/a-ngelfu.ads (Sin, Cos): Make the single-argument
functions inline.
---
 gcc/ada/libgnat/a-ngelfu.ads |2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/ada/libgnat/a-ngelfu.ads b/gcc/ada/libgnat/a-ngelfu.ads
index 1a8e176..70f9b7a 100644
--- a/gcc/ada/libgnat/a-ngelfu.ads
+++ b/gcc/ada/libgnat/a-ngelfu.ads
@@ -92,6 +92,7 @@ is
and then (if Left  = 0.0 then "**"'Result = 0.0);
 
function Sin (X : Float_Type'Base) return Float_Type'Base with
+ Inline,
  Post => Sin'Result in -1.0 .. 1.0
and then (if X = 0.0 then Sin'Result = 0.0);
 
@@ -101,6 +102,7 @@ is
and then (if X = 0.0 then Sin'Result = 0.0);
 
function Cos (X : Float_Type'Base) return Float_Type'Base with
+ Inline,
  Post => Cos'Result in -1.0 .. 1.0
and then (if X = 0.0 then Cos'Result = 1.0);
 

-- 
Alexandre Oliva, happy hacker
https://FSFLA.org/blogs/lxo/
Free Software Activist
GNU Toolchain Engineer


Fix tramp3d misoptimization

2020-10-12 Thread Jan Hubicka
Hi,
this patch fixes tramp3d ICE with PGO.  It has turned out to be by a misupdate
in ignore_edge I introduced in previous patch that made us to not compute
SCCs correctly with -fno-lto.

While looking for problem I proofread the sources and also fortified the
srouces for situation where we insert a summary for no good reason and noticed
a problem that early ipa-modref disabled itself in some cases.
I also noticed that param_index is treamed as uhwi while it is signed (that
wastes file space).

Bootstrapping/regtesting x86_64-linux, will commit it tomorrow if that passes.

gcc/ChangeLog:

2020-10-13  Jan Hubicka  

PR ipa/97389
* ipa-modref.c (dump_lto_records): Fix formating of dump file.
(modref_summary::dump): Do not check loads to be non-null.
(modref_summary_lto::dump): Do not check loads to be non-null.
(merge_call_side_effects): Improve debug output.
(analyze_call): Crash when cur_summary->loads is NULL.
(analyze_function): Update.
(modref_summaries::insert): Insert only into summaries, not
optimization_summaries.
(modref_summaries::duplicate): Likewise; crash when load or sotres
are NULL.
(modref_summaries_lto::duplicate): Crash when loads or stores are NULL.
(write_modref_records): param_index is signed.
(read_modref_records): param_index is signed.
(modref_write): Crash when loads or stores are NULL.
(read_section): Compensate previous change.
(pass_modref::execute): Do not check optimization_summaries t be
non-NULL.
(ignore_edge): Fix.
(compute_parm_map): Fix formating.
(modref_propagate_in_scc): Do not expect loads/stores to be NULL.

diff --git a/gcc/ipa-modref.c b/gcc/ipa-modref.c
index 1d4eaf8d7ad..d78cba44fe7 100644
--- a/gcc/ipa-modref.c
+++ b/gcc/ipa-modref.c
@@ -298,7 +298,7 @@ dump_lto_records (modref_records_lto *tt, FILE *out)
   r->ref ? get_alias_set (r->ref) : 0);
  if (r->every_access)
{
- fprintf (out, "  Every access\n");
+ fprintf (out, "  Every access\n");
  continue;
}
  size_t k;
@@ -314,16 +314,10 @@ dump_lto_records (modref_records_lto *tt, FILE *out)
 void
 modref_summary::dump (FILE *out)
 {
-  if (loads)
-{
-  fprintf (out, "  loads:\n");
-  dump_records (loads, out);
-}
-  if (stores)
-{
-  fprintf (out, "  stores:\n");
-  dump_records (stores, out);
-}
+  fprintf (out, "  loads:\n");
+  dump_records (loads, out);
+  fprintf (out, "  stores:\n");
+  dump_records (stores, out);
 }
 
 /* Dump summary.  */
@@ -331,16 +325,10 @@ modref_summary::dump (FILE *out)
 void
 modref_summary_lto::dump (FILE *out)
 {
-  if (loads)
-{
-  fprintf (out, "  loads:\n");
-  dump_lto_records (loads, out);
-}
-  if (stores)
-{
-  fprintf (out, "  stores:\n");
-  dump_lto_records (stores, out);
-}
+  fprintf (out, "  loads:\n");
+  dump_lto_records (loads, out);
+  fprintf (out, "  stores:\n");
+  dump_lto_records (stores, out);
 }
 
 /* Get function summary for FUNC if it exists, return NULL otherwise.  */
@@ -530,16 +518,19 @@ ignore_stores_p (tree caller, int flags)
 bool
 merge_call_side_effects (modref_summary *cur_summary,
 gimple *stmt, modref_summary *callee_summary,
-bool ignore_stores)
+bool ignore_stores, cgraph_node *callee_node)
 {
   auto_vec  parm_map;
   bool changed = false;
 
+  if (dump_file)
+fprintf (dump_file, " - Merging side effects of %s with parm map:",
+callee_node->dump_name ());
+
   parm_map.safe_grow_cleared (gimple_call_num_args (stmt));
   for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
 {
   tree op = gimple_call_arg (stmt, i);
-  STRIP_NOPS (op);
   if (TREE_CODE (op) == SSA_NAME
  && SSA_NAME_IS_DEFAULT_DEF (op)
  && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
@@ -563,17 +554,17 @@ merge_call_side_effects (modref_summary *cur_summary,
parm_map[i].parm_index = -2;
   else
parm_map[i].parm_index = -1;
+  if (dump_file)
+   fprintf (dump_file, " %i", parm_map[i].parm_index);
 }
+  if (dump_file)
+fprintf (dump_file, "\n");
 
   /* Merge with callee's summary.  */
-  if (cur_summary->loads)
-changed |= cur_summary->loads->merge (callee_summary->loads, &parm_map);
+  changed |= cur_summary->loads->merge (callee_summary->loads, &parm_map);
   if (!ignore_stores)
-{
-  if (cur_summary->stores)
-   changed |= cur_summary->stores->merge (callee_summary->stores,
-  &parm_map);
-}
+changed |= cur_summary->stores->merge (callee_summary->stores,
+  &parm_map);
   return changed;
 }
 
@@ -672,8 +663,7 @@ analyze_call (modref_summary *cur_summary,
 {
   if (ignore

[committed] analyzer: handle static callbacks [PR97258]

2020-10-12 Thread David Malcolm via Gcc-patches
The analyzer's initial worklist was only populated with non-static
functions in the TU (along with those that look promising for call
summaries).  Hence some static functions that were never explicitly
called but could be called via function pointers were not being
analyzed.

This patch remedies this by ensuring that functions that escape as
function pointers get added to the worklist, if they haven't been
already.  Another fix would be to simply analyze all functions that
we have a body for, but too much of the testsuite relies on static
test functions not being directly analyzed.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-3840-gaf66094d037793773eb8a49597866457f2f6a104.

gcc/analyzer/ChangeLog:
PR analyzer/97258
* engine.cc (impl_region_model_context::on_escaped_function): New
vfunc.
(exploded_graph::add_function_entry): Use m_functions_with_enodes
to implement idempotency.
(add_any_callbacks): New.
(exploded_graph::build_initial_worklist): Use the above to find
callbacks that are reachable from global initializers.
(exploded_graph::on_escaped_function): New.
* exploded-graph.h
(impl_region_model_context::on_escaped_function): New decl.
(exploded_graph::on_escaped_function): New decl.
(exploded_graph::m_functions_with_enodes): New field.
* region-model-reachability.cc
(reachable_regions::reachable_regions): Replace "store" param with
"model" param; use it to initialize m_model.
(reachable_regions::add): When getting the svalue for the region,
call get_store_value on the model rather than using an initial
value.
(reachable_regions::mark_escaped_clusters): Add ctxt param and
use it to call on_escaped_function when a function_region escapes.
* region-model-reachability.h
(reachable_regions::reachable_regions): Replace "store" param with
"model" param.
(reachable_regions::mark_escaped_clusters): Add ctxt param.
(reachable_regions::m_model): New field.
* region-model.cc (region_model::handle_unrecognized_call): Update
for change in reachable_regions ctor.
(region_model::handle_unrecognized_call): Pass ctxt to
mark_escaped_clusters.
(region_model::get_reachable_svalues): Update for change in
reachable_regions ctor.
(region_model::get_initial_value_for_global): Read-only variables
keep their initial values.
* region-model.h (region_model_context::on_escaped_function): New
vfunc.
(noop_region_model_context::on_escaped_function): New.

gcc/testsuite/ChangeLog:
PR analyzer/97258
* gcc.dg/analyzer/callbacks-1.c: New test.
* gcc.dg/analyzer/callbacks-2.c: New test.
* gcc.dg/analyzer/callbacks-3.c: New test.
---
 gcc/analyzer/engine.cc  | 70 +
 gcc/analyzer/exploded-graph.h   |  8 +++
 gcc/analyzer/region-model-reachability.cc   | 19 --
 gcc/analyzer/region-model-reachability.h|  8 ++-
 gcc/analyzer/region-model.cc| 13 ++--
 gcc/analyzer/region-model.h |  5 ++
 gcc/testsuite/gcc.dg/analyzer/callbacks-1.c | 25 
 gcc/testsuite/gcc.dg/analyzer/callbacks-2.c | 22 +++
 gcc/testsuite/gcc.dg/analyzer/callbacks-3.c | 19 ++
 9 files changed, 175 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/callbacks-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/callbacks-2.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/callbacks-3.c

diff --git a/gcc/analyzer/engine.cc b/gcc/analyzer/engine.cc
index 0e79254ad60..65d7495f26f 100644
--- a/gcc/analyzer/engine.cc
+++ b/gcc/analyzer/engine.cc
@@ -143,6 +143,12 @@ impl_region_model_context::on_unknown_change (const svalue 
*sval,
 smap->on_unknown_change (sval, is_mutable, m_ext_state);
 }
 
+void
+impl_region_model_context::on_escaped_function (tree fndecl)
+{
+  m_eg->on_escaped_function (fndecl);
+}
+
 /* class setjmp_svalue : public svalue.  */
 
 /* Implementation of svalue::accept vfunc for setjmp_svalue.  */
@@ -1931,6 +1937,15 @@ exploded_graph::~exploded_graph ()
 exploded_node *
 exploded_graph::add_function_entry (function *fun)
 {
+  /* Be idempotent.  */
+  if (m_functions_with_enodes.contains (fun))
+{
+  logger * const logger = get_logger ();
+   if (logger)
+   logger->log ("entrypoint for %qE already exists", fun->decl);
+  return NULL;
+}
+
   program_point point = program_point::from_function_entry (m_sg, fun);
   program_state state (m_ext_state);
   state.push_frame (m_ext_state, fun);
@@ -1942,6 +1957,9 @@ exploded_graph::add_function_entry (function *fun)
   /* We should never fail to add such a node.  */
   gcc_assert (enode);
   add_edge (m_origin, enode, NULL);
+
+  m_functions_with_enodes.add (fun);
+
   return enode;
 }
 
@@

Ping: [PATCH 0/2] Rework adding Power10 IEEE 128-bit min, max, and conditional move

2020-10-12 Thread Michael Meissner via Gcc-patches
Ping the following two patches to add IEEE 128-bit minimum, maximu, and
conditional move support:

https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554460.html
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554461.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Ping: [PATCH 0/9] PowerPC: Patches to enable changing the long double default to IEEE 128-bit on little endian PowerPC 64-bit Linux systems

2020-10-12 Thread Michael Meissner via Gcc-patches
Ping the following 9 patches to add support for building a GCC toolchain where
the default long double is IEEE 128-bit floating point instead of the IBM
extended double floating point.

The first patch was revised with input from Joesph Myers.  I will list that
patch in this list.  Most of these patches are independent of each other, so if
there are problems with some of the patches, please look at the other patches
after that patch:

Patch #1: Map built-in long double functions to an alternate name if long
double is IEEE 128-bit.
https://gcc.gnu.org/pipermail/gcc-patches/2020-October/555855.html

Patch #2: Update error messages about mixing __float128 and __ibm128 to deal
with long double being IEEE 128-bit:
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554767.html

Patch #3: Rework libgcc 128-bit floating point conversion support:
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554768.html

Patch #4: Add support in libgcc to convert between IEEE 128-bit and the three
Decimal types:
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554770.html

Patch #5: Fix some tests that break if long double is IEEE 128-bit:
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554772.html

Patch #6: Map the 'q' built-ins to 'l' built-ins if long double is IEEE
128-bit:
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554773.html

Patch #7: Update the power10 built-in functions for IEEE 128-bit support to
support long double if long double is IEEE 128-bit:
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554774.html

Patch #8: Change tests that use an explicit '__ieee128' keywork to use the
keyword we document ('__float128'):
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554775.html

Patch #9: If long double is IEEE 128-bit, use the pack_ibm128 built-in instead
of the pack_longdouble built-in in libgcc:
https://gcc.gnu.org/pipermail/gcc-patches/2020-September/554776.html

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797


Re: [PATCH 2a/5] rs6000, vec_rlnm builtin fix arguments

2020-10-12 Thread Segher Boessenkool
On Mon, Oct 12, 2020 at 01:15:39PM -0700, Carl Love wrote:
> This patch fixes an error in how the vec_rlnm() builtin parameters are
> handled.  The current test for this builtin are compile only.  The
> issue was found in the path that adds the 128-bit operands to the
> vec_rlnm() builtin.  The new test for the 128-bit operands is a compile
> and run test.

>   * config/rs6000/altivec.h (vec_rlnm): Fix bug in argument generation.

> diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
> index 8a2dcda0144..f7720d136c9 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -183,7 +183,7 @@
>  #define vec_recipdiv __builtin_vec_recipdiv
>  #define vec_rlmi __builtin_vec_rlmi
>  #define vec_vrlnm __builtin_vec_rlnm
> -#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((c)<<8)|(b)))
> +#define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((b)<<8)|(c)))
>  #define vec_rsqrt __builtin_vec_rsqrt
>  #define vec_rsqrte __builtin_vec_rsqrte
>  #define vec_signed __builtin_vec_vsigned

That patch is fine of course, thanks!  Is there some testcase that trips
over the old definition?  That would have been good to have.


Segher


Re: [PATCH 2b/5] RS6000 add 128-bit Integer Operations

2020-10-12 Thread Segher Boessenkool
Hi!

On Wed, Oct 07, 2020 at 04:53:11PM -0500, will schmidt wrote:
> > +;; AIX does not support extended mnemonic xxswapd.  Use the basic
> > +;; mnemonic xxpermdi instead.
> 
> I'd wonder if there can be additional logic using ( DEFAULT_ABI ==
> ABI_AIX ) sort of check to resolve this.  It looks like this same
> comment exists in multiple places througout our *.md files, so not
> something that needs to be solved here today. 

ABI_AIX just tests the *ABI*, whether we have function descriptors
mostly.  But there is TARGET_AIX to test if we are running on AIX.

The problem with generating different assembler code on AIX is that we
then have to do more testing as well, have more opportunities to get
things wrong.  But it might be worth it for xxpermdi, the extended
mnemonics improve readability a lot.  On the other hand, once we start
this, where will it end :-)

> > new file mode 100644
> > index 000..85ad544e22b
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/powerpc/int_128bit-runnable.c
> > @@ -0,0 +1,2254 @@
> > +/* { dg-do run } */
> > +/* { dg-options "-mcpu=power10 -O2 -save-temps" } */
> > +/* { dg-require-effective-target power10_hw } */
> > +/* { dg-require-effective-target ppc_native_128bit } */
> 
> I don't see any other uses of the target option for ppc_native_128bit
> in my tree ?

I don't see where it is defined, even?


Segher


PING [PATCH] Enable GCC support for Intel Key Locker extension

2020-10-12 Thread Hongyu Wang via Gcc-patches
Hongyu Wang  于2020年9月21日周一 下午1:30写道:
>
> Hi:
>
> This patch is about to support Intel Key Locker extension.
>
> Key Locker provides a mechanism to encrypt and decrypt data with an AES
key without having access to the raw key value.
>
> For more details, please refer to
https://software.intel.com/content/dam/develop/external/us/en/documents/343965-intel-key-locker-specification.pdf
.
>
> Bootstrap ok, regression test on i386/x86 backend is ok.
>
> OK for master?
>
> gcc/ChangeLog
>
> * common/config/i386/cpuinfo.h (get_available_features):
> Detect KL, AESKLE and WIDEKL features.
> * common/config/i386/i386-common.c
> (OPTION_MASK_ISA_KL_SET): New.
> (OPTION_MASK_ISA_WIDEKL_SET): Likewise.
> (OPTION_MASK_ISA_KL_UNSET): Likewise.
> (OPTION_MASK_ISA_WIDEKL_UNSET): Likewise.
> (OPTION_MASK_ISA2_AVX2_UNSET): Likewise.
> (OPTION_MASK_ISA2_AVX_UNSET): Likewise.
> (OPTION_MASK_ISA2_SSE4_2_UNSET): Likewise.
> (OPTION_MASK_ISA2_SSE4_1_UNSET): Likewise.
> (OPTION_MASK_ISA2_SSE4_UNSET): Likewise.
> (OPTION_MASK_ISA2_SSSE3_UNSET): Likewise.
> (OPTION_MASK_ISA2_SSE3_UNSET): Likewise.
> (OPTION_MASK_ISA2_SSE2_UNSET): Likewise.
> (OPTION_MASK_ISA2_SSE_UNSET): Likewise.
> (ix86_handle_option): Handle kl and widekl, add dependency
chain
> for KL and SSE2.
> * common/config/i386/i386-cpuinfo.h (enum processor_features):
> (FEATURE_KL, FEATURE_AESKLE, FEATURE_WIDEKL): New.
> * common/config/i386/i386-isas.h: Add ISA_NAMES_TABLE_ENTRY
> for KL, AESKLE and WIDEKL.
> * config.gcc: Add keylockerintrin.h.
> * doc/invoke.texi: Document new option -mkl and -mwidekl.
> * doc/extend.texi: Document kl and widekl.
> * config/i386/constraints.md
> (Y1, Y2, Y3, Y4, Y5, Y6, Y7): New register constraints.
> * config/i386/cpuid.h (bit_KL, bit_AESKLE, bit_WIDEKL): New.
> * config/i386/i386-builtin-types.def ((UINT, UINT, V2DI,
V2DI, PVOID),
> (UINT, UINT, V2DI, PVOID), (VOID, V2DI, V2DI, V2DI, UINT),
> (UINT8, PV2DI, V2DI, PCVOID), (UINT8, PV2DI, PCV2DI,
PCVOID)): New
> function types.
> * config/i386/i386-builtin.def: Add
> __builtin_ia32_loadiwkey,
> __builtin_ia32_aesdec128kl_u8,
> __builtin_ia32_aesdec256kl_u8,
> __builtin_ia32_aesenc128kl_u8,
> __builtin_ia32_aesenc256kl_u8,
> __builtin_ia32_aesdecwide128kl_u8,
> __builtin_ia32_aesdecwide256kl_u8,
> __builtin_ia32_aesencwide128kl_u8,
> __builtin_ia32_aesencwide256kl_u8,
> __builtin_ia32_encodekey128_u32,
> __builtin_ia32_encodekey256_u32.
> * config/i386/i386-c.c (ix86_target_macros_internal): Handle
> kl and widekl.
> * config/i386/i386-options.c (isa2_opts): Add -mkl and
-mwidekl.
> (ix86_option_override_internal): Handle KL and WIDEKL.
> (ix86_valid_target_attribute_inner_p): Add attribute for kl
and widekl.
> * config/i386/i386-expand.c
> (ix86_expand_builtin): Expand Keylocker Builtins.
> * config/i386/i386.h (TARGET_KL): New.
> (TARGET_KL_P): Likewise.
> (TARGET_WIDEKL): Likewise.
> (TARGET_WIDEKL_P): Likewise.
> (PTA_KL): Likewise.
> (PTA_WIDEKL): Likewise.
> (enum reg_class): Add 7 new SSE register classes.
> (REG_CLASS_NAMES): Likewise.
> (REG_CLASS_CONTENTS): Likewise.
> * config/i386/i386.opt: Add new option mkl and mwidekl.
> * config/i386/keylockerintrin.h: New header file for
Keylocker.
> * config/i386/immintrin.h: Include keylockerintrin.h.
> * config/i386/sse.md (UNSPECV_LOADIWKEY): New.
> (UNSPECV_AESDEC128KLU8): Likewise.
> (UNSPECV_AESENC128KLU8): Likewise.
> (UNSPECV_AESDEC256KLU8): Likewise.
> (UNSPECV_AESENC256KLU8): Likewise.
> (UNSPECV_AESDECWIDE128KLU8): Likewise.
> (UNSPECV_AESENCWIDE128KLU8): Likewise.
> (UNSPECV_AESDECWIDE256KLU8): Likewise.
> (UNSPECV_AESENCWIDE256KLU8): Likewise.
> (UNSPECV_ENCODEKEY128U32): Likewise.
> (UNSPECV_ENCODEKEY256U32): Likewise.
> (loadiwkey): New insn pattern.
> (encodekey128u32): Likewise.
> (encodekey256u32): Likewise.
> (aesu8): Likewise.
> (aesu8): Likewise.
>
> gcc/testsuite/ChangeLog
>
> * gcc.target/i386/keylocker-aesdec128kl.c: New test.
> * gcc.target/i386/keylocker-aesdec256kl.c: Likewise.
> * gcc.target/i386/keylocker-aesdecwide128kl.c: Likewise.

Re: [PUSHED] operator_trunc_mod::wi_fold: Return VARYING for mod by zero.

2020-10-12 Thread Richard Biener via Gcc-patches
On Mon, Oct 12, 2020 at 6:57 PM Aldy Hernandez via Gcc-patches
 wrote:
>
> Division by zero should return VARYING, otherwise we propagate undefine all 
> over the
> ranger and cause bad things to happen :)

So we never should propagate UNDEFINED?

>.  This fixes MOD 0 to also return VARYING.
>
> This is Andrew's patch.  I forgot to use --author for proper patch
> attribution.
>
> Tested on x86-64 Linux.
>
> Pushed to trunk.
>
> gcc/ChangeLog:
>
> PR tree-optimization/97378
> * range-op.cc (operator_trunc_mod::wi_fold): Return VARYING for mod 
> by zero.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/pr97378.c: New test.
> ---
>  gcc/range-op.cc|  6 +++---
>  gcc/testsuite/gcc.dg/pr97378.c | 15 +++
>  2 files changed, 18 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr97378.c
>
> diff --git a/gcc/range-op.cc b/gcc/range-op.cc
> index ce6ae2de20c..6108de367ad 100644
> --- a/gcc/range-op.cc
> +++ b/gcc/range-op.cc
> @@ -1359,7 +1359,7 @@ operator_div::wi_fold (irange &r, tree type,
>// If we're definitely dividing by zero, there's nothing to do.
>if (wi_zero_p (type, divisor_min, divisor_max))
>  {
> -  r.set_undefined ();
> +  r.set_varying (type);
>return;
>  }
>
> @@ -2624,10 +2624,10 @@ operator_trunc_mod::wi_fold (irange &r, tree type,
>signop sign = TYPE_SIGN (type);
>unsigned prec = TYPE_PRECISION (type);
>
> -  // Mod 0 is undefined.  Return undefined.
> +  // Mod 0 is undefined.
>if (wi_zero_p (type, rh_lb, rh_ub))
>  {
> -  r.set_undefined ();
> +  r.set_varying (type);
>return;
>  }
>
> diff --git a/gcc/testsuite/gcc.dg/pr97378.c b/gcc/testsuite/gcc.dg/pr97378.c
> new file mode 100644
> index 000..27e4a1f4321
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr97378.c
> @@ -0,0 +1,15 @@
> +// { dg-do compile }
> +// { dg-options "-O2" }
> +
> +int a, b, c;
> +void d() {
> +e : {
> +  long f;
> +  long *g = &f;
> +  if ((a != 0) - (b = 0))
> +;
> +  else
> +a &= (*g %= a *= c) >= (*g || f);
> +  goto e;
> +}
> +}
> --
> 2.26.2
>


[PATCH v2] pass: Run cleanup passes before SLP [PR96789]

2020-10-12 Thread Kewen.Lin via Gcc-patches
Hi!

>> Can you repeat the compile-time measurement there?  I also wonder
>> whether we should worry about compile-time at -O[12] when SLP is not run.
>> Thus, probably rename the cleanup pass to pre_slp_scalar_cleanup and
>> gate it on && flag_slp_vectorize
> 
> Good idea, will evaluate it.
> 

Sorry for the late update.  

I evaluated compilation time on SPEC2017 INT bmks,

for several option sets:
  A1: -Ofast -funroll-loops
  A2: -O1
  A3: -O1 -funroll-loops
  A4: -O2
  A5: -O2 -funroll-loops

and for several guard conditions:
  C1: no loops after cunroll (the previous version)
  C2: any outermost loop unrolled
  C3: C1 + C2
  C4: C1 + C2 + SLP only
  C5: C2 + SLP only (the current version)

Compilation time increase percentages table:
A1  A2  A3  A4  A5
C1: 0.74%   0.07%   -0.25%  0.00%   0.10%
C2: 0.21%   0.00%   -0.19%  0.00%   0.71%
C3: 0.21%   0.00%   -0.06%  0.30%   0.00%
C4: 0.21%   -0.07%  -0.38%  0.20%   -0.19%
C5: 0.08%   0.00%   -0.38%  -0.10%  -0.05%

C2 is a better guard than C1 (C2/A1 better than C1/A1).
SLP guard is good from C2/A5 vs. C5/A5.
btw, the data could have some noises especially when
the difference is very small.

Bootstrapped/regtested on powerpc64le-linux-gnu P8.

Is it ok for trunk?

BR,
Kewen
-
gcc/ChangeLog:

PR tree-optimization/96789
* passes.c (class pass_pre_slp_scalar_cleanup): New class.
(make_pass_pre_slp_scalar_cleanup): New function.
(pass_data_pre_slp_scalar_cleanup): New pass data.
(execute_one_pass): Add support for
TODO_force_next_scalar_cleanup.
(pending_TODOs): Init.
* passes.def (pass_pre_slp_scalar_cleanup): New pass, add
pass_fre and pass_dse as its children.
* timevar.def (TV_SCALAR_CLEANUP): New timevar.
* tree-pass.h (TODO_force_next_scalar_cleanup): New TODO flag.
(make_pass_pre_slp_scalar_cleanup): New declare.
(pending_TODOs): Likewise.
* tree-ssa-loop-ivcanon.c (tree_unroll_loops_completely_1):
Extend to set father_bbs for outermost loop.
(tree_unroll_loops_completely): Once any outermost loop gets
unrolled, set outermost_unrolled and further flag return value
with TODO_force_next_scalar_cleanup.

gcc/testsuite/ChangeLog:

PR tree-optimization/96789
* gcc.dg/tree-ssa/ssa-dse-28.c: Adjust.
* gcc.dg/tree-ssa/ssa-dse-29.c: Likewise.
* gcc.dg/vect/bb-slp-41.c: Likewise.
* gcc.dg/tree-ssa/pr96789.c: New test.
diff --git a/gcc/passes.c b/gcc/passes.c
index 6ff31ec37d7..eb938d72a42 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -71,6 +71,8 @@ using namespace gcc;
The variable current_pass is also used for statistics and plugins.  */
 opt_pass *current_pass;
 
+unsigned int pending_TODOs = 0;
+
 /* Most passes are single-instance (within their context) and thus don't
need to implement cloning, but passes that support multiple instances
*must* provide their own implementation of the clone method.
@@ -731,7 +733,54 @@ make_pass_late_compilation (gcc::context *ctxt)
   return new pass_late_compilation (ctxt);
 }
 
+/* Pre-SLP scalar cleanup, it has several cleanup passes like FRE, DSE.  */
+
+namespace {
+
+const pass_data pass_data_pre_slp_scalar_cleanup =
+{
+  GIMPLE_PASS, /* type */
+  "*pre_slp_scalar_cleanup", /* name */
+  OPTGROUP_LOOP, /* optinfo_flags */
+  TV_SCALAR_CLEANUP, /* tv_id */
+  ( PROP_cfg | PROP_ssa ), /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_pre_slp_scalar_cleanup : public gimple_opt_pass
+{
+public:
+  pass_pre_slp_scalar_cleanup (gcc::context *ctxt)
+: gimple_opt_pass (pass_data_pre_slp_scalar_cleanup, ctxt)
+  {
+  }
+
+  virtual bool
+  gate (function *)
+  {
+return flag_tree_slp_vectorize
+  && (pending_TODOs & TODO_force_next_scalar_cleanup);
+  }
+
+  virtual unsigned int
+  execute (function *)
+  {
+pending_TODOs &= ~TODO_force_next_scalar_cleanup;
+return 0;
+  }
 
+}; // class pass_pre_slp_scalar_cleanup
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_pre_slp_scalar_cleanup (gcc::context *ctxt)
+{
+  return new pass_pre_slp_scalar_cleanup (ctxt);
+}
 
 /* Set the static pass number of pass PASS to ID and record that
in the mapping from static pass number to pass.  */
@@ -2538,6 +2587,12 @@ execute_one_pass (opt_pass *pass)
   return true;
 }
 
+  if (todo_after & TODO_force_next_scalar_cleanup)
+{
+  todo_after &= ~TODO_force_next_scalar_cleanup;
+  pending_TODOs |= TODO_force_next_scalar_cleanup;
+}
+
   do_per_function (clear_last_verified, NULL);
 
   do_per_function (update_properties_after_pass, pass);
diff --git a/gcc/passes.def b/gcc/passes.def
index c0098d755bf..c74add75068 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -288,11 +288,16 @@ along with GCC; see the file COPYING3.  If not se