[committed] remove multi-range selftests from range-ops.cc

2019-11-14 Thread Aldy Hernandez
Multi-range selftests are unused, as we don't currently have them on 
trunk.  This was leftover from the original ranger branch.  Removing 
them has the added benefit that the selftests no longer need to peek at 
m_max_pairs, so value_range no longer needs to friend the selftests.


This tidies up the selftest.h header file, and allows us to put the 
range tests in the correct namespace.


Committed as obvious.
commit d542175952566dc3c43078d65a5a6e0f638a30e2
Author: Aldy Hernandez 
Date:   Thu Nov 14 08:14:59 2019 +0100

Remove higher precision range tests because they are unused.
This removes the dependency on m_max_pairs from the selftests, which has
the ultimate effect of allowing us to put the tests in the selftest
namespace as was the original plan.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index db5c5e6aeba..cfb88e67c8c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2019-11-14  Aldy Hernandez  
+
+	* range-op.cc (RANGE3): Remove.
+	(range_tests): Remove all selftest that check for multi-ranges.
+	Put tests in namespace selftest.
+	* selftest.h: Move range_tests into namespace selftest.
+	* value-range.h (class value_range): Unfriend range_tests.
+
 2019-11-14  Aldy Hernandez  
 
 	* tree-vrp.c (range_fold_binary_symbolics_p): Adapt for
diff --git a/gcc/range-op.cc b/gcc/range-op.cc
index 5c7ff60b788..ae3025c6eea 100644
--- a/gcc/range-op.cc
+++ b/gcc/range-op.cc
@@ -2803,10 +2803,8 @@ range_cast (value_range &r, tree type)
 #include "selftest.h"
 #include "stor-layout.h"
 
-// Ideally this should go in namespace selftest, but range_tests
-// needs to be a friend of class value_range so it can access
-// value_range::m_max_pairs.
-
+namespace selftest
+{
 #define INT(N) build_int_cst (integer_type_node, (N))
 #define UINT(N) build_int_cstu (unsigned_type_node, (N))
 #define INT16(N) build_int_cst (short_integer_type_node, (N))
@@ -2817,14 +2815,6 @@ range_cast (value_range &r, tree type)
 #define UCHAR(N) build_int_cstu (unsigned_char_type_node, (N))
 #define SCHAR(N) build_int_cst (signed_char_type_node, (N))
 
-#define RANGE3(A,B,C,D,E,F)		\
-( i1 = value_range (INT (A), INT (B)),	\
-  i2 = value_range (INT (C), INT (D)),	\
-  i3 = value_range (INT (E), INT (F)),	\
-  i1.union_ (i2),			\
-  i1.union_ (i3),			\
-  i1 )
-
 // Run all of the selftests within this file.
 
 void
@@ -2893,16 +2883,13 @@ range_tests ()
   ASSERT_TRUE (r0 == r1);
 
   r1 = value_range (INT (5), INT (5));
-  r1.check ();
   value_range r2 (r1);
   ASSERT_TRUE (r1 == r2);
 
   r1 = value_range (INT (5), INT (10));
-  r1.check ();
 
   r1 = value_range (integer_type_node,
 	   wi::to_wide (INT (5)), wi::to_wide (INT (10)));
-  r1.check ();
   ASSERT_TRUE (r1.contains_p (INT (7)));
 
   r1 = value_range (SCHAR (0), SCHAR (20));
@@ -3046,42 +3033,12 @@ range_tests ()
   r1.union_ (r2);
   ASSERT_TRUE (r0 == r1);
 
-  if (value_range::m_max_pairs > 2)
-{
-  // ([10,20] U [5,8]) U [1,3] ==> [1,3][5,8][10,20].
-  r0 = value_range (INT (10), INT (20));
-  r1 = value_range (INT (5), INT (8));
-  r0.union_ (r1);
-  r1 = value_range (INT (1), INT (3));
-  r0.union_ (r1);
-  ASSERT_TRUE (r0 == RANGE3 (1, 3, 5, 8, 10, 20));
-
-  // [1,3][5,8][10,20] U [-5,0] => [-5,3][5,8][10,20].
-  r1 = value_range (INT (-5), INT (0));
-  r0.union_ (r1);
-  ASSERT_TRUE (r0 == RANGE3 (-5, 3, 5, 8, 10, 20));
-}
-
   // [10,20] U [30,40] ==> [10,20][30,40].
   r0 = value_range (INT (10), INT (20));
   r1 = value_range (INT (30), INT (40));
   r0.union_ (r1);
   ASSERT_TRUE (r0 == range_union (value_range (INT (10), INT (20)),
   value_range (INT (30), INT (40;
-  if (value_range::m_max_pairs > 2)
-{
-  // [10,20][30,40] U [50,60] ==> [10,20][30,40][50,60].
-  r1 = value_range (INT (50), INT (60));
-  r0.union_ (r1);
-  ASSERT_TRUE (r0 == RANGE3 (10, 20, 30, 40, 50, 60));
-  // [10,20][30,40][50,60] U [70, 80] ==> [10,20][30,40][50,60][70,80].
-  r1 = value_range (INT (70), INT (80));
-  r0.union_ (r1);
-
-  r2 = RANGE3 (10, 20, 30, 40, 50, 60);
-  r2.union_ (value_range (INT (70), INT (80)));
-  ASSERT_TRUE (r0 == r2);
-}
 
   // Make sure NULL and non-NULL of pointer types work, and that
   // inverses of them are consistent.
@@ -3092,35 +3049,6 @@ range_tests ()
   r0.invert ();
   ASSERT_TRUE (r0 == r1);
 
-  if (value_range::m_max_pairs > 2)
-{
-  // [10,20][30,40][50,60] U [6,35] => [6,40][50,60].
-  r0 = RANGE3 (10, 20, 30, 40, 50, 60);
-  r1 = value_range (INT (6), INT (35));
-  r0.union_ (r1);
-  ASSERT_TRUE (r0 == range_union (value_range (INT (6), INT (40)),
-  value_range (INT (50), INT (60;
-
-  // [10,20][30,40][50,60] U [6,60] => [6,60].
-  r0 = RANGE3 (10, 20, 30, 40, 50, 60);
-  r1 = value_range (INT (6), INT (60));
-  r0.union_ (r1);
-  ASSERT_TRUE (r0 == value_range (INT (6), INT (60)));
-
-  // [10,20][30,40][50,60] U [6,70] => [6,7

Re: [PATCH] Relax lto-dump.o dependency.

2019-11-14 Thread Martin Liška

On 11/13/19 11:49 PM, Jan Hubicka wrote:

Hi.

Current lto-dump.o relies on some FE generated files as
pre-requirement. That hover delays LTO linking of the lto-dump
and so that I adjusted the dependency to LTO_OBJS which will
work as well.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/lto/ChangeLog:

2019-11-11  Martin Liska  

* Make-lang.in: Relax dependency of lto-dump.o to
LTO_OBJS which will allow faster linking (mainly with LTO).

Hi,
thanks for looking into this. Unforutnately it seems that even after
your patch lto-dump always links only after all other frontends are
finished :(


You are right :) I've got a patch that removes the not needed
(and blocking) dependency.

I'm going to install the patch after proper testing.
Martin



Honza



>From f1f48015c12600e03c7da710f0b07185fa0c2572 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Thu, 14 Nov 2019 09:16:58 +0100
Subject: [PATCH] Remove wrong lto-dump: lto1 makefile dependency.

gcc/lto/ChangeLog:

2019-11-14  Martin Liska  

	* Make-lang.in: Remove wrong dependency
	of LTO_DUMP_EXE on LTO_EXE.
---
 gcc/lto/Make-lang.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/lto/Make-lang.in b/gcc/lto/Make-lang.in
index faee8899502..46df75ab59b 100644
--- a/gcc/lto/Make-lang.in
+++ b/gcc/lto/Make-lang.in
@@ -88,7 +88,7 @@ $(LTO_EXE): $(LTO_OBJS) $(BACKEND) $(LIBDEPS)
 	+$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \
 		$(LTO_OBJS) $(BACKEND) $(BACKENDLIBS) $(LIBS)
 
-$(LTO_DUMP_EXE): $(LTO_EXE) $(LTO_DUMP_OBJS) $(BACKEND) $(LIBDEPS)
+$(LTO_DUMP_EXE): $(LTO_DUMP_OBJS) $(BACKEND) $(LIBDEPS)
 	+$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \
 		$(LTO_DUMP_OBJS) $(BACKEND) $(BACKENDLIBS) $(LIBS)
 
-- 
2.24.0



Re: Fix ICE when inlining into function containing polymorphic call

2019-11-14 Thread Jan Hubicka
> On Wed, Nov 13, 2019 at 10:08:23PM +0100, Jan Hubicka wrote:
> > PR c++/92421
> > * ipa-prop.c (update_indirect_edges_after_inlining):
> > Mark parameter as used.
> > * ipa-inline.c (recursive_inlining): Reset node cache
> > after inlining.
> > (inline_small_functions): Remove checking ifdef.
> > * ipa-inline-analysis.c (do_estimate_edge_time): Verify
> > cache consistency.
> > * g++.dg/torture/pr92421.C: New testcase.
> 
> The testcase FAILs everywhere:
> FAIL: g++.dg/torture/pr92421.C   -O0  (test for excess errors)
> FAIL: g++.dg/torture/pr92421.C   -O1  (test for excess errors)
> FAIL: g++.dg/torture/pr92421.C   -O2  (test for excess errors)
> ...
> FAIL: g++.dg/torture/pr92421.C   -O0  (test for excess errors)
> Excess errors:
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:112:3: warning: no return 
> statement in function returning non-void [-Wreturn-type]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:121:3: warning: no return 
> statement in function returning non-void [-Wreturn-type]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:166:10: warning: ISO C++ 
> forbids converting a string constant to 'char*' [-Wwrite-strings]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:166:37: warning: ISO C++ 
> forbids converting a string constant to 'char*' [-Wwrite-strings]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:172:10: warning: ISO C++ 
> forbids converting a string constant to 'char*' [-Wwrite-strings]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:174:1: warning: no return 
> statement in function returning non-void [-Wreturn-type]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:60:10: warning: ISO C++ 
> forbids converting a string constant to 'char*' [-Wwrite-strings]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:60:34: warning: ISO C++ 
> forbids converting a string constant to 'char*' [-Wwrite-strings]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:62:10: warning: ISO C++ 
> forbids converting a string constant to 'char*' [-Wwrite-strings]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:62:34: warning: ISO C++ 
> forbids converting a string constant to 'char*' [-Wwrite-strings]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:71:8: warning: ISO C++ 
> forbids converting a string constant to 'char*' [-Wwrite-strings]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:71:32: warning: ISO C++ 
> forbids converting a string constant to 'char*' [-Wwrite-strings]
> /usr/src/gcc/gcc/testsuite/g++.dg/torture/pr92421.C:47:7: warning: 
> 'q::bb(char*, long int, char*, long int)::bf::bf(long int)' used but never 
> defined
> 
> I've fixed all the -Wwrite-strings warnings, all but one
> -Wreturn-type warnings (the one spot left made the ICE go away
> with unfixed g++ at -O3), the undefined bf ctor warning, tested on
> x86_64-linux, verified unfixed g++ still ICEs, committed to trunk
> as obvious:
> 
> 2019-11-14  Jakub Jelinek  
> 
>   PR ipa/92421
>   * g++.dg/torture/pr92421.C: Add -Wno-return-type to
>   dg-additional-options.  Avoid -Wwrite-string warnings, most of
>   -Wreturn-type warnings, define bf ctor.  Use struct instead of class
>   with public: at the start.

Thanks a lot and sorry for the breakage.

Honza


[committed] Add nvidia as valid OpenMP context selector vendor

2019-11-14 Thread Jakub Jelinek
Hi!

NVidia recently requested replacement of pgi with nvidia.  The following
patch just adds nvidia and keeps pgi, so that we don't warn on either of
those, at least for now.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-11-14  Jakub Jelinek  

* c-omp.c (c_omp_check_context_selector): Add nvidia to the list of
valid vendors.

* c-c++-common/gomp/declare-variant-3.c: Add testcase for vendor nvidia.

--- gcc/c-family/c-omp.c.jj 2019-11-13 12:36:30.453265616 +0100
+++ gcc/c-family/c-omp.c2019-11-13 14:02:13.895142143 +0100
@@ -2173,7 +2173,7 @@ c_omp_check_context_selector (location_t
"host", "nohost", "cpu", "gpu", "fpga", "any", NULL };
   static const char *const vendor[] = {
"amd", "arm", "bsc", "cray", "fujitsu", "gnu", "ibm", "intel",
-   "llvm", "pgi", "ti", "unknown", NULL };
+   "llvm", "nvidia", "pgi", "ti", "unknown", NULL };
   static const char *const extension[] = { NULL };
   static const char *const atomic_default_mem_order[] = {
"seq_cst", "relaxed", "acq_rel", NULL };
--- gcc/testsuite/c-c++-common/gomp/declare-variant-3.c.jj  2019-11-13 
12:48:57.234919664 +0100
+++ gcc/testsuite/c-c++-common/gomp/declare-variant-3.c 2019-11-13 
14:03:04.100379844 +0100
@@ -145,3 +145,5 @@ void f75 (void);
 void f76 (void);
 #pragma omp declare variant (f13) match (device={kind("any",any)})
 void f77 (void);
+#pragma omp declare variant (f13) match (implementation={vendor(nvidia)})
+void f78 (void);

Jakub



[committed] Support both identifiers and string literals as properties of declare variant *-name-list selectors

2019-11-14 Thread Jakub Jelinek
Hi!

OpenMP 5.0 is not clear enough here, but on Tuesday language committee
it was agreed that the selectors like arch/isa/kind/vendor/extension
should accept both identifiers and string literals.
kind(host) and kind("host") are then the same thing, but there can be
properties that are not valid identifiers and those can be only expressed
as string literals, like isa("sse4.2").
atomic_default_mem_order, being a clause on the requires directive, must
still use only identifier.

The following patch implements it.  Bootstrapped/regtested on x86_64-linux
and i686-linux, committed to trunk.

2019-11-14  Jakub Jelinek  

* omp-general.c (omp_context_name_list_prop): New function.
(omp_context_selector_matches): Use it.  Return 0 if it returns
NULL.
(omp_context_selector_props_compare): Allow equivalency of an
identifier and a string literal containing no embedded zeros.
c-family/
* c-omp.c (c_omp_check_context_selector): Handle name lists
containing string literals.  Don't diagnose atomic_default_mem_order
with multiple props.
c/
* c-parser.c (c_parser_omp_context_selector): Rename
CTX_PROPERTY_IDLIST to CTX_PROPERTY_NAME_LIST, add CTX_PROPERTY_ID.
Use CTX_PROPERTY_ID for atomic_default_mem_order, only allow a single
identifier in that.  For CTX_PROPERTY_NAME_LIST, allow identifiers
and string literals.
cp/
* parser.c (cp_parser_omp_context_selector): Rename
CTX_PROPERTY_IDLIST to CTX_PROPERTY_NAME_LIST, add CTX_PROPERTY_ID.
Use CTX_PROPERTY_ID for atomic_default_mem_order, only allow a single
identifier in that.  For CTX_PROPERTY_NAME_LIST, allow identifiers
and string literals.
* pt.c (tsubst_attribute): Fix up STRING_CST handling if allow_string.
testsuite/
* c-c++-common/gomp/declare-variant-2.c: Adjust expected diagnostics,
add a test for atomic_default_mem_order with a string literal.
* c-c++-common/gomp/declare-variant-3.c: Use string literal props
in a few random places, add a few string literal prop related tests.
* c-c++-common/gomp/declare-variant-8.c: Likewise.
* c-c++-common/gomp/declare-variant-9.c: Use string literal props
in a few random places.
* c-c++-common/gomp/declare-variant-10.c: Likewise.
* c-c++-common/gomp/declare-variant-11.c: Likewise.
* c-c++-common/gomp/declare-variant-12.c: Likewise.
* g++.dg/gomp/declare-variant-7.C: Likewise.

--- gcc/omp-general.c.jj2019-11-11 21:04:06.675237225 +0100
+++ gcc/omp-general.c   2019-11-13 12:29:34.686583916 +0100
@@ -652,6 +652,23 @@ omp_maybe_offloaded (void)
   return false;
 }
 
+/* Return a name from PROP, a property in selectors accepting
+   name lists.  */
+
+static const char *
+omp_context_name_list_prop (tree prop)
+{
+  if (TREE_PURPOSE (prop))
+return IDENTIFIER_POINTER (TREE_PURPOSE (prop));
+  else
+{
+  const char *ret = TREE_STRING_POINTER (TREE_VALUE (prop));
+  if ((size_t) TREE_STRING_LENGTH (TREE_VALUE (prop)) == strlen (ret) + 1)
+   return ret;
+  return NULL;
+}
+}
+
 /* Return 1 if context selector matches the current OpenMP context, 0
if it does not and -1 if it is unknown and need to be determined later.
Some properties can be checked right away during parsing (this routine),
@@ -701,8 +718,11 @@ omp_context_selector_matches (tree ctx)
  if (set == 'i' && !strcmp (sel, "vendor"))
for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3))
  {
-   const char *prop = IDENTIFIER_POINTER (TREE_PURPOSE (t3));
-   if (!strcmp (prop, " score") || !strcmp (prop, "gnu"))
+   const char *prop = omp_context_name_list_prop (t3);
+   if (prop == NULL)
+ return 0;
+   if ((!strcmp (prop, " score") && TREE_PURPOSE (t3))
+   || !strcmp (prop, "gnu"))
  continue;
return 0;
  }
@@ -750,7 +770,9 @@ omp_context_selector_matches (tree ctx)
  if (set == 'd' && !strcmp (sel, "arch"))
for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3))
  {
-   const char *arch = IDENTIFIER_POINTER (TREE_PURPOSE (t3));
+   const char *arch = omp_context_name_list_prop (t3);
+   if (arch == NULL)
+ return 0;
int r = 0;
if (targetm.omp.device_kind_arch_isa != NULL)
  r = targetm.omp.device_kind_arch_isa (omp_device_arch,
@@ -844,7 +866,9 @@ omp_context_selector_matches (tree ctx)
  if (set == 'd' && !strcmp (sel, "kind"))
for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3))
  {
-   const char *prop = IDENTIFIER_POINTER (TREE_PURPOSE (t

Re: [Patch] PR fortran/92470 Fixes for CFI_address

2019-11-14 Thread Andreas Schwab
On Nov 14 2019, Jakub Jelinek wrote:

> --- gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c.jj 2019-11-13 
> 10:54:37.081172852 +0100
> +++ gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c2019-11-14 
> 01:19:36.704285484 +0100
> @@ -2,7 +2,7 @@
>  
>  #include 
>  #include 
> -#include "ISO_Fortran_binding.h"
> +#include "../../../libgfortran/ISO_Fortran_binding.h"

Shoudn't that be fixed generically, by directing the compiler to use the
uninstalled headers?

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


[committed] Diagnose negative score in declare variant

2019-11-14 Thread Jakub Jelinek
Hi!

We've also agreed that score arguments need to be non-negative.

Implemented thusly, bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk.

2019-11-14  Jakub Jelinek  

* c-parser.c (c_parser_omp_context_selector): Don't require score
argument to fit into shwi, just to be INTEGER_CST.  Diagnose
negative score.

* parser.c (cp_parser_omp_context_selector): Don't require score
argument to fit into shwi, just to be INTEGER_CST.  Diagnose
negative score.
* pt.c (tsubst_attribute): Likewise.

* c-c++-common/gomp/declare-variant-2.c: Add test for non-integral
score and for negative score.
* c-c++-common/gomp/declare-variant-3.c: Add test for zero score.
* g++.dg/gomp/declare-variant-8.C: Add test for negative and zero
scores.

--- gcc/c/c-parser.c.jj 2019-11-13 12:41:14.625947998 +0100
+++ gcc/c/c-parser.c2019-11-13 14:12:45.650550207 +0100
@@ -19594,9 +19594,12 @@ c_parser_omp_context_selector (c_parser
  mark_exp_read (score);
  score = c_fully_fold (score, false, NULL);
  if (!INTEGRAL_TYPE_P (TREE_TYPE (score))
- || !tree_fits_shwi_p (score))
+ || TREE_CODE (score) != INTEGER_CST)
error_at (token->location, "score argument must be "
  "constant integer expression");
+ else if (tree_int_cst_sgn (score) < 0)
+   error_at (token->location, "score argument must be "
+ "non-negative");
  else
properties = tree_cons (get_identifier (" score"),
score, properties);
--- gcc/cp/parser.c.jj  2019-11-13 13:35:50.337188794 +0100
+++ gcc/cp/parser.c 2019-11-13 14:16:48.423864195 +0100
@@ -40565,11 +40565,16 @@ cp_parser_omp_context_selector (cp_parse
  if (score != error_mark_node)
{
  score = fold_non_dependent_expr (score);
- if (!value_dependent_expression_p (score)
- && (!INTEGRAL_TYPE_P (TREE_TYPE (score))
- || !tree_fits_shwi_p (score)))
+ if (value_dependent_expression_p (score))
+   properties = tree_cons (get_identifier (" score"),
+   score, properties);
+ else if (!INTEGRAL_TYPE_P (TREE_TYPE (score))
+  || TREE_CODE (score) != INTEGER_CST)
error_at (token->location, "score argument must be "
  "constant integer expression");
+ else if (tree_int_cst_sgn (score) < 0)
+   error_at (token->location, "score argument must be "
+ "non-negative");
  else
properties = tree_cons (get_identifier (" score"),
score, properties);
--- gcc/cp/pt.c.jj  2019-11-13 13:47:08.181894187 +0100
+++ gcc/cp/pt.c 2019-11-13 14:20:48.433220170 +0100
@@ -11172,7 +11172,9 @@ tsubst_attribute (tree t, tree *decl_p,
v = tsubst_expr (v, args, complain, in_decl, true);
v = fold_non_dependent_expr (v);
if (!INTEGRAL_TYPE_P (TREE_TYPE (v))
-   || !tree_fits_shwi_p (v))
+   || (TREE_PURPOSE (t3) == score
+   ? TREE_CODE (v) != INTEGER_CST
+   : !tree_fits_shwi_p (v)))
  {
location_t loc
  = cp_expr_loc_or_loc (TREE_VALUE (t3),
@@ -11189,6 +11191,16 @@ tsubst_attribute (tree t, tree *decl_p,
 "integer expression");
return NULL_TREE;
  }
+   else if (TREE_PURPOSE (t3) == score
+&& tree_int_cst_sgn (v) < 0)
+ {
+   location_t loc
+ = cp_expr_loc_or_loc (TREE_VALUE (t3),
+   match_loc);
+   error_at (loc, "score argument must be "
+  "non-negative");
+   return NULL_TREE;
+ }
TREE_VALUE (t3) = v;
  }
}
--- gcc/testsuite/c-c++-common/gomp/declare-variant-2.c.jj  2019-11-13 
14:27:37.0 +0100
+++ gcc/testsuite/c-c++-common/gomp/declare-variant-2.c 2019-11-13 
14:58:08.550213556 +0100
@@ -153,3 +153,7 @@ void f74 (void);
 void f75 (void);
 #pragma omp declare variant (f1) 
match(imple

Re: [Patch] PR fortran/92470 Fixes for CFI_address

2019-11-14 Thread Jakub Jelinek
On Thu, Nov 14, 2019 at 09:30:42AM +0100, Andreas Schwab wrote:
> On Nov 14 2019, Jakub Jelinek wrote:
> 
> > --- gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c.jj   2019-11-13 
> > 10:54:37.081172852 +0100
> > +++ gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c  2019-11-14 
> > 01:19:36.704285484 +0100
> > @@ -2,7 +2,7 @@
> >  
> >  #include 
> >  #include 
> > -#include "ISO_Fortran_binding.h"
> > +#include "../../../libgfortran/ISO_Fortran_binding.h"
> 
> Shoudn't that be fixed generically, by directing the compiler to use the
> uninstalled headers?

Maybem it just needs somebody to spend the time and do the header discovery
in gfortran.dg.
The above is what is used in other testcases.

Jakub



[committed] Change sse4_1 to sse4.1 and sse4_2 to sse4.2

2019-11-14 Thread Jakub Jelinek
Hi!

With the support for string literals, there is no need to obfuscate
sse4.2 to sse4_2; not doing it makes it easier to describe, because
the option is -msse4.2, not -msse4_2.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-11-14  Jakub Jelinek  

* config/i386/i386-options.c (ix86_omp_device_kind_arch_isa): Don't
change sse4.2 to sse4_2 and sse4.1 to sse4.1.
* config/i386/t-omp-device (omp-device-properties-i386): Likewise.

* c-c++-common/gomp/declare-variant-11.c: Add "sse4.2" and "sse4.1"
test.

--- gcc/config/i386/i386-options.c.jj   2019-11-13 10:54:56.641875411 +0100
+++ gcc/config/i386/i386-options.c  2019-11-13 15:24:19.266370999 +0100
@@ -316,19 +316,13 @@ ix86_omp_device_kind_arch_isa (enum omp_
  HOST_WIDE_INT mask = i ? ix86_isa_flags2 : ix86_isa_flags;
  for (size_t n = 0; n < nopts; n++)
{
- const char *option = opts[n].option + 2;
- /* -msse4.2 and -msse4.1 options contain dot, which is not valid
-in identifiers.  Use underscore instead, and handle sse4
-as an alias to sse4_2.  */
+ /* Handle sse4 as an alias to sse4.2.  */
  if (opts[n].mask == OPTION_MASK_ISA_SSE4_2)
{
- option = "sse4_2";
  if (strcmp (name, "sse4") == 0)
return (mask & opts[n].mask) != 0 ? 1 : -1;
}
- else if (opts[n].mask == OPTION_MASK_ISA_SSE4_1)
-   option = "sse4_1";
- if (strcmp (name, option) == 0)
+ if (strcmp (name, opts[n].option + 2) == 0)
return (mask & opts[n].mask) != 0 ? 1 : -1;
}
}
--- gcc/config/i386/t-omp-device.jj 2019-11-02 00:33:48.374547757 +0100
+++ gcc/config/i386/t-omp-device2019-11-13 15:22:01.173467114 +0100
@@ -3,4 +3,4 @@ omp-device-properties-i386: $(srcdir)/co
echo arch: x86 x86_64 i386 i486 i586 i686 ia32 >> $@
echo isa: sse4 `sed -n '/^static struct ix86_target_opts 
isa2\?_opts\[\] =/,/^};/p' \
  $(srcdir)/config/i386/i386-options.c | \
- sed -n 's/",.*$$//;s/\./_/;s/^  { "-m//p'` >> $@
+ sed -n 's/",.*$$//;s/^  { "-m//p'` >> $@
--- gcc/testsuite/c-c++-common/gomp/declare-variant-11.c.jj 2019-11-13 
12:56:27.328081745 +0100
+++ gcc/testsuite/c-c++-common/gomp/declare-variant-11.c2019-11-13 
15:25:40.045144859 +0100
@@ -14,7 +14,7 @@ void f05 (void);
 void f06 (void);
 void f07 (void);
 void f08 (void);
-#pragma omp declare variant (f07) match (device={isa(sse4,sse3,"avx")})
+#pragma omp declare variant (f07) match 
(device={isa(sse4,"sse4.1","sse4.2",sse3,"avx")})
 #pragma omp declare variant (f08) match (device={isa("avx",sse3)})
 void f09 (void);
 void f10 (void);

Jakub



Re: [PATCH] Enable VPOPCNTDQ for icelake-{client,server} and tigerlake.

2019-11-14 Thread Martin Liška

On 11/13/19 5:14 PM, Uros Bizjak wrote:

On Wed, Nov 13, 2019 at 4:25 PM Martin Liška  wrote:


Hi.

The patch adds a missing feature for PTA_ICELAKE_CLIENT and
inherited CPUs. One can see that:
https://en.wikipedia.org/wiki/AVX-512#CPUs_with_AVX-512

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2019-11-13  Martin Liska  

 PR target/92389
 * config/i386/i386.h: Add PTA_AVX512VPOPCNTDQ to
 PTA_ICELAKE_CLIENT which is later interited by
 PTA_ICELAKE_SERVER and PTA_TIGERLAKE.



OK.

Thanks,
Uros.


Thank you for the review.

May I install the patch to all active branches as well?
Martin




---
   gcc/config/i386/i386.h | 2 +-
   1 file changed, 1 insertion(+), 1 deletion(-)






Re: [Patch] PR fortran/92470 Fixes for CFI_address

2019-11-14 Thread Tobias Burnus

On 11/14/19 1:31 AM, Jakub Jelinek wrote:
This broke on hosts where the system compiler doesn't have 
ISO_Fortran_binding.h header installed (e.g. GCC 8 and earlier). 


Aha, that's the reason.


Fixed thusly, tested on x86_64-linux, committed to trunk as obvious.
2019-11-14  Jakub Jelinek  

* gfortran.dg/ISO_Fortran_binding_17.c: Include
../../../libgfortran/ISO_Fortran_binding.h rather than
ISO_Fortran_binding.h.


Thanks, Jakub, for the fix and sorry all for the breakage. I try to 
remember in order to not do repeat this mistake :-/


Likewise fixed on GCC 9 as attached.

Tobias


commit 64785acc4083f9c18450e1ed37789a59a5572230
Author: burnus 
Date:   Thu Nov 14 08:02:42 2019 +

Fix gfortran.dg/ISO_Fortran_binding_17.c using rel. #include

PR fortran/92470
PR fortran/92500
* gfortran.dg/ISO_Fortran_binding_17.c: Include
ISO_Fortran_binding.h with relative path.



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-9-branch@278201 138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9ed961d3970..8f97012e5cd 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2019-11-13  Tobias Burnus  
+
+	PR fortran/92470
+	PR fortran/92500
+	* gfortran.dg/ISO_Fortran_binding_17.c: Include
+	ISO_Fortran_binding.h with relative path.
+
 2019-11-13  Tobias Burnus  
 
 	Backport from mainline
diff --git a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c
index b0893cc15e8..14dfcc90c4e 100644
--- a/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c
+++ b/gcc/testsuite/gfortran.dg/ISO_Fortran_binding_17.c
@@ -2,7 +2,7 @@
 
 #include 
 #include 
-#include "ISO_Fortran_binding.h"
+#include "../../../libgfortran/ISO_Fortran_binding.h"
 
 void Csub(const CFI_cdesc_t *, size_t, CFI_index_t invalid);
 


[Patch][ARM] backport r266665 to gcc8

2019-11-14 Thread Christophe Lyon
Hi,

Is it OK to backport r25 to gcc8 (Ensure dotproduct is only
enabled on armv8 neon) ?
I've noticed unnecessary failure of gcc.target/arm/simd/vdot-compile.c
after I upgraded to recent binutils.

Thanks,

Christophe
[ARM] Ensure dotproduct is only enabled on armv8 neon

2019-11-14  Christophe Lyon  

Backport r25 from mainline.
gcc/
2018-11-30  Sam Tebbs  

* config/arm/arm.h (TARGET_DOTPROD): Add TARGET_VFP5 constraint.

gcc/testsuite/
2018-11-30  Sam Tebbs  

* gcc.target/arm/neon-dotprod-restriction.c: New file.
* lib/target-supports.exp
(check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache): Include
stdint.h.

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index b12ae38..febd6b0 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -211,7 +211,7 @@ extern tree arm_fp16_type_node;
 #define TARGET_NEON_RDMA (TARGET_NEON && arm_arch8_1)
 
 /* Supports the Dot Product AdvSIMD extensions.  */
-#define TARGET_DOTPROD (TARGET_NEON\
+#define TARGET_DOTPROD (TARGET_NEON && TARGET_VFP5 \
&& bitmap_bit_p (arm_active_target.isa, \
isa_bit_dotprod)\
&& arm_arch8_2)
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index e6b84041..857884d 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4510,6 +4510,7 @@ proc 
check_effective_target_arm_v8_2a_dotprod_neon_ok_nocache { } {
 foreach flags {"" "-mfloat-abi=softfp -mfpu=neon-fp-armv8" 
"-mfloat-abi=hard -mfpu=neon-fp-armv8"} {
 if { [check_no_compiler_messages_nocache \
   arm_v8_2a_dotprod_neon_ok object {
+#include 
 #if !defined (__ARM_FEATURE_DOTPROD)
 #error "__ARM_FEATURE_DOTPROD not defined"
 #endif


Re: [PATCH] Enable VPOPCNTDQ for icelake-{client,server} and tigerlake.

2019-11-14 Thread Uros Bizjak
On Thu, Nov 14, 2019 at 9:36 AM Martin Liška  wrote:
>
> On 11/13/19 5:14 PM, Uros Bizjak wrote:
> > On Wed, Nov 13, 2019 at 4:25 PM Martin Liška  wrote:
> >>
> >> Hi.
> >>
> >> The patch adds a missing feature for PTA_ICELAKE_CLIENT and
> >> inherited CPUs. One can see that:
> >> https://en.wikipedia.org/wiki/AVX-512#CPUs_with_AVX-512
> >>
> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >>
> >> Ready to be installed?
> >> Thanks,
> >> Martin
> >>
> >> gcc/ChangeLog:
> >>
> >> 2019-11-13  Martin Liska  
> >>
> >>  PR target/92389
> >>  * config/i386/i386.h: Add PTA_AVX512VPOPCNTDQ to
> >>  PTA_ICELAKE_CLIENT which is later interited by
> >>  PTA_ICELAKE_SERVER and PTA_TIGERLAKE.
> >
> >
> > OK.
> >
> > Thanks,
> > Uros.
>
> Thank you for the review.
>
> May I install the patch to all active branches as well?

Yes, it is simple enough and kind of obvious.

Thanks,
Uros.

> Martin
>
> >
> >> ---
> >>gcc/config/i386/i386.h | 2 +-
> >>1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >>
>


Re: [PATCH][DOC] Document -fallocation-dce.

2019-11-14 Thread Martin Liška

On 11/13/19 4:25 PM, Sandra Loosemore wrote:

On 11/13/19 6:45 AM, Martin Liška wrote:

Hi.

The patch is about a documentation entry for a newly added
option in GCC 10.

Ready for trunk?


I thought this would be OK, until I saw:


+Enabled by default.


At the beginning of invoke.texi where it is talking about options having both -foo 
and -fno-foo forms, it says "This manual documents
only one of these two forms, whichever one is not the default."  So I think you 
should be documenting -fno-allocation-dce here instead of the positive form.


Thank you for the review.
I've changed that and I'm going to install the patch.

Martin



-Sandra


>From e1ebd30612d688fb03024a48e38f228d273545c4 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Wed, 13 Nov 2019 14:44:41 +0100
Subject: [PATCH] Document -fallocation-dce.

gcc/ChangeLog:

2019-11-13  Martin Liska  

	PR other/92329
	* doc/invoke.texi: Document -fallocation-dce.
---
 gcc/doc/invoke.texi | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 00eb7e77808..10f144ccc39 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -407,7 +407,7 @@ Objective-C and Objective-C++ Dialects}.
 -falign-jumps[=@var{n}[:@var{m}:[@var{n2}[:@var{m2} @gol
 -falign-labels[=@var{n}[:@var{m}:[@var{n2}[:@var{m2} @gol
 -falign-loops[=@var{n}[:@var{m}:[@var{n2}[:@var{m2} @gol
--fallow-store-data-races @gol
+-fno-allocation-dce -fallow-store-data-races @gol
 -fassociative-math  -fauto-profile  -fauto-profile[=@var{path}] @gol
 -fauto-inc-dec  -fbranch-probabilities @gol
 -fcaller-saves @gol
@@ -10267,6 +10267,10 @@ The maximum allowed @var{n} option value is 65536.
 
 Enabled at levels @option{-O2}, @option{-O3}.
 
+@item -fno-allocation-dce
+@opindex fno-allocation-dce
+Do not remove unused C++ allocations in dead code elimination.
+
 @item -fallow-store-data-races
 @opindex fallow-store-data-races
 Allow the compiler to introduce new data races on stores.
-- 
2.24.0



Re: Ping*2: [PATCH v5] Missed function specialization + partial devirtualization

2019-11-14 Thread Jan Hubicka
>   PR ipa/69678
>   * cgraph.c (symbol_table::create_edge): Init speculative_id.
>   (cgraph_edge::make_speculative): Add param for setting speculative_id.
>   (cgraph_edge::speculative_call_info): Find reference by
>   speculative_id for multiple indirect targets.
>   (cgraph_edge::resolve_speculation): Decrease the speculations
>   for indirect edge, drop it's speculative if not direct target
>   left.
>   (cgraph_edge::redirect_call_stmt_to_callee): Likewise.
>   (cgraph_node::verify_node): Don't report error if speculative
>   edge not include statement.
>   (cgraph_edge::has_multiple_indirect_call_p): New function.
>   (cgraph_edge::has_indirect_call_p): New function.
>   * cgraph.h (struct indirect_target_info): New struct.
>   (indirect_call_targets): New vector variable.
>   (make_speculative): Add param for setting speculative_id.
>   (cgraph_edge::has_multiple_indirect_call_p): New declare.
>   (cgraph_edge::has_indirect_call_p): New declare.
>   (speculative_id): New variable.
>   * cgraphclones.c (cgraph_node::create_clone): Clone speculative_id.
>   * cgraphunit.c: Fix comments typo.
>   * ipa-comdats.c: Fix comments typo.
>   * ipa-inline.c (inline_small_functions): Fix iterator update.
>   * ipa-profile.c (ipa_profile_generate_summary): Add indirect
>   multiple targets logic.
>   (ipa_profile): Likewise.
>   * ipa-ref.h (speculative_id): New variable.
>   * ipa.c (process_references): Fix typo.
>   * lto-cgraph.c (lto_output_edge): Add indirect multiple targets
>   logic.  Stream out speculative_id.
>   (input_edge): Likewise.
>   * predict.c (dump_prediction): Revome edges count assert to be
>   precise.
>   * symtab.c (symtab_node::create_reference): Init speculative_id.
>   (symtab_node::clone_references): Clone speculative_id.
>   (symtab_node::clone_referring): Clone speculative_id.
>   (symtab_node::clone_reference): Clone speculative_id.
>   (symtab_node::clear_stmts_in_references): Clear speculative_id.
>   * tree-inline.c (copy_bb): Duplicate all the speculative edges
>   if indirect call contains multiple speculative targets.
>   * tree-profile.c (gimple_gen_ic_profiler): Use the new variable
>   __gcov_indirect_call.counters and __gcov_indirect_call.callee.
>   (gimple_gen_ic_func_profiler): Likewise.
>   (pass_ipa_tree_profile::gate): Fix comment typos.
>   * value-prof.h  (check_ic_target): Remove.
>   * value-prof.c  (gimple_value_profile_transformations):
>   Use void function gimple_ic_transform.
>   * value-prof.c  (gimple_ic_transform): Handle topn case.
>   Fix comment typos.  Change it to a void function.
> 
> gcc/testsuite/ChangeLog
> 
>   2019-11-14  Xiong Hu Luo  
> 
>   PR ipa/69678
>   * gcc.dg/tree-prof/indir-call-prof-topn.c: New testcase.
>   * gcc.dg/tree-prof/crossmodule-indir-call-topn-1.c: New testcase.
>   * gcc.dg/tree-prof/crossmodule-indir-call-topn-1a.c: New testcase.
>   * gcc.dg/tree-prof/crossmodule-indir-call-topn-2.c: New testcase.
>   * lib/scandump.exp: Dump executable file name.
>   * lib/scanwpaipa.exp: New scan-pgo-wap-ipa-dump.
> @@ -1089,6 +1093,38 @@ cgraph_edge::make_speculative (cgraph_node *n2, 
> profile_count direct_count)
> call) and if one of them exists, all of them must exist.
>  
> Given speculative call edge, return all three components.
> +
> +   For some indirect edge, it may maps to multiple direct edges, i.e.  1:N.
> +   check the speculative_id to return all the three components for specified
> +   direct edge or indirect edge.
> +   If input is indirect, caller of this function will get the direct edge 
> one by
> +   one, get_edge will just return one of the direct edge mapped to the 
> indirect
> +   edge, the returned direct edge will be resolved or redirected by the 
> caller,
> +   then number of indirect calls (speculations) is deceased in each access.
> +   If input is direct, this function will get the indirect edge and reference
> +   with matched speculative_id, the returned edge will also be resolved or
> +   redirected, decrease the speculations accordingly.
> +   Speculations of indirect edge will be dropped only if all direct edges
> +   be handled.
> +
> +   e.g.  for indirect edge E statement "call call_dest":
> +
> +   Redirect N3 after redirected N2:
> +
> +   if (call_dest == N2)
> + n2 ();
> +   else if (call_dest == N3)
> + n3 ();
> +   else
> + call call_dest
> +
> +   Resolve N3 and only redirect N2:
> +
> +   if (call_dest == N2)
> + n2 ();
> +   else
> + call call_dest
> +

I find this comment hard to read. Reader probably does not know what
speculative edges are and we only want to describe speculative_call_info
function not also the way we resolve calls.  So what about something
like this:

  Speculative calls represent a transformation of indire

Re: [PATCH 0/2] Introduce a new GCC option, --record-gcc-command-line

2019-11-14 Thread Martin Liška

On 11/13/19 8:23 PM, Jeff Law wrote:

On 11/13/19 2:37 AM, Martin Liška wrote:


As Nick also mentioned many times, -grecord-gcc-switches is in DWARF
and this causes a great disadvantage: it gets stripped out.


Well, that's still something I disagree. I bet RedHat is similarly to
openSUSE also building all packages with a debug info, which
is later stripped and put into a foo-devel package. That's why one can
easily read the compile options from these sub-packages.
My motivation is to write a rpm linter check that will verify that all
object files really used flags that we expect.


Hi.


Right.  We inject -g into the default build flags.  We extract the
resultant debug info into a .debuginfo RPM.


Which means it can be possible to you to process a rpm check on the .debuginfo
RPM packages. Right?



The original motivation behind annobin was to verify how well the
injection mechanism worked.


I thought the original motivation was to provide a sanity check on RPM level
which will verify that all object files use the proper $Optflags
(mainly security hardening ones like -D_FORTIFY_SOURCE=1, 
-fstack-protector-strong, -fstack-clash-protection, ..)?
And so that you can guarantee that the packages are "safe" :)

Martin


We originally wanted to do something like
what Egeyar has done, but it's been proposed in the past and was highly
controversial.  Rather than fight that problem or have a Red Hat
specific patch, we built annobin/annocheck which (IMHO) handles this
kind of need quite well.


Jeff





[PATCH] Make flag_thread_jumps a gate of pass_jump_after_combine

2019-11-14 Thread Ilya Leoshkevich
Bootstrapped and regtested on x86_64-redhat-linux, s390x-redhat-linux and
ppc64le-redhat-linux.  OK for trunk and gcc-9-branch?  I'd like to commit
this and https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00919.html together
to gcc-9-branch.


This is a follow-up to
https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00919.html (r278095).
Dominance info is deleted even if we don't perform jump threading. Since
the whole point of this pass is to perform jump threading (other
cleanups are not valuable at this point), skip it completely when
flag_thread_jumps is not set.

gcc/ChangeLog:

2019-11-13  Ilya Leoshkevich  

PR rtl-optimization/92430
* cfgcleanup.c (pass_jump_after_combine::gate): New function.
(pass_jump_after_combine::execute): Perform jump threading
unconditionally.
---
 gcc/cfgcleanup.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
index 7f388258e10..f1d421b1cfa 100644
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@@ -3304,6 +3304,7 @@ public:
   {}
 
   /* opt_pass methods: */
+  virtual bool gate (function *) { return flag_thread_jumps; }
   virtual unsigned int execute (function *);
 
 }; // class pass_jump_after_combine
@@ -3313,7 +3314,7 @@ pass_jump_after_combine::execute (function *)
 {
   /* Jump threading does not keep dominators up-to-date.  */
   free_dominance_info (CDI_DOMINATORS);
-  cleanup_cfg (flag_thread_jumps ? CLEANUP_THREADING : 0);
+  cleanup_cfg (CLEANUP_THREADING);
   return 0;
 }
 
-- 
2.23.0



Re: [PATCH] Add if-chain to switch conversion pass.

2019-11-14 Thread Martin Liška

On 11/5/19 1:38 PM, Richard Biener wrote:

On Mon, Nov 4, 2019 at 3:49 PM Jakub Jelinek  wrote:


On Mon, Nov 04, 2019 at 03:23:20PM +0100, Martin Liška wrote:

The patch adds a new pass that identifies a series of if-elseif
statements and transform then into a GIMPLE switch (if possible).
The pass runs right after tree-ssa pass and I decided to implement
matching of various forms that are introduced by folder (fold_range_test):


Not a review, just a few questions:


Hello.



Likewise - please do not name switches -ftree-*, 'tree' doens't add anything
but confusion to users.  Thus use -fif-to-switch or -fconvert-if-to-switch


Agree with you, I selected the later option.



+The transformation can help to produce a faster code for
+the switch statement.

produce faster code.


Fixed.



Doesn't it also produce smaller code eventually?


In some situation yes, but generally it leads to more jump tables
(which are bigger when expanded).



Please to not put code transform passes into build_ssa_passes (why did
you choose this place)?


Well, that was my initial pass selection as I wanted to have a GIMPLE
code close to what FEs produce.


 The pass should go into pass_all_early_optimizations
instead, and I'm quite sure you want to run _after_ CSE.  I'd even say
that the pass should run as part of switch-conversion, so we build
a representation of a switch internally and then code-generate the optimal
form directly.  For now just put the pass before switch-conversion.


But yes, the suggested place is much better place and we can benefit from
VRP (that will kill dead conditions in a if-else chain)



There are functions without comments in the patch and you copied
from DSE which shows in confusing comments left over from the original.

+  mark_virtual_operands_for_renaming (cfun);

if you did nothing renaming all vops is expensive.


This one is needed for situations like:

fn1 ()
{
   a.0_1;

   :
  # VUSE <.MEM_5(D)>
  a.0_1 = a;
  if (a.0_1 == 0)
goto ; [INV]
  else
goto ; [INV]

   :
  if (a.0_1 == 1)
goto ; [INV]
  else
goto ; [INV]

   :
  if (a.0_1 == 2)
goto ; [INV]
  else
goto ; [INV]

   :
  # .MEM_6 = VDEF <.MEM_5(D)>
  fn2 ();

   :
  # .MEM_4 = PHI <.MEM_5(D)(2), .MEM_5(D)(3), .MEM_5(D)(4), .MEM_6(5)>
  # VUSE <.MEM_4>
  return;

}

where without the call, I end up with:

   :
  # VUSE <.MEM_5(D)>
  a.0_1 = a;
  switch (a.0_1)  [INV], case 0:  [INV], case 1:  [INV], case 2: 
 [INV]>

   :
:
  # .MEM_6 = VDEF <.MEM_5(D)>
  fn2 ();

   :
  # .MEM_4 = PHI <.MEM_6(3), (2)>




I'm missing an overall comment - you are using a dominator walk
but do nothing in the after hook which means you are not really
gathering any data?  You're also setting visited bits on BBs which
means you are visiting alternate BBs during the DOM walk.


You are right, I'm a bit cheating with the DOM walk as I also mark visited BBs.
What I want is for each if-else condition, I want to visit the first IF in such
chain. That's why I decided to iterate in DOMINATOR order.
Can I do it simpler?

Thanks,
Martin




1) what does it do if __builtin_expect* has been used, does it preserve
the probabilities and if in the end decides to expand as ifs, are those
probabilities retained through it?
2) for the reassoc-*.c testcases, do you get identical or better code
with the patch?
3) shouldn't it be gimple-if-to-switch.c instead?
4) what code size effect does the patch have say on cc1plus (if you don't
count the code changes of the patch itself, i.e. revert the patch in the
stage3 and rebuild just the stage3)?


+struct case_range
+{
+  /* Default constructor.  */
+  case_range ():
+m_min (NULL_TREE), m_max (NULL_TREE)


I admit I'm never sure about coding conventions for C++,
but shouldn't there be a space before :, or even better :
be on the next line before m_min ?

 Jakub





Re: [PATCH] Add if-chain to switch conversion pass.

2019-11-14 Thread Martin Liška

On 11/6/19 10:02 PM, Bernhard Reutner-Fischer wrote:

Also why do you punt on duplicate conditions like in


+++ b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-4.c
+int main(int argc, char **argv)
+{
+  if (argc == 1)
+  else if (argc == 2)
+  else if (argc == 3)
+  else if (argc == 4)
+  else if (argc == 1)
+{

This block is dead, isn't it. Why don't you just discard it but punt?



Hello.

After I moved the pass later in optimization pipeline, such dead conditions
are already gone. What's remaining are situations like:

if (argc >= 1 && argc <= 10)
...
else if (argc >= 8 && argc <= 15)

which are overlapping intervals. I'm not planning to handle these in the first
iteration of the patch.

Martin


Re: [PATCH] Add if-chain to switch conversion pass.

2019-11-14 Thread Martin Liška

On 11/13/19 4:43 PM, Michael Matz wrote:

Hi,

On Wed, 13 Nov 2019, Martin Liška wrote:


Not a review, just a few questions:


Hello.

Thank you for it.



1) what does it do if __builtin_expect* has been used, does it preserve
 the probabilities and if in the end decides to expand as ifs, are those
 probabilities retained through it?


No, it's currently not supported. I can consider adding that as a follow up.


But given that you apply the transformation even to small if ladders this
looses quite some info then.


Yes. Based on my experiments, the patch does not convert if-else chain
with __builtin_expect* right now.




2) for the reassoc-*.c testcases, do you get identical or better code
 with the patch?


The code is the following:

Before:
Optimizing range tests a_5(D) -[10, 10] and -[26, 26]
  into (a_5(D) & -17) != 10

[local count: 1073741823]:
   _11 = a_5(D) & -17;
   _12 = _11 == 10;
   _1 = a_5(D) == 10;
   _2 = a_5(D) == 12;
   _10 = a_5(D) == 26;
   _9 = _2 | _12;
   if (_9 != 0)
 goto ; [56.44%]
   else
 goto ; [43.56%]

After:

[local count: 1073741824]:
   switch (a_2(D))  [50.00%], case 10:  [50.00%], case 12:
[50.00%], case 26:  [50.00%]>


And here I bet that Jakub was asking for final code, not intermediate
code.  In particular the bit trick with transforming a test for
a \in {10,26} into (a&-17)==10.  The switch still tests for 10,26, and in
the end it should be ensured that the same trickery is employed to
actually implement the switch.


I agree that (a&-17)==10 is more elegant trick here ...




As seen, reassoc can generate a different range check and then it's about cmp1
| cmp2 | ..
I bet the explicit gswitch is quite equal representation.


As long as the final code is the same or better, sure.  Otherwise it's a
more canonical representation with suboptimal expansion, and the latter
should be fixed otherwise it's introducing a regression.


... and yes, the gswitch expansion is not capable of such smart interval tests.
We can improve that in the future.

Martin




Ciao,
Michael.





[PATCH v2 0/6] Implement asm flag outputs for arm + aarch64

2019-11-14 Thread Richard Henderson
I've put the implementation into config/arm/aarch-common.c, so
that it can be shared between the two targets.  This required
a little bit of cleanup to the CC modes and constraints to get
the two targets to match up.

Changes for v2:
  * Document overflow flags.
  * Add "hs" and "lo" as aliases of "cs" and "cc".
  * Add unsigned cmp tests to asm-flag-6.c.

Richard Sandiford has given his ack for the aarch64 side.
I'm still looking for an ack for the arm side.


r~


Richard Henderson (6):
  aarch64: Add "c" constraint
  arm: Fix the "c" constraint
  arm: Rename CC_NOOVmode to CC_NZmode
  arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__
  arm: Add testsuite checks for asm-flag
  aarch64: Add testsuite checks for asm-flag

 gcc/config/arm/aarch-common-protos.h  |   6 +
 gcc/config/aarch64/aarch64-c.c|   2 +
 gcc/config/aarch64/aarch64.c  |   3 +
 gcc/config/arm/aarch-common.c | 136 +
 gcc/config/arm/arm-c.c|   1 +
 gcc/config/arm/arm.c  |  15 +-
 gcc/testsuite/gcc.target/aarch64/asm-flag-1.c |  35 
 gcc/testsuite/gcc.target/aarch64/asm-flag-3.c |  38 
 gcc/testsuite/gcc.target/aarch64/asm-flag-5.c |  30 +++
 gcc/testsuite/gcc.target/aarch64/asm-flag-6.c |  62 ++
 gcc/testsuite/gcc.target/arm/asm-flag-1.c |  36 
 gcc/testsuite/gcc.target/arm/asm-flag-3.c |  38 
 gcc/testsuite/gcc.target/arm/asm-flag-5.c |  30 +++
 gcc/testsuite/gcc.target/arm/asm-flag-6.c |  62 ++
 gcc/config/aarch64/constraints.md |   4 +
 gcc/config/arm/arm-modes.def  |   4 +-
 gcc/config/arm/arm.md | 186 +-
 gcc/config/arm/constraints.md |   5 +-
 gcc/config/arm/predicates.md  |   2 +-
 gcc/config/arm/thumb1.md  |   8 +-
 gcc/config/arm/thumb2.md  |  34 ++--
 gcc/doc/extend.texi   |  39 
 22 files changed, 651 insertions(+), 125 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-6.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-6.c

-- 
2.17.1



[PATCH v2 2/6] arm: Fix the "c" constraint

2019-11-14 Thread Richard Henderson
The existing definition using register class CC_REG does not
work because CC_REGNUM does not support normal modes, and so
fails to match register_operand.  Use a non-register constraint
and the cc_register predicate instead.

* config/arm/constraints.md (c): Use cc_register predicate.
---
 gcc/config/arm/constraints.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index b76de81b85c..e02b678d26d 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -94,8 +94,9 @@
  "@internal
   Thumb only.  The union of the low registers and the stack register.")
 
-(define_register_constraint "c" "CC_REG"
- "@internal The condition code register.")
+(define_constraint "c"
+ "@internal The condition code register."
+ (match_operand 0 "cc_register"))
 
 (define_register_constraint "Cs" "CALLER_SAVE_REGS"
  "@internal The caller save registers.  Useful for sibcalls.")
-- 
2.17.1



[PATCH v2 1/6] aarch64: Add "c" constraint

2019-11-14 Thread Richard Henderson
Mirror arm in letting "c" match the condition code register.

* config/aarch64/constraints.md (c): New constraint.
---
 gcc/config/aarch64/constraints.md | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/config/aarch64/constraints.md 
b/gcc/config/aarch64/constraints.md
index d0c3dd5bc1f..b9e5d13e851 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -39,6 +39,10 @@
 (define_register_constraint "y" "FP_LO8_REGS"
   "Floating point and SIMD vector registers V0 - V7.")
 
+(define_constraint "c"
+ "@internal The condition code register."
+  (match_operand 0 "cc_register"))
+
 (define_constraint "I"
  "A constant that can be used with an ADD operation."
  (and (match_code "const_int")
-- 
2.17.1



[PATCH v2 3/6] arm: Rename CC_NOOVmode to CC_NZmode

2019-11-14 Thread Richard Henderson
CC_NZmode is a more accurate description of what we require
from the mode, and matches up with the definition in aarch64.

Rename noov_comparison_operator to nz_comparison_operator
in order to match.

* config/arm/arm-modes.def (CC_NZ): Rename from CC_NOOV.
* config/arm/predicates.md (nz_comparison_operator): Rename
from noov_comparison_operator.
* config/arm/arm.c (arm_select_cc_mode): Use CC_NZmode name.
(arm_gen_dicompare_reg): Likewise.
(maybe_get_arm_condition_code): Likewise.
(thumb1_final_prescan_insn): Likewise.
(arm_emit_coreregs_64bit_shift): Likewise.
* config/arm/arm.md (addsi3_compare0): Likewise.
(*addsi3_compare0_scratch, subsi3_compare0): Likewise.
(*mulsi3_compare0, *mulsi3_compare0_v6): Likewise.
(*mulsi3_compare0_scratch, *mulsi3_compare0_scratch_v6): Likewise.
(*mulsi3addsi_compare0, *mulsi3addsi_compare0_v6): Likewise.
(*mulsi3addsi_compare0_scratch): Likewise.
(*mulsi3addsi_compare0_scratch_v6): Likewise.
(*andsi3_compare0, *andsi3_compare0_scratch): Likewise.
(*zeroextractsi_compare0_scratch): Likewise.
(*ne_zeroextractsi, *ne_zeroextractsi_shifted): Likewise.
(*ite_ne_zeroextractsi, *ite_ne_zeroextractsi_shifted): Likewise.
(andsi_not_shiftsi_si_scc_no_reuse): Likewise.
(andsi_not_shiftsi_si_scc): Likewise.
(*andsi_notsi_si_compare0, *andsi_notsi_si_compare0_scratch): Likewise.
(*iorsi3_compare0, *iorsi3_compare0_scratch): Likewise.
(*xorsi3_compare0, *xorsi3_compare0_scratch): Likewise.
(*shiftsi3_compare0, *shiftsi3_compare0_scratch): Likewise.
(*not_shiftsi_compare0, *not_shiftsi_compare0_scratch): Likewise.
(*notsi_compare0, *notsi_compare0_scratch): Likewise.
(return_addr_mask, *check_arch2): Likewise.
(*arith_shiftsi_compare0, *arith_shiftsi_compare0_scratch): Likewise.
(*sub_shiftsi_compare0, *sub_shiftsi_compare0_scratch): Likewise.
(compare_scc splitters): Likewise.
(movcond_addsi): Likewise.
* config/arm/thumb2.md (thumb2_addsi3_compare0): Likewise.
(*thumb2_addsi3_compare0_scratch): Likewise.
(*thumb2_mulsi_short_compare0): Likewise.
(*thumb2_mulsi_short_compare0_scratch): Likewise.
(compare peephole2s): Likewise.
* config/arm/thumb1.md (thumb1_cbz): Use CC_NZmode and
nz_comparison_operator names.
(cbranchsi4_insn): Likewise.
---
 gcc/config/arm/arm.c |  12 +--
 gcc/config/arm/arm-modes.def |   4 +-
 gcc/config/arm/arm.md| 186 +--
 gcc/config/arm/predicates.md |   2 +-
 gcc/config/arm/thumb1.md |   8 +-
 gcc/config/arm/thumb2.md |  34 +++
 6 files changed, 123 insertions(+), 123 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 9086cf65953..d996207853c 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15376,7 +15376,7 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
  || GET_CODE (x) == ROTATERT
  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
-return CC_NOOVmode;
+return CC_NZmode;
 
   /* A comparison of ~reg with a const is really a special
  canoncialization of compare (~const, reg), which is a reverse
@@ -15492,11 +15492,11 @@ arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, 
rtx scratch)
  }
 
rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
-   cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
+   cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
 
rtx set
  = gen_rtx_SET (cc_reg,
-gen_rtx_COMPARE (CC_NOOVmode,
+gen_rtx_COMPARE (CC_NZmode,
  gen_rtx_IOR (SImode, x_lo, x_hi),
  const0_rtx));
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
@@ -23881,7 +23881,7 @@ maybe_get_arm_condition_code (rtx comparison)
return code;
   return ARM_NV;
 
-case E_CC_NOOVmode:
+case E_CC_NZmode:
   switch (comp_code)
{
case NE: return ARM_NE;
@@ -25304,7 +25304,7 @@ thumb1_final_prescan_insn (rtx_insn *insn)
  cfun->machine->thumb1_cc_insn = insn;
  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
  cfun->machine->thumb1_cc_op1 = const0_rtx;
- cfun->machine->thumb1_cc_mode = CC_NOOVmode;
+ cfun->machine->thumb1_cc_mode = CC_NZmode;
  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
{
  rtx src1 = XEXP (SET_SRC (set), 1);
@@ -30486,7 +30486,7 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx 
out, rtx in,
   else
 {
   /* We have a shift-by-register.  */
-  rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
+  

[PATCH v2 4/6] arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__

2019-11-14 Thread Richard Henderson
Since all but a couple of lines is shared between the two targets,
enable them both at once.

* config/arm/aarch-common-protos.h (arm_md_asm_adjust): Declare.
* config/arm/aarch-common.c (arm_md_asm_adjust): New.
* config/arm/arm-c.c (arm_cpu_builtins): Define
__GCC_ASM_FLAG_OUTPUTS__.
* config/arm/arm.c (TARGET_MD_ASM_ADJUST): New.
* config/aarch64/aarch64-c.c (aarch64_define_unconditional_macros):
Define __GCC_ASM_FLAG_OUTPUTS__.
* config/aarch64/aarch64.c (TARGET_MD_ASM_ADJUST): New.
* doc/extend.texi (FlagOutputOperands): Add documentation
for ARM and AArch64.
---
 gcc/config/arm/aarch-common-protos.h |   6 ++
 gcc/config/aarch64/aarch64-c.c   |   2 +
 gcc/config/aarch64/aarch64.c |   3 +
 gcc/config/arm/aarch-common.c| 136 +++
 gcc/config/arm/arm-c.c   |   1 +
 gcc/config/arm/arm.c |   3 +
 gcc/doc/extend.texi  |  39 
 7 files changed, 190 insertions(+)

diff --git a/gcc/config/arm/aarch-common-protos.h 
b/gcc/config/arm/aarch-common-protos.h
index 3bf38a104f6..f15cf336e9d 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -23,6 +23,8 @@
 #ifndef GCC_AARCH_COMMON_PROTOS_H
 #define GCC_AARCH_COMMON_PROTOS_H
 
+#include "hard-reg-set.h"
+
 extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
 extern bool aarch_rev16_p (rtx);
 extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
@@ -141,5 +143,9 @@ struct cpu_cost_table
   const struct vector_cost_table vect;
 };
 
+rtx_insn *
+arm_md_asm_adjust (vec &outputs, vec &/*inputs*/,
+   vec &constraints,
+   vec &clobbers, HARD_REG_SET &clobbered_regs);
 
 #endif /* GCC_AARCH_COMMON_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 7c322ca0813..0af859f1c14 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -69,6 +69,8 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
   builtin_define ("__ARM_FEATURE_UNALIGNED");
   builtin_define ("__ARM_PCS_AAPCS64");
   builtin_define_with_int_value ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8);
+
+  builtin_define ("__GCC_ASM_FLAG_OUTPUTS__");
 }
 
 /* Undefine/redefine macros that depend on the current backend state and may
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d2a3c7ef90a..9a5f27fea3a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -21933,6 +21933,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_STRICT_ARGUMENT_NAMING
 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
 
+#undef TARGET_MD_ASM_ADJUST
+#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index 965a07a43e3..760ef6c9c0a 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -26,10 +26,16 @@
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "insn-modes.h"
 #include "tm.h"
 #include "rtl.h"
 #include "rtl-iter.h"
 #include "memmodel.h"
+#include "diagnostic.h"
+#include "tree.h"
+#include "expr.h"
+#include "function.h"
+#include "emit-rtl.h"
 
 /* Return TRUE if X is either an arithmetic shift left, or
is a multiplication by a power of two.  */
@@ -520,3 +526,133 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx 
consumer)
   && !reg_overlap_mentioned_p (mul_result, mac_op0)
   && !reg_overlap_mentioned_p (mul_result, mac_op1));
 }
+
+/* Worker function for TARGET_MD_ASM_ADJUST.
+   We implement asm flag outputs.  */
+
+rtx_insn *
+arm_md_asm_adjust (vec &outputs, vec &/*inputs*/,
+   vec &constraints,
+   vec &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
+{
+  bool saw_asm_flag = false;
+
+  start_sequence ();
+  for (unsigned i = 0, n = outputs.length (); i < n; ++i)
+{
+  const char *con = constraints[i];
+  if (strncmp (con, "=@cc", 4) != 0)
+   continue;
+  con += 4;
+  if (strchr (con, ',') != NULL)
+   {
+ error ("alternatives not allowed in % flag output");
+ continue;
+   }
+
+  machine_mode mode;
+  rtx_code code;
+  int con01 = 0;
+
+#define C(X, Y)  (unsigned char)(X) * 256 + (unsigned char)(Y)
+
+  /* All of the condition codes are two characters.  */
+  if (con[0] != 0 && con[1] != 0 && con[2] == 0)
+   con01 = C(con[0], con[1]);
+
+  switch (con01)
+   {
+   case C('c', 'c'):
+   case C('l', 'o'):
+ mode = CC_Cmode, code = GEU;
+ break;
+   case C('c', 's'):
+   case C('h', 's'):
+ mode = CC_Cmode, code = LTU;
+ break;
+   case C('e', 'q'):
+ mode = CC_NZmode, code = EQ;
+ break;
+ 

[PATCH v2 5/6] arm: Add testsuite checks for asm-flag

2019-11-14 Thread Richard Henderson
Inspired by the tests in gcc.target/i386.  Testing code generation,
diagnostics, and execution.

* gcc.target/arm/asm-flag-1.c: New test.
* gcc.target/arm/asm-flag-3.c: New test.
* gcc.target/arm/asm-flag-5.c: New test.
* gcc.target/arm/asm-flag-6.c: New test.
---
 gcc/testsuite/gcc.target/arm/asm-flag-1.c | 36 +
 gcc/testsuite/gcc.target/arm/asm-flag-3.c | 38 ++
 gcc/testsuite/gcc.target/arm/asm-flag-5.c | 30 +++
 gcc/testsuite/gcc.target/arm/asm-flag-6.c | 62 +++
 4 files changed, 166 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-6.c

diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-1.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-1.c
new file mode 100644
index 000..9707ebfcebb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-1.c
@@ -0,0 +1,36 @@
+/* Test the valid @cc asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#ifndef __GCC_ASM_FLAG_OUTPUTS__
+#error "missing preprocessor define"
+#endif
+
+void f(char *out)
+{
+  asm(""
+  : "=@ccne"(out[0]), "=@cceq"(out[1]),
+   "=@cccs"(out[2]), "=@"(out[3]),
+   "=@ccmi"(out[4]), "=@ccpl"(out[5]),
+   "=@ccvs"(out[6]), "=@ccvc"(out[7]),
+   "=@cchi"(out[8]), "=@ccls"(out[9]),
+   "=@ccge"(out[10]), "=@cclt"(out[11]),
+   "=@ccgt"(out[12]), "=@ccle"(out[13]),
+   "=@cchs"(out[14]), "=@cclo"(out[15]));
+}
+
+/* There will be at least one of each.  */
+/* { dg-final { scan-assembler "movne" } } */
+/* { dg-final { scan-assembler "moveq" } } */
+/* { dg-final { scan-assembler "movcs" } } */
+/* { dg-final { scan-assembler "movcc" } } */
+/* { dg-final { scan-assembler "movmi" } } */
+/* { dg-final { scan-assembler "movpl" } } */
+/* { dg-final { scan-assembler "movvs" } } */
+/* { dg-final { scan-assembler "movvc" } } */
+/* { dg-final { scan-assembler "movhi" } } */
+/* { dg-final { scan-assembler "movls" } } */
+/* { dg-final { scan-assembler "movge" } } */
+/* { dg-final { scan-assembler "movls" } } */
+/* { dg-final { scan-assembler "movgt" } } */
+/* { dg-final { scan-assembler "movle" } } */
diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-3.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-3.c
new file mode 100644
index 000..e84e3431277
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-3.c
@@ -0,0 +1,38 @@
+/* Test some of the valid @cc asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#define DO(C) \
+void f##C(void) { char x; asm("" : "=@cc"#C(x)); if (!x) asm(""); asm(""); }
+
+DO(ne)
+DO(eq)
+DO(cs)
+DO(cc)
+DO(hs)
+DO(lo)
+DO(mi)
+DO(pl)
+DO(vs)
+DO(vc)
+DO(hi)
+DO(ls)
+DO(ge)
+DO(lt)
+DO(gt)
+DO(le)
+
+/* { dg-final { scan-assembler "bne" } } */
+/* { dg-final { scan-assembler "beq" } } */
+/* { dg-final { scan-assembler "bcs" } } */
+/* { dg-final { scan-assembler "bcc" } } */
+/* { dg-final { scan-assembler "bmi" } } */
+/* { dg-final { scan-assembler "bpl" } } */
+/* { dg-final { scan-assembler "bvs" } } */
+/* { dg-final { scan-assembler "bvc" } } */
+/* { dg-final { scan-assembler "bhi" } } */
+/* { dg-final { scan-assembler "bls" } } */
+/* { dg-final { scan-assembler "bge" } } */
+/* { dg-final { scan-assembler "blt" } } */
+/* { dg-final { scan-assembler "bgt" } } */
+/* { dg-final { scan-assembler "ble" } } */
diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-5.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-5.c
new file mode 100644
index 000..4d4394e1478
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-5.c
@@ -0,0 +1,30 @@
+/* Test error conditions of asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void f_B(void) { _Bool x; asm("" : "=@"(x)); }
+void f_c(void) { char x; asm("" : "=@"(x)); }
+void f_s(void) { short x; asm("" : "=@"(x)); }
+void f_i(void) { int x; asm("" : "=@"(x)); }
+void f_l(void) { long x; asm("" : "=@"(x)); }
+void f_ll(void) { long long x; asm("" : "=@"(x)); }
+
+void f_f(void)
+{
+  float x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
+
+void f_d(void)
+{
+  double x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
+
+struct S { int x[3]; };
+
+void f_S(void)
+{
+  struct S x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-6.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-6.c
new file mode 100644
index 000..09174e04ae6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-6.c
@@ -0,0 +1,62 @@
+/* Executable testcase for 'output flags.'  */
+/* { dg-do run } */
+
+int test_bits (long nzcv)
+{
+  long n, z, c, v;
+
+  __asm__ ("msr APSR_nzcvq, %[in]"
+  : "=@ccmi"(n), "=@cceq"(z), "=@cccs"(c), "=@ccvs"(v)
+  : [in] "r"(nzcv << 28));
+
+ 

[PATCH v2 6/6] aarch64: Add testsuite checks for asm-flag

2019-11-14 Thread Richard Henderson
Inspired by the tests in gcc.target/i386.  Testing code generation,
diagnostics, and execution.

* gcc.target/aarch64/asm-flag-1.c: New test.
* gcc.target/aarch64/asm-flag-3.c: New test.
* gcc.target/aarch64/asm-flag-5.c: New test.
* gcc.target/aarch64/asm-flag-6.c: New test.
---
 gcc/testsuite/gcc.target/aarch64/asm-flag-1.c | 35 +++
 gcc/testsuite/gcc.target/aarch64/asm-flag-3.c | 38 
 gcc/testsuite/gcc.target/aarch64/asm-flag-5.c | 30 +
 gcc/testsuite/gcc.target/aarch64/asm-flag-6.c | 62 +++
 4 files changed, 165 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-6.c

diff --git a/gcc/testsuite/gcc.target/aarch64/asm-flag-1.c 
b/gcc/testsuite/gcc.target/aarch64/asm-flag-1.c
new file mode 100644
index 000..49901e59c38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asm-flag-1.c
@@ -0,0 +1,35 @@
+/* Test the valid @cc asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#ifndef __GCC_ASM_FLAG_OUTPUTS__
+#error "missing preprocessor define"
+#endif
+
+void f(char *out)
+{
+  asm(""
+  : "=@ccne"(out[0]), "=@cceq"(out[1]),
+   "=@cccs"(out[2]), "=@"(out[3]),
+   "=@ccmi"(out[4]), "=@ccpl"(out[5]),
+   "=@ccvs"(out[6]), "=@ccvc"(out[7]),
+   "=@cchi"(out[8]), "=@ccls"(out[9]),
+   "=@ccge"(out[10]), "=@cclt"(out[11]),
+   "=@ccgt"(out[12]), "=@ccle"(out[13]),
+   "=@cchs"(out[14]), "=@cclo"(out[15]));
+}
+
+/* { dg-final { scan-assembler "cset.*, ne" } } */
+/* { dg-final { scan-assembler "cset.*, eq" } } */
+/* { dg-final { scan-assembler "cset.*, cs" } } */
+/* { dg-final { scan-assembler "cset.*, cc" } } */
+/* { dg-final { scan-assembler "cset.*, mi" } } */
+/* { dg-final { scan-assembler "cset.*, pl" } } */
+/* { dg-final { scan-assembler "cset.*, vs" } } */
+/* { dg-final { scan-assembler "cset.*, vc" } } */
+/* { dg-final { scan-assembler "cset.*, hi" } } */
+/* { dg-final { scan-assembler "cset.*, ls" } } */
+/* { dg-final { scan-assembler "cset.*, ge" } } */
+/* { dg-final { scan-assembler "cset.*, ls" } } */
+/* { dg-final { scan-assembler "cset.*, gt" } } */
+/* { dg-final { scan-assembler "cset.*, le" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/asm-flag-3.c 
b/gcc/testsuite/gcc.target/aarch64/asm-flag-3.c
new file mode 100644
index 000..e84e3431277
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asm-flag-3.c
@@ -0,0 +1,38 @@
+/* Test some of the valid @cc asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#define DO(C) \
+void f##C(void) { char x; asm("" : "=@cc"#C(x)); if (!x) asm(""); asm(""); }
+
+DO(ne)
+DO(eq)
+DO(cs)
+DO(cc)
+DO(hs)
+DO(lo)
+DO(mi)
+DO(pl)
+DO(vs)
+DO(vc)
+DO(hi)
+DO(ls)
+DO(ge)
+DO(lt)
+DO(gt)
+DO(le)
+
+/* { dg-final { scan-assembler "bne" } } */
+/* { dg-final { scan-assembler "beq" } } */
+/* { dg-final { scan-assembler "bcs" } } */
+/* { dg-final { scan-assembler "bcc" } } */
+/* { dg-final { scan-assembler "bmi" } } */
+/* { dg-final { scan-assembler "bpl" } } */
+/* { dg-final { scan-assembler "bvs" } } */
+/* { dg-final { scan-assembler "bvc" } } */
+/* { dg-final { scan-assembler "bhi" } } */
+/* { dg-final { scan-assembler "bls" } } */
+/* { dg-final { scan-assembler "bge" } } */
+/* { dg-final { scan-assembler "blt" } } */
+/* { dg-final { scan-assembler "bgt" } } */
+/* { dg-final { scan-assembler "ble" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/asm-flag-5.c 
b/gcc/testsuite/gcc.target/aarch64/asm-flag-5.c
new file mode 100644
index 000..4d4394e1478
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asm-flag-5.c
@@ -0,0 +1,30 @@
+/* Test error conditions of asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void f_B(void) { _Bool x; asm("" : "=@"(x)); }
+void f_c(void) { char x; asm("" : "=@"(x)); }
+void f_s(void) { short x; asm("" : "=@"(x)); }
+void f_i(void) { int x; asm("" : "=@"(x)); }
+void f_l(void) { long x; asm("" : "=@"(x)); }
+void f_ll(void) { long long x; asm("" : "=@"(x)); }
+
+void f_f(void)
+{
+  float x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
+
+void f_d(void)
+{
+  double x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
+
+struct S { int x[3]; };
+
+void f_S(void)
+{
+  struct S x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/asm-flag-6.c 
b/gcc/testsuite/gcc.target/aarch64/asm-flag-6.c
new file mode 100644
index 000..963b5a48c70
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/asm-flag-6.c
@@ -0,0 +1,62 @@
+/* Executable testcase for 'output flags.'  */
+/* { dg-do run } */
+
+int test_bits (long nzcv)
+{
+  long n, z, c, v;
+
+  __asm__ ("msr nzcv, %[in]

Re: [PATCH] Make flag_thread_jumps a gate of pass_jump_after_combine

2019-11-14 Thread Richard Biener
On Thu, 14 Nov 2019, Ilya Leoshkevich wrote:

> Bootstrapped and regtested on x86_64-redhat-linux, s390x-redhat-linux and
> ppc64le-redhat-linux.  OK for trunk and gcc-9-branch?  I'd like to commit
> this and https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00919.html together
> to gcc-9-branch.

OK.

Richard.

> 
> This is a follow-up to
> https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00919.html (r278095).
> Dominance info is deleted even if we don't perform jump threading. Since
> the whole point of this pass is to perform jump threading (other
> cleanups are not valuable at this point), skip it completely when
> flag_thread_jumps is not set.
> 
> gcc/ChangeLog:
> 
> 2019-11-13  Ilya Leoshkevich  
> 
>   PR rtl-optimization/92430
>   * cfgcleanup.c (pass_jump_after_combine::gate): New function.
>   (pass_jump_after_combine::execute): Perform jump threading
>   unconditionally.
> ---
>  gcc/cfgcleanup.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
> index 7f388258e10..f1d421b1cfa 100644
> --- a/gcc/cfgcleanup.c
> +++ b/gcc/cfgcleanup.c
> @@ -3304,6 +3304,7 @@ public:
>{}
>  
>/* opt_pass methods: */
> +  virtual bool gate (function *) { return flag_thread_jumps; }
>virtual unsigned int execute (function *);
>  
>  }; // class pass_jump_after_combine
> @@ -3313,7 +3314,7 @@ pass_jump_after_combine::execute (function *)
>  {
>/* Jump threading does not keep dominators up-to-date.  */
>free_dominance_info (CDI_DOMINATORS);
> -  cleanup_cfg (flag_thread_jumps ? CLEANUP_THREADING : 0);
> +  cleanup_cfg (CLEANUP_THREADING);
>return 0;
>  }
>  
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH] Add if-chain to switch conversion pass.

2019-11-14 Thread Richard Biener
On Thu, Nov 14, 2019 at 10:39 AM Martin Liška  wrote:
>
> On 11/5/19 1:38 PM, Richard Biener wrote:
> > On Mon, Nov 4, 2019 at 3:49 PM Jakub Jelinek  wrote:
> >>
> >> On Mon, Nov 04, 2019 at 03:23:20PM +0100, Martin Liška wrote:
> >>> The patch adds a new pass that identifies a series of if-elseif
> >>> statements and transform then into a GIMPLE switch (if possible).
> >>> The pass runs right after tree-ssa pass and I decided to implement
> >>> matching of various forms that are introduced by folder (fold_range_test):
> >>
> >> Not a review, just a few questions:
>
> Hello.
>
> >
> > Likewise - please do not name switches -ftree-*, 'tree' doens't add anything
> > but confusion to users.  Thus use -fif-to-switch or -fconvert-if-to-switch
>
> Agree with you, I selected the later option.
>
> >
> > +The transformation can help to produce a faster code for
> > +the switch statement.
> >
> > produce faster code.
>
> Fixed.
>
> >
> > Doesn't it also produce smaller code eventually?
>
> In some situation yes, but generally it leads to more jump tables
> (which are bigger when expanded).
>
> >
> > Please to not put code transform passes into build_ssa_passes (why did
> > you choose this place)?
>
> Well, that was my initial pass selection as I wanted to have a GIMPLE
> code close to what FEs produce.
>
> >  The pass should go into pass_all_early_optimizations
> > instead, and I'm quite sure you want to run _after_ CSE.  I'd even say
> > that the pass should run as part of switch-conversion, so we build
> > a representation of a switch internally and then code-generate the optimal
> > form directly.  For now just put the pass before switch-conversion.
>
> But yes, the suggested place is much better place and we can benefit from
> VRP (that will kill dead conditions in a if-else chain)
>
> >
> > There are functions without comments in the patch and you copied
> > from DSE which shows in confusing comments left over from the original.
> >
> > +  mark_virtual_operands_for_renaming (cfun);
> >
> > if you did nothing renaming all vops is expensive.
>
> This one is needed for situations like:
>
> fn1 ()
> {
> a.0_1;
>
> :
># VUSE <.MEM_5(D)>
>a.0_1 = a;
>if (a.0_1 == 0)
>  goto ; [INV]
>else
>  goto ; [INV]
>
> :
>if (a.0_1 == 1)
>  goto ; [INV]
>else
>  goto ; [INV]
>
> :
>if (a.0_1 == 2)
>  goto ; [INV]
>else
>  goto ; [INV]
>
> :
># .MEM_6 = VDEF <.MEM_5(D)>
>fn2 ();
>
> :
># .MEM_4 = PHI <.MEM_5(D)(2), .MEM_5(D)(3), .MEM_5(D)(4), .MEM_6(5)>
># VUSE <.MEM_4>
>return;
>
> }
>
> where without the call, I end up with:
>
> :
># VUSE <.MEM_5(D)>
>a.0_1 = a;
>switch (a.0_1)  [INV], case 0:  [INV], case 1:  
> [INV], case 2:  [INV]>
>
> :
> :
># .MEM_6 = VDEF <.MEM_5(D)>
>fn2 ();
>
> :
># .MEM_4 = PHI <.MEM_6(3), (2)>

I meant you are doing it unconditionally even if you didn't transform
any if-then-else chain.

>
> >
> > I'm missing an overall comment - you are using a dominator walk
> > but do nothing in the after hook which means you are not really
> > gathering any data?  You're also setting visited bits on BBs which
> > means you are visiting alternate BBs during the DOM walk.
>
> You are right, I'm a bit cheating with the DOM walk as I also mark visited 
> BBs.
> What I want is for each if-else condition, I want to visit the first IF in 
> such
> chain. That's why I decided to iterate in DOMINATOR order.
> Can I do it simpler?

Put that in a comment, do away with domwalk and instead start from
ENTRY_BLOCK, using a worklist seeded by {first,next}_dom_son ()
and avoid putting visited blocks on that worklist.

Btw, what about if-chains nested in another if-chain?  Don't you want to
transform "inner" chains first or does it really not matter (you're adjusting
the CFG, doing that inside the domwalk is fishy since that also uses
pre-computed RPO order; the simple dom-son walking should work
but you of course might miss some blocks depending on how you set up
things).

Richard.

> Thanks,
> Martin
>
> >
> >> 1) what does it do if __builtin_expect* has been used, does it preserve
> >> the probabilities and if in the end decides to expand as ifs, are those
> >> probabilities retained through it?
> >> 2) for the reassoc-*.c testcases, do you get identical or better code
> >> with the patch?
> >> 3) shouldn't it be gimple-if-to-switch.c instead?
> >> 4) what code size effect does the patch have say on cc1plus (if you don't
> >> count the code changes of the patch itself, i.e. revert the patch in 
> >> the
> >> stage3 and rebuild just the stage3)?
> >>
> >>> +struct case_range
> >>> +{
> >>> +  /* Default constructor.  */
> >>> +  case_range ():
> >>> +m_min (NULL_TREE), m_max (NULL_TREE)
> >>
> >> I admit I'm never sure about coding conventions for C++,
> >> but shouldn't there be a space before :, or even better :
> >> be on the next line before m_min ?
> >>
> >> 

[patch] Common ground work for vxworks7 ports updates

2019-11-14 Thread Olivier Hainque
Hello,

This is the first of a series of patches evolving the VxWorks
support in the compiler.

This particular change prepares the grounds for an evolution of the
VxWorks 7 support towards the more recent major upgrade of the base OS,
from the SR5xx to the SR6xx series in vendor parlance.

The OS updgrade introduces major novelties (additional target platforms,
init/fini array and TLS support everywhere) and was rolled out pretty fast
after the previous SR5xx series.

The general trend is to get the system environment and toolchains as
similar as possible to the Linux environment.

As this is a fast moving target in the process of converging, we
simply map upstream GCC "vxworks7" to the more recent environment. There's
no real point in setting up intricate internal parametarizations to
account for all intermediate releases on mainline.

We know of uses of different triplet names to denote the more recent
release, still, (-wrs-vxworks7r2 for SR6xx, for example), so will
arrange to recognize the new triplets and let the more generic -vxworks7
one behave as a synonym.

After this change will come a few others,

- some implementing port specific adaptations for existing
  ports, with cleanups along the way,

- some introducing new ports (aarch64 in particular),

- some improving common functionalities such as gthreads
  or crtstuff files for table based unwinding, which we
  will leverage to strengthen the c++ support afterwards.

A number of people contributed to all this, listed as
authors in ChangeLog entries (Jerome Lambourg, Corentin Gay,
Pat Bernardi and Doug Rupp to name a few here).

This was first tested by our internal QA, doing builds and
running a variety of testsuites for powerpc-vxworks6, arm-vxworks7
and aarch64-vxworks7, both for kernel modules and RTPs.

The patches essentially all apply unchanged on mainline, and
I checked that I could build a powerpc-vxworks6 compiler from there.

I will apply to mainline shortly.

With Kind Regards,

Olivier

--

2019-11-06  Pat Bernardi  
Jerome Lambourg  
Olivier Hainque  

* config.gcc: Add comment to introduce the TARGET_VXWORKS
commong macro definitions, conveying VXWORKS7 or 64bit general
variations.  Add a block to set gcc_cv_initfini_array
unconditionally to "yes" for VxWorks7.
config/vx-common.h (VXWORKS_CC1_SPEC): New macro, empty string
by default.  Update some comments.
config/vxworks.h (VXWORKS_EXTRA_LIBS_RTP): New macro, empty by
default, to be added the end of VXWORKS_LIBS_RTP.
(VXWORKS_LIBS_RTP): Replace hardcoded part by VXWORKS_BASE_LIBS_RTP
and append VXWORKS_EXTRA_LIBS_RTP, both of which specific ports may
redefine.
(VXWORKS_NET_LIBS_RTP): Account for VxWorks7 specificities.
(VXWORKS_CC1_SPEC): Common base definition, with VxWorks7 variation
to account for the now available TLS abilities.
(TARGET_LIBC_HAS_FUNCTION): Account for VxWorks7 abilities.
(VXWORKS_HAVE_TLS): Likewise.




0001-Common-ground-work-for-vxworks7-ports-updates.patch
Description: Binary data


Re: Fix ICE when inlining into function containing polymorphic call

2019-11-14 Thread Martin Jambor
Hi,

On Wed, Nov 13 2019, Jan Hubicka wrote:
> Hi,
> the testcase causes inline context cache to go out of sync because I
> forgot to update used flags of parameters in one path of
> update_indirect_edges_after_inlining.
>
> While debugging it I also added better consistency check to
> ipa-inline-analysis and turned ipa-inline test from ifdef to -fchecking.
> This uncovered yet another missed upate in recursive inliner.
>
> Bootstrapped/regtested x86_64-linux, comitted.
>
>   PR c++/92421
>   * ipa-prop.c (update_indirect_edges_after_inlining):
>   Mark parameter as used.
>   * ipa-inline.c (recursive_inlining): Reset node cache
>   after inlining.
>   (inline_small_functions): Remove checking ifdef.
>   * ipa-inline-analysis.c (do_estimate_edge_time): Verify
>   cache consistency.
>   * g++.dg/torture/pr92421.C: New testcase.
> Index: ipa-prop.c
> ===
> --- ipa-prop.c(revision 278151)
> +++ ipa-prop.c(working copy)
> @@ -3537,6 +3537,11 @@ update_indirect_edges_after_inlining (st
> if (ici->polymorphic
> && !ipa_get_jf_ancestor_type_preserved (jfunc))
>   ici->vptr_changed = true;
> +   ipa_set_param_used_by_indirect_call (new_root_info,
> +ici->param_index, true);
> +   if (ici->polymorphic)
> + ipa_set_param_used_by_polymorphic_call (new_root_info,
> + ici->param_index, true);
>   }



Interesting, you have this exact hunk already in the patch introducing
the new param flags (message id
id:20191103224712.ndzyxu6cn3jt3...@kam.mff.cuni.cz or
https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00077.html).  I did
actually check it was there, even if only yesterday evening, but I did :-)

And I can also see the code already in my Monday checkout (r278047).  So
I guess you must have actually removed it by accident in the meantime?

Martin


Re: [gomp4.1] Support #pragma omp target {enter,exit} data

2019-11-14 Thread Thomas Schwinge
Hi!

In context of reviewing Julian's "OpenACC reference count overhaul", I'm
generally reviewing (also known as: trying to understand) the libgomp
OpenMP 'target' "refcount"ing , and I noticed something strange (?):

On 2015-07-30T22:44:33+0300, Ilya Verbin  wrote:
> make check-target-libgomp passed.  ok?

(This eventually got into trunk in r228777 "Merge from gomp-4_1-branch to
trunk".)

> libgomp/

>   * target.c

>   (gomp_offload_image_to_device): Set tgt's refcount to infinity.

> --- a/libgomp/target.c
> +++ b/libgomp/target.c

> @@ -794,7 +807,7 @@ gomp_offload_image_to_device (struct gomp_device_descr 
> *devicep,
>/* Insert host-target address mapping into splay tree.  */
>struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
>tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
> -  tgt->refcount = 1;
> +  tgt->refcount = REFCOUNT_INFINITY;
>tgt->tgt_start = 0;
>tgt->tgt_end = 0;
>tgt->to_free = NULL;

I had understood that 'REFCOUNT_INFINITY' is only meant to be used for
the 'refcount' in 'struct splay_tree_key_s', but here it's used for the
'refcount' in 'struct target_mem_desc'.  However, all the other 'struct
target_mem_desc' 'refcount' handling doesn't seem to take care of the
special 'REFCOUNT_INFINITY' value.

This might not be an actually observable bug (I have not verified, have
not tried to construct a test case), but should this be changed anyway?
(Back to 'tgt->refcount = 1'; not yet tested?)


Grüße
 Thomas


signature.asc
Description: PGP signature


[C++ Patch] Use cp_expr_loc_or_input_loc in a few additional typeck.c places

2019-11-14 Thread Paolo Carlini

Hi,

tested x86_64-linux.

Thanks, Paolo.

///

/cp
2019-11-14  Paolo Carlini  

* typeck.c (cp_build_addr_expr_1): Use cp_expr_loc_or_input_loc
in three places.
(lvalue_or_else): Use it in one place.

/testsuite
2019-11-14  Paolo Carlini  

* g++.dg/cpp0x/addressof2.C: Test locations too.
* g++.dg/cpp0x/rv-lvalue-req.C: Likewise.
* g++.dg/expr/crash2.C: Likewise.
* g++.dg/expr/lval1.C: Likewise.
* g++.dg/expr/unary2.C: Likewise.
* g++.dg/ext/lvaddr.C: Likewise.
* g++.dg/ext/lvalue1.C: Likewise.
* g++.dg/tree-ssa/pr20280.C: Likewise.
* g++.dg/warn/Wplacement-new-size.C: Likewise.
* g++.old-deja/g++.law/temps1.C: Likewise.
Index: cp/typeck.c
===
--- cp/typeck.c (revision 278216)
+++ cp/typeck.c (working copy)
@@ -6126,7 +6126,7 @@ cp_build_addr_expr_1 (tree arg, bool strict_lvalue
   if (kind == clk_none)
{
  if (complain & tf_error)
-   lvalue_error (input_location, lv_addressof);
+   lvalue_error (cp_expr_loc_or_input_loc (arg), lv_addressof);
  return error_mark_node;
}
   if (strict_lvalue && (kind & (clk_rvalueref|clk_class)))
@@ -6134,7 +6134,8 @@ cp_build_addr_expr_1 (tree arg, bool strict_lvalue
  if (!(complain & tf_error))
return error_mark_node;
  /* Make this a permerror because we used to accept it.  */
- permerror (input_location, "taking address of rvalue");
+ permerror (cp_expr_loc_or_input_loc (arg),
+"taking address of rvalue");
}
 }
 
@@ -6228,7 +6229,8 @@ cp_build_addr_expr_1 (tree arg, bool strict_lvalue
   if (bitfield_p (arg))
 {
   if (complain & tf_error)
-   error ("attempt to take address of bit-field");
+   error_at (cp_expr_loc_or_input_loc (arg),
+ "attempt to take address of bit-field");
   return error_mark_node;
 }
 
@@ -10431,7 +10433,7 @@ lvalue_or_else (tree ref, enum lvalue_use use, tsu
   if (kind == clk_none)
 {
   if (complain & tf_error)
-   lvalue_error (input_location, use);
+   lvalue_error (cp_expr_loc_or_input_loc (ref), use);
   return 0;
 }
   else if (kind & (clk_rvalueref|clk_class))
Index: testsuite/g++.dg/cpp0x/addressof2.C
===
--- testsuite/g++.dg/cpp0x/addressof2.C (revision 278216)
+++ testsuite/g++.dg/cpp0x/addressof2.C (working copy)
@@ -8,19 +8,19 @@ addressof (T &x) noexcept
   return __builtin_addressof (x);
 }
 
-auto a = __builtin_addressof (1);  // { dg-error "lvalue required 
as unary" }
-auto b = addressof (1);// { dg-error "cannot 
bind non-const lvalue reference of type" }
+auto a = __builtin_addressof (1);  // { dg-error "31:lvalue 
required as unary" }
+auto b = addressof (1);// { dg-error 
"21:cannot bind non-const lvalue reference of type" }
 
 struct S { int s : 5; int t; void foo (); } s;
 
 auto c = __builtin_addressof (s);
 auto d = addressof (s);
-auto e = __builtin_addressof (s.s);// { dg-error "attempt to take 
address of bit-field" }
-auto f = addressof (s.s);  // { dg-error "cannot bind 
bit-field" }
-auto g = __builtin_addressof (S{});// { dg-error "taking address 
of rvalue" }
-auto h = addressof (S{});  // { dg-error "cannot bind 
non-const lvalue reference of type" }
-auto i = __builtin_addressof (S::t);   // { dg-error "invalid use of 
non-static data member" }
-auto j = __builtin_addressof (S::foo); // { dg-error "invalid use of 
non-static member function" }
+auto e = __builtin_addressof (s.s);// { dg-error "33:attempt to 
take address of bit-field" }
+auto f = addressof (s.s);  // { dg-error "23:cannot bind 
bit-field" }
+auto g = __builtin_addressof (S{});// { dg-error "31:taking 
address of rvalue" }
+auto h = addressof (S{});  // { dg-error "21:cannot bind 
non-const lvalue reference of type" }
+auto i = __builtin_addressof (S::t);   // { dg-error "34:invalid use 
of non-static data member" }
+auto j = __builtin_addressof (S::foo); // { dg-error "34:invalid use 
of non-static member function" }
 
 void
 foo (bool b)
@@ -28,6 +28,6 @@ foo (bool b)
   lab:;
   char c;
   long long int d;
-  auto k = __builtin_addressof (lab);  // { dg-error "was not declared 
in this scope" }
-  auto l = __builtin_addressof (b ? c : d);// { dg-error "lvalue required 
as unary" }
+  auto k = __builtin_addressof (lab);  // { dg-error "33:.lab. was not 
declared in this scope" }
+  auto l = __builtin_addressof (b ? c : d);// { dg-error "35:lvalue 
required as unary" }
 }
Index: testsuite/g++.dg/cpp0x/rv-lvalue-req.C

[patch 2/7] Introduce vxworks specific crtstuff support

2019-11-14 Thread Olivier Hainque
This is the second patch of the first set evolving the
VxWorks support before we move on to more c++ specific
capabilities.

This change generalizes a mechanism we have been using in a
ad-hoc manner for Ada for quite a while now, so C++ can also
benefit from it.

The general idea is to introduce VxWorks specific versions
of the crtstuff facilities dealing with dwarf table registration
and deregistration for EH, accounting for differences in
the underlying OS support across versions and across the kind
of module one is building (kernel vs rtp).

Tested as the other patches in the set, first with gcc-9 based
builds and testsuite runs for kernel and RTP on a few targets, then
with a sanity-check build of a powerpc-vxworks6 toolchain using
mainline sources, where the patch just applies.

Olivier

2019-11-06  Jerome Lambourg  
Olivier Hainque  

libgcc/
* config/vxcrtstuff.c: New file.
* config/t-vxcrtstuff: New Makefile fragment.
* config.host: Append t-vxcrtstuff to the tmake_file list
on all VxWorks ports using dwarf for table based EH.

gcc/
* config/vx-common.h (USE_TM_CLONE_REGISTRY): Remove
definition, pointless with a VxWorks specific version
of crtstuff.
(DWARF2_UNWIND_INFO): Conditionalize on !ARM_UNWIND_INFO.
* config/vxworks.h (VX_CRTBEGIN_SPEC, VX_CRTEND_SPEC):
New local macros, controlling the addition of vxworks specific
crtstuff objects depending on the EH mechanism and kind of
module being linked.
(VXWORKS_STARTFILE_SPEC, VXWORKS_ENDFILE_SPEC): Use them.



0002-Introduce-vxworks-specific-crtstuff-support.patch
Description: Binary data


Re: [gomp4.1] Support #pragma omp target {enter,exit} data

2019-11-14 Thread Jakub Jelinek
On Thu, Nov 14, 2019 at 12:08:45PM +0100, Thomas Schwinge wrote:
> > @@ -794,7 +807,7 @@ gomp_offload_image_to_device (struct gomp_device_descr 
> > *devicep,
> >/* Insert host-target address mapping into splay tree.  */
> >struct target_mem_desc *tgt = gomp_malloc (sizeof (*tgt));
> >tgt->array = gomp_malloc ((num_funcs + num_vars) * sizeof (*tgt->array));
> > -  tgt->refcount = 1;
> > +  tgt->refcount = REFCOUNT_INFINITY;
> >tgt->tgt_start = 0;
> >tgt->tgt_end = 0;
> >tgt->to_free = NULL;
> 
> I had understood that 'REFCOUNT_INFINITY' is only meant to be used for
> the 'refcount' in 'struct splay_tree_key_s', but here it's used for the
> 'refcount' in 'struct target_mem_desc'.  However, all the other 'struct
> target_mem_desc' 'refcount' handling doesn't seem to take care of the
> special 'REFCOUNT_INFINITY' value.
> 
> This might not be an actually observable bug (I have not verified, have
> not tried to construct a test case), but should this be changed anyway?
> (Back to 'tgt->refcount = 1'; not yet tested?)

No, we certainly don't want the code to free this unless the image is
unloaded.  So, if anything, REFCOUNT_INFINITY needs to be special cased
even in the target_mem_desc handling.  But, do you actually see any code
path where the current code doesn't work properly?

Jakub



[patch 3/7] Improve the thread support for VxWorks

2019-11-14 Thread Olivier Hainque
Hello,

This change adds support for the __GTHREAD_HAS_COND and __GTHREADS_CXX0X
facilities on top of what was already there for the base __GTHREADS code
on VxWorks. It also improves the general support to handle VxWorks 7 in
more configurations.

Split the current vxlib.c source into separate files for this, adopting
a naming convention similar to what other ports do.

Extract the parts of t-vxworks* adding those sources to the libgcc
closure into separate files, so we can include the separate fragments
after everything else for all the ports. Useful in particular for arm,
where t-bpabi resets LIB2ADDEH on purpose.

Tested together with the other patches in the series. We have also
had good c++ and libstdc++ test results on gcc-8 with this, associated
with additional changes that we haven't yet ported to more recent
versions of gcc (porting in progresss).

Olivier

2019-11-12  Corentin Gay  
Jerome Lambourg  
Olivier Hainque  

libgcc/

* config/t-gthr-vxworks: New file, add all the gthr-vxworks
sources to LIB2ADDEH.
* config/t-vxworks: Remove adjustments to LIB2ADDEH.
* config/t-vxworks7: Likewise.

* config.host: Append a block at the end of the file to add the
t-gthr files to the tmake_file list for VxWorks after everything
else.

* config/vxlib.c: Rename as gthr-vxworks.c.
* config/vxlib-tls.c: Rename as gthr-vxworks-tls.c.

* config/gthr-vxworks.h: Simplify a few comments.  Expose a TAS
API and a basic error checking API, both internal.  Simplify the
__gthread_once_t type definition and initializers.  Add sections
for condition variables support and for the C++0x thread support,
conditioned against Vx653 for the latter.

* config/gthr-vxworks.c (__gthread_once): Simplify comments and
implementation, leveraging the TAS internal API.
* config/gthr-vxworks-tls.c: Introduce an internal TLS data access
API, leveraging the general availability of TLS services in VxWorks7
post SR6xxx.
(__gthread_setspecific, __gthread_setspecific): Use it.
(tls_delete_hook): Likewise, and simplify the enter/leave dtor logic.
* config/gthr-vxworks-cond.c: New file.  GTHREAD_COND variable
support based on VxWorks primitives.
* config/gthr-vxworks-thread.c: New file.  GTHREAD_CXX0X support
based on VxWorks primitives.



0003-Improve-the-thread-support-for-VxWorks.patch
Description: Binary data




[patch 4/7] Update the libgcc support for VxWorks AE/653

2019-11-14 Thread Olivier Hainque
Hello,

This change first fixes a glitch in the VxWorks AE/653 configuration
settings, missing a Makefile fragment to pass the expected compilation
flags when building libgcc.

It also adds a t-gthr-vxworksae fragment for the
gthreads support, specialized from other variants as we
we haven't implemented the cxx0x part for 653 yet.

We have been using this with gcc-8 based toolchains for AE
for about a year now. I have verified that a few gcc-9 based
ports for regular VxWorks still operate properly with this,
and I checked that I could build a powerpc-vxworks6 toolchain
with this patch on top of mainline sources.

Olivier

2019-11-12  Olivier Hainque  

libgcc/

* config/t-gthr-vxworksae: New file, add all the gthr-vxworks
sources except the cxx0x support to LIB2ADDEH.  We don't support
cxx0x on AE/653.
* config/t-vxworksae: New file.
* config.host: Handle *-*-vxworksae: Add the two aforementioned
Makefile fragment files at their expected position in the tmake_file
list, in accordance with what is done for other VxWorks variants.



0004-Update-the-libgcc-support-for-VxWorks-AE-653.patch
Description: Binary data


Re: Fix ICE when inlining into function containing polymorphic call

2019-11-14 Thread Jan Hubicka
> Hi,
> 
> On Wed, Nov 13 2019, Jan Hubicka wrote:
> > Hi,
> > the testcase causes inline context cache to go out of sync because I
> > forgot to update used flags of parameters in one path of
> > update_indirect_edges_after_inlining.
> >
> > While debugging it I also added better consistency check to
> > ipa-inline-analysis and turned ipa-inline test from ifdef to -fchecking.
> > This uncovered yet another missed upate in recursive inliner.
> >
> > Bootstrapped/regtested x86_64-linux, comitted.
> >
> > PR c++/92421
> > * ipa-prop.c (update_indirect_edges_after_inlining):
> > Mark parameter as used.
> > * ipa-inline.c (recursive_inlining): Reset node cache
> > after inlining.
> > (inline_small_functions): Remove checking ifdef.
> > * ipa-inline-analysis.c (do_estimate_edge_time): Verify
> > cache consistency.
> > * g++.dg/torture/pr92421.C: New testcase.
> > Index: ipa-prop.c
> > ===
> > --- ipa-prop.c  (revision 278151)
> > +++ ipa-prop.c  (working copy)
> > @@ -3537,6 +3537,11 @@ update_indirect_edges_after_inlining (st
> >   if (ici->polymorphic
> >   && !ipa_get_jf_ancestor_type_preserved (jfunc))
> > ici->vptr_changed = true;
> > + ipa_set_param_used_by_indirect_call (new_root_info,
> > +  ici->param_index, true);
> > + if (ici->polymorphic)
> > +   ipa_set_param_used_by_polymorphic_call (new_root_info,
> > +   ici->param_index, true);
> > }
> 
> 
> 
> Interesting, you have this exact hunk already in the patch introducing
> the new param flags (message id
> id:20191103224712.ndzyxu6cn3jt3...@kam.mff.cuni.cz or
> https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00077.html).  I did
> actually check it was there, even if only yesterday evening, but I did :-)
> 
> And I can also see the code already in my Monday checkout (r278047).  So
> I guess you must have actually removed it by accident in the meantime?

Well, it is there twice - once for PASS_THROUGH and here for ANCESTOR.
But it is quite possible that I had both (since I also had verification
at the place when originally doing the patch) and lost it on way to
mainline.

Honza

> 
> Martin


Re: [PATCH 2/2] gdbinit.in: fix wrong reference to function argument

2019-11-14 Thread Segher Boessenkool
On Thu, Nov 14, 2019 at 09:45:28AM +0300, Konstantin Kharlamov wrote:
> On Ср, ноя 13, 2019 at 15:23, Jason Merrill  
> wrote:
> >On Wed, Nov 13, 2019 at 6:39 AM Segher Boessenkool 
> > wrote:
> >> There are users.  There are users who have been used to this 
> >>behaviour
> >> for many many many years.
> >>
> >> People just do (say I have an "rtx insn"):
> >>   p insn
> >>   pr
> >
> >Indeed.  I use this constantly.
> 
> Thanks everyone for answers. No, you don't have to type parentheses. 
> Gdb has it like in Haskell, i.e. arguments are separated by just 
> whitespace. So you type `pr insn`
> 
> You know what, I came up with an alternative solution that won't break 
> anyone's workflow neither confuse newbies: I can add a check for number 
> of arguments, and to branch on that to use either $ or $arg0.

That sounds great!  Thank you :-)


Segher


Re: [gomp4.1] Support #pragma omp target {enter,exit} data

2019-11-14 Thread Julian Brown
On Thu, 14 Nov 2019 12:08:45 +0100
Thomas Schwinge  wrote:

> Hi!
> 
> In context of reviewing Julian's "OpenACC reference count overhaul",
> I'm generally reviewing (also known as: trying to understand) the
> libgomp OpenMP 'target' "refcount"ing , and I noticed something
> strange (?):
> 
> On 2015-07-30T22:44:33+0300, Ilya Verbin  wrote:
> > make check-target-libgomp passed.  ok?  
> 
> (This eventually got into trunk in r228777 "Merge from
> gomp-4_1-branch to trunk".)
> 
> > libgomp/  
> 
> > * target.c  
> 
> > (gomp_offload_image_to_device): Set tgt's refcount to
> > infinity.  
> 
> > --- a/libgomp/target.c
> > +++ b/libgomp/target.c  
> 
> > @@ -794,7 +807,7 @@ gomp_offload_image_to_device (struct
> > gomp_device_descr *devicep, /* Insert host-target address mapping
> > into splay tree.  */ struct target_mem_desc *tgt = gomp_malloc
> > (sizeof (*tgt)); tgt->array = gomp_malloc ((num_funcs + num_vars) *
> > sizeof (*tgt->array));
> > -  tgt->refcount = 1;
> > +  tgt->refcount = REFCOUNT_INFINITY;
> >tgt->tgt_start = 0;
> >tgt->tgt_end = 0;
> >tgt->to_free = NULL;  
> 
> I had understood that 'REFCOUNT_INFINITY' is only meant to be used for
> the 'refcount' in 'struct splay_tree_key_s', but here it's used for
> the 'refcount' in 'struct target_mem_desc'.  However, all the other
> 'struct target_mem_desc' 'refcount' handling doesn't seem to take
> care of the special 'REFCOUNT_INFINITY' value.
> 
> This might not be an actually observable bug (I have not verified,
> have not tried to construct a test case), but should this be changed
> anyway? (Back to 'tgt->refcount = 1'; not yet tested?)

These function- or variable-mapping blocks will never interact with the
rest of the reference-counting machinery, I don't think, so it's
possibly a bit weird but it's unlikely to cause a problem in practice.
Just IMO.

Julian


[PATCH] Add Optimization keyword for param_max_inline_insns_auto param.

2019-11-14 Thread Martin Liška

On 11/13/19 2:36 PM, Richard Biener wrote:

Hmm, can you please - as exercise - add Optimization only for
the "formerly" _o2 params you remove in the other patch to see
if with this you indeed get at a NOP effect?


Sure, there's a patch that removed max-inline-insns-auto-O2.
Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
I see the same backtrace change in libsanitizer as in the original
patch series.

Ready to be installed?
Thanks,
Martin
>From 7a46cb4d5629e556cb2a17af6b83069dfc90 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Thu, 14 Nov 2019 10:56:57 +0100
Subject: [PATCH] Add Optimization keyword for param_max_inline_insns_auto
 param.

gcc/ChangeLog:

2019-11-14  Martin Liska  

	* ipa-cp.c (devirtualization_time_bonus): Use opt_for_fn
	of a callee to get value of the param.
	* ipa-inline.c (inline_insns_auto): Use proper
	opt_for_fn.
	* opts.c (maybe_default_option): Do not overwrite param
	value if optimization level does not match.  Note that
	params usually have default value set via Init() keyword.
	* params.opt: Remove -param=max-inline-insns-auto-O2.
	* cif-code.def (MAX_INLINE_INSNS_AUTO_O2_LIMIT): Remove.
	* doc/invoke.texi: Remove documentation of
	max-inline-insns-auto-O2.

gcc/testsuite/ChangeLog:

2019-11-14  Martin Liska  

	* c-c++-common/asan/memcmp-1.c: Update expected backtrace.
---
 gcc/cif-code.def   |  2 --
 gcc/doc/invoke.texi|  7 +--
 gcc/ipa-cp.c   |  8 +---
 gcc/ipa-inline.c   | 18 --
 gcc/opts.c |  6 +-
 gcc/params.opt |  6 +-
 gcc/testsuite/c-c++-common/asan/memcmp-1.c |  4 ++--
 7 files changed, 18 insertions(+), 33 deletions(-)

diff --git a/gcc/cif-code.def b/gcc/cif-code.def
index a154f24f13d..b4403c96247 100644
--- a/gcc/cif-code.def
+++ b/gcc/cif-code.def
@@ -74,8 +74,6 @@ DEFCIFCODE(MAX_INLINE_INSNS_SINGLE_O2_LIMIT, CIF_FINAL_NORMAL,
 	   N_("--param max-inline-insns-single-O2 limit reached"))
 DEFCIFCODE(MAX_INLINE_INSNS_AUTO_LIMIT, CIF_FINAL_NORMAL,
 	   N_("--param max-inline-insns-auto limit reached"))
-DEFCIFCODE(MAX_INLINE_INSNS_AUTO_O2_LIMIT, CIF_FINAL_NORMAL,
-	   N_("--param max-inline-insns-auto-O2 limit reached"))
 DEFCIFCODE(INLINE_UNIT_GROWTH_LIMIT, CIF_FINAL_NORMAL,
 	   N_("--param inline-unit-growth limit reached"))
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 69f057e7a12..e5cb0a3242b 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -11283,16 +11283,11 @@ applied. In other cases @option{max-inline-insns-single-O2} is applied.
 
 
 @item max-inline-insns-auto
-@item max-inline-insns-auto-O2
 When you use @option{-finline-functions} (included in @option{-O3}),
 a lot of functions that would otherwise not be considered for inlining
 by the compiler are investigated.  To those functions, a different
 (more restrictive) limit compared to functions declared inline can
-be applied.
-
-For functions compiled with optimization levels
-@option{-O3} and @option{-Ofast} parameter @option{max-inline-insns-auto} is
-applied. In other cases @option{max-inline-insns-auto-O2} is applied.
+be applied (@option{--param max-inline-insns-auto}).
 
 @item max-inline-insns-small
 This is bound applied to calls which are considered relevant with
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index f0d354b3704..86c625355b6 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -2936,11 +2936,13 @@ devirtualization_time_bonus (struct cgraph_node *node,
   int size = ipa_size_summaries->get (callee)->size;
   /* FIXME: The values below need re-considering and perhaps also
 	 integrating into the cost metrics, at lest in some very basic way.  */
-  if (size <= param_max_inline_insns_auto / 4)
+  int max_inline_insns_auto
+	= opt_for_fn (callee->decl, param_max_inline_insns_auto);
+  if (size <= max_inline_insns_auto / 4)
 	res += 31 / ((int)speculative + 1);
-  else if (size <= param_max_inline_insns_auto / 2)
+  else if (size <= max_inline_insns_auto / 2)
 	res += 15 / ((int)speculative + 1);
-  else if (size <= param_max_inline_insns_auto
+  else if (size <= max_inline_insns_auto
 	   || DECL_DECLARED_INLINE_P (callee->decl))
 	res += 7 / ((int)speculative + 1);
 }
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index 78ec0ec685f..effb59784a3 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -417,20 +417,10 @@ inline_insns_single (cgraph_node *n, bool hint)
 static int
 inline_insns_auto (cgraph_node *n, bool hint)
 {
-  if (opt_for_fn (n->decl, optimize) >= 3)
-{
-  if (hint)
-	return param_max_inline_insns_auto
-	   * param_inline_heuristics_hint_percent / 100;
-  return param_max_inline_insns_auto;
-}
-  else
-{
-  if (hint)
-	return param_max_inline_insns_auto_o2
-	   * param_inline_heuristics_hint_percent_o2 / 100;
-  return param_max_inline_insns_au

[PATCH] Remove dead code in switch conv pass.

2019-11-14 Thread Martin Liška

Hi.

The patch is a clean up of unused code.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
I'm going to install the patch.

Thanks,
Martin

gcc/ChangeLog:

2019-11-14  Martin Liska  

* tree-switch-conversion.c (switch_conversion::switch_conversion):
Do not initialize m_other_count.
(switch_conversion::collect): Do not count m_default_count and
m_other_count as we use frequencies for edges.
* tree-switch-conversion.h: Remove m_default_count and m_other_count.
---
 gcc/tree-switch-conversion.c | 6 +-
 gcc/tree-switch-conversion.h | 6 --
 2 files changed, 1 insertion(+), 11 deletions(-)


diff --git a/gcc/tree-switch-conversion.c b/gcc/tree-switch-conversion.c
index af4fd5e33bd..e741f56b520 100644
--- a/gcc/tree-switch-conversion.c
+++ b/gcc/tree-switch-conversion.c
@@ -61,7 +61,7 @@ using namespace tree_switch_conversion;
 
 /* Constructor.  */
 
-switch_conversion::switch_conversion (): m_final_bb (NULL), m_other_count (),
+switch_conversion::switch_conversion (): m_final_bb (NULL),
   m_constructors (NULL), m_default_values (NULL),
   m_arr_ref_first (NULL), m_arr_ref_last (NULL),
   m_reason (NULL), m_default_case_nonstandard (false), m_cfg_altered (false)
@@ -89,10 +89,6 @@ switch_conversion::collect (gswitch *swtch)
   e_default = gimple_switch_default_edge (cfun, swtch);
   m_default_bb = e_default->dest;
   m_default_prob = e_default->probability;
-  m_default_count = e_default->count ();
-  FOR_EACH_EDGE (e, ei, m_switch_bb->succs)
-if (e != e_default)
-  m_other_count += e->count ();
 
   /* Get upper and lower bounds of case values, and the covered range.  */
   min_case = gimple_switch_label (swtch, 1);
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index c58bccea7f1..a0639fc5477 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -819,12 +819,6 @@ public:
   /* The probability of the default edge in the replaced switch.  */
   profile_probability m_default_prob;
 
-  /* The count of the default edge in the replaced switch.  */
-  profile_count m_default_count;
-
-  /* Combined count of all other (non-default) edges in the replaced switch.  */
-  profile_count m_other_count;
-
   /* Number of phi nodes in the final bb (that we'll be replacing).  */
   int m_phi_count;
 



Re: [PATCH] Add Optimization keyword for param_max_inline_insns_auto param.

2019-11-14 Thread Richard Biener
On Thu, Nov 14, 2019 at 1:00 PM Martin Liška  wrote:
>
> On 11/13/19 2:36 PM, Richard Biener wrote:
> > Hmm, can you please - as exercise - add Optimization only for
> > the "formerly" _o2 params you remove in the other patch to see
> > if with this you indeed get at a NOP effect?
>
> Sure, there's a patch that removed max-inline-insns-auto-O2.
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> I see the same backtrace change in libsanitizer as in the original
> patch series.
>
> Ready to be installed?

OK.

Richard.

> Thanks,
> Martin


[PATCH] Add one more pass_convert_switch late.

2019-11-14 Thread Martin Liška

Hi.

As mentioned in the PR, the patch adds one more late pass_convert_switch
just before switch lowering.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2019-11-13  Martin Liska  

PR tree-optimization/92005
* passes.def: Add pass_convert_switch late.
* tree-switch-conversion.c: Define clone
method.

gcc/testsuite/ChangeLog:

2019-11-13  Martin Liska  

PR tree-optimization/92005
* g++.dg/tree-ssa/pr92005.C: New test.
* gcc.dg/tree-ssa/cswtch-2.c: Update dump file name.
* gcc.dg/tree-ssa/cswtch-3.c: Likewise.
* gcc.dg/tree-ssa/cswtch-4.c: Likewise.
* gcc.dg/tree-ssa/cswtch-5.c: Likewise.
* gcc.dg/tree-ssa/cswtch.c: Likewise.
* gcc.dg/tree-ssa/pr36881.c: Likewise.
* gcc.dg/tree-ssa/pr84436-1.c: Likewise.
* gcc.dg/tree-ssa/pr84436-2.c: Likewise.
* gcc.dg/tree-ssa/pr84436-3.c: Likewise.
* gcc.dg/tree-ssa/pr84436-4.c: Likewise.
* gcc.dg/tree-ssa/pr84436-5.c: Likewise.
* gcc.dg/tree-ssa/pr88753.c: Likewise.
* gcc.target/i386/pr45830.c: Likewise.
---
 gcc/passes.def|  1 +
 gcc/testsuite/g++.dg/tree-ssa/pr92005.C   | 50 +++
 gcc/testsuite/gcc.dg/tree-ssa/cswtch-2.c  |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/cswtch-3.c  |  4 +-
 gcc/testsuite/gcc.dg/tree-ssa/cswtch-4.c  |  4 +-
 gcc/testsuite/gcc.dg/tree-ssa/cswtch-5.c  |  4 +-
 gcc/testsuite/gcc.dg/tree-ssa/cswtch.c|  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr36881.c   |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr84436-1.c |  4 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr84436-2.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr84436-3.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr84436-4.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr84436-5.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr88753.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr45830.c   |  2 +-
 gcc/tree-switch-conversion.c  |  1 +
 16 files changed, 69 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr92005.C


diff --git a/gcc/passes.def b/gcc/passes.def
index 798a391bd35..48204af1009 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -305,6 +305,7 @@ along with GCC; see the file COPYING3.  If not see
   POP_INSERT_PASSES ()
   NEXT_PASS (pass_simduid_cleanup);
   NEXT_PASS (pass_lower_vector_ssa);
+  NEXT_PASS (pass_convert_switch);
   NEXT_PASS (pass_lower_switch);
   NEXT_PASS (pass_cse_reciprocals);
   NEXT_PASS (pass_reassoc, false /* insert_powi_p */);
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr92005.C b/gcc/testsuite/g++.dg/tree-ssa/pr92005.C
new file mode 100644
index 000..ce228b0a20c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr92005.C
@@ -0,0 +1,50 @@
+/* PR tree-optimization/92005 */
+/* { dg-options "-O2 -fdump-tree-optimized -std=c++17" } */
+
+template struct overloaded : Ts... { using Ts::operator()...; };
+template overloaded(Ts...) -> overloaded;
+
+struct T0 {};
+struct T1 {};
+struct T2 {};
+struct T3 {};
+struct T4 {};
+
+struct variant
+{
+unsigned index_;
+
+union
+{
+T0 t0_;
+T1 t1_;
+T2 t2_;
+T3 t3_;
+T4 t4_;
+};
+};
+
+template int visit( F f, variant const& v )
+{
+switch( v.index_ )
+{
+case 0: return f( v.t0_ );
+case 1: return f( v.t1_ );
+case 2: return f( v.t2_ );
+case 3: return f( v.t3_ );
+case 4: return f( v.t4_ );
+default: __builtin_unreachable();
+}
+}
+
+int do_visit(variant const& v) {
+ return visit(overloaded{
+[](T0 val) { return 3; },
+[](T1 val) { return 5; },
+[](T2 val) { return 8; },
+[](T3 val) { return 9; },
+[](T4 val) { return 10; }
+}, v);
+}
+
+/* { dg-final { scan-tree-dump "CSWTCH" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cswtch-2.c b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-2.c
index 87ed7bba517..44e25ebbac1 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/cswtch-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-2.c
@@ -17,4 +17,4 @@ int h1 (X x)
 }
 }
 
-/* { dg-final { scan-tree-dump-times "CSWTCH" 0 "switchconv" } } */
+/* { dg-final { scan-tree-dump-times "CSWTCH" 0 "switchconv1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cswtch-3.c b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-3.c
index b983c8fbe92..25c2e75c5a2 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/cswtch-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/cswtch-3.c
@@ -326,5 +326,5 @@ main ()
   T (f5 (231, 0), 2, { 80, 0 });
 }
 
-/* { dg-final { scan-tree-dump-times "Switch converted" 5 "switchconv" } } */
-/* { dg-final { scan-tree-dump-times "= CSWTCH" 8 "switchconv" } } */
+/* { dg-final { scan-tree-dump-times "Switch converted" 5 "switchconv1" } } */
+/* { dg-final { scan-tree-dump-times "= CSWTCH" 8 "switchconv1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/cswtch-4.c b/gcc/

Re: [PATCH] Add one more pass_convert_switch late.

2019-11-14 Thread Richard Biener
On Thu, Nov 14, 2019 at 1:06 PM Martin Liška  wrote:
>
> Hi.
>
> As mentioned in the PR, the patch adds one more late pass_convert_switch
> just before switch lowering.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

Hmm.  I was thinking of moving the pass instead of adding another one.
What's the reason to run switch-conversion during early optimization again?

But then it's probably not too bad... (and somehow I'd still like to see
switch-conversion, switch lowering and if-to-switch be "integrated"
somehow, analyzing the IL and then outputting optimized if/switch
according to the same cost metric).

Richard.

> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> 2019-11-13  Martin Liska  
>
> PR tree-optimization/92005
> * passes.def: Add pass_convert_switch late.
> * tree-switch-conversion.c: Define clone
> method.
>
> gcc/testsuite/ChangeLog:
>
> 2019-11-13  Martin Liska  
>
> PR tree-optimization/92005
> * g++.dg/tree-ssa/pr92005.C: New test.
> * gcc.dg/tree-ssa/cswtch-2.c: Update dump file name.
> * gcc.dg/tree-ssa/cswtch-3.c: Likewise.
> * gcc.dg/tree-ssa/cswtch-4.c: Likewise.
> * gcc.dg/tree-ssa/cswtch-5.c: Likewise.
> * gcc.dg/tree-ssa/cswtch.c: Likewise.
> * gcc.dg/tree-ssa/pr36881.c: Likewise.
> * gcc.dg/tree-ssa/pr84436-1.c: Likewise.
> * gcc.dg/tree-ssa/pr84436-2.c: Likewise.
> * gcc.dg/tree-ssa/pr84436-3.c: Likewise.
> * gcc.dg/tree-ssa/pr84436-4.c: Likewise.
> * gcc.dg/tree-ssa/pr84436-5.c: Likewise.
> * gcc.dg/tree-ssa/pr88753.c: Likewise.
> * gcc.target/i386/pr45830.c: Likewise.
> ---
>   gcc/passes.def|  1 +
>   gcc/testsuite/g++.dg/tree-ssa/pr92005.C   | 50 +++
>   gcc/testsuite/gcc.dg/tree-ssa/cswtch-2.c  |  2 +-
>   gcc/testsuite/gcc.dg/tree-ssa/cswtch-3.c  |  4 +-
>   gcc/testsuite/gcc.dg/tree-ssa/cswtch-4.c  |  4 +-
>   gcc/testsuite/gcc.dg/tree-ssa/cswtch-5.c  |  4 +-
>   gcc/testsuite/gcc.dg/tree-ssa/cswtch.c|  2 +-
>   gcc/testsuite/gcc.dg/tree-ssa/pr36881.c   |  2 +-
>   gcc/testsuite/gcc.dg/tree-ssa/pr84436-1.c |  4 +-
>   gcc/testsuite/gcc.dg/tree-ssa/pr84436-2.c |  2 +-
>   gcc/testsuite/gcc.dg/tree-ssa/pr84436-3.c |  2 +-
>   gcc/testsuite/gcc.dg/tree-ssa/pr84436-4.c |  2 +-
>   gcc/testsuite/gcc.dg/tree-ssa/pr84436-5.c |  2 +-
>   gcc/testsuite/gcc.dg/tree-ssa/pr88753.c   |  2 +-
>   gcc/testsuite/gcc.target/i386/pr45830.c   |  2 +-
>   gcc/tree-switch-conversion.c  |  1 +
>   16 files changed, 69 insertions(+), 17 deletions(-)
>   create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr92005.C
>
>


Re: [PATCH] Split X86_TUNE_AVX128_OPTIMAL into X86_TUNE_AVX256_SPLIT_REGS and X86_TUNE_AVX128_OPTIMAL

2019-11-14 Thread Richard Biener
On Tue, Nov 12, 2019 at 11:35 AM Hongtao Liu  wrote:
>
> Hi:
>   As mentioned in https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00832.html
> > So yes, it's poorly named.  A preparatory patch to clean this up
> > (and maybe split it into TARGET_AVX256_SPLIT_REGS and TARGET_AVX128_OPTIMAL)
> > would be nice.
>
>   Bootstrap and regression test for i386 backend is ok.
>   Ok for trunk?

It looks OK to me, please let x86 maintainers a day to comment, otherwise OK.

Thanks,
Richard.

> Changelog
> gcc/
> PR target/92448
> * config/i386/i386-expand.c (ix86_expand_set_or_cpymem):
> Replace TARGET_AVX128_OPTIMAL with TARGET_AVX256_SPLIT_REGS.
> * config/i386/i386-option.c (ix86_vec_cost): Ditto.
> (ix86_reassociation_width): Ditto.
> * config/i386/i386-options.c (ix86_option_override_internal):
> Replace TARGET_AVX128_OPTIAML with
> ix86_tune_features[X86_TUNE_AVX128_OPTIMAL]
> * config/i386/i386.h (TARGET_AVX256_SPLIT_REGS): New macro.
> (TARGET_AVX128_OPTIMAL): Deleted.
> * config/i386/x86-tune.def (X86_TUNE_AVX256_SPLIT_REGS): New
> DEF_TUNE.
>
> --
> BR,
> Hongtao


Re: [16/n] Apply maximum nunits for BB SLP

2019-11-14 Thread Richard Biener
On Tue, Nov 5, 2019 at 3:09 PM Richard Sandiford
 wrote:
>
> Richard Biener  writes:
> > On Tue, Oct 29, 2019 at 6:05 PM Richard Sandiford
> >  wrote:
> >>
> >> The BB vectoriser picked vector types in the same way as the loop
> >> vectoriser: it picked a vector mode/size for the region and then
> >> based all the vector types off that choice.  This meant we could
> >> end up trying to use vector types that had too many elements for
> >> the group size.
> >>
> >> The main part of this patch is therefore about passing the SLP
> >> group size down to routines like get_vectype_for_scalar_type and
> >> ensuring that each vector type in the SLP tree is chosen wrt the
> >> group size.  That part in itself is pretty easy and mechanical.
> >>
> >> The main warts are:
> >>
> >> (1) We normally pick a STMT_VINFO_VECTYPE for data references at an
> >> early stage (vect_analyze_data_refs).  However, nothing in the
> >> BB vectoriser relied on this, or on the min_vf calculated from it.
> >> I couldn't see anything other than vect_recog_bool_pattern that
> >> tried to access the vector type before the SLP tree is built.
> >
> > So can you not set STMT_VINFO_VECTYPE for data refs with BB vectorization
> > then?
>
> Yeah, the patch stops us from setting it during vect_analyze_data_refs.
> We still need to set it later when building the SLP tree, just like
> we do for other statements.
>
> >> (2) It's possible for the same statement to be used in the groups of
> >> different sizes.  Taking the group size into account meant that
> >> we could try to pick different vector types for the same statement.
> >
> > That only happens when we have multiple SLP instances though
> > (entries into the shared SLP graph).
>
> Yeah.
>
> > It probably makes sense to keep handling SLP instances sharing stmts
> > together for costing reasons but one issue is that for disjunct pieces
> > (in the same BB) disqualifying one cost-wise disqualifies all.  So at
> > some point during analysis (which should eventually cover more than a
> > single BB) we want to split the graph.  It probably doesn't help the
> > above case.
>
> Yeah, sounds like there are two issues: one with sharing stmt_vec_infos
> between multiple SLP nodes, and one with sharing SLP child nodes between
> multiple parent nodes.  (2) comes from the first, but I guess failing
> based on costs is more about the second.
>
> >> This problem should go away with the move to doing everything on
> >> SLP trees, where presumably we would attach the vector type to the
> >> SLP node rather than the stmt_vec_info.  Until then, the patch just
> >> uses a first-come, first-served approach.
> >
> > Yeah, I ran into not having vectype on SLP trees with invariants/externals
> > as well.  I suppose you didn't try simply adding that to the SLP tree
> > and pushing/popping it like we push/pop the def type?
>
> No, didn't try that.  Maybe it would be worth a go, but it seems like it
> could be a rabbit hole.
>
> > Assigning the vector types should really happen in vectorizable_*
> > and not during SLP build itself btw.
>
> Agree we need to improve the way this is handled, but delaying it
> to vectorizable_* sounds quite late.  Maybe it should be a more global
> decision, since the vector types for each vectorizable_* have to be
> compatible and it's not obvious which routine should get first choice.
>
> > Your update-all-shared-vectypes thing looks quadratic to me :/
>
> Should be amortised linear.  The statements in a DR group always
> have the same vectype.  When we want to change the vector type
> of one statement, we change it for all statements if possible
> or fail if we can't.

OK, let's go for it.

Thanks,
Richard.

> Thanks,
> Richard


[patch 5/7] Base support for vxworks 7 on aarch64

2019-11-14 Thread Olivier Hainque
Hello,

This change introduces VxWorks support for aarch64, which
we have tested in-house through a number of C/Ada testsuite runs
for a mix of kernel and RTP configurations, with both gcc-8 and
gcc-9 based toolchains.

VxWorks uses r18 to hold a pointer to the current task TCB, which
conflicts with the current selection of r18 as the static chain register
in the common back-end part.

I have proposed

  https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00523.html

as a possible way to circumvent such conflicts and currently just
#define TARGET_OS_USES_R18 1 in the main VxWorks configuration file.

I'll fallback to an entirely VxWorks specific way around if necessary.

The patches apply unchanged on mainline and I have verified that we
can still build a VxWorks configuration there with this patch included,
in addition to the testing mentioned above.

Olivier

2019-11-14  Doug Rupp  
Olivier Hainque  
Jerome Lambourg  

gcc/
* config.gcc: Handle aarch64*-wrs-vxworks7*.
* config/aarch64/aarch64-vxworks.h: New file.
* config/aarch64/t-aarch64-vxworks: New file.

libgcc/
* config.host: Handle aarch64*-wrs-vxworks7*.




0005-Base-support-for-vxworks-7-on-aarch64.patch
Description: Binary data





Re: [17/17] Extend can_duplicate_and_interleave_p to mixed-size vectors

2019-11-14 Thread Richard Biener
On Tue, Nov 5, 2019 at 9:45 PM Richard Sandiford
 wrote:
>
> This patch makes can_duplicate_and_interleave_p cope with mixtures of
> vector sizes, by using queries based on get_vectype_for_scalar_type
> instead of directly querying GET_MODE_SIZE (vinfo->vector_mode).
>
> int_mode_for_size is now the first check we do for a candidate mode,
> so it seemed better to restrict it to MAX_FIXED_MODE_SIZE.  This avoids
> unnecessary work and avoids trying to create scalar types that the
> target might not support.
>
> This final patch in the series.  As before, each patch tested individually
> on aarch64-linux-gnu and the series as a whole on x86_64-linux-gnu.

OK.

Thanks,
Richard.

>
> 2019-11-04  Richard Sandiford  
>
> gcc/
> * tree-vectorizer.h (can_duplicate_and_interleave_p): Take an
> element type rather than an element mode.
> * tree-vect-slp.c (can_duplicate_and_interleave_p): Likewise.
> Use get_vectype_for_scalar_type to query the natural types
> for a given element type rather than basing everything on
> GET_MODE_SIZE (vinfo->vector_mode).  Limit int_mode_for_size
> query to MAX_FIXED_MODE_SIZE.
> (duplicate_and_interleave): Update call accordingly.
> * tree-vect-loop.c (vectorizable_reduction): Likewise.
>
> Index: gcc/tree-vectorizer.h
> ===
> --- gcc/tree-vectorizer.h   2019-11-05 11:08:12.521631453 +
> +++ gcc/tree-vectorizer.h   2019-11-05 11:14:42.786884473 +
> @@ -1779,8 +1779,7 @@ extern void vect_get_slp_defs (slp_tree,
>  extern bool vect_slp_bb (basic_block);
>  extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
>  extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
> -extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
> -   machine_mode,
> +extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int, tree,
> unsigned int * = NULL,
> tree * = NULL, tree * = NULL);
>  extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
> Index: gcc/tree-vect-slp.c
> ===
> --- gcc/tree-vect-slp.c 2019-11-05 11:08:12.517631481 +
> +++ gcc/tree-vect-slp.c 2019-11-05 11:14:42.786884473 +
> @@ -265,7 +265,7 @@ vect_get_place_in_interleaving_chain (st
>return -1;
>  }
>
> -/* Check whether it is possible to load COUNT elements of type ELT_MODE
> +/* Check whether it is possible to load COUNT elements of type ELT_TYPE
> using the method implemented by duplicate_and_interleave.  Return true
> if so, returning the number of intermediate vectors in *NVECTORS_OUT
> (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
> @@ -273,26 +273,37 @@ vect_get_place_in_interleaving_chain (st
>
>  bool
>  can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
> -   machine_mode elt_mode,
> -   unsigned int *nvectors_out,
> +   tree elt_type, unsigned int *nvectors_out,
> tree *vector_type_out,
> tree *permutes)
>  {
> -  poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
> -  poly_int64 nelts;
> +  tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, 
> count);
> +  if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
> +return false;
> +
> +  machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
> +  poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
>unsigned int nvectors = 1;
>for (;;)
>  {
>scalar_int_mode int_mode;
>poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
> -  if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
> - && int_mode_for_size (elt_bits, 0).exists (&int_mode))
> +  if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
> {
> + /* Get the natural vector type for this SLP group size.  */
>   tree int_type = build_nonstandard_integer_type
> (GET_MODE_BITSIZE (int_mode), 1);
> - tree vector_type = build_vector_type (int_type, nelts);
> - if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
> -   {
> + tree vector_type
> +   = get_vectype_for_scalar_type (vinfo, int_type, count);
> + if (vector_type
> + && VECTOR_MODE_P (TYPE_MODE (vector_type))
> + && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
> +  GET_MODE_SIZE (base_vector_mode)))
> +   {
> + /* Try fusing consecutive sequences of COUNT / NVECTORS elements
> +together into elements of type INT_TYPE and using the result
> + 

Re: [PATCH 1/4] Preliminary m68k patches

2019-11-14 Thread Bernd Schmidt
On 11/13/19 9:03 PM, Jeff Law wrote:
> OK.  I'd actually recommend this go ahead and get installed.  My tester
> will bootstrap it overnight.

Alright, let me know how that turns out. What kind of machine do you
have for that?


Bernd


Re: Fix ICE when inlining into function containing polymorphic call

2019-11-14 Thread Martin Jambor
On Thu, Nov 14 2019, Jan Hubicka wrote:
>> On Wed, Nov 13 2019, Jan Hubicka wrote:
>> > Hi,
>> > the testcase causes inline context cache to go out of sync because I
>> > forgot to update used flags of parameters in one path of
>> > update_indirect_edges_after_inlining.
>> >
>> > While debugging it I also added better consistency check to
>> > ipa-inline-analysis and turned ipa-inline test from ifdef to -fchecking.
>> > This uncovered yet another missed upate in recursive inliner.
>> >
>> > Bootstrapped/regtested x86_64-linux, comitted.
>> >
>> >PR c++/92421
>> >* ipa-prop.c (update_indirect_edges_after_inlining):
>> >Mark parameter as used.
>> >* ipa-inline.c (recursive_inlining): Reset node cache
>> >after inlining.
>> >(inline_small_functions): Remove checking ifdef.
>> >* ipa-inline-analysis.c (do_estimate_edge_time): Verify
>> >cache consistency.
>> >* g++.dg/torture/pr92421.C: New testcase.
>> > Index: ipa-prop.c
>> > ===
>> > --- ipa-prop.c (revision 278151)
>> > +++ ipa-prop.c (working copy)
>> > @@ -3537,6 +3537,11 @@ update_indirect_edges_after_inlining (st
>> >  if (ici->polymorphic
>> >  && !ipa_get_jf_ancestor_type_preserved (jfunc))
>> >ici->vptr_changed = true;
>> > +ipa_set_param_used_by_indirect_call (new_root_info,
>> > + ici->param_index, true);
>> > +if (ici->polymorphic)
>> > +  ipa_set_param_used_by_polymorphic_call (new_root_info,
>> > +  ici->param_index, true);
>> >}
>> 
>> 
>> 
>> Interesting, you have this exact hunk already in the patch introducing
>> the new param flags (message id
>> id:20191103224712.ndzyxu6cn3jt3...@kam.mff.cuni.cz or
>> https://gcc.gnu.org/ml/gcc-patches/2019-11/msg00077.html).  I did
>> actually check it was there, even if only yesterday evening, but I did :-)
>> 
>> And I can also see the code already in my Monday checkout (r278047).  So
>> I guess you must have actually removed it by accident in the meantime?
>
> Well, it is there twice - once for PASS_THROUGH and here for ANCESTOR.
> But it is quite possible that I had both (since I also had verification
> at the place when originally doing the patch) and lost it on way to
> mainline.
>

Oh, I see.  That explains it then.

Thanks,

Martin


Re: [PR47785] COLLECT_AS_OPTIONS

2019-11-14 Thread Richard Biener
On Fri, Nov 8, 2019 at 3:35 AM Kugan Vivekanandarajah
 wrote:
>
> Hi Richard,
> Thanks for the review.
>
> On Tue, 5 Nov 2019 at 23:08, Richard Biener  
> wrote:
> >
> > On Tue, Nov 5, 2019 at 12:17 AM Kugan Vivekanandarajah
> >  wrote:
> > >
> > > Hi,
> > > Thanks for the review.
> > >
> > > On Tue, 5 Nov 2019 at 03:57, H.J. Lu  wrote:
> > > >
> > > > On Sun, Nov 3, 2019 at 6:45 PM Kugan Vivekanandarajah
> > > >  wrote:
> > > > >
> > > > > Thanks for the reviews.
> > > > >
> > > > >
> > > > > On Sat, 2 Nov 2019 at 02:49, H.J. Lu  wrote:
> > > > > >
> > > > > > On Thu, Oct 31, 2019 at 6:33 PM Kugan Vivekanandarajah
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Wed, 30 Oct 2019 at 03:11, H.J. Lu  wrote:
> > > > > > > >
> > > > > > > > On Sun, Oct 27, 2019 at 6:33 PM Kugan Vivekanandarajah
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > Hi Richard,
> > > > > > > > >
> > > > > > > > > Thanks for the review.
> > > > > > > > >
> > > > > > > > > On Wed, 23 Oct 2019 at 23:07, Richard Biener 
> > > > > > > > >  wrote:
> > > > > > > > > >
> > > > > > > > > > On Mon, Oct 21, 2019 at 10:04 AM Kugan Vivekanandarajah
> > > > > > > > > >  wrote:
> > > > > > > > > > >
> > > > > > > > > > > Hi Richard,
> > > > > > > > > > >
> > > > > > > > > > > Thanks for the pointers.
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > >
> > > > > > > > > > > On Fri, 11 Oct 2019 at 22:33, Richard Biener 
> > > > > > > > > > >  wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > On Fri, Oct 11, 2019 at 6:15 AM Kugan Vivekanandarajah
> > > > > > > > > > > >  wrote:
> > > > > > > > > > > > >
> > > > > > > > > > > > > Hi Richard,
> > > > > > > > > > > > > Thanks for the review.
> > > > > > > > > > > > >
> > > > > > > > > > > > > On Wed, 2 Oct 2019 at 20:41, Richard Biener 
> > > > > > > > > > > > >  wrote:
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > On Wed, Oct 2, 2019 at 10:39 AM Kugan 
> > > > > > > > > > > > > > Vivekanandarajah
> > > > > > > > > > > > > >  wrote:
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Hi,
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > As mentioned in the PR, attached patch adds 
> > > > > > > > > > > > > > > COLLECT_AS_OPTIONS for
> > > > > > > > > > > > > > > passing assembler options specified with -Wa, to 
> > > > > > > > > > > > > > > the link-time driver.
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > The proposed solution only works for uniform -Wa 
> > > > > > > > > > > > > > > options across all
> > > > > > > > > > > > > > > TUs. As mentioned by Richard Biener, supporting 
> > > > > > > > > > > > > > > non-uniform -Wa flags
> > > > > > > > > > > > > > > would require either adjusting partitioning 
> > > > > > > > > > > > > > > according to flags or
> > > > > > > > > > > > > > > emitting multiple object files  from a single 
> > > > > > > > > > > > > > > LTRANS CU. We could
> > > > > > > > > > > > > > > consider this as a follow up.
> > > > > > > > > > > > > > >
> > > > > > > > > > > > > > > Bootstrapped and regression tests on  
> > > > > > > > > > > > > > > arm-linux-gcc. Is this OK for trunk?
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > While it works for your simple cases it is unlikely 
> > > > > > > > > > > > > > to work in practice since
> > > > > > > > > > > > > > your implementation needs the assembler options be 
> > > > > > > > > > > > > > present at the link
> > > > > > > > > > > > > > command line.  I agree that this might be the way 
> > > > > > > > > > > > > > for people to go when
> > > > > > > > > > > > > > they face the issue but then it needs to be 
> > > > > > > > > > > > > > documented somewhere
> > > > > > > > > > > > > > in the manual.
> > > > > > > > > > > > > >
> > > > > > > > > > > > > > That is, with COLLECT_AS_OPTION (why singular?  I'd 
> > > > > > > > > > > > > > expected
> > > > > > > > > > > > > > COLLECT_AS_OPTIONS) available to cc1 we could 
> > > > > > > > > > > > > > stream this string
> > > > > > > > > > > > > > to lto_options and re-materialize it at link time 
> > > > > > > > > > > > > > (and diagnose mismatches
> > > > > > > > > > > > > > even if we like).
> > > > > > > > > > > > > OK. I will try to implement this. So the idea is if 
> > > > > > > > > > > > > we provide
> > > > > > > > > > > > > -Wa,options as part of the lto compile, this should 
> > > > > > > > > > > > > be available
> > > > > > > > > > > > > during link time. Like in:
> > > > > > > > > > > > >
> > > > > > > > > > > > > arm-linux-gnueabihf-gcc -march=armv7-a -mthumb -O2 
> > > > > > > > > > > > > -flto
> > > > > > > > > > > > > -Wa,-mimplicit-it=always,-mthumb -c test.c
> > > > > > > > > > > > > arm-linux-gnueabihf-gcc  -flto  test.o
> > > > > > > > > > > > >
> > > > > > > > > > > > > I am not sure where should we stream this. Currently, 
> > > > > > > > > > > > > cl_optimization
> > > > > > > > > > > > > has all the optimization flag provided for compiler 
> > > > > > > > > > > > > and it is
> > > > > > 

[patch 6/7] Housekeeping on TARGET_OS_CPP_BUILTINS for arm-vxworks

2019-11-14 Thread Olivier Hainque
Hello,

This change replaces uses of CPU by uses of _VX_CPU in
TARGET_OS_CPP_BUILTINS for arm-vxworks, as the latter form is
more widely recognized across VxWorks versions.

It also adds a case for armv8, supported by at least
recent versions of VxWorks 7.

Tested in house with builds and various testsuite runs
for both gcc-8 and gcc-9 based toolchains. Also sanity-checked
that I could build a VxWorks toolchain with this change on top
of mainline sources, where the patch applies unchanged.

Olivier

2019-11-14  Jerome Lambourg  

* config/arm/vxworks.h (TARGET_OS_CPP_BUILTINS): Use
_VX_CPU instead of CPU and handle arm_arch8.



0006-Housekeeping-on-TARGET_OS_CPP_BUILTINS-for-arm-vxwor.patch
Description: Binary data







Re: [PATCH] Add if-chain to switch conversion pass.

2019-11-14 Thread Bernhard Reutner-Fischer
On Thu, 14 Nov 2019 10:41:25 +0100
Martin Liška  wrote:

> On 11/6/19 10:02 PM, Bernhard Reutner-Fischer wrote:
> > Also why do you punt on duplicate conditions like in
> >   
> >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/if-to-switch-4.c
> >> +int main(int argc, char **argv)
> >> +{
> >> +  if (argc == 1)
> >> +  else if (argc == 2)
> >> +  else if (argc == 3)
> >> +  else if (argc == 4)
> >> +  else if (argc == 1)
> >> +{  
> > This block is dead, isn't it. Why don't you just discard it but punt?
> >   
> 
> Hello.
> 
> After I moved the pass later in optimization pipeline, such dead conditions

nice

> are already gone. What's remaining are situations like:
> 
> if (argc >= 1 && argc <= 10)
> ...
> else if (argc >= 8 && argc <= 15)
> 
> which are overlapping intervals. I'm not planning to handle these in the first
> iteration of the patch.

yea. Later when we see a new interval overlapping old, we can adjust
the new to clamp to the intersection of old and new. We'd need to
generate one or more artificial intervals covering the delta interval
with the body of new if we find intervening previous intervals, which
complicates things more than a first attempt of a patch should
supposedly do, agree. One step at a time :)

thanks for moving it later in the pipeline!
cheers,


[PATCH] Check suitability of spill register for mode

2019-11-14 Thread Kwok Cheung Yeung

Hello

Currently, when choosing a spill register, GCC just picks the first 
available register in the register class returned by the 
TAQRGET_SPILL_CLASS hook that doesn't conflict.


On AMD GCN this can cause problems as DImode values stored in SGPRs must 
start on an even register number and TImode values on a multiple of 4. 
This is enforced by defining TARGET_HARD_REGNO_MODE_OK to be false when 
this condition is not satisfied. However, assign_spill_hard_regs does 
not check TARGET_HARD_REGNO_MODE_OK, so it can assign an unsuitable hard 
register for the mode. I have fixed this by rejecting spill registers 
that do not satisfy TARGET_HARD_REGNO_MODE_OK for the largest mode of 
the spilt register.


Built and tested for an AMD GCN target. This fixes failures in:

gcc.dg/vect/no-scevccp-outer-9.c
gcc.dg/vect/no-scevccp-outer-10.c

I have also ensured that the code bootstraps on x86_64, though it 
currently does not use spill registers.


Okay for trunk?

Kwok


2019-11-14  Kwok Cheung Yeung  

gcc/
* lra-spills.c (assign_spill_hard_regs): Check that the spill
register is suitable for the mode.
---
 gcc/lra-spills.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c
index 54f76cc..8fbd3a8 100644
--- a/gcc/lra-spills.c
+++ b/gcc/lra-spills.c
@@ -283,7 +283,8 @@ assign_spill_hard_regs (int *pseudo_regnos, int n)
   for (k = 0; k < spill_class_size; k++)
{
  hard_regno = ira_class_hard_regs[spill_class][k];
- if (TEST_HARD_REG_BIT (eliminable_regset, hard_regno))
+ if (TEST_HARD_REG_BIT (eliminable_regset, hard_regno)
+ || !targetm.hard_regno_mode_ok (hard_regno, mode))
continue;
  if (! overlaps_hard_reg_set_p (conflict_hard_regs, mode, hard_regno))
break;
--
2.8.1


[PATCH v2 0/2] gdbinit.in fixes

2019-11-14 Thread Konstantin Kharlamov
A follow-up to previous post. The 1st patch was approved Jeff Law given
changelog is fixed. The second one is a compromise that came up in
discussion to the previous patchset.

Please add me to CC when replying, I'm not subscribed to the list.

Konstantin Kharlamov (2):
  gdbinit.in: call a function with "call", not "set"
  gdbinit.in: allow to pass function argument explicitly

 gcc/gdbinit.in | 116 -
 1 file changed, 96 insertions(+), 20 deletions(-)

-- 
2.24.0



[PATCH v2 1/2] gdbinit.in: call a function with "call", not "set"

2019-11-14 Thread Konstantin Kharlamov
Calling a function foo in gdb as "set foo()" results in a warning.
Disregarding, it looks wrong to call a function with "set". Let's use
"call" instead.

2019-11-14  Konstantin Kharlamov  

* gdbinit.in (pr, prl, pt, pct, pgg, pgq, pgs, pge, pmz, pdd, pbs, pbm):
Use "call" instead of "set".
---
 gcc/gdbinit.in | 24 
 1 file changed, 12 insertions(+), 12 deletions(-)

v2: fixed changelog in commit message

diff --git a/gcc/gdbinit.in b/gcc/gdbinit.in
index 42302aecfe3..a933ddc6141 100644
--- a/gcc/gdbinit.in
+++ b/gcc/gdbinit.in
@@ -26,7 +26,7 @@ Works only when an inferior is executing.
 end
 
 define pr
-set debug_rtx ($)
+call debug_rtx ($)
 end
 
 document pr
@@ -35,7 +35,7 @@ Works only when an inferior is executing.
 end
 
 define prl
-set debug_rtx_list ($, debug_rtx_count)
+call debug_rtx_list ($, debug_rtx_count)
 end
 
 document prl
@@ -50,7 +50,7 @@ it using debug_rtx_list. Usage example: set 
$foo=debug_rtx_find(first, 42)
 end
 
 define pt
-set debug_tree ($)
+call debug_tree ($)
 end
 
 document pt
@@ -59,7 +59,7 @@ Works only when an inferior is executing.
 end
 
 define pct
-set debug_c_tree ($)
+call debug_c_tree ($)
 end
 
 document pct
@@ -68,7 +68,7 @@ Works only when an inferior is executing.
 end
 
 define pgg
-set debug_gimple_stmt ($)
+call debug_gimple_stmt ($)
 end
 
 document pgg
@@ -77,7 +77,7 @@ Works only when an inferior is executing.
 end
 
 define pgq
-set debug_gimple_seq ($)
+call debug_gimple_seq ($)
 end
 
 document pgq
@@ -86,7 +86,7 @@ Works only when an inferior is executing.
 end
 
 define pgs
-set debug_generic_stmt ($)
+call debug_generic_stmt ($)
 end
 
 document pgs
@@ -95,7 +95,7 @@ Works only when an inferior is executing.
 end
 
 define pge
-set debug_generic_expr ($)
+call debug_generic_expr ($)
 end
 
 document pge
@@ -104,7 +104,7 @@ Works only when an inferior is executing.
 end
 
 define pmz
-set mpz_out_str(stderr, 10, $)
+call mpz_out_str(stderr, 10, $)
 end
 
 document pmz
@@ -140,7 +140,7 @@ Print the name of the type-node that is $.
 end
 
 define pdd
-set debug_dwarf_die ($)
+call debug_dwarf_die ($)
 end
 
 document pdd
@@ -167,7 +167,7 @@ Print the fields of an instruction that is $.
 end
 
 define pbs
-set print_binding_stack ()
+call print_binding_stack ()
 end
 
 document pbs
@@ -176,7 +176,7 @@ including the global binding level.
 end
 
 define pbm
-set bitmap_print (stderr, $, "", "\n")
+call bitmap_print (stderr, $, "", "\n")
 end
 
 document pbm
-- 
2.24.0



[PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Konstantin Kharlamov
Generally, people expect functions to accept arguments directly. But
ones defined in gdbinit did not use the argument, which may be confusing
for newcomers. But we can't change behavior to use the argument without
breaking existing users of the gdbinit. Let's fix this by adding a check
for whether a user passed an argument, and either use it or go with
older behavior.

2019-11-14  Konstantin Kharlamov  

* gdbinit.in (pp, pr, prl, pt, pct, pgg, pgq, pgs, pge, pmz, ptc, pdn,
ptn, pdd, prc, pi, pbm, pel, trt):
Make use of $arg0 if a user passed it
---
 gcc/gdbinit.in | 114 -
 1 file changed, 95 insertions(+), 19 deletions(-)

v2: instead of unconditionally using $arg0, check whether a user passed it.

diff --git a/gcc/gdbinit.in b/gcc/gdbinit.in
index a933ddc6141..043dfc909df 100644
--- a/gcc/gdbinit.in
+++ b/gcc/gdbinit.in
@@ -17,7 +17,11 @@
 # .
 
 define pp
-call debug ($)
+  if ($argc == 0)
+call debug ($)
+  else
+call debug ($arg0)
+  end
 end
 
 document pp
@@ -26,7 +30,11 @@ Works only when an inferior is executing.
 end
 
 define pr
-call debug_rtx ($)
+  if ($argc == 0)
+call debug_rtx ($)
+  else
+call debug_rtx ($arg0)
+  end
 end
 
 document pr
@@ -35,7 +43,11 @@ Works only when an inferior is executing.
 end
 
 define prl
-call debug_rtx_list ($, debug_rtx_count)
+  if ($argc == 0)
+call debug_rtx_list ($, debug_rtx_count)
+  else
+call debug_rtx_list ($arg0, debug_rtx_count)
+  end
 end
 
 document prl
@@ -50,7 +62,11 @@ it using debug_rtx_list. Usage example: set 
$foo=debug_rtx_find(first, 42)
 end
 
 define pt
-call debug_tree ($)
+  if ($argc == 0)
+call debug_tree ($)
+  else
+call debug_tree ($arg0)
+  end
 end
 
 document pt
@@ -59,7 +75,11 @@ Works only when an inferior is executing.
 end
 
 define pct
-call debug_c_tree ($)
+  if ($argc == 0)
+call debug_c_tree ($)
+  else
+call debug_c_tree ($arg0)
+  end
 end
 
 document pct
@@ -68,7 +88,11 @@ Works only when an inferior is executing.
 end
 
 define pgg
-call debug_gimple_stmt ($)
+  if ($argc == 0)
+call debug_gimple_stmt ($)
+  else
+call debug_gimple_stmt ($arg0)
+  end
 end
 
 document pgg
@@ -77,7 +101,11 @@ Works only when an inferior is executing.
 end
 
 define pgq
-call debug_gimple_seq ($)
+  if ($argc == 0)
+call debug_gimple_seq ($)
+  else
+call debug_gimple_seq ($arg0)
+  end
 end
 
 document pgq
@@ -86,7 +114,11 @@ Works only when an inferior is executing.
 end
 
 define pgs
-call debug_generic_stmt ($)
+  if ($argc == 0)
+call debug_generic_stmt ($)
+  else
+call debug_generic_stmt ($arg0)
+  end
 end
 
 document pgs
@@ -95,7 +127,11 @@ Works only when an inferior is executing.
 end
 
 define pge
-call debug_generic_expr ($)
+  if ($argc == 0)
+call debug_generic_expr ($)
+  else
+call debug_generic_expr ($arg0)
+  end
 end
 
 document pge
@@ -104,7 +140,11 @@ Works only when an inferior is executing.
 end
 
 define pmz
-call mpz_out_str(stderr, 10, $)
+  if ($argc == 0)
+call mpz_out_str(stderr, 10, $)
+  else
+call mpz_out_str(stderr, 10, $arg0)
+  end
 end
 
 document pmz
@@ -113,7 +153,11 @@ Works only when an inferior is executing.
 end
 
 define ptc
-output (enum tree_code) $.base.code
+  if ($argc == 0)
+output (enum tree_code) $.base.code
+  else
+output (enum tree_code) $arg0.base.code
+  end
 echo \n
 end
 
@@ -122,7 +166,11 @@ Print the tree-code of the tree node that is $.
 end
 
 define pdn
-output $.decl_minimal.name->identifier.id.str
+  if ($argc == 0)
+output $.decl_minimal.name->identifier.id.str
+  else
+output $arg0.decl_minimal.name->identifier.id.str
+  end
 echo \n
 end
 
@@ -131,7 +179,11 @@ Print the name of the decl-node that is $.
 end
 
 define ptn
-output $.type.name->decl_minimal.name->identifier.id.str
+  if ($argc == 0)
+output $.type.name->decl_minimal.name->identifier.id.str
+  else
+output $arg0.type.name->decl_minimal.name->identifier.id.str
+  end
 echo \n
 end
 
@@ -140,7 +192,11 @@ Print the name of the type-node that is $.
 end
 
 define pdd
-call debug_dwarf_die ($)
+  if ($argc == 0)
+call debug_dwarf_die ($)
+  else
+call debug_dwarf_die ($arg0)
+  end
 end
 
 document pdd
@@ -148,7 +204,11 @@ Print the dw_die_ref that is in $.
 end
 
 define prc
-output (enum rtx_code) $.code
+  if ($argc == 0)
+output (enum rtx_code) $.code
+  else
+output (enum rtx_code) $arg0.code
+  end
 echo \ (
 output $.mode
 echo )\n
@@ -159,7 +219,11 @@ Print the rtx-code and machine mode of the rtx that is $.
 end
 
 define pi
-print $.u.fld[0].rt_rtx@7
+  if ($argc == 0)
+print $.u.fld[0].rt_rtx@7
+  else
+print $arg0.u.fld[0].rt_rtx@7
+  end
 end
 
 document pi
@@ -176,7 +240,11 @@ including the global binding level.
 end
 
 define pbm
-call bitmap_print (stderr, $, "", "\n")
+  if ($argc == 0)
+call bitmap_print (stderr, $, "", "\n")
+  else
+call bitmap_print 

Set inline-insns-single-O2 to 70

2019-11-14 Thread Jan Hubicka
Hi,
this patch bumps inline-insns-single-O2 from 30 to 70.  I originally reduced
it from 120 to 50 when forking the -O2 and -O3 parameters which has
quite significant code size benefits.

This parameter controls how large functions user declared inline are inlined
(sadly we really can't inline all).

However while this transform is mostly SPEC netutral it has turned out to cause
performance regression for tramp3d, botan and some of Firefox benchmarks with
LTO.

I re-measured everything with 30, 50, 70 and 90 values as seen here:

https://lnt.opensuse.org/db_default/v4/SPEC/latest_runs_report?younger_in_days=14&older_in_days=0&min_percentage_change=0.02&revisions=ead4ea7bb1b1b531c2d8ba72fc5c1f1b14ddc454%2Ced81e91c55436bb949fab8556c138488b598af9e%2C44f7fe6bc09fc2365c4ec9ec7aea2863593d87fc%2C6f4da220ebfa0c3a3db02109dcb371da27516a3b%2C7aa+fd3ce8a81b45e040dae74e38a6849c65883ba

Ignore the benzen results since it run only part of the tests.
Also ignore everything which is not with -Ofast. It is noise.

The off noise observations are:

 - 6% regression for Povray at 50, 70, 90 (6%).  This is bit of independent
   problem which I will treat independently
 - 4% improvement for gcc for 90
 - 4% improvement for xalancbmk for 90
 - 2% improvement for parest for 70, 90
 - 12% improvement for Deesjeng for 50, 70, 90

Most sensitive code size wise is xalanc, about 15% growth for 50+

To see code size one needs to click "Display all ELF stats", set minimum
threshold to 0.001 and click generate. Once page is fully loaded add 
total.*text to Filter.

The overall outcome is growth

507090
 spec 2006  0.51% 0.89% 1.12%
 spec 2006 LTO  0.34% 0.60% 0.79%
 spec 2017  2.06% 2.48% 2.57%

https://lnt.opensuse.org/db_default/v4/CPP/latest_runs_report?younger_in_days=14&older_in_days=0&min_percentage_change=0.02&revisions=ead4ea7bb1b1b531c2d8ba72fc5c1f1b14ddc454%2Ced81e91c55436bb949fab8556c138488b598af9e%2C44f7fe6bc09fc2365c4ec9ec7aea2863593d87fc%2C6f4da220ebfa0c3a3db02109dcb371da27516a3b%2C7aa+fd3ce8a81b45e040dae74e38a6849c65883ba

Short story  
 - many of botan tests like bumping limits up, about 1/3 of them all the way to
   90 (there was no improvments for 120).
 - nbench like increase to 50 and more
 - polyhedron ttf2 likes 50 and more
 - tramp3d likes 90 

I also run Firefox LTO benchmarks:

30 
https://treeherder.mozilla.org/#/jobs?repo=try&revision=90a908c19de521482cad5ff864f8f67fec6dbc75
50 
https://treeherder.mozilla.org/#/jobs?repo=try&revision=7efe0bfd2f5acb55b1bcf0ba4a162e59b1a3be99
70 
https://treeherder.mozilla.org/#/jobs?repo=try&revision=6a7cf9728e4a952eff5190abeb72a4a95571d95d
90 
https://treeherder.mozilla.org/#/jobs?repo=try&revision=5157552ce80419ed5bd0594668a53a13a04786d2

In all cases I used --param inline-unit-growth=12000 since this limit otherwise 
blocks
inliner before it gets into function sizes in question.  Code size is as folows:

libxul.so size:

30 103798151
50 108490103 (+4%)
70 114372911 (+10%)
90 116104639 (+11%)

Compares:
30 to 50: 
https://treeherder.mozilla.org/perf.html#/compare?originalProject=try&originalRevision=5157552ce80419ed5bd0594668a53a13a04786d2&newProject=try&newRevision=7efe0bfd2f5acb55b1bcf0ba4a162e59b1a3be99&framework=1
 (this shows almost nothing)
30 to 70: 
https://treeherder.mozilla.org/perf.html#/compare?originalProject=try&originalRevision=90a908c19de521482cad5ff864f8f67fec6dbc75&newProject=try&newRevision=6a7cf9728e4a952eff5190abeb72a4a95571d95d&framework=1
 (here is 14% improvement for dormaeo benchamrk and 5% in overall
 responsiveness, there is regression in tsvgx/tresize which can be
 tracked down to quite low lever hand optimized code in SKIA graphics
 rendering library which does not define ALWAY_INLINE to always_inline
 for GCC (only for clang))
30 to 90: 
https://treeherder.mozilla.org/perf.html#/compare?originalProject=try&originalRevision=90a908c19de521482cad5ff864f8f67fec6dbc75&newProject=try&newRevision=5157552ce80419ed5bd0594668a53a13a04786d2&framework=1
 (generally similar to previous one)

So most improvments shows up with 70 and 50 seems to not be enough to get
performance for Firefox.  We still lose on tramp3d and some of botan, but I
think this is generally -O3/-Ofast type of code so I hope it is acceptable.

The SPEC code sizes are not very realistic, since a lot of codebases are
Fortran or old C which do not use inlined keyword at all.  On the other hand
Firefox sizes are not realistic either (in other direction) since I disabled
the inline-unit-growth parameter.

I hope that once Martin get Tumbleweed builds with GCC 10 branch working, we
can verify how much this makes difference in larger scale.

Bootstrapped/regtested x86_64-linux, will commit it shortly.

* params.opt (inline-insns-single-O2): Bump from 30 to 70.

Index: params.opt
===
--- params.opt  (revision 278216)
+++ params.opt  (working copy)
@@ -487,7 +487,7 @@ Common Joined UInteger Var(pa

[patch 7/7] Update the arm-*-vxworks* support

2019-11-14 Thread Olivier Hainque
Hello,

The pre VxWorks 7 ports are now obsolete, as pre VxWorks 7
on ARM rely on the long deprecated APCS ABI. VxWorks 7,
post SR600, is now llvm based and always uses ARM unwinding.

This change updates the GCC support accordingly, tightening
the set of accepted triplets from config.gcc to vx7 variants
only, always bpabi based.

We also add an implementation of __gnu_Unwind_Find_exidx for kernel
modules, as it is used by some paths in the common unwinding code and
is not provided by VxWorks any more.

Finally, we remove the fPIC multilib, as the option has been
dysfunctional for a while now and, to our knowledge, is unused anyway.

We have tested this with in-hous builds and runs of various C+Ada
testsuites on a couple of architecture variations, with mixed kernel
and RTP modules, with both gcc-8 and gcc-9 based toolchains.

The patch applies untouched on mainline and I have verified that I
could successfully build a VxWorks compiler with this source base
as well.

Olivier

2019-11-13  Jerome Lambourg  
Doug Rupp 
Olivier Hainque  

gcc/
* config.gcc: Collapse the arm-vxworks entries into
a single arm-wrs-vxworks7* one, bpabi based.  Update
the default cpu from arm8 to armv7-a
* config/arm/vxworks.h (CC1_SPEC): Simplify, knowing that
we always use ARM_UNWIND_INFO.
(DWARF2_UNWIND_INFO): Remove redefinition.
(ARM_TARGET2_DWARF_FORMAT): Likewise.
(VXWORKS_PERSONALITY): Define, to "llvm".
(VXWORKS_EXTRA_LIBS_RTP): Define, to "-lllvm".

libgcc/
* config.host: Collapse the arm-vxworks entries into
a single arm-wrs-vxworks7* one.
* config/arm/unwind-arm-vxworks.c: Update comments.  Provide
__gnu_Unwind_Find_exidx and a weak dummy __cxa_type_match for
kernel modules, to be overriden by libstdc++ when we link with
it.  Rely on externally provided __exidx_start/end.



0007-Update-the-arm-vxworks-support.patch
Description: Binary data




[PATCH] [GCN] Fix handling of VCC_CONDITIONAL_REG

2019-11-14 Thread Kwok Cheung Yeung

Hello

This patch fixes an issue seen in the following test cases on AMD GCN:

libgomp.oacc-fortran/gemm.f90
libgomp.oacc-fortran/gemm-2.f90
libgomp.c/for-5-test_ttdpfs_ds128_auto.c
libgomp.c/for-5-test_ttdpfs_ds128_guided32.c
libgomp.c/for-5-test_ttdpfs_ds128_runtime.c
libgomp.c/for-5-test_ttdpfs_ds128_static.c
libgomp.c/for-5-test_ttdpfs_ds128_static32.c
libgomp.c/for-6-test_tdpfs_ds128_auto.c
libgomp.c/for-6-test_tdpfs_ds128_guided32.c
libgomp.c/for-6-test_tdpfs_ds128_runtime.c
libgomp.c/for-6-test_tdpfs_ds128_static.c
libgomp.c/for-6-test_tdpfs_ds128_static32.c
libgomp.c-c++-common/for-5.c
libgomp.c-c++-common/for-6.c

The compiler is generating code like this:

v_cmp_gt_i32vcc, s3, v1
v_writelane_b32v3, vcc_lo, 0; Move VCC to v[3:4]
v_writelane_b32v4, vcc_hi, 0
...
v_cmp_eq_f32vcc, v20, v0; Clobber VCC
...
; Move old VCC_LO into s62 - this is okay as v3 has not
; been clobbered.
v_readlane_b32s62, v3, 0

; Move old VCC_HI into s63 - this is _not_ okay as VCC_HI
; has been clobbered by the intervening v_cmp_eq_f32 instruction.
s_mov_b32s63, vcc_hi

GCC is failing to notice that the v_cmp_eq_f32 instruction clobbers both 
vcc_lo and vcc_hi. This is because gcn_hard_regno_nregs uses 
REGNO_REG_CLASS to determine the register class of the queried hard reg, 
but vcc_lo and vcc_hi are classified as GENERAL_REGS by 
gcn_regno_reg_class. gcn_hard_regno_nregs therefore returns 1 for the 
BImode store into vcc, so vcc_hi is regarded as untouched by the operation.


This is fixed by making gcn_regno_reg_class classify vcc_lo and vcc_hi 
into VCC_CONDITIONAL_REG (REGNO_REG_CLASS is supposed to return the 
smallest class anyway). I have also added a spill class for 
VCC_CONDITIONAL_REG (into SGPR_REGS) to avoid expensive spills into memory.


Built for and tested on the AMD GCN target with no regressions.

Okay for trunk?

Kwok


2019-11-14  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn.c (gcn_regno_reg_class): Return VCC_CONDITIONAL_REG
register class for VCC_LO and VCC_HI.
(gcn_spill_class): Use SGPR_REGS to spill registers in
VCC_CONDITIONAL_REG.
---
 gcc/config/gcn/gcn.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 976843b..dd89c26 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -470,6 +470,9 @@ gcn_regno_reg_class (int regno)
 {
 case SCC_REG:
   return SCC_CONDITIONAL_REG;
+case VCC_LO_REG:
+case VCC_HI_REG:
+  return VCC_CONDITIONAL_REG;
 case VCCZ_REG:
   return VCCZ_CONDITIONAL_REG;
 case EXECZ_REG:
@@ -637,7 +640,8 @@ gcn_can_split_p (machine_mode, rtx op)
 static reg_class_t
 gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
 {
-  if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c))
+  if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c)
+  || c == VCC_CONDITIONAL_REG)
 return SGPR_REGS;
   else
 return NO_REGS;
--
2.8.1



Re: [C++] Fix interaction between aka changes and DR1558 (PR92206)

2019-11-14 Thread Richard Sandiford
Jakub Jelinek  writes:
> On Wed, Nov 13, 2019 at 08:44:31AM +, Richard Sandiford wrote:
>>  PR c++/92206
>>  * g++.dg/cpp0x/alias-decl-pr92206-1.C: New test.
>>  * g++.dg/cpp0x/alias-decl-pr92206-2.C: Likewise.
>>  * g++.dg/cpp0x/alias-decl-pr92206-3.C: Likewise.
>
> This last test FAILs with -std=c++11:
> /usr/src/gcc/gcc/testsuite/g++.dg/cpp0x/alias-decl-pr92206-3.C:8:37: error: 
> variable templates only available with '-std=c++14' or '-std=gnu++14'
>
> Fixed thusly, committed as obvious:
>
> 2019-11-14  Jakub Jelinek  
>
>   * g++.dg/cpp0x/alias-decl-pr92206-3.C: Require effective target c++14
>   rather than c++11.

Thanks, and sorry for the breakage.  I obviously messed something up
during the testing.

Richard


Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Alexander Monakov
On Thu, 14 Nov 2019, Konstantin Kharlamov wrote:

> Generally, people expect functions to accept arguments directly. But
> ones defined in gdbinit did not use the argument, which may be confusing
> for newcomers. But we can't change behavior to use the argument without
> breaking existing users of the gdbinit. Let's fix this by adding a check
> for whether a user passed an argument, and either use it or go with
> older behavior.

Thank you for working on this. I think it's possible to avoid code duplication,
see below.

> 2019-11-14  Konstantin Kharlamov  
> 
> * gdbinit.in (pp, pr, prl, pt, pct, pgg, pgq, pgs, pge, pmz, ptc, pdn,
> ptn, pdd, prc, pi, pbm, pel, trt):
> Make use of $arg0 if a user passed it

(note: no need to start a new line, "Make use ..." can go immediately after the 
colon)

> --- a/gcc/gdbinit.in
> +++ b/gcc/gdbinit.in
> @@ -17,7 +17,11 @@
>  # .
>  
>  define pp
> -call debug ($)
> +  if ($argc == 0)
> +call debug ($)
> +  else
> +call debug ($arg0)
> +  end
>  end

I think here you can use simply use

call debug ($argc ? $arg0 : $)

and likewise in other instances. Where it would make the line too complicated, I
think you can introduce a convenience variable, e.g. instead of

> +  if ($argc == 0)
> +output $.decl_minimal.name->identifier.id.str
> +  else
> +output $arg0.decl_minimal.name->identifier.id.str
> +  end

have something like

set $dbgarg = $argc ? $arg0 : $
output $dbgarg.decl_minimal.name->identifier.id.str

WDYT?

Alexander


Re: [C++] Fix interaction between aka changes and DR1558 (PR92206)

2019-11-14 Thread Jakub Jelinek
On Thu, Nov 14, 2019 at 12:50:09PM +, Richard Sandiford wrote:
> > 2019-11-14  Jakub Jelinek  
> >
> > * g++.dg/cpp0x/alias-decl-pr92206-3.C: Require effective target c++14
> > rather than c++11.
> 
> Thanks, and sorry for the breakage.  I obviously messed something up
> during the testing.

Not necessarily, seems the current default is just iterating over
C++98, 14, 17 and 2a.  check-c++-all additionally iterates over 11 and
also -fconcepts together with 17, in my testing I'm using
GXX_TESTSUITE_STDS=98,11,14,17,2a
and thus do test 11, but not 17 with -fconcepts.

Jakub



Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Konstantin Kharlamov




On Чт, ноя 14, 2019 at 15:55, Alexander Monakov 
 wrote:

On Thu, 14 Nov 2019, Konstantin Kharlamov wrote:


 Generally, people expect functions to accept arguments directly. But
 ones defined in gdbinit did not use the argument, which may be 
confusing
 for newcomers. But we can't change behavior to use the argument 
without
 breaking existing users of the gdbinit. Let's fix this by adding a 
check

 for whether a user passed an argument, and either use it or go with
 older behavior.


Thank you for working on this. I think it's possible to avoid code 
duplication,

see below.


 2019-11-14  Konstantin Kharlamov  

 * gdbinit.in (pp, pr, prl, pt, pct, pgg, pgq, pgs, pge, 
pmz, ptc, pdn,

 ptn, pdd, prc, pi, pbm, pel, trt):
 Make use of $arg0 if a user passed it


(note: no need to start a new line, "Make use ..." can go immediately 
after the colon)



 --- a/gcc/gdbinit.in
 +++ b/gcc/gdbinit.in
 @@ -17,7 +17,11 @@
  # .

  define pp
 -call debug ($)
 +  if ($argc == 0)
 +call debug ($)
 +  else
 +call debug ($arg0)
 +  end
  end


I think here you can use simply use

call debug ($argc ? $arg0 : $)

and likewise in other instances. Where it would make the line too 
complicated, I

think you can introduce a convenience variable, e.g. instead of


 +  if ($argc == 0)
 +output $.decl_minimal.name->identifier.id.str
 +  else
 +output $arg0.decl_minimal.name->identifier.id.str
 +  end


have something like

set $dbgarg = $argc ? $arg0 : $
output $dbgarg.decl_minimal.name->identifier.id.str

WDYT?


Thanks! Unfortunately AFAIK ternary expressions are broken in gdb 
https://sourceware.org/bugzilla/show_bug.cgi?id=22466 :c





Re: [PATCH v2 2/6] arm: Fix the "c" constraint

2019-11-14 Thread Kyrill Tkachov



On 11/14/19 10:07 AM, Richard Henderson wrote:

The existing definition using register class CC_REG does not
work because CC_REGNUM does not support normal modes, and so
fails to match register_operand.  Use a non-register constraint
and the cc_register predicate instead.

    * config/arm/constraints.md (c): Use cc_register predicate.



Ok.

Does this need a backport to the branches?

Thanks,

Kyrill



---
 gcc/config/arm/constraints.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index b76de81b85c..e02b678d26d 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -94,8 +94,9 @@
  "@internal
   Thumb only.  The union of the low registers and the stack register.")

-(define_register_constraint "c" "CC_REG"
- "@internal The condition code register.")
+(define_constraint "c"
+ "@internal The condition code register."
+ (match_operand 0 "cc_register"))

 (define_register_constraint "Cs" "CALLER_SAVE_REGS"
  "@internal The caller save registers.  Useful for sibcalls.")
--
2.17.1



Re: [PATCH v2 0/6] Implement asm flag outputs for arm + aarch64

2019-11-14 Thread Kyrill Tkachov

Hi Richard,

On 11/14/19 10:07 AM, Richard Henderson wrote:

I've put the implementation into config/arm/aarch-common.c, so
that it can be shared between the two targets.  This required
a little bit of cleanup to the CC modes and constraints to get
the two targets to match up.

Changes for v2:
  * Document overflow flags.
  * Add "hs" and "lo" as aliases of "cs" and "cc".
  * Add unsigned cmp tests to asm-flag-6.c.

Richard Sandiford has given his ack for the aarch64 side.
I'm still looking for an ack for the arm side.

The arm parts look good to me, there's not too much arm-specific stuff 
that's not shared with aarch64 thankfully.


Thanks,

Kyrill




r~


Richard Henderson (6):
  aarch64: Add "c" constraint
  arm: Fix the "c" constraint
  arm: Rename CC_NOOVmode to CC_NZmode
  arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__
  arm: Add testsuite checks for asm-flag
  aarch64: Add testsuite checks for asm-flag

 gcc/config/arm/aarch-common-protos.h  |   6 +
 gcc/config/aarch64/aarch64-c.c    |   2 +
 gcc/config/aarch64/aarch64.c  |   3 +
 gcc/config/arm/aarch-common.c | 136 +
 gcc/config/arm/arm-c.c    |   1 +
 gcc/config/arm/arm.c  |  15 +-
 gcc/testsuite/gcc.target/aarch64/asm-flag-1.c |  35 
 gcc/testsuite/gcc.target/aarch64/asm-flag-3.c |  38 
 gcc/testsuite/gcc.target/aarch64/asm-flag-5.c |  30 +++
 gcc/testsuite/gcc.target/aarch64/asm-flag-6.c |  62 ++
 gcc/testsuite/gcc.target/arm/asm-flag-1.c |  36 
 gcc/testsuite/gcc.target/arm/asm-flag-3.c |  38 
 gcc/testsuite/gcc.target/arm/asm-flag-5.c |  30 +++
 gcc/testsuite/gcc.target/arm/asm-flag-6.c |  62 ++
 gcc/config/aarch64/constraints.md |   4 +
 gcc/config/arm/arm-modes.def  |   4 +-
 gcc/config/arm/arm.md | 186 +-
 gcc/config/arm/constraints.md |   5 +-
 gcc/config/arm/predicates.md  |   2 +-
 gcc/config/arm/thumb1.md  |   8 +-
 gcc/config/arm/thumb2.md  |  34 ++--
 gcc/doc/extend.texi   |  39 
 22 files changed, 651 insertions(+), 125 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-5.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/asm-flag-6.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-6.c

--
2.17.1



Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Alexander Monakov
On Thu, 14 Nov 2019, Konstantin Kharlamov wrote:

> Thanks! Unfortunately AFAIK ternary expressions are broken in gdb
> https://sourceware.org/bugzilla/show_bug.cgi?id=22466 :c

Indeed, I didn't notice that. But it still would be nice to avoid duplicating
the commands over and over again. Can we use something like

  define pp
  if $argc
   p $arg0
  end
  call debug ($)
  end

this way

  pp something

is simply equivalent to what people already use:

  p something
  pp

Alexander


Re: Optimize handling of inline summaries

2019-11-14 Thread Martin Jambor
Hi,

On Mon, Nov 04 2019, Jan Hubicka wrote:
>> On 11/4/19 3:12 PM, Jan Hubicka wrote:
>> > Martin, do you know why this flag was introduced?
>> 
>> Hi.
>> 
>> The flag is used in IPA CP:
>> 
>> call_summary 
>> 
>> class edge_clone_summary
>> {
>> ...
>>   cgraph_edge *prev_clone;
>>   cgraph_edge *next_clone;
>> }
>
> I see, so it is there to collect chains of duplications. I suppose it
> makes sense even though it is bit unexpected use of summaries (I suppose
> I approved it :)

Well, it was originally a duplication hook but martin converted it to a
summary too.  it just creates a link list of edges created during IPA-CP
but these specific "summaries" live only during the IPA-CP WPA stage and
are promptly deleted afterwards so they hopefully should not pose any
issues.

>
> In this case we want to more know that something was duplicated and
> trigger creation.

I am afraid I don't understand what is "this case" in this context but
yeah, the behavior should not be normally required.

> There are other cases where we do not want to
> duplicate in all siutations (like when inline clone is created).

> I was wondering about adding duplicate_p function which will by default
> return true if source summary exists and which one can overwrite with
> different behaviour.  What do you think?
>

Well, you'll have to override it for edge_clone_summary_t :-)  But
otherwise it makes sense, I guess.

Martin


Re: [PATCH] [GCN] Fix handling of VCC_CONDITIONAL_REG

2019-11-14 Thread Andrew Stubbs

On 14/11/2019 12:43, Kwok Cheung Yeung wrote:

Hello

This patch fixes an issue seen in the following test cases on AMD GCN:

libgomp.oacc-fortran/gemm.f90
libgomp.oacc-fortran/gemm-2.f90
libgomp.c/for-5-test_ttdpfs_ds128_auto.c
libgomp.c/for-5-test_ttdpfs_ds128_guided32.c
libgomp.c/for-5-test_ttdpfs_ds128_runtime.c
libgomp.c/for-5-test_ttdpfs_ds128_static.c
libgomp.c/for-5-test_ttdpfs_ds128_static32.c
libgomp.c/for-6-test_tdpfs_ds128_auto.c
libgomp.c/for-6-test_tdpfs_ds128_guided32.c
libgomp.c/for-6-test_tdpfs_ds128_runtime.c
libgomp.c/for-6-test_tdpfs_ds128_static.c
libgomp.c/for-6-test_tdpfs_ds128_static32.c
libgomp.c-c++-common/for-5.c
libgomp.c-c++-common/for-6.c

The compiler is generating code like this:

     v_cmp_gt_i32    vcc, s3, v1
     v_writelane_b32    v3, vcc_lo, 0    ; Move VCC to v[3:4]
     v_writelane_b32    v4, vcc_hi, 0
     ...
     v_cmp_eq_f32    vcc, v20, v0    ; Clobber VCC
     ...
     ; Move old VCC_LO into s62 - this is okay as v3 has not
     ; been clobbered.
     v_readlane_b32    s62, v3, 0

     ; Move old VCC_HI into s63 - this is _not_ okay as VCC_HI
     ; has been clobbered by the intervening v_cmp_eq_f32 instruction.
     s_mov_b32    s63, vcc_hi

GCC is failing to notice that the v_cmp_eq_f32 instruction clobbers both 
vcc_lo and vcc_hi. This is because gcn_hard_regno_nregs uses 
REGNO_REG_CLASS to determine the register class of the queried hard reg, 
but vcc_lo and vcc_hi are classified as GENERAL_REGS by 
gcn_regno_reg_class. gcn_hard_regno_nregs therefore returns 1 for the 
BImode store into vcc, so vcc_hi is regarded as untouched by the operation.


This is fixed by making gcn_regno_reg_class classify vcc_lo and vcc_hi 
into VCC_CONDITIONAL_REG (REGNO_REG_CLASS is supposed to return the 
smallest class anyway). I have also added a spill class for 
VCC_CONDITIONAL_REG (into SGPR_REGS) to avoid expensive spills into memory.


Built for and tested on the AMD GCN target with no regressions.

Okay for trunk?

Kwok


2019-11-14  Kwok Cheung Yeung  

 gcc/
 * config/gcn/gcn.c (gcn_regno_reg_class): Return VCC_CONDITIONAL_REG
 register class for VCC_LO and VCC_HI.
 (gcn_spill_class): Use SGPR_REGS to spill registers in
 VCC_CONDITIONAL_REG.
---
  gcc/config/gcn/gcn.c | 6 +-
  1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 976843b..dd89c26 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -470,6 +470,9 @@ gcn_regno_reg_class (int regno)
  {
  case SCC_REG:
    return SCC_CONDITIONAL_REG;
+    case VCC_LO_REG:
+    case VCC_HI_REG:
+  return VCC_CONDITIONAL_REG;
  case VCCZ_REG:
    return VCCZ_CONDITIONAL_REG;
  case EXECZ_REG:
@@ -637,7 +640,8 @@ gcn_can_split_p (machine_mode, rtx op)
  static reg_class_t
  gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
  {
-  if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c))
+  if (reg_classes_intersect_p (ALL_CONDITIONAL_REGS, c)
+  || c == VCC_CONDITIONAL_REG)
  return SGPR_REGS;
    else
  return NO_REGS;


OK.

Andrew


Re: [PATCH] Support multi-versioning on self-recursive function (ipa/92133)

2019-11-14 Thread Jan Hubicka
Hi,
I think the patch generally looks reasonable

+2019-11-13  Feng Xue 
+
+   PR ipa/92133
+   * doc/invoke.texi (ipa-cp-max-recursion-depth): Document new option.
+   * params.opt (ipa-cp-max-recursion-depth): New.
+   * ipa-cp.c (ipcp_lattice::add_value): Add two new parameters
+   val_pos_p and unlimited.
+   (self_recursively_generated_p): New function.
+   (get_val_across_arith_op): Likewise.
+   (propagate_vals_across_arith_jfunc): Add constant propagation for
+   self-recursive function.
+   (incorporate_penalties): Do not penalize pure self-recursive function.
+   (good_cloning_opportunity_p): Dump node_is_self_scc flag.
+   (propagate_constants_topo): Set node_is_self_scc flag for cgraph node.
+   (get_info_about_necessary_edges): Relax hotness check for edge to
+   self-recursive function.
+   * ipa-prop.h (ipa_node_params): Add new field node_is_self_scc.
+

In general the patch looks good to me, but I would like Martin Jambor to
comment on the ipa-prop/cp interfaces. However...
 
+@item ipa-cp-max-recursion-depth
+Maximum depth of recursive cloning for self-recursive function.
+

... I believe we will need more careful cost model for this.  I think
we want to limit the overall growth for all the clones and also probably
enable this only when ipa-predicates things the individual clones will
actualy be faster by some non-trivial percentage. For recursive inliner
we have:

--param max-inline-recursive-depth which has similar meaning to your parameter
  (so perhaps similar name would be good)
--param min-inline-recursive-probability
  which requires the inlining to happen only across edges which are
  known to be taken with reasonable chance
--param max-inline-insns-recursive
  which specifies overall size after all the recursive inlining

Those parameters are not parituclarly well tought out or tested, but
they may be good start.

Do you have some data on code size/performance effects of this change?

Honza


Re: [PATCH v2 2/6] arm: Fix the "c" constraint

2019-11-14 Thread Richard Henderson
On 11/14/19 2:07 PM, Kyrill Tkachov wrote:
> 
> On 11/14/19 10:07 AM, Richard Henderson wrote:
>> The existing definition using register class CC_REG does not
>> work because CC_REGNUM does not support normal modes, and so
>> fails to match register_operand.  Use a non-register constraint
>> and the cc_register predicate instead.
>>
>>     * config/arm/constraints.md (c): Use cc_register predicate.
> 
> 
> Ok.
> 
> Does this need a backport to the branches?

I don't think so, because it is currently unused.

I tried to track down if it was *ever* used and did not succeed.
The first reference I see is

commit cffb2a26c44c682185b6bb405d48fcbe1fbc0b37
Author: rearnsha 
Date:   Sat Apr 8 14:29:53 2000 +

Merge changes from merged-arm-thumb-backend-branch onto trunk.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@33028 \
138bc75d-0d04-0410-961f-82ee72b054a4

within REG_CLASS_FROM_LETTER.  But I cannot find a user of the constraint
within a checkout of that revision.

Unless I miss something, it seems to have been garbage for a very long time.


r~


Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Konstantin Kharlamov




On Чт, ноя 14, 2019 at 16:13, Alexander Monakov 
 wrote:

On Thu, 14 Nov 2019, Konstantin Kharlamov wrote:


 Thanks! Unfortunately AFAIK ternary expressions are broken in gdb
 https://sourceware.org/bugzilla/show_bug.cgi?id=22466 :c


Indeed, I didn't notice that. But it still would be nice to avoid 
duplicating

the commands over and over again. Can we use something like

  define pp
  if $argc
   p $arg0
  end
  call debug ($)
  end

this way

  pp something

is simply equivalent to what people already use:

  p something
  pp


Though, this wouldn't be equivalent to what new people, who just want 
to call `debug()` with the arg, would expect :) If you want to 
deduplicate the function call, I can reorganize the code to be like 
this:


define pp
 if ($argc == 0)
   set $arg = $
 else
   set $arg = $arg0
 end
 call debug ($arg)
end





Re: Avoid trashing of polymorphic call cache during inlining

2019-11-14 Thread Martin Jambor
Hi,

On Mon, Nov 04 2019, Jan Hubicka wrote:
> Hi,
> I am not really pround of this implementation (and will think of better
> interface), but this patch saves about 10% of WPA time by avoiding
> unnecesary invalidations of the polymorphic call target hash during
> inlining.
>
> ipa-devirt register node removal hook to invalidate cache when one of
> functions in it gets removed.  Now inliner often decides to inline into
> a thunk. In order to get costs right it turns the thunk into a gimple
> functions and re-inserts it into the summaries (so the summaries gets
> computed for the actual thunk body). 
>
> Bootstrapped/regtested x86_64-linux, comitted.
>
>   * ipa-inline-transform.c: Include ipa-utils.h
>   (inline_call): Set thunk_expansion flag.
>   * ipa-utils.h (thunk_expansion): Declare.
>   * ipa-devirt.c (thunk_expansion): New global var.
>   (devirt_node_removal_hook): Do not invalidate cache while
>   doing thunk expansion.

...

> Index: ipa-utils.h
> ===
> --- ipa-utils.h   (revision 277780)
> +++ ipa-utils.h   (working copy)
> @@ -47,6 +47,9 @@ void ipa_merge_profiles (struct cgraph_n
>struct cgraph_node *src, bool preserve_body = false);
>  bool recursive_call_p (tree, tree);
>  
> +/* In ipa-prop.c  */
> +void ipa_remove_useless_jump_functions ();
> +

This is probably an unintended change?  Can I remove it?

Martin



Re: Avoid trashing of polymorphic call cache during inlining

2019-11-14 Thread Jan Hubicka
> Hi,
> 
> On Mon, Nov 04 2019, Jan Hubicka wrote:
> > Hi,
> > I am not really pround of this implementation (and will think of better
> > interface), but this patch saves about 10% of WPA time by avoiding
> > unnecesary invalidations of the polymorphic call target hash during
> > inlining.
> >
> > ipa-devirt register node removal hook to invalidate cache when one of
> > functions in it gets removed.  Now inliner often decides to inline into
> > a thunk. In order to get costs right it turns the thunk into a gimple
> > functions and re-inserts it into the summaries (so the summaries gets
> > computed for the actual thunk body). 
> >
> > Bootstrapped/regtested x86_64-linux, comitted.
> >
> > * ipa-inline-transform.c: Include ipa-utils.h
> > (inline_call): Set thunk_expansion flag.
> > * ipa-utils.h (thunk_expansion): Declare.
> > * ipa-devirt.c (thunk_expansion): New global var.
> > (devirt_node_removal_hook): Do not invalidate cache while
> > doing thunk expansion.
> 
> ...
> 
> > Index: ipa-utils.h
> > ===
> > --- ipa-utils.h (revision 277780)
> > +++ ipa-utils.h (working copy)
> > @@ -47,6 +47,9 @@ void ipa_merge_profiles (struct cgraph_n
> >  struct cgraph_node *src, bool preserve_body = false);
> >  bool recursive_call_p (tree, tree);
> >  
> > +/* In ipa-prop.c  */
> > +void ipa_remove_useless_jump_functions ();
> > +
> 
> This is probably an unintended change?  Can I remove it?

Indeed, it is unrelated change.
Thanks for noticing it!
Honza
> 
> Martin
> 


Re: [PATCH v2 0/6] Implement asm flag outputs for arm + aarch64

2019-11-14 Thread Richard Henderson
On 11/14/19 2:08 PM, Kyrill Tkachov wrote:
> Hi Richard,
> 
> On 11/14/19 10:07 AM, Richard Henderson wrote:
>> I've put the implementation into config/arm/aarch-common.c, so
>> that it can be shared between the two targets.  This required
>> a little bit of cleanup to the CC modes and constraints to get
>> the two targets to match up.
>>
>> Changes for v2:
>>   * Document overflow flags.
>>   * Add "hs" and "lo" as aliases of "cs" and "cc".
>>   * Add unsigned cmp tests to asm-flag-6.c.
>>
>> Richard Sandiford has given his ack for the aarch64 side.
>> I'm still looking for an ack for the arm side.
>>
> The arm parts look good to me, there's not too much arm-specific stuff that's
> not shared with aarch64 thankfully.

Yes indeed.

Committed series ending in r278228.

Thanks,

r~


Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Alexander Monakov
On Thu, 14 Nov 2019, Konstantin Kharlamov wrote:

> Though, this wouldn't be equivalent to what new people, who just want to call
> `debug()` with the arg, would expect :) If you want to deduplicate the
> function call, I can reorganize the code to be like this:
> 
> define pp
>  if ($argc == 0)
>set $arg = $
>  else
>set $arg = $arg0
>  end
>  call debug ($arg)
> end

I wish there was a less verbose way :)  I don't insist on this deduplication, so
at this point I guess it's up to the reviewers.  Your suggestion seems slightly
preferable to duplicating commands, but please wait for feedback from others.

Alexander


Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Konstantin Kharlamov




On Чт, ноя 14, 2019 at 16:57, Alexander Monakov 
 wrote:

On Thu, 14 Nov 2019, Konstantin Kharlamov wrote:

 Though, this wouldn't be equivalent to what new people, who just 
want to call
 `debug()` with the arg, would expect :) If you want to deduplicate 
the

 function call, I can reorganize the code to be like this:

 define pp
  if ($argc == 0)
set $arg = $
  else
set $arg = $arg0
  end
  call debug ($arg)
 end


I wish there was a less verbose way :)  I don't insist on this 
deduplication, so
at this point I guess it's up to the reviewers.  Your suggestion 
seems slightly
preferable to duplicating commands, but please wait for feedback from 
others.


Sure; you know, actually I just found out how we can deduplicate the 
code! Can we use this or not depends on whether we can tolerate 
dependency on python (I dunno, I can ‾\_(ツ)_/‾). The code:


   py
   def choose_arg0_into_gdbarg():
 if gdb.execute('output $argc', to_string=True) == '1':
   gdb.execute('set debug_arg = $arg0', to_string=True)
 else:
   gdb.execute('set debug_arg = $', to_string=True)
   end

   define pp
 py choose_arg0_into_gdbarg()
 call debug (debug_arg)
   end

I also named `dbgarg` as `debug_arg` because I figured I confuse 
whether it was `gdbarg` or `gdbarg` :D





Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Andreas Schwab
On Nov 14 2019, Konstantin Kharlamov wrote:

> python (I dunno, I can ‾\_(ツ)_/‾). The code:

Python support is optional.

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


[PATCH, GCC/ARM, 0/2] Add support for Armv8.1-M Mainline scalar shifts

2019-11-14 Thread Mihail Ionescu
Hi,

This is a patch series to introduce the Armv8.1-M Mainline scalar shift
instructions to the arm backend.


Mihail Ionescu (2)
[PATCH, GCC/ARM, 1/2] Add support for ASRL(reg) and LSLL(reg) instructions for 
Armv8.1-M Mainline
[PATCH, GCC/ARM, 2/2] Add support for ASRL(imm), LSLL(imm) and LSRL(imm) 
instructions for Armv8.1-M Mainline


Regards,
Mihail


Entire patch series attached to cover letter.

all-patches.tar.gz
Description: application/gzip


[PATCH, GCC/ARM, 1/2] Add support for ASRL(reg) and LSLL(reg) instructions for Armv8.1-M Mainline

2019-11-14 Thread Mihail Ionescu
Hi,

This patch adds the new scalar shift instructions for Armv8.1-M
Mainline to the arm backend.
This patch is adding the following instructions:

ASRL (reg)
LSLL (reg)


ChangeLog entry are as follow:

*** gcc/ChangeLog ***


2019-11-14  Mihail-Calin Ionescu  
2019-11-14  Sudakshina Das  

* config/arm/arm.h (TARGET_MVE): New macro for MVE support.
* config/arm/arm.md (ashldi3): Generate thumb2_lsll for TARGET_MVE.
(ashrdi3): Generate thumb2_asrl for TARGET_MVE.
* config/arm/arm.c (arm_hard_regno_mode_ok): Allocate even odd
register pairs for doubleword quantities for ARMv8.1M-Mainline.
* config/arm/thumb2.md (thumb2_asrl): New.
(thumb2_lsll): Likewise.

*** gcc/testsuite/ChangeLog ***

2019-11-14  Mihail-Calin Ionescu  
2019-11-14  Sudakshina Das  

* gcc.target/arm/armv8_1m-shift-reg_1.c: New test.

Testsuite shows no regression when run for arm-none-eabi targets.

Is this ok for trunk?

Thanks
Mihail


### Attachment also inlined for ease of reply###


diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 
be51df7d14738bc1addeab8ac5a3806778106bce..bf788087a30343269b30cf7054ec29212ad9c572
 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -24454,14 +24454,15 @@ arm_hard_regno_mode_ok (unsigned int regno, 
machine_mode mode)
 
   /* We allow almost any value to be stored in the general registers.
  Restrict doubleword quantities to even register pairs in ARM state
- so that we can use ldrd.  Do not allow very large Neon structure
- opaque modes in general registers; they would use too many.  */
+ so that we can use ldrd and Armv8.1-M Mainline instructions.
+ Do not allow very large Neon structure  opaque modes in general
+ registers; they would use too many.  */
   if (regno <= LAST_ARM_REGNUM)
 {
   if (ARM_NUM_REGS (mode) > 4)
return false;
 
-  if (TARGET_THUMB2)
+  if (TARGET_THUMB2 && !TARGET_HAVE_MVE)
return true;
 
   return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 
a91a4b941c3f9d2c3d443f9f4639069ae953fb3b..b735f858a6a5c94d02a6765c1b349cdcb5e77ee3
 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -3503,6 +3503,22 @@
(match_operand:SI 2 "reg_or_int_operand")))]
   "TARGET_32BIT"
   "
+  if (TARGET_HAVE_MVE)
+{
+  if (!reg_or_int_operand (operands[2], SImode))
+operands[2] = force_reg (SImode, operands[2]);
+
+  /* Armv8.1-M Mainline double shifts are not expanded.  */
+  if (REG_P (operands[2]))
+   {
+ if (!reg_overlap_mentioned_p(operands[0], operands[1]))
+   emit_insn (gen_movdi (operands[0], operands[1]));
+
+ emit_insn (gen_thumb2_lsll (operands[0], operands[2]));
+ DONE;
+   }
+}
+
   arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
 operands[2], gen_reg_rtx (SImode),
 gen_reg_rtx (SImode));
@@ -3530,6 +3546,16 @@
  (match_operand:SI 2 "reg_or_int_operand")))]
   "TARGET_32BIT"
   "
+  /* Armv8.1-M Mainline double shifts are not expanded.  */
+  if (TARGET_HAVE_MVE && REG_P (operands[2]))
+{
+  if (!reg_overlap_mentioned_p(operands[0], operands[1]))
+   emit_insn (gen_movdi (operands[0], operands[1]));
+
+  emit_insn (gen_thumb2_asrl (operands[0], operands[2]));
+  DONE;
+}
+
   arm_emit_coreregs_64bit_shift (ASHIFTRT, operands[0], operands[1],
 operands[2], gen_reg_rtx (SImode),
 gen_reg_rtx (SImode));
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 
c08dab233784bd1cbaae147ece795058d2ef234f..3a716ea954ac55b2081121248b930b7f11520ffa
 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1645,3 +1645,19 @@
   }
   [(set_attr "predicable" "yes")]
 )
+
+(define_insn "thumb2_asrl"
+  [(set (match_operand:DI 0 "arm_general_register_operand" "+r")
+   (ashiftrt:DI (match_dup 0)
+(match_operand:SI 1 "arm_general_register_operand" "r")))]
+  "TARGET_HAVE_MVE"
+  "asrl%?\\t%Q0, %R0, %1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "thumb2_lsll"
+  [(set (match_operand:DI 0 "arm_general_register_operand" "+r")
+   (ashift:DI (match_dup 0)
+  (match_operand:SI 1 "arm_general_register_operand" "r")))]
+  "TARGET_HAVE_MVE"
+  "lsll%?\\t%Q0, %R0, %1"
+  [(set_attr "predicable" "yes")])
diff --git a/gcc/testsuite/gcc.target/arm/armv8_1m-shift-reg-1.c 
b/gcc/testsuite/gcc.target/arm/armv8_1m-shift-reg-1.c
new file mode 100644
index 
..a97e9d687ef66e9642dd1d735125c8ee941fb151
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/armv8_1m-shift-reg-1.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv8.1-m.m

[PATCH, GCC/ARM, 2/2] Add support for ASRL(imm), LSLL(imm) and LSRL(imm) instructions for Armv8.1-M Mainline

2019-11-14 Thread Mihail Ionescu
Hi,

This is part of a series of patches where I am trying to add new
instructions for Armv8.1-M Mainline to the arm backend.
This patch is adding the following instructions:

ASRL (imm)
LSLL (imm)
LSRL (imm)


ChangeLog entry are as follow:

*** gcc/ChangeLog ***

2019-11-14  Mihail-Calin Ionescu  
2019-11-14  Sudakshina Das  

* config/arm/arm.md (ashldi3): Generate thumb2_lsll for both reg
and valid immediate.
(ashrdi3): Generate thumb2_asrl for both reg and valid immediate.
(lshrdi3): Generate thumb2_lsrl for valid immediates.
* config/arm/constraints.md (Pg): New.
* config/arm/predicates.md (long_shift_imm): New.
(arm_reg_or_long_shift_imm): Likewise.
* config/arm/thumb2.md (thumb2_asrl): New immediate alternative.
(thumb2_lsll): Likewise.
(thumb2_lsrl): New.

*** gcc/testsuite/ChangeLog ***

2019-11-14  Mihail-Calin Ionescu  
2019-11-14  Sudakshina Das  

* gcc.target/arm/armv8_1m-shift-imm_1.c: New test.

Testsuite shows no regression when run for arm-none-eabi targets.

Is this ok for trunk?

Thanks
Mihail


### Attachment also inlined for ease of reply###


diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 
b735f858a6a5c94d02a6765c1b349cdcb5e77ee3..82f4a5573d43925fb7638b9078a06699df38f88c
 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -3509,8 +3509,8 @@
 operands[2] = force_reg (SImode, operands[2]);
 
   /* Armv8.1-M Mainline double shifts are not expanded.  */
-  if (REG_P (operands[2]))
-   {
+  if (arm_reg_or_long_shift_imm (operands[2], GET_MODE (operands[2])))
+{
  if (!reg_overlap_mentioned_p(operands[0], operands[1]))
emit_insn (gen_movdi (operands[0], operands[1]));
 
@@ -3547,7 +3547,8 @@
   "TARGET_32BIT"
   "
   /* Armv8.1-M Mainline double shifts are not expanded.  */
-  if (TARGET_HAVE_MVE && REG_P (operands[2]))
+  if (TARGET_HAVE_MVE
+  && arm_reg_or_long_shift_imm (operands[2], GET_MODE (operands[2])))
 {
   if (!reg_overlap_mentioned_p(operands[0], operands[1]))
emit_insn (gen_movdi (operands[0], operands[1]));
@@ -3580,6 +3581,17 @@
  (match_operand:SI 2 "reg_or_int_operand")))]
   "TARGET_32BIT"
   "
+  /* Armv8.1-M Mainline double shifts are not expanded.  */
+  if (TARGET_HAVE_MVE
+&& long_shift_imm (operands[2], GET_MODE (operands[2])))
+{
+  if (!reg_overlap_mentioned_p(operands[0], operands[1]))
+emit_insn (gen_movdi (operands[0], operands[1]));
+
+  emit_insn (gen_thumb2_lsrl (operands[0], operands[2]));
+  DONE;
+}
+
   arm_emit_coreregs_64bit_shift (LSHIFTRT, operands[0], operands[1],
 operands[2], gen_reg_rtx (SImode),
 gen_reg_rtx (SImode));
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 
b76de81b85c8ce7a2ca484a750b908b7ca64600a..d807818c8499a6a65837f1ed0487e45947f68199
 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -35,7 +35,7 @@
 ;;  Dt, Dp, Dz, Tu
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Ha, Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py, Pz
-;; in all states: Pf
+;; in all states: Pf, Pg
 
 ;; The following memory constraints have been used:
 ;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us
@@ -187,6 +187,11 @@
&& !is_mm_consume (memmodel_from_int (ival))
&& !is_mm_release (memmodel_from_int (ival))")))
 
+(define_constraint "Pg"
+  "@internal In Thumb-2 state a constant in range 1 to 32"
+  (and (match_code "const_int")
+   (match_test "TARGET_THUMB2 && ival >= 1 && ival <= 32")))
+
 (define_constraint "Ps"
   "@internal In Thumb-2 state a constant in the range -255 to +255"
   (and (match_code "const_int")
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 
69c10c06ff405e19efa172217a08a512c66cb902..ef5b0303d4424981347287865efb3cca85e56f36
 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -322,6 +322,15 @@
  && (UINTVAL (XEXP (op, 1)) < 32)")))
(match_test "mode == GET_MODE (op)")))
 
+;; True for Armv8.1-M Mainline long shift instructions.
+(define_predicate "long_shift_imm"
+  (match_test "satisfies_constraint_Pg (op)"))
+
+(define_predicate "arm_reg_or_long_shift_imm"
+  (ior (match_test "TARGET_THUMB2
+   && arm_general_register_operand (op, GET_MODE (op))")
+   (match_test "satisfies_constraint_Pg (op)")))
+
 ;; True for MULT, to identify which variant of shift_operator is in use.
 (define_special_predicate "mult_operator"
   (match_code "mult"))
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 
3a716ea954ac55b2081121248b930b7f11520ffa..af486d07f428030257855381ff72c32a885b506f
 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.

Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Alexander Monakov
On Thu, 14 Nov 2019, Konstantin Kharlamov wrote:

> I also named `dbgarg` as `debug_arg` because I figured I confuse whether it
> was `gdbarg` or `gdbarg` :D

It should begin with a dollar ($debug_arg), otherwise GDB will attempt to locate
and use a variable named 'debug_arg' in the program being debugged.

In any case, I feel we should avoid adding a dependency on GDB-Python here.

Alexander


Re: [PATCH v2 4/6] arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__

2019-11-14 Thread Richard Earnshaw (lists)

Not had a chance to look at this in detail, but I don't see any support for

1) Thumb1 where we do not expose the condition codes at all
2) Thumb2 where we need IT instructions along-side the conditional 
instructions themselves.


How have you tested this for those targets?

R.

On 14/11/2019 10:07, Richard Henderson wrote:

Since all but a couple of lines is shared between the two targets,
enable them both at once.

* config/arm/aarch-common-protos.h (arm_md_asm_adjust): Declare.
* config/arm/aarch-common.c (arm_md_asm_adjust): New.
* config/arm/arm-c.c (arm_cpu_builtins): Define
__GCC_ASM_FLAG_OUTPUTS__.
* config/arm/arm.c (TARGET_MD_ASM_ADJUST): New.
* config/aarch64/aarch64-c.c (aarch64_define_unconditional_macros):
Define __GCC_ASM_FLAG_OUTPUTS__.
* config/aarch64/aarch64.c (TARGET_MD_ASM_ADJUST): New.
* doc/extend.texi (FlagOutputOperands): Add documentation
for ARM and AArch64.
---
  gcc/config/arm/aarch-common-protos.h |   6 ++
  gcc/config/aarch64/aarch64-c.c   |   2 +
  gcc/config/aarch64/aarch64.c |   3 +
  gcc/config/arm/aarch-common.c| 136 +++
  gcc/config/arm/arm-c.c   |   1 +
  gcc/config/arm/arm.c |   3 +
  gcc/doc/extend.texi  |  39 
  7 files changed, 190 insertions(+)

diff --git a/gcc/config/arm/aarch-common-protos.h 
b/gcc/config/arm/aarch-common-protos.h
index 3bf38a104f6..f15cf336e9d 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -23,6 +23,8 @@
  #ifndef GCC_AARCH_COMMON_PROTOS_H
  #define GCC_AARCH_COMMON_PROTOS_H
  
+#include "hard-reg-set.h"

+
  extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
  extern bool aarch_rev16_p (rtx);
  extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
@@ -141,5 +143,9 @@ struct cpu_cost_table
const struct vector_cost_table vect;
  };
  
+rtx_insn *

+arm_md_asm_adjust (vec &outputs, vec &/*inputs*/,
+   vec &constraints,
+   vec &clobbers, HARD_REG_SET &clobbered_regs);
  
  #endif /* GCC_AARCH_COMMON_PROTOS_H */

diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 7c322ca0813..0af859f1c14 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -69,6 +69,8 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
builtin_define ("__ARM_FEATURE_UNALIGNED");
builtin_define ("__ARM_PCS_AAPCS64");
builtin_define_with_int_value ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8);
+
+  builtin_define ("__GCC_ASM_FLAG_OUTPUTS__");
  }
  
  /* Undefine/redefine macros that depend on the current backend state and may

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d2a3c7ef90a..9a5f27fea3a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -21933,6 +21933,9 @@ aarch64_libgcc_floating_mode_supported_p
  #undef TARGET_STRICT_ARGUMENT_NAMING
  #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
  
+#undef TARGET_MD_ASM_ADJUST

+#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
+
  struct gcc_target targetm = TARGET_INITIALIZER;
  
  #include "gt-aarch64.h"

diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index 965a07a43e3..760ef6c9c0a 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -26,10 +26,16 @@
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
+#include "insn-modes.h"
  #include "tm.h"
  #include "rtl.h"
  #include "rtl-iter.h"
  #include "memmodel.h"
+#include "diagnostic.h"
+#include "tree.h"
+#include "expr.h"
+#include "function.h"
+#include "emit-rtl.h"
  
  /* Return TRUE if X is either an arithmetic shift left, or

 is a multiplication by a power of two.  */
@@ -520,3 +526,133 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx 
consumer)
&& !reg_overlap_mentioned_p (mul_result, mac_op0)
&& !reg_overlap_mentioned_p (mul_result, mac_op1));
  }
+
+/* Worker function for TARGET_MD_ASM_ADJUST.
+   We implement asm flag outputs.  */
+
+rtx_insn *
+arm_md_asm_adjust (vec &outputs, vec &/*inputs*/,
+   vec &constraints,
+   vec &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
+{
+  bool saw_asm_flag = false;
+
+  start_sequence ();
+  for (unsigned i = 0, n = outputs.length (); i < n; ++i)
+{
+  const char *con = constraints[i];
+  if (strncmp (con, "=@cc", 4) != 0)
+   continue;
+  con += 4;
+  if (strchr (con, ',') != NULL)
+   {
+ error ("alternatives not allowed in % flag output");
+ continue;
+   }
+
+  machine_mode mode;
+  rtx_code code;
+  int con01 = 0;
+
+#define C(X, Y)  (unsigned char)(X) * 256 + (unsigned char)(Y)
+
+  /* All of the condition codes are two characters.  */
+  if (con[0] != 0 && con[1] != 0 && con[2]

Re: [PATCH v2 3/6] arm: Rename CC_NOOVmode to CC_NZmode

2019-11-14 Thread Richard Earnshaw (lists)

On 14/11/2019 10:07, Richard Henderson wrote:

CC_NZmode is a more accurate description of what we require
from the mode, and matches up with the definition in aarch64.

Rename noov_comparison_operator to nz_comparison_operator
in order to match.

* config/arm/arm-modes.def (CC_NZ): Rename from CC_NOOV.
* config/arm/predicates.md (nz_comparison_operator): Rename
from noov_comparison_operator.
* config/arm/arm.c (arm_select_cc_mode): Use CC_NZmode name.
(arm_gen_dicompare_reg): Likewise.
(maybe_get_arm_condition_code): Likewise.
(thumb1_final_prescan_insn): Likewise.
(arm_emit_coreregs_64bit_shift): Likewise.
* config/arm/arm.md (addsi3_compare0): Likewise.
(*addsi3_compare0_scratch, subsi3_compare0): Likewise.
(*mulsi3_compare0, *mulsi3_compare0_v6): Likewise.
(*mulsi3_compare0_scratch, *mulsi3_compare0_scratch_v6): Likewise.
(*mulsi3addsi_compare0, *mulsi3addsi_compare0_v6): Likewise.
(*mulsi3addsi_compare0_scratch): Likewise.
(*mulsi3addsi_compare0_scratch_v6): Likewise.
(*andsi3_compare0, *andsi3_compare0_scratch): Likewise.
(*zeroextractsi_compare0_scratch): Likewise.
(*ne_zeroextractsi, *ne_zeroextractsi_shifted): Likewise.
(*ite_ne_zeroextractsi, *ite_ne_zeroextractsi_shifted): Likewise.
(andsi_not_shiftsi_si_scc_no_reuse): Likewise.
(andsi_not_shiftsi_si_scc): Likewise.
(*andsi_notsi_si_compare0, *andsi_notsi_si_compare0_scratch): Likewise.
(*iorsi3_compare0, *iorsi3_compare0_scratch): Likewise.
(*xorsi3_compare0, *xorsi3_compare0_scratch): Likewise.
(*shiftsi3_compare0, *shiftsi3_compare0_scratch): Likewise.
(*not_shiftsi_compare0, *not_shiftsi_compare0_scratch): Likewise.
(*notsi_compare0, *notsi_compare0_scratch): Likewise.
(return_addr_mask, *check_arch2): Likewise.
(*arith_shiftsi_compare0, *arith_shiftsi_compare0_scratch): Likewise.
(*sub_shiftsi_compare0, *sub_shiftsi_compare0_scratch): Likewise.
(compare_scc splitters): Likewise.
(movcond_addsi): Likewise.
* config/arm/thumb2.md (thumb2_addsi3_compare0): Likewise.
(*thumb2_addsi3_compare0_scratch): Likewise.
(*thumb2_mulsi_short_compare0): Likewise.
(*thumb2_mulsi_short_compare0_scratch): Likewise.
(compare peephole2s): Likewise.
* config/arm/thumb1.md (thumb1_cbz): Use CC_NZmode and
nz_comparison_operator names.
(cbranchsi4_insn): Likewise.


I take it this is a rototill change.  OK.

R.

---
  gcc/config/arm/arm.c |  12 +--
  gcc/config/arm/arm-modes.def |   4 +-
  gcc/config/arm/arm.md| 186 +--
  gcc/config/arm/predicates.md |   2 +-
  gcc/config/arm/thumb1.md |   8 +-
  gcc/config/arm/thumb2.md |  34 +++
  6 files changed, 123 insertions(+), 123 deletions(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 9086cf65953..d996207853c 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -15376,7 +15376,7 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
  || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
  || GET_CODE (x) == ROTATERT
  || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
-return CC_NOOVmode;
+return CC_NZmode;
  
/* A comparison of ~reg with a const is really a special

   canoncialization of compare (~const, reg), which is a reverse
@@ -15492,11 +15492,11 @@ arm_gen_dicompare_reg (rtx_code code, rtx x, rtx y, 
rtx scratch)
  }
  
  	rtx clobber = gen_rtx_CLOBBER (VOIDmode, scratch);

-   cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
+   cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
  
  	rtx set

  = gen_rtx_SET (cc_reg,
-gen_rtx_COMPARE (CC_NOOVmode,
+gen_rtx_COMPARE (CC_NZmode,
  gen_rtx_IOR (SImode, x_lo, x_hi),
  const0_rtx));
emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set,
@@ -23881,7 +23881,7 @@ maybe_get_arm_condition_code (rtx comparison)
return code;
return ARM_NV;
  
-case E_CC_NOOVmode:

+case E_CC_NZmode:
switch (comp_code)
{
case NE: return ARM_NE;
@@ -25304,7 +25304,7 @@ thumb1_final_prescan_insn (rtx_insn *insn)
  cfun->machine->thumb1_cc_insn = insn;
  cfun->machine->thumb1_cc_op0 = SET_DEST (set);
  cfun->machine->thumb1_cc_op1 = const0_rtx;
- cfun->machine->thumb1_cc_mode = CC_NOOVmode;
+ cfun->machine->thumb1_cc_mode = CC_NZmode;
  if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
{
  rtx src1 = XEXP (SET_SRC (set), 1);
@@ -30486,7 +30486,7 @@ arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx 
out, rtx in,
else
  {

Re: [PATCH v2 5/6] arm: Add testsuite checks for asm-flag

2019-11-14 Thread Richard Earnshaw (lists)

On 14/11/2019 10:07, Richard Henderson wrote:

Inspired by the tests in gcc.target/i386.  Testing code generation,
diagnostics, and execution.

* gcc.target/arm/asm-flag-1.c: New test.
* gcc.target/arm/asm-flag-3.c: New test.
* gcc.target/arm/asm-flag-5.c: New test.
* gcc.target/arm/asm-flag-6.c: New test.
---
  gcc/testsuite/gcc.target/arm/asm-flag-1.c | 36 +
  gcc/testsuite/gcc.target/arm/asm-flag-3.c | 38 ++
  gcc/testsuite/gcc.target/arm/asm-flag-5.c | 30 +++
  gcc/testsuite/gcc.target/arm/asm-flag-6.c | 62 +++
  4 files changed, 166 insertions(+)
  create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-1.c
  create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-3.c
  create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-5.c
  create mode 100644 gcc/testsuite/gcc.target/arm/asm-flag-6.c


These will/should clearly not work on thumb1 targets where the condition 
register is not exposed.  On thumb2 I'd expect to see some testing for 
the correct IT instruction.


R.



diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-1.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-1.c
new file mode 100644
index 000..9707ebfcebb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-1.c
@@ -0,0 +1,36 @@
+/* Test the valid @cc asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#ifndef __GCC_ASM_FLAG_OUTPUTS__
+#error "missing preprocessor define"
+#endif
+
+void f(char *out)
+{
+  asm(""
+  : "=@ccne"(out[0]), "=@cceq"(out[1]),
+   "=@cccs"(out[2]), "=@"(out[3]),
+   "=@ccmi"(out[4]), "=@ccpl"(out[5]),
+   "=@ccvs"(out[6]), "=@ccvc"(out[7]),
+   "=@cchi"(out[8]), "=@ccls"(out[9]),
+   "=@ccge"(out[10]), "=@cclt"(out[11]),
+   "=@ccgt"(out[12]), "=@ccle"(out[13]),
+   "=@cchs"(out[14]), "=@cclo"(out[15]));
+}
+
+/* There will be at least one of each.  */
+/* { dg-final { scan-assembler "movne" } } */
+/* { dg-final { scan-assembler "moveq" } } */
+/* { dg-final { scan-assembler "movcs" } } */
+/* { dg-final { scan-assembler "movcc" } } */
+/* { dg-final { scan-assembler "movmi" } } */
+/* { dg-final { scan-assembler "movpl" } } */
+/* { dg-final { scan-assembler "movvs" } } */
+/* { dg-final { scan-assembler "movvc" } } */
+/* { dg-final { scan-assembler "movhi" } } */
+/* { dg-final { scan-assembler "movls" } } */
+/* { dg-final { scan-assembler "movge" } } */
+/* { dg-final { scan-assembler "movls" } } */
+/* { dg-final { scan-assembler "movgt" } } */
+/* { dg-final { scan-assembler "movle" } } */
diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-3.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-3.c
new file mode 100644
index 000..e84e3431277
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-3.c
@@ -0,0 +1,38 @@
+/* Test some of the valid @cc asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#define DO(C) \
+void f##C(void) { char x; asm("" : "=@cc"#C(x)); if (!x) asm(""); asm(""); }
+
+DO(ne)
+DO(eq)
+DO(cs)
+DO(cc)
+DO(hs)
+DO(lo)
+DO(mi)
+DO(pl)
+DO(vs)
+DO(vc)
+DO(hi)
+DO(ls)
+DO(ge)
+DO(lt)
+DO(gt)
+DO(le)
+
+/* { dg-final { scan-assembler "bne" } } */
+/* { dg-final { scan-assembler "beq" } } */
+/* { dg-final { scan-assembler "bcs" } } */
+/* { dg-final { scan-assembler "bcc" } } */
+/* { dg-final { scan-assembler "bmi" } } */
+/* { dg-final { scan-assembler "bpl" } } */
+/* { dg-final { scan-assembler "bvs" } } */
+/* { dg-final { scan-assembler "bvc" } } */
+/* { dg-final { scan-assembler "bhi" } } */
+/* { dg-final { scan-assembler "bls" } } */
+/* { dg-final { scan-assembler "bge" } } */
+/* { dg-final { scan-assembler "blt" } } */
+/* { dg-final { scan-assembler "bgt" } } */
+/* { dg-final { scan-assembler "ble" } } */
diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-5.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-5.c
new file mode 100644
index 000..4d4394e1478
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-5.c
@@ -0,0 +1,30 @@
+/* Test error conditions of asm flag outputs.  */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void f_B(void) { _Bool x; asm("" : "=@"(x)); }
+void f_c(void) { char x; asm("" : "=@"(x)); }
+void f_s(void) { short x; asm("" : "=@"(x)); }
+void f_i(void) { int x; asm("" : "=@"(x)); }
+void f_l(void) { long x; asm("" : "=@"(x)); }
+void f_ll(void) { long long x; asm("" : "=@"(x)); }
+
+void f_f(void)
+{
+  float x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
+
+void f_d(void)
+{
+  double x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
+
+struct S { int x[3]; };
+
+void f_S(void)
+{
+  struct S x;
+  asm("" : "=@"(x)); /* { dg-error invalid type } */
+}
diff --git a/gcc/testsuite/gcc.target/arm/asm-flag-6.c 
b/gcc/testsuite/gcc.target/arm/asm-flag-6.c
new file mode 100644
index 000..09174e04ae6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/asm-flag-6.c
@@ -0,0 +1,62 @@
+/* Executable testcase

Re: [PATCH v2 4/6] arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__

2019-11-14 Thread Richard Earnshaw (lists)

On 14/11/2019 10:07, Richard Henderson wrote:

Since all but a couple of lines is shared between the two targets,
enable them both at once.

* config/arm/aarch-common-protos.h (arm_md_asm_adjust): Declare.
* config/arm/aarch-common.c (arm_md_asm_adjust): New.
* config/arm/arm-c.c (arm_cpu_builtins): Define
__GCC_ASM_FLAG_OUTPUTS__.
* config/arm/arm.c (TARGET_MD_ASM_ADJUST): New.
* config/aarch64/aarch64-c.c (aarch64_define_unconditional_macros):
Define __GCC_ASM_FLAG_OUTPUTS__.
* config/aarch64/aarch64.c (TARGET_MD_ASM_ADJUST): New.
* doc/extend.texi (FlagOutputOperands): Add documentation
for ARM and AArch64.


In AArch64 when SVE is enabled, there are some additional condition 
names which are more suited for describing the way conditions are set by 
the SVE instructions.  Do you plan to support those as well?


R.


---
  gcc/config/arm/aarch-common-protos.h |   6 ++
  gcc/config/aarch64/aarch64-c.c   |   2 +
  gcc/config/aarch64/aarch64.c |   3 +
  gcc/config/arm/aarch-common.c| 136 +++
  gcc/config/arm/arm-c.c   |   1 +
  gcc/config/arm/arm.c |   3 +
  gcc/doc/extend.texi  |  39 
  7 files changed, 190 insertions(+)

diff --git a/gcc/config/arm/aarch-common-protos.h 
b/gcc/config/arm/aarch-common-protos.h
index 3bf38a104f6..f15cf336e9d 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -23,6 +23,8 @@
  #ifndef GCC_AARCH_COMMON_PROTOS_H
  #define GCC_AARCH_COMMON_PROTOS_H
  
+#include "hard-reg-set.h"

+
  extern int aarch_accumulator_forwarding (rtx_insn *, rtx_insn *);
  extern bool aarch_rev16_p (rtx);
  extern bool aarch_rev16_shleft_mask_imm_p (rtx, machine_mode);
@@ -141,5 +143,9 @@ struct cpu_cost_table
const struct vector_cost_table vect;
  };
  
+rtx_insn *

+arm_md_asm_adjust (vec &outputs, vec &/*inputs*/,
+   vec &constraints,
+   vec &clobbers, HARD_REG_SET &clobbered_regs);
  
  #endif /* GCC_AARCH_COMMON_PROTOS_H */

diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 7c322ca0813..0af859f1c14 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -69,6 +69,8 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
builtin_define ("__ARM_FEATURE_UNALIGNED");
builtin_define ("__ARM_PCS_AAPCS64");
builtin_define_with_int_value ("__ARM_SIZEOF_WCHAR_T", WCHAR_TYPE_SIZE / 8);
+
+  builtin_define ("__GCC_ASM_FLAG_OUTPUTS__");
  }
  
  /* Undefine/redefine macros that depend on the current backend state and may

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d2a3c7ef90a..9a5f27fea3a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -21933,6 +21933,9 @@ aarch64_libgcc_floating_mode_supported_p
  #undef TARGET_STRICT_ARGUMENT_NAMING
  #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
  
+#undef TARGET_MD_ASM_ADJUST

+#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
+
  struct gcc_target targetm = TARGET_INITIALIZER;
  
  #include "gt-aarch64.h"

diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index 965a07a43e3..760ef6c9c0a 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -26,10 +26,16 @@
  #include "config.h"
  #include "system.h"
  #include "coretypes.h"
+#include "insn-modes.h"
  #include "tm.h"
  #include "rtl.h"
  #include "rtl-iter.h"
  #include "memmodel.h"
+#include "diagnostic.h"
+#include "tree.h"
+#include "expr.h"
+#include "function.h"
+#include "emit-rtl.h"
  
  /* Return TRUE if X is either an arithmetic shift left, or

 is a multiplication by a power of two.  */
@@ -520,3 +526,133 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx 
consumer)
&& !reg_overlap_mentioned_p (mul_result, mac_op0)
&& !reg_overlap_mentioned_p (mul_result, mac_op1));
  }
+
+/* Worker function for TARGET_MD_ASM_ADJUST.
+   We implement asm flag outputs.  */
+
+rtx_insn *
+arm_md_asm_adjust (vec &outputs, vec &/*inputs*/,
+   vec &constraints,
+   vec &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
+{
+  bool saw_asm_flag = false;
+
+  start_sequence ();
+  for (unsigned i = 0, n = outputs.length (); i < n; ++i)
+{
+  const char *con = constraints[i];
+  if (strncmp (con, "=@cc", 4) != 0)
+   continue;
+  con += 4;
+  if (strchr (con, ',') != NULL)
+   {
+ error ("alternatives not allowed in % flag output");
+ continue;
+   }
+
+  machine_mode mode;
+  rtx_code code;
+  int con01 = 0;
+
+#define C(X, Y)  (unsigned char)(X) * 256 + (unsigned char)(Y)
+
+  /* All of the condition codes are two characters.  */
+  if (con[0] != 0 && con[1] != 0 && con[2] == 0)
+   con01 = C(con[0], con[1]);
+
+  switch (con01)
+ 

Re: [PATCH 0/4] Eliminate cc0 from m68k

2019-11-14 Thread Richard Henderson
On 11/13/19 8:35 PM, Jeff Law wrote:
> On 11/13/19 6:04 AM, Bernd Schmidt wrote:
>> The cc0 machinery allows for eliminating unnecessary comparisons by
>> examining the effect instructions have on the flags registers. I have
>> replicated that mechanism with a relatively modest amount of code based
>> on a final_postscan_insn hook, but it is now opt-in: an instruction
>> pattern can set the "flags_valid" attribute to a number of possible
>> values to indicate what effect it has. That should be more reliable (try
>> git log m68k.md to see recent sprinkling of CC_STATUS_INIT to squash
>> bugs with the previous mechanism).
> Yea, sounds like a reimplementation of the tst elimination bits, but
> buried in the backend.  Given the choice of dropping the port or burying
> this kind of stuff in there, I'd lean towards accepting the latter.

Indeed.  Even if we wanted an eventual transition to the tst elimination bits,
this is a better starting place than from cc0.


r~


Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Alexander Monakov
On Thu, 14 Nov 2019, Alexander Monakov wrote:

> On Thu, 14 Nov 2019, Konstantin Kharlamov wrote:
> 
> > I also named `dbgarg` as `debug_arg` because I figured I confuse whether it
> > was `gdbarg` or `gdbarg` :D
> 
> It should begin with a dollar ($debug_arg), otherwise GDB will attempt to 
> locate
> and use a variable named 'debug_arg' in the program being debugged.
> 
> In any case, I feel we should avoid adding a dependency on GDB-Python here.

Here's a one-liner that uses eval instead:

eval "set $debug_arg = $%s", $argc ? "arg0" : ""
call debug ($debug_arg)

(but oddly with '$argc ? "$arg0" : ""' it doesn't work).  Can you rework your
patch to use this approach?

Alexander


Re: [PATCH v2 4/6] arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__

2019-11-14 Thread Richard Henderson
On 11/14/19 3:39 PM, Richard Earnshaw (lists) wrote:
> Not had a chance to look at this in detail, but I don't see any support for
> 
> 1) Thumb1 where we do not expose the condition codes at all
> 2) Thumb2 where we need IT instructions along-side the conditional 
> instructions
> themselves.
> 
> How have you tested this for those targets?

I tested aarch64-linux and arm-elf-eabi (I'm currently 8 time zones away from
my arm-linux-eabihf box, so using sim).

I didn't know about the thumb1 restriction.  I had assumed somehow that we'd
just use branch insns to form whatever cstore* is required.  I suppose it's
easy enough to generate an error/sorry for asm-flags in thumb1 mode.

As for thumb2, correct behaviour comes from the existing cstore* patterns, and
the testsuite need not check for IT specifically because unified asm syntax
says that the insns that are conditional under the IT should still bear the
conditions themselves.

I presume I can test both of these cases with arm-elf-eabi + -mthumb{1,2}?


r~


Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Konstantin Kharlamov




On Чт, ноя 14, 2019 at 18:00, Alexander Monakov 
 wrote:

On Thu, 14 Nov 2019, Alexander Monakov wrote:


 On Thu, 14 Nov 2019, Konstantin Kharlamov wrote:

 > I also named `dbgarg` as `debug_arg` because I figured I confuse 
whether it

 > was `gdbarg` or `gdbarg` :D

 It should begin with a dollar ($debug_arg), otherwise GDB will 
attempt to locate

 and use a variable named 'debug_arg' in the program being debugged.

 In any case, I feel we should avoid adding a dependency on 
GDB-Python here.


Here's a one-liner that uses eval instead:

eval "set $debug_arg = $%s", $argc ? "arg0" : ""
call debug ($debug_arg)

(but oddly with '$argc ? "$arg0" : ""' it doesn't work).  Can you 
rework your

patch to use this approach?


Haha, this is amazing! Will do. A newbish question: shall I send the 
updated patch "in reply" here, or should I resend the patchset?





Re: [PATCH v2 4/6] arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__

2019-11-14 Thread Richard Henderson
On 11/14/19 3:48 PM, Richard Earnshaw (lists) wrote:
> On 14/11/2019 10:07, Richard Henderson wrote:
>> Since all but a couple of lines is shared between the two targets,
>> enable them both at once.
>>
>> * config/arm/aarch-common-protos.h (arm_md_asm_adjust): Declare.
>> * config/arm/aarch-common.c (arm_md_asm_adjust): New.
>> * config/arm/arm-c.c (arm_cpu_builtins): Define
>> __GCC_ASM_FLAG_OUTPUTS__.
>> * config/arm/arm.c (TARGET_MD_ASM_ADJUST): New.
>> * config/aarch64/aarch64-c.c (aarch64_define_unconditional_macros):
>> Define __GCC_ASM_FLAG_OUTPUTS__.
>> * config/aarch64/aarch64.c (TARGET_MD_ASM_ADJUST): New.
>> * doc/extend.texi (FlagOutputOperands): Add documentation
>> for ARM and AArch64.
> 
> In AArch64 when SVE is enabled, there are some additional condition names 
> which
> are more suited for describing the way conditions are set by the SVE
> instructions.  Do you plan to support those as well?

I did not, no.

I read the acle spec once at the beginning of the year, and vaguely recall that
it already covers pretty much all one wants to do.  I haven't given much
thought to sve in inline asm since.

I suppose I can add them if they're thought important.


r~


Re: [PATCH] Support multi-versioning on self-recursive function (ipa/92133)

2019-11-14 Thread Feng Xue OS
Thanks for your review.

> In general the patch looks good to me, but I would like Martin Jambor to
> comment on the ipa-prop/cp interfaces. However...

> +@item ipa-cp-max-recursion-depth
> +Maximum depth of recursive cloning for self-recursive function.
> +

> ... I believe we will need more careful cost model for this.  I think
> we want to limit the overall growth for all the clones and also probably
> enable this only when ipa-predicates things the individual clones will
> actualy be faster by some non-trivial percentage. For recursive inliner
> we have:

Cost model used by self-recursive cloning is mainly based on existing stuffs
in ipa-cp cloning, size growth and time benefit are considered. But since
recursive cloning is a more aggressive cloning, we will actually have another
problem, which is opposite to your concern.  By default, current parameter
set used to control ipa-cp and recursive-inliner gives priority to code size,
not completely for performance. This makes ipa-cp behave somewhat
conservatively, and as a result, it can not trigger expected recursive cloning
for the case in SPEC2017.exchange2 with default setting, blocked by both
ipa-cp-eval-threshold and ipcp-unit-growth. The former is due to improper
static profile estimation, and the latter is conflicted to aggressiveness of
recursive cloning. Thus, we have to explicitly lower the threshold and raise
percentage of unit-growth.

We might not reach the destination in one leap. This patch is just first step
to enable recursive function versioning. And next, we still need further
elaborate tuning on this.

> --param max-inline-recursive-depth which has similar meaning to your parameter
>  (so perhaps similar name would be good)
> --param min-inline-recursive-probability
>  which requires the inlining to happen only across edges which are
>  known to be taken with reasonable chance
> --param max-inline-insns-recursive
>  which specifies overall size after all the recursive inlining

> Those parameters are not parituclarly well tought out or tested, but
> they may be good start.

> Do you have some data on code size/performance effects of this change?
For spec2017, no obvious code size and performance change with default setting.
Specifically, for exchange2, with ipa-cp-eval-threshold=1 and 
ipcp-unit-growth=80,
performance +31%, size +7%, on aarch64.

Feng

Re: [PATCH v2 4/6] arm, aarch64: Add support for __GCC_ASM_FLAG_OUTPUTS__

2019-11-14 Thread Richard Earnshaw (lists)

On 14/11/2019 15:06, Richard Henderson wrote:

On 11/14/19 3:39 PM, Richard Earnshaw (lists) wrote:

Not had a chance to look at this in detail, but I don't see any support for

1) Thumb1 where we do not expose the condition codes at all
2) Thumb2 where we need IT instructions along-side the conditional instructions
themselves.

How have you tested this for those targets?


I tested aarch64-linux and arm-elf-eabi (I'm currently 8 time zones away from
my arm-linux-eabihf box, so using sim).

I didn't know about the thumb1 restriction.  I had assumed somehow that we'd
just use branch insns to form whatever cstore* is required.  I suppose it's
easy enough to generate an error/sorry for asm-flags in thumb1 mode.


I suggest we just suppress the __GCC_ASM_FLAG_OUTPUTS__ define if 
targetting thumb1.  In the tests, we then just make the code PASS in 
that case.


As for thumb2, correct behaviour comes from the existing cstore* patterns, and
the testsuite need not check for IT specifically because unified asm syntax
says that the insns that are conditional under the IT should still bear the
conditions themselves.

I presume I can test both of these cases with arm-elf-eabi + -mthumb{1,2}?


Not quite, selection of thumb/thumb1 is controlled by the architecture 
(or the cpu that causes the architecture chosen).  There are several 
helpers in target-supports.exp to help with testing if the selected 
architecture has thumb/thumb2/neither as appropriate.


R.




r~





Re: [PATCH] Support multi-versioning on self-recursive function (ipa/92133)

2019-11-14 Thread Jan Hubicka
> Thanks for your review.
> 
> > In general the patch looks good to me, but I would like Martin Jambor to
> > comment on the ipa-prop/cp interfaces. However...
> 
> > +@item ipa-cp-max-recursion-depth
> > +Maximum depth of recursive cloning for self-recursive function.
> > +
> 
> > ... I believe we will need more careful cost model for this.  I think
> > we want to limit the overall growth for all the clones and also probably
> > enable this only when ipa-predicates things the individual clones will
> > actualy be faster by some non-trivial percentage. For recursive inliner
> > we have:
> 
> Cost model used by self-recursive cloning is mainly based on existing stuffs
> in ipa-cp cloning, size growth and time benefit are considered. But since
> recursive cloning is a more aggressive cloning, we will actually have another
> problem, which is opposite to your concern.  By default, current parameter
> set used to control ipa-cp and recursive-inliner gives priority to code size,
> not completely for performance. This makes ipa-cp behave somewhat

Yes, for a while the cost model is quite off.  On Firefox it does just
few clonings where code size increases so it desprately needs retuning.

But since rescursive cloning is quite a different case from normal one,
perhaps having independent set of limits would help in particular ...
> conservatively, and as a result, it can not trigger expected recursive cloning
> for the case in SPEC2017.exchange2 with default setting, blocked by both
> ipa-cp-eval-threshold and ipcp-unit-growth. The former is due to improper
> static profile estimation, and the latter is conflicted to aggressiveness of
> recursive cloning. Thus, we have to explicitly lower the threshold and raise
> percentage of unit-growth.
> 
> We might not reach the destination in one leap. This patch is just first step
> to enable recursive function versioning. And next, we still need further
> elaborate tuning on this.
> 
> > --param max-inline-recursive-depth which has similar meaning to your 
> > parameter
> >  (so perhaps similar name would be good)
> > --param min-inline-recursive-probability
> >  which requires the inlining to happen only across edges which are
> >  known to be taken with reasonable chance
> > --param max-inline-insns-recursive
> >  which specifies overall size after all the recursive inlining
> 
> > Those parameters are not parituclarly well tought out or tested, but
> > they may be good start.
> 
> > Do you have some data on code size/performance effects of this change?
> For spec2017, no obvious code size and performance change with default 
> setting.
> Specifically, for exchange2, with ipa-cp-eval-threshold=1 and 
> ipcp-unit-growth=80,
> performance +31%, size +7%, on aarch64.

... it will help here since ipa-cp-eval-threshold value needed are quite off of 
what we need to do.

I wonder about the 80% of unit growth which is also more than we can
enable by default.  How it comes the overal size change is only 7%?

Honza
> 
> Feng


[PATCH 0/5] [amdgcn] Reduce register usage on AMD GCN

2019-11-14 Thread Kwok Cheung Yeung

Hello

Although GCN has a large register file, these registers are distributed 
among the threads (wavefronts) running on the same compute unit, so (up 
to a point) the fewer registers used in a kernel, the more kernels can 
run concurrently. While this is of limited use in trunk at the moment 
with only single-worker offloading, hopefully it will be of more use in 
the future.


These patches free up some of the registers that were previously fixed, 
and restrict the number of registers used in non-kernel functions to 64 
SGPRs and 24 VGPRs, as opposed to 102 SGPRs and 64 VGPRs before. Kernels 
can still use however many they need, but the minimum limit on the 
number of registers needed is now reduced to that of the non-kernel 
functions (since kernels cannot in general know how many registers are 
used by the functions they call, they need to reserve the maximum number 
of registers usable by the callees).


These patches need the patch 'Stash reent marker in upper bits of s1 on 
AMD GCN' in newlib to free up s[2:3] (recently committed as commit 
d14714c690c0b11b0aa7e6d09c930a321eeac7f9).


Tested in standalone configuration on a gfx900 target. I have not yet 
tested the offload configuration with trunk sources as testsuite support 
has not yet been committed yet - I will retest when this is done. 
Internal offload testing (based on a branch of OG9) revealed a number of 
regressions, but they are due to latent bugs exposed by the changes 
rather than issues with this patchset. I have already posted fixes for 
these in the following patches:


[PATCH] Support multiple registers for the frame pointer
[PATCH] [LRA] Do not use eliminable registers for spilling
[PATCH] Check suitability of spill register for mode
[PATCH] [GCN] Fix handling of VCC_CONDITIONAL_REG

Kwok


[PATCH 1/5] [amdgcn] Use first lane of v1 for zero constant

2019-11-14 Thread Kwok Cheung Yeung
GCN 5 has commonly-used global memory instructions that specify the 
address as [SGPR address] + [VGPR offset] + [constant offset], and we 
often want the VGPR offset to be zero, so v0 is currently reserved for 
that purpose.


However, v1 contains [0, 1, 2..., 63], and as we only use the first lane 
of the VGPR for the offset (the instructions actually work on vectors of 
addresses, but we only employ them in single-lane mode for all memory 
accesses except for explicit scatter-gather instructions), v1 can be 
used in place of v0, freeing v0 for other purposes.


Okay for trunk?

Kwok


2019-11-14  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn.c (gcn_expand_prologue): Remove initialization and
prologue use of v0.
(print_operand_address): Use v1 for zero vector offset.
---
 gcc/config/gcn/gcn.c | 17 +++--
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 1a69737..2c08771 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -2799,15 +2799,6 @@ gcn_expand_prologue ()
 cfun->machine->args.
 reg[PRIVATE_SEGMENT_WAVE_OFFSET_ARG]);

-  if (TARGET_GCN5_PLUS)
-   {
- /* v0 is reserved for constant zero so that "global"
-memory instructions can have a nul-offset without
-causing reloads.  */
- emit_insn (gen_vec_duplicatev64si
-(gen_rtx_REG (V64SImode, VGPR_REGNO (0)), const0_rtx));
-   }
-
   if (cfun->machine->args.requested & (1 << FLAT_SCRATCH_INIT_ARG))
{
  rtx fs_init_lo =
@@ -2866,8 +2857,6 @@ gcn_expand_prologue ()
  gen_int_mode (LDS_SIZE, SImode));

   emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
-  if (TARGET_GCN5_PLUS)
-emit_insn (gen_prologue_use (gen_rtx_REG (SImode, VGPR_REGNO (0;

   if (cfun && cfun->machine && !cfun->machine->normal_function && 
flag_openmp)

 {
@@ -5324,9 +5313,9 @@ print_operand_address (FILE *file, rtx mem)
  /* The assembler requires a 64-bit VGPR pair here, even though
 the offset should be only 32-bit.  */
  if (vgpr_offset == NULL_RTX)
-   /* In this case, the vector offset is zero, so we use v0,
-  which is initialized by the kernel prologue to zero.  */
-   fprintf (file, "v[0:1]");
+   /* In this case, the vector offset is zero, so we use the first
+  lane of v1, which is initialized to zero.  */
+   fprintf (file, "v[1:2]");
  else if (REG_P (vgpr_offset)
   && VGPR_REGNO_P (REGNO (vgpr_offset)))
{
--
2.8.1




[PATCH 2/5] [amdgcn] Reinitialize registers for every function

2019-11-14 Thread Kwok Cheung Yeung
The set of fixed registers is adjusted by the 
TARGET_CONDITIONAL_REGISTER_USAGE hook, but this needs to be done on a 
per-function basis, whereas the hook is normally called once during GCC 
initialization before any functions have been processed (which means the 
majority of the current implementation is actually dead code!). I have 
added a call to reinit_regs in gcn_init_cumulative_args to setup the 
available registers for each function.


Okay for trunk?

Kwok

2019-11-14  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn.c (gcn_init_cumulative_args): Call reinit_regs.
---
 gcc/config/gcn/gcn.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 2c08771..09dfabb 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -2434,6 +2434,8 @@ gcn_init_cumulative_args (CUMULATIVE_ARGS *cum /* 
Argument info to init */ ,

   cfun->machine->args = cum->args;
   if (!caller && cfun->machine->normal_function)
 gcn_detect_incoming_pointer_arg (fndecl);
+
+  reinit_regs ();
 }

 static bool
--
2.8.1



Re: [PATCH v2 2/2] gdbinit.in: allow to pass function argument explicitly

2019-11-14 Thread Alexander Monakov
On Thu, 14 Nov 2019, Konstantin Kharlamov wrote:

> Haha, this is amazing! Will do. A newbish question: shall I send the updated
> patch "in reply" here, or should I resend the patchset?

Your choice, GCC doesn't have a hard rule for this.  Personally I feel it's
more appropriate to send patches "in reply" when new revisions are coming fast,
and start a new thread when it's been some time since the previous discussion.

Alexander


[PATCH 3/5] [amdgcn] Restrict register usage in non-kernel functions

2019-11-14 Thread Kwok Cheung Yeung
This patch restricts non-kernel functions to using a maximum of 64 SGPRs 
and 24 VGPRs.


Kernels can request various pieces of information from the HSA runtime, 
and these will be loaded into the registers consecutively before the 
kernel executes. These registers are normally fixed. Since non-kernel 
functions cannot make these requests, they have to assume that the 
default set of information has been requested. If a non-leaf kernel 
requests information not in the defaults, a warning is now emitted as 
pieces of info needed by callees may have shifted locations. A leaf 
kernel can do whatever it wants.


I have setup FIXED_REGISTERS for the default case now - if a different 
set of startup info is requested (which should be rare), then the set of 
fixed registers will be adjusted accordingly by 
gcn_conditional_register_usage. Compared to before, v0, s2 and s3 are 
now unfixed (due to the newlib patch 'Stash reent marker in upper bits 
of s1 on AMD GCN' and the first patch in this series).


Okay to commit?

Kwok


2019-11-14  Kwok Cheung Yeung  

gcc/
* config/gcn/gcn.c (default_requested_args): New.
(gcn_parse_amdgpu_hsa_kernel_attribute): Initialize requested args
set with default_requested_args.
(gcn_conditional_register_usage): Limit register usage of non-kernel
functions.  Reassign fixed registers if a non-standard set of args is
requested.
* config/gcn/gcn.h (FIXED_REGISTERS): Fix registers according to ABI.
---
 gcc/config/gcn/gcn.c | 63 
++--

 gcc/config/gcn/gcn.h |  6 ++---
 2 files changed, 39 insertions(+), 30 deletions(-)

diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index 09dfabb..8a2f7d7 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -191,6 +191,17 @@ static const struct gcn_kernel_arg_type
   {"work_item_id_Z", NULL, V64SImode, FIRST_VGPR_REG + 2}
 };

+static const long default_requested_args
+   = (1 << PRIVATE_SEGMENT_BUFFER_ARG)
+ | (1 << DISPATCH_PTR_ARG)
+ | (1 << QUEUE_PTR_ARG)
+ | (1 << KERNARG_SEGMENT_PTR_ARG)
+ | (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG)
+ | (1 << WORKGROUP_ID_X_ARG)
+ | (1 << WORK_ITEM_ID_X_ARG)
+ | (1 << WORK_ITEM_ID_Y_ARG)
+ | (1 << WORK_ITEM_ID_Z_ARG);
+
 /* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
This function also sets the default values for some arguments.

@@ -201,10 +212,7 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct 
gcn_kernel_args *args,

   tree list)
 {
   bool err = false;
-  args->requested = ((1 << PRIVATE_SEGMENT_BUFFER_ARG)
-| (1 << QUEUE_PTR_ARG)
-| (1 << KERNARG_SEGMENT_PTR_ARG)
-| (1 << PRIVATE_SEGMENT_WAVE_OFFSET_ARG));
+  args->requested = default_requested_args;
   args->nargs = 0;

   for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
@@ -242,8 +250,6 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct 
gcn_kernel_args *args,

   args->requested |= (1 << a);
   args->order[args->nargs++] = a;
 }
-  args->requested |= (1 << WORKGROUP_ID_X_ARG);
-  args->requested |= (1 << WORK_ITEM_ID_Z_ARG);

   /* Requesting WORK_ITEM_ID_Z_ARG implies requesting 
WORK_ITEM_ID_X_ARG and

  WORK_ITEM_ID_Y_ARG.  Similarly, requesting WORK_ITEM_ID_Y_ARG implies
@@ -253,10 +259,6 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct 
gcn_kernel_args *args,

   if (args->requested & (1 << WORK_ITEM_ID_Y_ARG))
 args->requested |= (1 << WORK_ITEM_ID_X_ARG);

-  /* Always enable this so that kernargs is in a predictable place for
- gomp_print, etc.  */
-  args->requested |= (1 << DISPATCH_PTR_ARG);
-
   int sgpr_regno = FIRST_SGPR_REG;
   args->nsgprs = 0;
   for (int a = 0; a < GCN_KERNEL_ARG_TYPES; a++)
@@ -2041,27 +2043,34 @@ gcn_secondary_reload (bool in_p, rtx x, 
reg_class_t rclass,

 static void
 gcn_conditional_register_usage (void)
 {
-  int i;
+  if (!cfun || !cfun->machine)
+return;

-  /* FIXME: Do we need to reset fixed_regs?  */
+  if (cfun->machine->normal_function)
+{
+  /* Restrict the set of SGPRs and VGPRs used by non-kernel 
functions.  */

+  for (int i = SGPR_REGNO (62); i <= LAST_SGPR_REG; i++)
+   fixed_regs[i] = 1, call_used_regs[i] = 1;

-/* Limit ourselves to 1/16 the register file for maximimum sized 
workgroups.

-   There are enough SGPRs not to limit those.
-   TODO: Adjust this more dynamically.  */
-  for (i = FIRST_VGPR_REG + 64; i <= LAST_VGPR_REG; i++)
-fixed_regs[i] = 1, call_used_regs[i] = 1;
+  for (int i = VGPR_REGNO (24); i <= LAST_VGPR_REG; i++)
+   fixed_regs[i] = 1, call_used_regs[i] = 1;

-  if (!cfun || !cfun->machine || cfun->machine->normal_function)
-{
-  /* Normal functions can't know what kernel argument registers are
- live, so just fix the bottom 16 SGPRs, and bottom 3 VGPRs.  */
-  for (i = 0; i < 16; i++)
-

  1   2   3   >