[PATCH] Add -Wno-abi in c++ struct-layout-1 tests

2014-09-25 Thread Jakub Jelinek
Hi!

I've noticed that lots of g++ struct-layout-1.exp tests now fail
on ppc*.  The problem is the psABI warning:
the layout of aggregates containing vectors with N-byte alignment
For various targets we are using -Wno-abi (and, in C
struct-layout-1.exp also by default) for this, so the patch just
changes the default for C++ testing too.

Regtested on x86_64-linux and powerpc*-linux, ok for trunk/4.9/4.8?

2014-09-25  Jakub Jelinek  

* g++.dg/compat/struct-layout-1_generate.c: Add -Wno-abi
to default options.

--- gcc/testsuite/g++.dg/compat/struct-layout-1_generate.c.jj   2012-03-14 
09:39:38.0 +0100
+++ gcc/testsuite/g++.dg/compat/struct-layout-1_generate.c  2014-09-24 
07:59:10.086096164 +0200
@@ -1,5 +1,5 @@
 /* Structure layout test generator.
-   Copyright (C) 2004, 2005, 2007, 2008, 2009, 2011, 2012
+   Copyright (C) 2004-2014
Free Software Foundation, Inc.
Contributed by Jakub Jelinek .
 
@@ -44,7 +44,7 @@ along with GCC; see the file COPYING3.
 #endif
 
 const char *dg_options[] = {
-"/* { dg-options \"%s-I%s\" } */\n",
+"/* { dg-options \"%s-I%s -Wno-abi\" } */\n",
 "/* { dg-options \"%s-I%s -mno-mmx -Wno-abi\" { target i?86-*-* x86_64-*-* } } 
*/\n",
 "/* { dg-options \"%s-I%s -fno-common\" { target hppa*-*-hpux* 
powerpc*-*-darwin* *-*-mingw32* *-*-cygwin* } } */\n",
 "/* { dg-options \"%s-I%s -mno-mmx -fno-common -Wno-abi\" { target 
i?86-*-darwin* x86_64-*-darwin* i?86-*-mingw32* x86_64-*-mingw32* 
i?86-*-cygwin* } } */\n",

Jakub


ipa-devirt TLC 6 - reorg of query cache

2014-09-25 Thread Jan Hubicka
Hi,
this patch makes polymorphic call targets cache more effective.  With more 
aggresive
speculation the code now run into a problem that it have many different 
speculative
lists.  Those are typically sort in number of calls speculatively considered 
likely
but they do have long list of unlikely targets, too.

This patch reorganizes possible_polymorphic_call_targets to give speculative and
non-speculative lists separately.  This way the speuclative lists are many and 
short
while non-speculative are few but long.

The code still spends more time than I would like to - next resonable 
optimization to
do is to kill the recursive type walks looking for BINFO.  Instead I can 
preprocess
ODR types and store table of offsets where BINFOs are located and types 
associated
with them.  This will also avoid the ugly details of C++ ABI.
I would like to do this change after retiring get_binfo_at_offset from ipa-prop.

The firefox WPA is now as follows:
 phase opt and generate  :  77.55 (64%) usr   1.52 (16%) sys  79.06 (56%) wall  
720726 kB (16%) ggc
 phase stream in :  36.84 (30%) usr   2.13 (22%) sys  38.97 (28%) wall 
3650329 kB (83%) ggc
 phase stream out:   6.67 ( 6%) usr   6.12 (63%) sys  23.00 (16%) wall  
 0 kB ( 0%) ggc
 callgraph optimization  :   0.87 ( 1%) usr   0.00 ( 0%) sys   0.87 ( 1%) wall  
34 kB ( 0%) ggc
 ipa dead code removal   :   9.56 ( 8%) usr   0.10 ( 1%) sys   9.79 ( 7%) wall  
 0 kB ( 0%) ggc
 ipa virtual call target :   8.11 ( 7%) usr   0.09 ( 1%) sys   8.09 ( 6%) wall  
 0 kB ( 0%) ggc
 ipa cp  :   2.68 ( 2%) usr   0.18 ( 2%) sys   2.86 ( 2%) wall  
234548 kB ( 5%) ggc
 ipa inlining heuristics :  35.32 (29%) usr   1.14 (12%) sys  36.46 (26%) wall  
960324 kB (22%) ggc
 ipa lto decl in :  25.61 (21%) usr   1.32 (14%) sys  26.95 (19%) wall 
2629518 kB (60%) ggc
 ipa lto decl out:   5.84 ( 5%) usr   0.31 ( 3%) sys   6.15 ( 4%) wall  
 0 kB ( 0%) ggc
 ipa lto cgraph I/O  :   1.48 ( 1%) usr   0.26 ( 3%) sys   1.73 ( 1%) wall  
487517 kB (11%) ggc
 ipa lto decl merge  :   3.03 ( 3%) usr   0.01 ( 0%) sys   3.04 ( 2%) wall  
 16412 kB ( 0%) ggc
 ipa lto cgraph merge:   2.84 ( 2%) usr   0.00 ( 0%) sys   2.84 ( 2%) wall  
 12531 kB ( 0%) ggc
 whopr wpa   :   2.51 ( 2%) usr   0.00 ( 0%) sys   2.50 ( 2%) wall  
 1 kB ( 0%) ggc
 whopr partitioning  :   8.78 ( 7%) usr   0.02 ( 0%) sys   8.81 ( 6%) wall  
  5082 kB ( 0%) ggc
 ipa reference   :   4.95 ( 4%) usr   0.08 ( 1%) sys   5.03 ( 4%) wall  
 0 kB ( 0%) ggc
 ipa pure const  :   5.64 ( 5%) usr   0.03 ( 0%) sys   5.66 ( 4%) wall  
 0 kB ( 0%) ggc
 TOTAL : 121.06 9.77   141.04
4372468 kB

Compared to 4.9.1:

Execution times (seconds)
 phase setup :   0.01 ( 0%) usr   0.01 ( 0%) sys   0.05 ( 0%) wall  
  1534 kB ( 0%) ggc
 phase opt and generate  :  71.55 (61%) usr   1.76 (18%) sys  73.44 (48%) wall  
822835 kB (18%) ggc
 phase stream in :  40.27 (34%) usr   2.59 (26%) sys  52.26 (34%) wall 
3647578 kB (82%) ggc
 phase stream out:   5.83 ( 5%) usr   5.51 (56%) sys  28.17 (18%) wall  
 0 kB ( 0%) ggc
 garbage collection  :   2.43 ( 2%) usr   0.00 ( 0%) sys   2.48 ( 2%) wall  
 0 kB ( 0%) ggc
 ipa dead code removal   :   8.31 ( 7%) usr   0.29 ( 3%) sys   8.64 ( 6%) wall  
87 kB ( 0%) ggc
 ipa virtual call target :   7.62 ( 6%) usr   0.05 ( 1%) sys   7.59 ( 5%) wall  
 0 kB ( 0%) ggc
 ipa cp  :   2.42 ( 2%) usr   0.26 ( 3%) sys   2.75 ( 2%) wall  
238617 kB ( 5%) ggc
 ipa inlining heuristics :  34.58 (29%) usr   1.02 (10%) sys  35.67 (23%) wall  
980366 kB (22%) ggc
 ipa lto decl in :  28.95 (25%) usr   1.91 (19%) sys  39.90 (26%) wall 
2755921 kB (62%) ggc
 ipa lto decl out:   5.22 ( 4%) usr   0.68 ( 7%) sys   5.90 ( 4%) wall  
 0 kB ( 0%) ggc
 ipa lto cgraph I/O  :   1.27 ( 1%) usr   0.34 ( 3%) sys   1.81 ( 1%) wall  
452790 kB (10%) ggc
 ipa lto decl merge  :   3.12 ( 3%) usr   0.00 ( 0%) sys   3.12 ( 2%) wall  
 16413 kB ( 0%) ggc
 ipa lto cgraph merge:   2.98 ( 3%) usr   0.00 ( 0%) sys   2.99 ( 2%) wall  
 11878 kB ( 0%) ggc
 whopr wpa   :   1.45 ( 1%) usr   0.00 ( 0%) sys   1.44 ( 1%) wall  
 2 kB ( 0%) ggc
 whopr partitioning  :   6.34 ( 5%) usr   0.07 ( 1%) sys   6.41 ( 4%) wall  
  3860 kB ( 0%) ggc
 ipa reference   :   5.11 ( 4%) usr   0.10 ( 1%) sys   5.20 ( 3%) wall  
 0 kB ( 0%) ggc
 ipa pure const  :   5.31 ( 5%) usr   0.02 ( 0%) sys   5.34 ( 3%) wall  
 0 kB ( 0%) ggc
 TOTAL : 117.66 9.87   153.92
4471948 kB

So we are back from regression land, but would be nice to see some actual 
improvements soon.

Bootstrapped/regtested x86_64-linux, will commit it tomorrow.

Honza

* ipa-devirt.c (polymorphic_call_target_d): Add SPECULATIVE; reorder
for better storage.
(po

[PATCH] Fix dr_explicit_realign_optimized handling in the vectorizer (PR tree-optimization/63341)

2014-09-25 Thread Jakub Jelinek
Hi!

As the testcases show, dr_explicit_realign_optimized (used on PowerPC/SPU
only) misbehaves if the base_address is in between 1 and vector element size - 1
modulo vector size.
The problem is that it wants to add a bias to base_addr such that
base_addr & ~vector_size
(base_addr + bias) & ~vector_size
are adjacent vector_size memory slots, but vect_create_data_ref_ptr
takes offset counted in vector elements and in the end multiplies that
by vector element size, so we end up actually with:
(base_addr + ((vector_size / vector_element_size) * vector_element_size) & 
~vector_size
which unfortunately is not enough, e.g. in the testcase
base_addr is 1 moduo vector_size, vector_size 16 and vector_element_size 2,
so we have
base_addr & ~16
(base_addr + 14) & ~16
instead of the desired
(base_addr + 15) & ~16
and 1 & ~16 and (1 + 14) & ~16 is the same address.

Fixed by passing another offset, measured in bytes (for the negative case
which are the only 3 other cases which pass any offset down we want it to be
as is), bootstrapped/regtested on x86_64-linux and i686-linux and on
{x86_64,i686,powerpc{,64},s390{,x}}-linux on the 4.8 branch.

Ok for trunk/4.9/4.8?

2014-09-25  Jakub Jelinek  

PR tree-optimization/63341
* tree-vectorizer.h (vect_create_data_ref_ptr,
vect_create_addr_base_for_vector_ref): Add another tree argument
defaulting to NULL_TREE.
* tree-vect-data-refs.c (vect_create_data_ref_ptr): Add byte_offset
argument, pass it down to vect_create_addr_base_for_vector_ref.
(vect_create_addr_base_for_vector_ref): Add byte_offset argument,
add that to base_offset too if non-NULL.
* tree-vect-stmts.c (vectorizable_load): Add byte_offset variable,
for dr_explicit_realign_optimized set it to vector byte size
- 1 instead of setting offset, pass byte_offset down to
vect_create_data_ref_ptr.

* gcc.dg/vect/pr63341-1.c: New test.
* gcc.dg/vect/pr63341-2.c: New test.

--- gcc/tree-vectorizer.h.jj2014-09-01 09:43:56.0 +0200
+++ gcc/tree-vectorizer.h   2014-09-23 15:19:28.302484227 +0200
@@ -1061,7 +1061,8 @@ extern bool vect_analyze_data_refs (loop
unsigned *);
 extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
  tree *, gimple_stmt_iterator *,
- gimple *, bool, bool *);
+ gimple *, bool, bool *,
+ tree = NULL_TREE);
 extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, 
tree);
 extern tree vect_create_destination_var (tree, tree);
 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
@@ -1078,7 +1079,8 @@ extern void vect_transform_grouped_load
 extern void vect_record_grouped_load_vectors (gimple, vec );
 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
 extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
-  tree, struct loop *);
+ tree, struct loop *,
+ tree = NULL_TREE);
 
 /* In tree-vect-loop.c.  */
 /* FORNOW: Used in tree-parloops.c.  */
--- gcc/tree-vect-data-refs.c.jj2014-09-18 15:48:22.0 +0200
+++ gcc/tree-vect-data-refs.c   2014-09-23 15:11:06.163061112 +0200
@@ -3860,6 +3860,9 @@ vect_get_new_vect_var (tree type, enum v
is as follows:
if LOOP=i_loop: &in (relative to i_loop)
if LOOP=j_loop: &in+i*2B(relative to j_loop)
+   BYTE_OFFSET: Optional, defaulted to NULL.  If supplied, it is added to the
+   initial address.  Unlike OFFSET, which is number of elements to
+   be added, BYTE_OFFSET is measured in bytes.
 
Output:
1. Return an SSA_NAME whose value is the address of the memory location of
@@ -3873,7 +3876,8 @@ tree
 vect_create_addr_base_for_vector_ref (gimple stmt,
  gimple_seq *new_stmt_list,
  tree offset,
- struct loop *loop)
+ struct loop *loop,
+ tree byte_offset)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
@@ -3926,6 +3930,12 @@ vect_create_addr_base_for_vector_ref (gi
   base_offset = fold_build2 (PLUS_EXPR, sizetype,
 base_offset, offset);
 }
+  if (byte_offset)
+{
+  byte_offset = fold_convert (sizetype, byte_offset);
+  base_offset = fold_build2 (PLUS_EXPR, sizetype,
+base_offset, byte_offset);
+}
 
   /* base + base_offset */
   if (loop_vinfo)
@@ -3983,6 +3993,10 @@ vect_create_addr_base_for_vecto

Re: [PATCH] Add -Wno-abi in c++ struct-layout-1 tests

2014-09-25 Thread Uros Bizjak
Hello!

> I've noticed that lots of g++ struct-layout-1.exp tests now fail
> on ppc*.  The problem is the psABI warning:
> the layout of aggregates containing vectors with N-byte alignment
> For various targets we are using -Wno-abi (and, in C
> struct-layout-1.exp also by default) for this, so the patch just
> changes the default for C++ testing too.

 const char *dg_options[] = {
-"/* { dg-options \"%s-I%s\" } */\n",
+"/* { dg-options \"%s-I%s -Wno-abi\" } */\n",
 "/* { dg-options \"%s-I%s -mno-mmx -Wno-abi\" { target i?86-*-*
x86_64-*-* } } */\n",
 "/* { dg-options \"%s-I%s -fno-common\" { target hppa*-*-hpux*
powerpc*-*-darwin* *-*-mingw32* *-*-cygwin* } } */\n",
 "/* { dg-options \"%s-I%s -mno-mmx -fno-common -Wno-abi\" { target
i?86-*-darwin* x86_64-*-darwin* i?86-*-mingw32* x86_64-*-mingw32*
i?86-*-cygwin* } } */\n",

IMO, these should be converted to dg-additional-options.

Uros.


Re: [PATCH] Fix dr_explicit_realign_optimized handling in the vectorizer (PR tree-optimization/63341)

2014-09-25 Thread Richard Biener
On Thu, 25 Sep 2014, Jakub Jelinek wrote:

> Hi!
> 
> As the testcases show, dr_explicit_realign_optimized (used on PowerPC/SPU
> only) misbehaves if the base_address is in between 1 and vector element size 
> - 1
> modulo vector size.
> The problem is that it wants to add a bias to base_addr such that
> base_addr & ~vector_size
> (base_addr + bias) & ~vector_size
> are adjacent vector_size memory slots, but vect_create_data_ref_ptr
> takes offset counted in vector elements and in the end multiplies that
> by vector element size, so we end up actually with:
> (base_addr + ((vector_size / vector_element_size) * vector_element_size) & 
> ~vector_size
> which unfortunately is not enough, e.g. in the testcase
> base_addr is 1 moduo vector_size, vector_size 16 and vector_element_size 2,
> so we have
> base_addr & ~16
> (base_addr + 14) & ~16
> instead of the desired
> (base_addr + 15) & ~16
> and 1 & ~16 and (1 + 14) & ~16 is the same address.
> 
> Fixed by passing another offset, measured in bytes (for the negative case
> which are the only 3 other cases which pass any offset down we want it to be
> as is), bootstrapped/regtested on x86_64-linux and i686-linux and on
> {x86_64,i686,powerpc{,64},s390{,x}}-linux on the 4.8 branch.
> 
> Ok for trunk/4.9/4.8?

Ok.

Thanks,
Richard.

> 2014-09-25  Jakub Jelinek  
> 
>   PR tree-optimization/63341
>   * tree-vectorizer.h (vect_create_data_ref_ptr,
>   vect_create_addr_base_for_vector_ref): Add another tree argument
>   defaulting to NULL_TREE.
>   * tree-vect-data-refs.c (vect_create_data_ref_ptr): Add byte_offset
>   argument, pass it down to vect_create_addr_base_for_vector_ref.
>   (vect_create_addr_base_for_vector_ref): Add byte_offset argument,
>   add that to base_offset too if non-NULL.
>   * tree-vect-stmts.c (vectorizable_load): Add byte_offset variable,
>   for dr_explicit_realign_optimized set it to vector byte size
>   - 1 instead of setting offset, pass byte_offset down to
>   vect_create_data_ref_ptr.
> 
>   * gcc.dg/vect/pr63341-1.c: New test.
>   * gcc.dg/vect/pr63341-2.c: New test.
> 
> --- gcc/tree-vectorizer.h.jj  2014-09-01 09:43:56.0 +0200
> +++ gcc/tree-vectorizer.h 2014-09-23 15:19:28.302484227 +0200
> @@ -1061,7 +1061,8 @@ extern bool vect_analyze_data_refs (loop
>   unsigned *);
>  extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
> tree *, gimple_stmt_iterator *,
> -   gimple *, bool, bool *);
> +   gimple *, bool, bool *,
> +   tree = NULL_TREE);
>  extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, 
> tree);
>  extern tree vect_create_destination_var (tree, tree);
>  extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
> @@ -1078,7 +1079,8 @@ extern void vect_transform_grouped_load
>  extern void vect_record_grouped_load_vectors (gimple, vec );
>  extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
>  extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
> -  tree, struct loop *);
> +   tree, struct loop *,
> +   tree = NULL_TREE);
>  
>  /* In tree-vect-loop.c.  */
>  /* FORNOW: Used in tree-parloops.c.  */
> --- gcc/tree-vect-data-refs.c.jj  2014-09-18 15:48:22.0 +0200
> +++ gcc/tree-vect-data-refs.c 2014-09-23 15:11:06.163061112 +0200
> @@ -3860,6 +3860,9 @@ vect_get_new_vect_var (tree type, enum v
>   is as follows:
>   if LOOP=i_loop: &in (relative to i_loop)
>   if LOOP=j_loop: &in+i*2B(relative to j_loop)
> +   BYTE_OFFSET: Optional, defaulted to NULL.  If supplied, it is added to the
> + initial address.  Unlike OFFSET, which is number of elements to
> + be added, BYTE_OFFSET is measured in bytes.
>  
> Output:
> 1. Return an SSA_NAME whose value is the address of the memory location of
> @@ -3873,7 +3876,8 @@ tree
>  vect_create_addr_base_for_vector_ref (gimple stmt,
> gimple_seq *new_stmt_list,
> tree offset,
> -   struct loop *loop)
> +   struct loop *loop,
> +   tree byte_offset)
>  {
>stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
>struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
> @@ -3926,6 +3930,12 @@ vect_create_addr_base_for_vector_ref (gi
>base_offset = fold_build2 (PLUS_EXPR, sizetype,
>base_offset, offset);
>  }
> +  if (byte_offset)
> +{
> +  byte_offset = fold_convert (sizetype, byte_offset);
> +  base_offset = fold_buil

Re: [PATCH] Add -Wno-abi in c++ struct-layout-1 tests

2014-09-25 Thread Jakub Jelinek
On Thu, Sep 25, 2014 at 09:29:52AM +0200, Uros Bizjak wrote:
> Hello!
> 
> > I've noticed that lots of g++ struct-layout-1.exp tests now fail
> > on ppc*.  The problem is the psABI warning:
> > the layout of aggregates containing vectors with N-byte alignment
> > For various targets we are using -Wno-abi (and, in C
> > struct-layout-1.exp also by default) for this, so the patch just
> > changes the default for C++ testing too.
> 
>  const char *dg_options[] = {
> -"/* { dg-options \"%s-I%s\" } */\n",
> +"/* { dg-options \"%s-I%s -Wno-abi\" } */\n",
>  "/* { dg-options \"%s-I%s -mno-mmx -Wno-abi\" { target i?86-*-*
> x86_64-*-* } } */\n",
>  "/* { dg-options \"%s-I%s -fno-common\" { target hppa*-*-hpux*
> powerpc*-*-darwin* *-*-mingw32* *-*-cygwin* } } */\n",
>  "/* { dg-options \"%s-I%s -mno-mmx -fno-common -Wno-abi\" { target
> i?86-*-darwin* x86_64-*-darwin* i?86-*-mingw32* x86_64-*-mingw32*
> i?86-*-cygwin* } } */\n",
> 
> IMO, these should be converted to dg-additional-options.

That would be something like following, except that compat framework doesn't
support dg-additional-options:
WARNING: compat.exp does not support dg-additional-options

--- gcc/testsuite/gcc.dg/compat/struct-layout-1_generate.c.jj   2012-03-14 
09:39:37.0 +0100
+++ gcc/testsuite/gcc.dg/compat/struct-layout-1_generate.c  2014-09-25 
09:37:07.430959359 +0200
@@ -43,16 +43,12 @@ along with GCC; see the file COPYING3.
 #define COMPAT_PRLL "ll"
 #endif
 
-const char *dg_options[] = {
-"/* { dg-options \"%s-I%s\" } */\n",
-"/* { dg-options \"%s-I%s -Wno-abi\" } */\n",
-"/* { dg-options \"%s-I%s -mno-mmx -Wno-abi\" { target i?86-*-* x86_64-*-* } } 
*/\n",
-"/* { dg-options \"%s-I%s -fno-common\" { target hppa*-*-hpux* 
powerpc*-*-darwin* } } */\n",
-"/* { dg-options \"%s-I%s -mno-mmx -fno-common -Wno-abi\" { target 
i?86-*-darwin* x86_64-*-darwin* } } */\n",
-"/* { dg-options \"%s-I%s -mno-base-addresses\" { target mmix-*-* } } */\n",
-"/* { dg-options \"%s-I%s -mlongcalls -mtext-section-literals\" { target 
xtensa*-*-* } } */\n"
-#define NDG_OPTIONS (sizeof (dg_options) / sizeof (dg_options[0]))
-};
+const char dg_options[] =
+"/* { dg-options \"%s-I%s -Wno-abi\" } */\n"
+"/* { dg-additional-options \"-mno-mmx\" { target i?86-*-* x86_64-*-* } } */\n"
+"/* { dg-additional-options \"-fno-common\" { target hppa*-*-hpux* 
powerpc*-*-darwin* *-*-mingw32* *-*-cygwin* } } */\n"
+"/* { dg-additional-options \"-mno-base-addresses\" { target mmix-*-* } } */\n"
+"/* { dg-additional-options \"-mlongcalls -mtext-section-literals\" { target 
xtensa*-*-* } } */\n";
 
 typedef unsigned int hashval_t;
 
@@ -759,7 +755,6 @@ switchfiles (int fields)
 {
   static int filecnt;
   static char *destbuf, *destptr;
-  int i;
 
   ++filecnt;
   if (outfile)
@@ -789,8 +784,7 @@ switchfiles (int fields)
   exit (1);
 }
   fprintf (outfile, "/* { dg-require-effective-target int32plus } */\n");
-  for (i = 0; i < NDG_OPTIONS; i++)
-fprintf (outfile, dg_options[i], "", srcdir_safe);
+  fprintf (outfile, dg_options, "", srcdir_safe);
   fprintf (outfile, "/* { dg-prune-output \".*-Wno-abi.*\" } */\n");
   fprintf (outfile, "/* { dg-prune-output \".*Offset of packed bit-field.*\" } 
*/\n");
   fprintf (outfile, "\
@@ -817,8 +811,7 @@ int main (void)\n\
   outfile = fopen (destbuf, "w");
   if (outfile == NULL)
 goto fail;
-  for (i = 0; i < NDG_OPTIONS; i++)
-fprintf (outfile, dg_options[i], "-w ", srcdir_safe);
+  fprintf (outfile, dg_options, "-w ", srcdir_safe);
   fprintf (outfile, "\
 #include \"struct-layout-1_x1.h\"\n\
 #include \"t%03d_test.h\"\n\
@@ -829,8 +822,7 @@ int main (void)\n\
   outfile = fopen (destbuf, "w");
   if (outfile == NULL)
 goto fail;
-  for (i = 0; i < NDG_OPTIONS; i++)
-fprintf (outfile, dg_options[i], "-w ", srcdir_safe);
+  fprintf (outfile, dg_options, "-w ", srcdir_safe);
   fprintf (outfile, "\
 #include \"struct-layout-1_y1.h\"\n\
 #include \"t%03d_test.h\"\n\
--- gcc/testsuite/g++.dg/compat/struct-layout-1_generate.c.jj   2014-09-24 
11:07:50.0 +0200
+++ gcc/testsuite/g++.dg/compat/struct-layout-1_generate.c  2014-09-25 
09:36:00.371312897 +0200
@@ -43,15 +43,12 @@ along with GCC; see the file COPYING3.
 #define COMPAT_PRLL "ll"
 #endif
 
-const char *dg_options[] = {
-"/* { dg-options \"%s-I%s\" } */\n",
-"/* { dg-options \"%s-I%s -mno-mmx -Wno-abi\" { target i?86-*-* x86_64-*-* } } 
*/\n",
-"/* { dg-options \"%s-I%s -fno-common\" { target hppa*-*-hpux* 
powerpc*-*-darwin* *-*-mingw32* *-*-cygwin* } } */\n",
-"/* { dg-options \"%s-I%s -mno-mmx -fno-common -Wno-abi\" { target 
i?86-*-darwin* x86_64-*-darwin* i?86-*-mingw32* x86_64-*-mingw32* 
i?86-*-cygwin* } } */\n",
-"/* { dg-options \"%s-I%s -mno-base-addresses\" { target mmix-*-* } } */\n",
-"/* { dg-options \"%s-I%s -mlongcalls -mtext-section-literals\" { target 
xtensa*-*-* } } */\n"
-#define NDG_OPTIONS (sizeof (dg_options) / sizeof (dg_options[0]))
-};
+const char dg_options[] =
+"/* { dg-options \"%s-I%s -Wno-abi\" } */

Re: [PATCH] Add -Wno-abi in c++ struct-layout-1 tests

2014-09-25 Thread Uros Bizjak
On Thu, Sep 25, 2014 at 9:43 AM, Jakub Jelinek  wrote:

>> > I've noticed that lots of g++ struct-layout-1.exp tests now fail
>> > on ppc*.  The problem is the psABI warning:
>> > the layout of aggregates containing vectors with N-byte alignment
>> > For various targets we are using -Wno-abi (and, in C
>> > struct-layout-1.exp also by default) for this, so the patch just
>> > changes the default for C++ testing too.
>>
>>  const char *dg_options[] = {
>> -"/* { dg-options \"%s-I%s\" } */\n",
>> +"/* { dg-options \"%s-I%s -Wno-abi\" } */\n",
>>  "/* { dg-options \"%s-I%s -mno-mmx -Wno-abi\" { target i?86-*-*
>> x86_64-*-* } } */\n",
>>  "/* { dg-options \"%s-I%s -fno-common\" { target hppa*-*-hpux*
>> powerpc*-*-darwin* *-*-mingw32* *-*-cygwin* } } */\n",
>>  "/* { dg-options \"%s-I%s -mno-mmx -fno-common -Wno-abi\" { target
>> i?86-*-darwin* x86_64-*-darwin* i?86-*-mingw32* x86_64-*-mingw32*
>> i?86-*-cygwin* } } */\n",
>>
>> IMO, these should be converted to dg-additional-options.
>
> That would be something like following, except that compat framework doesn't
> support dg-additional-options:
> WARNING: compat.exp does not support dg-additional-options

Huh ...

> -const char *dg_options[] = {
> -"/* { dg-options \"%s-I%s\" } */\n",
> -"/* { dg-options \"%s-I%s -Wno-abi\" } */\n",
> -"/* { dg-options \"%s-I%s -mno-mmx -Wno-abi\" { target i?86-*-* x86_64-*-* } 
> } */\n",
> -"/* { dg-options \"%s-I%s -fno-common\" { target hppa*-*-hpux* 
> powerpc*-*-darwin* } } */\n",
> -"/* { dg-options \"%s-I%s -mno-mmx -fno-common -Wno-abi\" { target 
> i?86-*-darwin* x86_64-*-darwin* } } */\n",
> -"/* { dg-options \"%s-I%s -mno-base-addresses\" { target mmix-*-* } } */\n",
> -"/* { dg-options \"%s-I%s -mlongcalls -mtext-section-literals\" { target 
> xtensa*-*-* } } */\n"
> -#define NDG_OPTIONS (sizeof (dg_options) / sizeof (dg_options[0]))
> -};
> +const char dg_options[] =
> +"/* { dg-options \"%s-I%s -Wno-abi\" } */\n"
> +"/* { dg-additional-options \"-mno-mmx\" { target i?86-*-* x86_64-*-* } } 
> */\n"
> +"/* { dg-additional-options \"-fno-common\" { target hppa*-*-hpux* 
> powerpc*-*-darwin* *-*-mingw32* *-*-cygwin* } } */\n"

*-*-darwin*

> +const char dg_options[] =
> +"/* { dg-options \"%s-I%s -Wno-abi\" } */\n"
> +"/* { dg-additional-options \"-mno-mmx\" { target i?86-*-* x86_64-*-* } } 
> */\n"
> +"/* { dg-additional-options \"-fno-common\" { target hppa*-*-hpux* 
> powerpc*-*-darwin* *-*-mingw32* *-*-cygwin* } } */\n"

Also here.

Uros.


Re: [PATCH] Add -Wno-abi in c++ struct-layout-1 tests

2014-09-25 Thread Uros Bizjak
On Thu, Sep 25, 2014 at 9:43 AM, Jakub Jelinek  wrote:

>> > I've noticed that lots of g++ struct-layout-1.exp tests now fail
>> > on ppc*.  The problem is the psABI warning:
>> > the layout of aggregates containing vectors with N-byte alignment
>> > For various targets we are using -Wno-abi (and, in C
>> > struct-layout-1.exp also by default) for this, so the patch just
>> > changes the default for C++ testing too.
>>
>>  const char *dg_options[] = {
>> -"/* { dg-options \"%s-I%s\" } */\n",
>> +"/* { dg-options \"%s-I%s -Wno-abi\" } */\n",
>>  "/* { dg-options \"%s-I%s -mno-mmx -Wno-abi\" { target i?86-*-*
>> x86_64-*-* } } */\n",
>>  "/* { dg-options \"%s-I%s -fno-common\" { target hppa*-*-hpux*
>> powerpc*-*-darwin* *-*-mingw32* *-*-cygwin* } } */\n",
>>  "/* { dg-options \"%s-I%s -mno-mmx -fno-common -Wno-abi\" { target
>> i?86-*-darwin* x86_64-*-darwin* i?86-*-mingw32* x86_64-*-mingw32*
>> i?86-*-cygwin* } } */\n",
>>
>> IMO, these should be converted to dg-additional-options.
>
> That would be something like following, except that compat framework doesn't
> support dg-additional-options:
> WARNING: compat.exp does not support dg-additional-options

Rainer, Mike - any hope here?

Thanks,
Uros.


Re: [PATCH] Add diagnostic to require that virtual methods be tagged with C++11 'override'

2014-09-25 Thread Marek Polacek
Hi!

On Wed, Sep 24, 2014 at 02:08:01PM -0700, Josh Gao wrote:
> commit b4d15d3cf660708dca088361801ebd56e018f986
> Author: Josh Gao 
> Date:   Wed Sep 24 14:01:16 2014 -0700
> 
> Add diagnostic to require virtual methods to be tagged override.
> 
> gcc/Changelog
> * doc/invoke.texi (-Wmissing-virtual-override): Add.
> 
> gcc/c-family
> * c.opt: Add -Wmissing-virtual-override.

A nit: this should probably be
* c.opt (Wmissing-virtual-override): New option.
 
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 666f1a6..9392e7b 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,6 @@
> +2014-09-24  Josh Gao  
> +* doc/invoke.texi (-Wmissing-virtual-override): Add.
> +
>  2014-09-24  Jan Hubicka  
>  
>   * ipa-utils.h (polymorphic_call_context): Add
> diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
> index 2278e77..56ec987 100644
> --- a/gcc/c-family/ChangeLog
> +++ b/gcc/c-family/ChangeLog
> @@ -1,3 +1,6 @@
> +2014-09-24  Josh Gao  
> + * c.opt: Add -Wmissing-virtual-override.
> +
>  2014-09-24  Marek Polacek  
>  
>   PR c/61405

Please don't include ChangeLogs in the patch itself, it makes the
patch hard to apply.  Instead, just include the ChangeLog entry before
the patch, as you did above.

> --- a/gcc/cp/class.c
> +++ b/gcc/cp/class.c
> @@ -2773,6 +2773,9 @@ check_for_override (tree decl, tree ctype)
>  error ("%q+#D marked %, but is not virtual", decl);
>if (DECL_OVERRIDE_P (decl) && !overrides_found)
>  error ("%q+#D marked %, but does not override", decl);
> +  if (!DECL_OVERRIDE_P (decl) && overrides_found && !DECL_DESTRUCTOR_P 
> (decl))
> +warning (OPT_Wmissing_virtual_override,
> +  "%q+#D overrides, but is not marked %", decl);

Shouldn't this new if be better 'else if'?

> --- a/gcc/testsuite/ChangeLog
> +++ b/gcc/testsuite/ChangeLog
> @@ -1,3 +1,6 @@
> +2014-09-24  Josh Gao  
> + * g++.dg/cpp0x/override5.C: New test.

This test seems to be missing in the patch; forgot to git add?

Thanks,

Marek


RE: [PATCH, 2/2] shrink wrap a function with a single loop: split live_edge

2014-09-25 Thread Zhenqiang Chen


> -Original Message-
> From: Jiong Wang [mailto:jiong.w...@arm.com]
> Sent: Thursday, September 25, 2014 2:13 AM
> To: Jeff Law; Zhenqiang Chen
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop: split
> live_edge
> 
> 
> On 22/09/14 18:51, Jeff Law wrote:
> > On 09/22/14 04:24, Jiong Wang wrote:
> >>> Great.  Can you send an updated patchkit for review.
> >> patch attached.
> >>
> >> please review, thanks.
> >>
> >> gcc/ * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the
> >> live-in of new created BB as the intersection of live-in from
> >> "old_dest" and live-out from "bb".
> > Looks good.  However, before committing we need a couple things.
> >
> > 1. Bootstrap & regression test this variant of the patch.  I know you
> > tested an earlier one, but please test this one just to be sure.
> >
> > 2. Testcase.  I think you could test for either the reduction in the
> > live-in set of the newly created block or that you're shrink wrapping
> > one or more functions you didn't previously shrink-wrap.  I think it's
> > fine if this test is target specific.
> 
>   bootstrap ok based on revision 215515.
> 
>   while the x86 regression result is interesting. there is no regression on
> check-g++, while there is four regression on check-gcc:
> 
> FAIL: gcc.dg/tree-ssa/loadpre10.c (internal compiler error)
> FAIL: gcc.dg/tree-ssa/loadpre10.c (test for excess errors)
> FAIL: gcc.dg/tree-ssa/pr21417.c (internal compiler error)
> FAIL: gcc.dg/tree-ssa/pr21417.c (test for excess errors)
> 
>this is caused by our improving the accuracy of live-in for new created 
> basic
> block. Now we will split
>more than one edge for the above two testcase. thus trigger the following
> assert in move_insn_for_shrink_wrap:
> 
>/* We should not split more than once for a function.  */
>gcc_assert (!(*split_p));

According to the algorithm, it is impossible to split one edge twice. It's 
possible to split two different edges. But for such cases, the control flow is 
too complex to perform shrink-wrapping.

Anyway, your patch improves the accuracy. You can replace the "gcc_assert" to 
"return"; or change "split_p" to "splitted_edge" then you can check one edge is 
not splitted twice.

Thanks!
-Zhenqiang
 
>   take pr21417.c for example, after the patch, two edges will be split,
> 
> before this patch
> =
> .L2:
>  movq%rdi, %rax
>  cmpl$142, (%rdi)
>  jne .L13
> .L4:
>  all insns sinked here  <-- the only split
>  ...
>  ...
> 
>  popq%rbx
>  popq%rbp
> .L13:
>  ret
> 
> after this patch
> 
> .L2:
> 
>  cmpl$142, (%rdi)
>  jne .L13
> .L4:
>  part of insns sinked into here  <-- first split
>  
>  
> 
>  popq%rbx
>  popq%rbp
>  ret
> 
> .L13:
>  movq%rdi, %rax  <-- second split and one instruction moved here
>  ret
> 
> I don't know why there is a assert to prevent multi split.
> 
> after I remove that assert, pass bootstrap and no regression.
> 
> and for pr21417.c, the multi split more cause one extra "ret" instruction, but
> the performance is better, because there
> is no need to execute "movq%rdi, %rax" if we go down to L4.
> 
> any comments?
> 
> BTW: I updated the patch with testcase which could not be shrink-wrapped
> before this patch.
> 
> thanks.
> 
> -- Jiong
> 
> >
> > Jeff
> >
> >






[PATCH][match-and-simplify] Apply TLC

2014-09-25 Thread Richard Biener

This shuffles routines around, grouping them properly and adding
function comments.  Applies minor code TLC.

Applied.

Richard.

2014-09-25  Richard Biener  

* genmatch.c: TLC.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 215554)
+++ gcc/genmatch.c  (working copy)
@@ -31,6 +31,7 @@ along with GCC; see the file COPYING3.
 #include "vec.h"
 #include "is-a.h"
 
+
 /* libccp helpers.  */
 
 static struct line_maps *line_table;
@@ -112,26 +113,8 @@ output_line_directive (FILE *f, source_l
 }
 
 
-/* Grammar
-
- capture = '@' number
- op = predicate | expr [capture]
- c_expr = '{' ... '}'
- genexpr = '(' code genop... ')'
- genop = capture | genexpr | c_expr
- transform = 'match_and_transform' name expr genop
-
- Match and simplify (A + B) - B -> A
- (simplify foo
-   (PLUS_EXPR (MINUS_EXPR integral_op_p@0 @1) @1)
-   @0)
-
- Match and simplify (CST + A) + CST to CST' + A
- (simplify bar
-   (PLUS_EXPR INTEGER_CST_P@0 (PLUS_EXPR @1 INTEGER_CST_P@2))
-   (PLUS_EXPR { int_const_binop (PLUS_EXPR, captures[0], captures[2]); } 
@1))
-*/
-
+/* Pull in tree codes and builtin function codes from their
+   definition files.  */
 
 #define DEFTREECODE(SYM, STRING, TYPE, NARGS)   SYM,
 enum tree_code {
@@ -150,12 +133,12 @@ END_BUILTINS
 };
 #undef DEF_BUILTIN
 
-/* Hashtable of known pattern operators.  This is pre-seeded from
-   all known tree codes and all known builtin function ids.  */
+
+/* Base class for all identifiers the parser knows.  */
 
 struct id_base : typed_noop_remove
 {
-  enum id_kind { CODE, FN, PREDICATE, USER_DEFINED } kind;
+  enum id_kind { CODE, FN, PREDICATE, USER } kind;
 
   id_base (id_kind, const char *, int = -1);
 
@@ -184,6 +167,8 @@ id_base::equal (const value_type *op1,
  && strcmp (op1->id, op2->id) == 0);
 }
 
+/* Hashtable of known pattern operators.  This is pre-seeded from
+   all known tree codes and all known builtin function ids.  */
 static hash_table *operators;
 
 id_base::id_base (id_kind kind_, const char *id_, int nargs_)
@@ -194,6 +179,8 @@ id_base::id_base (id_kind kind_, const c
   hashval = htab_hash_string (id);
 }
 
+/* Identifier that maps to a tree code.  */
+
 struct operator_id : public id_base
 {
   operator_id (enum tree_code code_, const char *id_, unsigned nargs_,
@@ -203,6 +190,8 @@ struct operator_id : public id_base
   const char *tcc;
 };
 
+/* Identifier that maps to a builtin function code.  */
+
 struct fn_id : public id_base
 {
   fn_id (enum built_in_function fn_, const char *id_)
@@ -212,6 +201,8 @@ struct fn_id : public id_base
 
 struct simplify;
 
+/* Identifier that maps to a user-defined predicate.  */
+
 struct predicate_id : public id_base
 {
   predicate_id (const char *id_)
@@ -219,10 +210,12 @@ struct predicate_id : public id_base
   vec matchers;
 };
 
+/* Identifier that maps to a operator defined by a 'for' directive.  */
+
 struct user_id : public id_base
 {
   user_id (const char *id_)
-: id_base (id_base::USER_DEFINED, id_), substitutes (vNULL) {}
+: id_base (id_base::USER, id_), substitutes (vNULL) {}
   vec substitutes;
 };
 
@@ -250,6 +243,16 @@ is_a_helper ::test (id_b
   return id->kind == id_base::PREDICATE;
 }
 
+template<>
+template<>
+inline bool
+is_a_helper ::test (id_base *id)
+{
+  return id->kind == id_base::USER;
+}
+
+/* Add a predicate identifier to the hash.  */
+
 static predicate_id * 
 add_predicate (const char *id)
 {
@@ -261,6 +264,8 @@ add_predicate (const char *id)
   return p;
 }
 
+/* Add a tree code identifier to the hash.  */
+
 static void
 add_operator (enum tree_code code, const char *id,
  const char *tcc, unsigned nargs)
@@ -281,6 +286,8 @@ add_operator (enum tree_code code, const
   *slot = op;
 }
 
+/* Add a builtin identifier to the hash.  */
+
 static void
 add_builtin (enum built_in_function code, const char *id)
 {
@@ -291,35 +298,80 @@ add_builtin (enum built_in_function code
   *slot = fn;
 }
 
+/* Helper for easy comparing ID with tree code CODE.  */
+
 static bool
 operator==(id_base &id, enum tree_code code)
 {
-  if (id.kind != id_base::CODE)
-return false;
-  return static_cast (id).code == code;
+  if (operator_id *oid = dyn_cast  (&id))
+return oid->code == code;
+  return false;
+}
+
+/* Lookup the identifier ID.  */
+
+id_base *
+get_operator (const char *id)
+{
+  id_base tem (id_base::CODE, id);
+
+  id_base *op = operators->find_with_hash (&tem, tem.hashval);
+  if (op)
+return op; 
+
+  /* Try all-uppercase.  */
+  char *id2 = xstrdup (id);
+  for (unsigned i = 0; i < strlen (id2); ++i)
+id2[i] = TOUPPER (id2[i]);
+  new (&tem) id_base (id_base::CODE, id2);
+  op = operators->find_with_hash (&tem, tem.hashval);
+  if (op)
+{
+  free (id2);
+  return op;
+}
+
+  /* Try _EXPR appended.  */
+  id2 = (char *)xrealloc (id2, strlen (id2) + sizeof ("_EXPR") + 1);
+  strcat (id2, 

Re: [patch] Implement move semantics for iostreams

2014-09-25 Thread Jonathan Wakely

On 25/09/14 07:11 +0200, Marc Glisse wrote:

On Wed, 24 Sep 2014, Jonathan Wakely wrote:


I'm tempted to tidy up the GLIBCXX_3.4 patterns in the linker script
quite considerably,


Paolo has done something like that this summer (motivated by -O0 
builds) and reverted it for a detail, but it could be a good starting 
point.


Oh yes, https://gcc.gnu.org/r211355  - I forgot about that, thanks.


Re: [RFC/PATCH] Fix-it hints

2014-09-25 Thread Dodji Seketeli
Hello Manuel,

Sorry for taking so long to reply to this.

FWIW, I like the direction of this.  I find fix-it hints cool in
general.  So thank you for working on this.

Manuel López-Ibáñez  a écrit:

> This patch implements fix-it hints. See https://gcc.gnu.org/PR62314
>
> When the caret line is active (which is the default), this adds an
> additional source-line indicating how to fix the code:
>
> gcc/testsuite/g++.dg/template/crash83.C:5:21: error: an explicit
> specialization must be preceded by 'template <>'
>  template: > struct B {}; // { dg-error
> "explicit specialization|expected" }
>  ^
>  template<>

It looks like your mail user agent wrapped a line above, making it hard
to read.  I suspect it should have been:

  template > struct B {}; // { dg-error "explicit 
specialization|expected" }
  ^
  template<>

> When the caret line is disabled with -fno-diagnostics-show-caret, the
> fix-it hint is printed as:
>
> gcc/testsuite/g++.dg/template/crash83.C:5:21: error: an explicit
> specialization must be preceded by 'template <>'
> gcc/testsuite/g++.dg/template/crash83.C:5:21: fixit: template<>
>
> The latter form may allow an IDE (such as emacs) to automatically
> apply the fix.

Nice.  Is the "fixit:" prefix used by other compilers too?  Or are there
variations from compiler to compiler?

> Currently, fix-it hints are limited to insertions at one single
> location, whereas Clang allows insertions, deletions, and replacements
> at arbitrary location ranges.

Do you have example of each of these kinds of fix-it hints? (deletions,
replacement at location ranges).  I think it'd be nice to have an idea
of what needs to be done, even if we are not doing it "in extenso" right
now.

> Opinions? Is the proposed interface/implementation acceptable?

Please read my comments below.

> Any other diagnostics that could use a fix-it hint? In principle, we
> should only give them when we are sure that the proposed fix will fix
> the error or silence a warning.  For example, the C++ parser often
> says 'x' expected before 'y' but adding x before y rarely fixes
> anything.

I am thinking that maybe the diagnostic about the missing ";" after a
struct/class declaration might be a candidate for this fix-it hint
feature.

It's emitted by cp_parser_class_specifier_1() at:

if (CLASSTYPE_DECLARED_CLASS (type))
  error_at (loc, "expected %<;%> after class definition");
else if (TREE_CODE (type) == RECORD_TYPE)
  error_at (loc, "expected %<;%> after struct definition");
else if (TREE_CODE (type) == UNION_TYPE)
  error_at (loc, "expected %<;%> after union definition");
else
  gcc_unreachable ();


[...]

> +
> +static int
> +adjust_column (int line_width, int max_width, int column)

Missing comments for this function.

[...]

> +static const char *
> +get_source_line_and_column (location_t loc, int *line_width, int *column)
> +{

Likewise.

[...]


>  /* Print the physical source line corresponding to the location of
> this diagnostic, and a caret indicating the precise column.  */
>  void
>  diagnostic_show_locus (diagnostic_context * context,
>  const diagnostic_info *diagnostic)
>  {

[...]

>context->last_location = diagnostic->location;
> -  s = expand_location_to_spelling_point (diagnostic->location);
> -  line = location_get_source_line (s, &line_width);
> -  if (line == NULL || s.column > line_width)
> +  line = get_source_line_and_column (diagnostic->location,
> +  &line_width, &column);
> +  if (line == NULL)
>  return;
>  
>max_width = context->caret_max_width;
> -  line = adjust_line (line, line_width, max_width, &(s.column));
> +  line = adjust_line (line, line_width, max_width, &column);

Apparently, each time we call get_source_line_and_column, we also call
adjust_line on it.  So maybe we want to have a
get_adjusted_source_line_and_column (or something like that) that does
it all?

[...]

> @@ -325,13 +345,13 @@ diagnostic_show_locus (diagnostic_contex
>pp_newline (context->printer);
>caret_cs = colorize_start (pp_show_color (context->printer), "caret");
>caret_ce = colorize_stop (pp_show_color (context->printer));
>  
>/* pp_printf does not implement %*c.  */
> -  size_t len = s.column + 3 + strlen (caret_cs) + strlen (caret_ce);
> +  size_t len = column + 3 + strlen (caret_cs) + strlen (caret_ce);
>buffer = XALLOCAVEC (char, len);
> -  snprintf (buffer, len, "%s %*c%s", caret_cs, s.column, context->caret_char,
> +  snprintf (buffer, len, "%s %*c%s", caret_cs, column, context->caret_char,
>   caret_ce);

Maybe you should factorize out the printing of a colored line starting
at given a column, rather than copy-pasting this in fixit_hint() later?

[...]

>diagnostic_set_info (&diagnostic, gmsgid, &ap, location, DK_NOTE);
>report_diagnostic (&diagnostic);
>va_end (ap

Re: [PATCH i386 AVX512] [52.1/n] Add vec2mask and mask2vec insn patterns.

2014-09-25 Thread Kirill Yukhin
Hello,
As suggested, this is splitted out part of [52/n] patch,
which introduces new vec2mask and mask2vec insn patterns.

As suggested, I've got rid off use of UNSPEC_CVTINT2MASK
unspec. Unfortunatelly, only partially.
I suppose, that vec2mask generic RTX will be too complex:
get most significant bit of each vec elt & compare it
with zero setting destination mask register accordingly.
If this approach is preferrable: I ready to do that.

Testing in progress.
Is it ok for trunk if pass?

gcc/
* config/i386/predicates.md (define_predicate "constm1_operand"): New.
* config/i386/sse.md
(define_c_enum "unspec"): Add UNSPEC_CVTINT2MASK.
(define_insn "_cvt2mask"): 
New.
(define_insn "_cvt2mask"): 
Ditto.
(define_expand "_cvtmask2"): 
Ditto.
(define_insn "*_cvtmask2"): 
Ditto.
(define_expand "_cvtmask2"): 
Ditto.
(define_insn "*_cvtmask2"): 
Ditto.

--
Thanks, K
commit 6cece2e60da5777b6223025365295a555a25f285
Author: Kirill Yukhin 
Date:   Thu Sep 25 12:01:15 2014 +0400

AVX-512. 52.1. D2M and M2D patterns.

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 690bed5..9566884 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -618,6 +618,15 @@
   return op == CONST0_RTX (mode);
 })
 
+;; Match -1.
+(define_predicate "constm1_operand"
+  (match_code "const_int,const_double,const_vector")
+{
+  if (mode == VOIDmode)
+mode = GET_MODE (op);
+  return op == CONSTM1_RTX (mode);
+})
+
 ;; Match one or vector filled with ones.
 (define_predicate "const1_operand"
   (match_code "const_int,const_double,const_vector")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e7646d7..de775f2 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -132,6 +132,7 @@
   ;; For AVX512BW support
   UNSPEC_PSHUFHW
   UNSPEC_PSHUFLW
+  UNSPEC_CVTINT2MASK
 
   ;; For AVX512DQ support
   UNSPEC_REDUCE
@@ -4868,6 +4869,72 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
 
+(define_insn "_cvt2mask"
+  [(set (match_operand: 0 "register_operand" "=Yk")
+   (unspec:
+[(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
+UNSPEC_CVTINT2MASK))]
+  "TARGET_AVX512BW"
+  "vpmov2m\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_insn "_cvt2mask"
+  [(set (match_operand: 0 "register_operand" "=Yk")
+   (unspec:
+[(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
+UNSPEC_CVTINT2MASK))]
+  "TARGET_AVX512DQ"
+  "vpmov2m\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_expand "_cvtmask2"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
+   (vec_merge:VI12_AVX512VL
+ (match_dup 1)
+ (match_dup 2)
+ (match_operand: 3 "register_operand")))]
+  "TARGET_AVX512BW"
+  {
+operands[1] = CONSTM1_RTX (mode);
+operands[2] = CONST0_RTX (mode);
+  })
+
+(define_insn "*_cvtmask2"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+   (vec_merge:VI12_AVX512VL
+ (match_operand:VI12_AVX512VL 1 "constm1_operand")
+ (match_operand:VI12_AVX512VL 2 "const0_operand")
+ (match_operand: 3 "register_operand" "Yk")))]
+  "TARGET_AVX512BW"
+  "vpmovm2\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_expand "_cvtmask2"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
+   (vec_merge:VI48_AVX512VL
+ (match_dup 1)
+ (match_dup 2)
+ (match_operand: 3 "register_operand")))]
+  "TARGET_AVX512DQ"
+  "{
+operands[1] = CONSTM1_RTX (mode);
+operands[2] = CONST0_RTX (mode);
+  }")
+
+(define_insn "*_cvtmask2"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+   (vec_merge:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "constm1_operand")
+ (match_operand:VI48_AVX512VL 2 "const0_operand")
+ (match_operand: 3 "register_operand" "Yk")))]
+  "TARGET_AVX512DQ"
+  "vpmovm2\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
 (define_insn "sse2_cvtps2pd"
   [(set (match_operand:V2DF 0 "register_operand" "=v")
(float_extend:V2DF


Re: [PATCH] Put all constants last in tree_swap_operands_p, remove odd -Os check

2014-09-25 Thread Alan Lawrence
s/tiny/small/ and I think we might be getting close. So the 403.gcc test being 
compiled contains:


if (regno == PIC_OFFSET_TABLE_REGNUM
  && fixed_regs[PIC_OFFSET_TABLE_REGNUM])
return pic_offset_table_rtx;

PIC_OFFSET_TABLE_REGNUM is an expression testing machine flags; prior to 
Richie's patch, it looks like constant propagation/CSE/forward propagation 
wasn't really figuring this out, so was computing PIC_OFFSET_TABLE_REGNUM into a 
register, and adding this to (fixed_regs + 0, supplied by the linker).


Following Richie's patch, PIC_OFFSET_TABLE_REGNUM resolves to INVALID_REGNUM = 
(~(unsigned int) 0), i.e. the  in the relocations. We then ask the 
linker for (fixed_regs + ), but of course, if (fixed_regs > 0) then that 
doesn't fit into the 32 bits allowed by the small memory model.


In neither case is the index into fixed_regs actually ever executed, of course!

So yes, my workaround is wrong, we are working on a proper fix...

--Alan


Andrew Pinski wrote:

On Mon, Sep 22, 2014 at 4:10 AM, Alan Lawrence  wrote:

Well, I haven't looked into this in detail: I've gone only as far as
  * swapping emit-rtl.o between 'good' compiles (svn r214042) and 'bad'
compiles (r214043), finding that the critical difference is in the
emit-rtl.o generated by r214043;
  *looking at the relocations in the 'bad' emit_rtl.o, seeing new entries
'fixed_regs + ', and that Richard Biener's changelog specifically
mentions stripping signedness changes (and introduces the SIGN_NOPS).

However, I apply your patch (minus the hunk adding the (set_attr "type"
load1"), this appears to have gone in already), and still see the same error
message:

emit-rtl.o: In function `gen_rtx_REG':
emit-rtl.c:(.text+0x12f8): relocation truncated to fit:
R_AARCH64_ADR_PREL_PG_HI21 against symbol `fixed_regs' defined in COMMON
section in regclass.o
emit-rtl.o: In function `gen_rtx':
emit-rtl.c:(.text+0x1824): relocation truncated to fit:
R_AARCH64_ADR_PREL_PG_HI21 against symbol `fixed_regs' defined in COMMON
section in regclass.o
collect2: error: ld returned 1 exit status

and still see the same (suspicious-looking, although perhaps not convicted)
relocations:

$ readelf --relocs
benchspec/CPU2006/403.gcc/build/build_base_test./emit-rtl.o | grep
fixed_regs
12a8  005d0113 R_AARCH64_ADR_PRE  fixed_regs + 0
12ac  005d0115 R_AARCH64_ADD_ABS  fixed_regs + 0
12f8  005d0113 R_AARCH64_ADR_PRE  fixed_regs +

12fc  005d0116 R_AARCH64_LDST8_A  fixed_regs +

1824  005d0113 R_AARCH64_ADR_PRE  fixed_regs +

1828  005d0116 R_AARCH64_LDST8_A  fixed_regs +

186c  005d0113 R_AARCH64_ADR_PRE  fixed_regs + 0
1870  005d0115 R_AARCH64_ADD_ABS  fixed_regs + 0

I've also now bootstrapped my patch (STRIP_NOPS -> STRIP_SIGN_NOPS * 2) on
aarch64-none-linux-gnu and x86_64-none-linux-gnu, and check-gcc with no
regressions, so would like to propose that patch for trunk...?



You need to track down where R_AARCH64_ADR_PREL_PG_HI21 reloc is being
created in the assembly and then track down why GCC is using tiny
model here.  Note my fix was for a similar issue; not necessary the
exact same one in that there could be another pattern which needs to
use the new constraint too.

Thanks,
Andrew


--Alan




Andrew Pinski wrote:

On Thu, Sep 18, 2014 at 9:44 AM, Alan Lawrence 
wrote:

We've been seeing errors using aarch64-none-linux-gnu gcc to build the
403.gcc benchmark from spec2k6, that we've traced back to this patch. The
error looks like:

/home/alalaw01/bootstrap_richie/gcc/xgcc
-B/home/alalaw01/bootstrap_richie/gcc -O3 -mcpu=cortex-a57.cortex-a53
-DSPEC_CPU_LP64alloca.o asprintf.o vasprintf.o c-parse.o c-lang.o
attribs.o c-errors.o c-lex.o c-pragma.o c-decl.o c-typeck.o c-convert.o
c-aux-info.o c-common.o c-format.o c-semantics.o c-objc-common.o main.o
cpplib.o cpplex.o cppmacro.o cppexp.o cppfiles.o cpphash.o cpperror.o
cppinit.o cppdefault.o line-map.o mkdeps.o prefix.o version.o mbchar.o
alias.o bb-reorder.o bitmap.o builtins.o caller-save.o calls.o cfg.o
cfganal.o cfgbuild.o cfgcleanup.o cfglayout.o cfgloop.o cfgrtl.o
combine.o
conflict.o convert.o cse.o cselib.o dbxout.o debug.o dependence.o df.o
diagnostic.o doloop.o dominance.o dwarf2asm.o dwarf2out.o dwarfout.o
emit-rtl.o except.o explow.o expmed.o expr.o final.o flow.o fold-const.o
function.o gcse.o genrtl.o ggc-common.o global.o graph.o haifa-sched.o
hash.o hashtable.o hooks.o ifcvt.o insn-attrtab.o insn-emit.o
insn-extract.o
insn-opinit.o insn-output.o insn-peep.o insn-recog.o integrate.o intl.o
jump.o langhooks.o lcm.o lists.o local-alloc.o loop.o obstack.o optabs.o
params.o predict.o print-rtl.o print-tree.o profile.o real.o recog.o
reg-stack.o regclass.o regmove.o regrename.o reload.o reload1.o reorg.o

Re: [AArch64] Fix predicate and constraint mismatch in logical atomic operations

2014-09-25 Thread Segher Boessenkool
On Wed, Sep 24, 2014 at 09:17:23PM -0700, Andrew Pinski wrote:
> On Wed, Sep 24, 2014 at 9:13 PM, Michael Collison
>  wrote:
> >
> > I have that attached to the bug report at the URL provided. I will work on a
> > testcase if you think it is warranted.
> 
> Yes it is almost always warranted.
> 
> https://gcc.gnu.org/contribute.html#patches
> 
> Testcases   If you cannot follow the recommendations of the GCC coding
> conventions about testcases, you should include a justification for
> why adequate testcases cannot be added.
> 
> See the last part of that sentence.  You don't have any justification
> on why you are not including testcases.

It is very hard to make a reliable testcase for such problems, because
they only happen when register allocation is under pressure.

The problem is not that "n" allows more than your predicate does.  The
predicate allows registers too, so the compiler happily made a register
contain some big const.  Now RA comes along, is out of registers but hey,
there is this "n", let's just put the big constant there!  Carnage.

So this is hard to test for; you can add some (big) code that exposed the
problem, but in a few months time that won't trigger the problem anymore
because earlier stages in the compiler will have generated slightly
different code.

It also does nothing to catch similar problems in other patterns.


Segher


Re: [patch] libstdc++/56193 re-add basic_ios::operator bool()

2014-09-25 Thread Jonathan Wakely

On 24/09/14 23:18 +0100, Jonathan Wakely wrote:

This changes operator void*() to operator bool(), and ensures we
export both from the library.

I have a new test for this, but will commit that tomorrow.


Here are the tests.

Tested x86_64-linux, committed to trunk.

commit 209087345926aaeb5510cb5fd31afd30a345b16d
Author: Jonathan Wakely 
Date:   Wed Sep 24 23:37:31 2014 +0100

	* testsuite/27_io/basic_ios/conv/bool_neg.cc: New.
	* testsuite/27_io/basic_ios/conv/bool.cc: New.
	* testsuite/27_io/basic_ios/conv/voidptr.cc: New.

diff --git a/libstdc++-v3/testsuite/27_io/basic_ios/conv/bool.cc b/libstdc++-v3/testsuite/27_io/basic_ios/conv/bool.cc
new file mode 100644
index 000..bb92092
--- /dev/null
+++ b/libstdc++-v3/testsuite/27_io/basic_ios/conv/bool.cc
@@ -0,0 +1,39 @@
+// Copyright (C) 2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++11" }
+
+#include 
+#include 
+
+void
+test01()
+{
+  const std::basic_ios x(nullptr);
+  bool test = static_cast(x);
+  VERIFY( !test );
+  if (x)
+  {
+VERIFY( false );
+  }
+}
+
+int
+main()
+{
+  test01();
+}
diff --git a/libstdc++-v3/testsuite/27_io/basic_ios/conv/bool_neg.cc b/libstdc++-v3/testsuite/27_io/basic_ios/conv/bool_neg.cc
new file mode 100644
index 000..816f851
--- /dev/null
+++ b/libstdc++-v3/testsuite/27_io/basic_ios/conv/bool_neg.cc
@@ -0,0 +1,26 @@
+// Copyright (C) 2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++11" }
+
+#include 
+
+void
+test01(const std::basic_ios& x)
+{
+  bool b = x; // { dg-error "cannot convert" }
+}
diff --git a/libstdc++-v3/testsuite/27_io/basic_ios/conv/voidptr.cc b/libstdc++-v3/testsuite/27_io/basic_ios/conv/voidptr.cc
new file mode 100644
index 000..6691982
--- /dev/null
+++ b/libstdc++-v3/testsuite/27_io/basic_ios/conv/voidptr.cc
@@ -0,0 +1,39 @@
+// Copyright (C) 2014 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++98" }
+
+#include 
+#include 
+
+void
+test01()
+{
+  const std::basic_ios x(0);
+  void* p = x;
+  VERIFY( !p );
+  if (x)
+  {
+VERIFY( false );
+  }
+}
+
+int
+main()
+{
+  test01();
+}


Re: [PATCH i386 AVX512] [52.1/n] Add vec2mask and mask2vec insn patterns.

2014-09-25 Thread Kirill Yukhin
On 25 Sep 13:42, Kirill Yukhin wrote:
> Hello,
> As suggested, this is splitted out part of [52/n] patch,
> which introduces new vec2mask and mask2vec insn patterns.
> 
> As suggested, I've got rid off use of UNSPEC_CVTINT2MASK
> unspec. Unfortunatelly, only partially.
> I suppose, that vec2mask generic RTX will be too complex:
> get most significant bit of each vec elt & compare it
> with zero setting destination mask register accordingly.
> If this approach is preferrable: I ready to do that.
> 
> Testing in progress.
> Is it ok for trunk if pass?
Patch updated (I didn't know that internal operand in define_expand
should appear last).

Testing pass.

--
Thanks, K
commit 88cb02d69d28b8a11162db57f092a2560dd5a867
Author: Kirill Yukhin 
Date:   Thu Sep 25 12:01:15 2014 +0400

AVX-512. 52.1. D2M and M2D patterns.

diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 690bed5..9566884 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -618,6 +618,15 @@
   return op == CONST0_RTX (mode);
 })
 
+;; Match -1.
+(define_predicate "constm1_operand"
+  (match_code "const_int,const_double,const_vector")
+{
+  if (mode == VOIDmode)
+mode = GET_MODE (op);
+  return op == CONSTM1_RTX (mode);
+})
+
 ;; Match one or vector filled with ones.
 (define_predicate "const1_operand"
   (match_code "const_int,const_double,const_vector")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e7646d7..ffc831f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -132,6 +132,7 @@
   ;; For AVX512BW support
   UNSPEC_PSHUFHW
   UNSPEC_PSHUFLW
+  UNSPEC_CVTINT2MASK
 
   ;; For AVX512DQ support
   UNSPEC_REDUCE
@@ -4868,6 +4869,72 @@
(set_attr "prefix" "evex")
(set_attr "mode" "V8DF")])
 
+(define_insn "_cvt2mask"
+  [(set (match_operand: 0 "register_operand" "=Yk")
+   (unspec:
+[(match_operand:VI12_AVX512VL 1 "register_operand" "v")]
+UNSPEC_CVTINT2MASK))]
+  "TARGET_AVX512BW"
+  "vpmov2m\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_insn "_cvt2mask"
+  [(set (match_operand: 0 "register_operand" "=Yk")
+   (unspec:
+[(match_operand:VI48_AVX512VL 1 "register_operand" "v")]
+UNSPEC_CVTINT2MASK))]
+  "TARGET_AVX512DQ"
+  "vpmov2m\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_expand "_cvtmask2"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
+   (vec_merge:VI12_AVX512VL
+ (match_dup 2)
+ (match_dup 3)
+ (match_operand: 1 "register_operand")))]
+  "TARGET_AVX512BW"
+  {
+operands[2] = CONSTM1_RTX (mode);
+operands[3] = CONST0_RTX (mode);
+  })
+
+(define_insn "*_cvtmask2"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
+   (vec_merge:VI12_AVX512VL
+ (match_operand:VI12_AVX512VL 2 "constm1_operand")
+ (match_operand:VI12_AVX512VL 3 "const0_operand")
+ (match_operand: 1 "register_operand" "Yk")))]
+  "TARGET_AVX512BW"
+  "vpmovm2\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_expand "_cvtmask2"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
+   (vec_merge:VI48_AVX512VL
+ (match_dup 2)
+ (match_dup 3)
+ (match_operand: 1 "register_operand")))]
+  "TARGET_AVX512DQ"
+  "{
+operands[2] = CONSTM1_RTX (mode);
+operands[3] = CONST0_RTX (mode);
+  }")
+
+(define_insn "*_cvtmask2"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+   (vec_merge:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 2 "constm1_operand")
+ (match_operand:VI48_AVX512VL 3 "const0_operand")
+ (match_operand: 1 "register_operand" "Yk")))]
+  "TARGET_AVX512DQ"
+  "vpmovm2\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
 (define_insn "sse2_cvtps2pd"
   [(set (match_operand:V2DF 0 "register_operand" "=v")
(float_extend:V2DF


Re: [patch] libstdc++/56193 re-add basic_ios::operator bool()

2014-09-25 Thread Jonathan Wakely

On 25/09/14 11:24 +0100, Jonathan Wakely wrote:

On 24/09/14 23:18 +0100, Jonathan Wakely wrote:

This changes operator void*() to operator bool(), and ensures we
export both from the library.

I have a new test for this, but will commit that tomorrow.


Here are the tests.


Argh, I took the { dg-do compile } out then forgot to put it back.
Here it is, tested x86_64-linux and committed to trunk.

I hope I'm done with iostreams for now, at least until next week when
I want to apply the hexfloat patch from earlier this year.

commit 9d4bf29883f25ea2dca1cf270c480c68ece7a736
Author: Jonathan Wakely 
Date:   Thu Sep 25 11:29:24 2014 +0100

	* testsuite/27_io/basic_ios/conv/bool_neg.cc: Add dg-do compile.

diff --git a/libstdc++-v3/testsuite/27_io/basic_ios/conv/bool_neg.cc b/libstdc++-v3/testsuite/27_io/basic_ios/conv/bool_neg.cc
index 816f851..d4b8cac 100644
--- a/libstdc++-v3/testsuite/27_io/basic_ios/conv/bool_neg.cc
+++ b/libstdc++-v3/testsuite/27_io/basic_ios/conv/bool_neg.cc
@@ -16,6 +16,7 @@
 // .
 
 // { dg-options "-std=gnu++11" }
+// { dg-do compile }
 
 #include 
 


[PATCH][match-and-simplify] Parser TLC, 2nd

2014-09-25 Thread Richard Biener

The following wraps lexing and parsing into a parser class, simplifying
state management.  It also removes the need to forward declare
matchers as we now keep a vector of them filled in declaration order.

Committed.

Richard.

2014-09-25  Richard Biener  

* genmatch.c: Put all AST parsing into a class parser
simplifying state management.  Keep a vector of user
predicates in order of definition.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 215591)
+++ gcc/genmatch.c  (working copy)
@@ -2210,17 +2210,6 @@ decision_tree::gen_generic (FILE *f)
 }
 }
 
-/* Write a prototype for the function defined by the predicate P.  */
-
-void
-write_predicate_prototype (FILE *f, predicate_id *p, bool gimple)
-{
-  fprintf (f, "bool %s%s (tree t%s%s);\n",
-  gimple ? "gimple_" : "tree_", p->id,
-  p->nargs > 0 ? ", tree *res_ops" : "",
-  gimple ? ", tree (*valueize)(tree) = NULL" : "");
-}
-
 /* Output code to implement the predicate P from the decision tree DT.  */
 
 void
@@ -2262,12 +2251,51 @@ write_header (FILE *f, const char *head)
 
 
 
+/* AST parsing.  */
+
+class parser
+{
+public:
+  parser (cpp_reader *);
+
+private:
+  const cpp_token *next ();
+  const cpp_token *peek ();
+  const cpp_token *peek_ident (const char * = NULL);
+  const cpp_token *expect (enum cpp_ttype);
+  void eat_token (enum cpp_ttype);
+  const char *get_string ();
+  const char *get_ident ();
+  void eat_ident (const char *);
+  const char *get_number ();
+
+  id_base *parse_operation ();
+  operand *parse_capture (operand *);
+  operand *parse_expr ();
+  c_expr *parse_c_expr (cpp_ttype);
+  operand *parse_op ();
+
+  void parse_pattern ();
+  void parse_simplify (source_location, vec&, predicate_id *);
+  void parse_for (source_location);
+  void parse_if (source_location);
+  void parse_predicates (source_location);
+
+  cpp_reader *r;
+  vec active_ifs;
+  vec > active_fors;
+
+public:
+  vec simplifiers;
+  vec user_predicates;
+};
+
 /* Lexing helpers.  */
 
 /* Read the next non-whitespace token from R.  */
 
-static const cpp_token *
-next (cpp_reader *r)
+const cpp_token *
+parser::next ()
 {
   const cpp_token *token;
   do
@@ -2281,8 +2309,8 @@ next (cpp_reader *r)
 
 /* Peek at the next non-whitespace token from R.  */
 
-static const cpp_token *
-peek (cpp_reader *r)
+const cpp_token *
+parser::peek ()
 {
   const cpp_token *token;
   unsigned i = 0;
@@ -2303,10 +2331,10 @@ peek (cpp_reader *r)
 /* Peek at the next identifier token (or return NULL if the next
token is not an identifier or equal to ID if supplied).  */
 
-static const cpp_token *
-peek_ident (cpp_reader *r, const char *id = 0)
+const cpp_token *
+parser::peek_ident (const char *id)
 {
-  const cpp_token *token = peek (r);
+  const cpp_token *token = peek ();
   if (token->type != CPP_NAME)
 return 0;
 
@@ -2322,10 +2350,10 @@ peek_ident (cpp_reader *r, const char *i
 
 /* Read the next token from R and assert it is of type TK.  */
 
-static const cpp_token *
-expect (cpp_reader *r, enum cpp_ttype tk)
+const cpp_token *
+parser::expect (enum cpp_ttype tk)
 {
-  const cpp_token *token = next (r);
+  const cpp_token *token = next ();
   if (token->type != tk)
 fatal_at (token, "expected %s, got %s",
  cpp_type2name (tk, 0), cpp_type2name (token->type, 0));
@@ -2335,19 +2363,19 @@ expect (cpp_reader *r, enum cpp_ttype tk
 
 /* Consume the next token from R and assert it is of type TK.  */
 
-static void
-eat_token (cpp_reader *r, enum cpp_ttype tk)
+void
+parser::eat_token (enum cpp_ttype tk)
 {
-  expect (r, tk);
+  expect (tk);
 }
 
 /* Read the next token from R and assert it is of type CPP_STRING and
return its value.  */
 
 const char *
-get_string (cpp_reader *r)
+parser::get_string ()
 {
-  const cpp_token *token = expect (r, CPP_STRING);
+  const cpp_token *token = expect (CPP_STRING);
   return (const char *)token->val.str.text;
 }
 
@@ -2355,19 +2383,19 @@ get_string (cpp_reader *r)
return its value.  */
 
 const char *
-get_ident (cpp_reader *r)
+parser::get_ident ()
 {
-  const cpp_token *token = expect (r, CPP_NAME);
+  const cpp_token *token = expect (CPP_NAME);
   return (const char *)CPP_HASHNODE (token->val.node.node)->ident.str;
 }
 
 /* Eat an identifier token with value S from R.  */
 
-static void
-eat_ident (cpp_reader *r, const char *s)
+void
+parser::eat_ident (const char *s)
 {
-  const cpp_token *token = peek (r);
-  const char *t = get_ident (r);
+  const cpp_token *token = peek ();
+  const char *t = get_ident ();
   if (strcmp (s, t) != 0) 
 fatal_at (token, "expected '%s' got '%s'\n", s, t);
 }
@@ -2376,26 +2404,22 @@ eat_ident (cpp_reader *r, const char *s)
return its value.  */
 
 const char *
-get_number (cpp_reader *r)
+parser::get_number ()
 {
-  const cpp_token *token = expect (r, CPP_NUMBER);
+  const cpp_token *token = expect (CPP_NUMBER);
   return (co

[PATCH] Fix ICE with -Woverloaded-virtual (PR c++/61945)

2014-09-25 Thread Marek Polacek
We ICE on the following testcase because DECL_VINDEX expects
a FUNCTION_DECL, but we didn't check that.  The new testcase
shows that with this patch we give a proper warning.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2014-09-25  Marek Polacek  

PR c++/61945
* class.c (warn_hidden): Check for FUNCTION_DECL.

* g++.dg/warn/pr61945.C: New test.

diff --git gcc/cp/class.c gcc/cp/class.c
index c4ac61b..acf5768 100644
--- gcc/cp/class.c
+++ gcc/cp/class.c
@@ -2821,7 +2821,8 @@ warn_hidden (tree t)
   for (fn = fns; fn; fn = OVL_NEXT (fn))
{
  fndecl = OVL_CURRENT (fn);
- if (DECL_VINDEX (fndecl))
+ if (TREE_CODE (fndecl) == FUNCTION_DECL
+ && DECL_VINDEX (fndecl))
{
  tree *prev = &base_fndecls;
 
diff --git gcc/testsuite/g++.dg/warn/pr61945.C 
gcc/testsuite/g++.dg/warn/pr61945.C
index e69de29..5584d84 100644
--- gcc/testsuite/g++.dg/warn/pr61945.C
+++ gcc/testsuite/g++.dg/warn/pr61945.C
@@ -0,0 +1,11 @@
+// PR c++/61945
+// { dg-do compile }
+// { dg-options "-Woverloaded-virtual" }
+
+class A {
+  virtual int foo ();  // { dg-warning "was hidden" }
+};
+class B : A {
+  template 
+  void foo (); // { dg-warning "by .B::foo\\(\\)." }
+};

Marek


Re: [patch] Implement move semantics for iostreams

2014-09-25 Thread Rainer Orth
Hi Jonathan,

>>almost there: now I only get
>>
>>ld: fatal: libstdc++-symbols.ver-sun: 4622: symbol 
>>'std::basic_ostream 
>>>::basic_ostream(std::basic_iostream >&)': 
>>symbol version conflict
>>ld: fatal: libstdc++-symbols.ver-sun: 4623: symbol 
>>'std::basic_ostream 
>>>::basic_ostream(std::basic_iostream >&)': 
>>symbol version conflict
>>
>>from
>>
>>  GLIBCXX_3.4:
>>
>>##_ZNSt13basic_ostreamIwSt11char_traitsIwEEC[12]E[RP]* (glob)
>>_ZNSt13basic_ostreamIwSt11char_traitsIwEEC1ERSt14basic_iostreamIwS1_E;
>>_ZNSt13basic_ostreamIwSt11char_traitsIwEEC2ERSt14basic_iostreamIwS1_E;
>>
>>  GLIBCXX_3.4.21:
>>
>>
>> ##_ZNSt13basic_ostreamIwSt11char_traitsIwEEC[12]ERSt14basic_iostreamIwS1_E 
>> (glob)
>>_ZNSt13basic_ostreamIwSt11char_traitsIwEEC1ERSt14basic_iostreamIwS1_E;
>>_ZNSt13basic_ostreamIwSt11char_traitsIwEEC2ERSt14basic_iostreamIwS1_E;
>
> Doh, yes, this additional tweak should solve that:
>
> index f736240..95fc3c7 100644
> --- a/libstdc++-v3/config/abi/pre/gnu.ver
> +++ b/libstdc++-v3/config/abi/pre/gnu.ver
> @@ -460,7 +460,7 @@ GLIBCXX_3.4 {
>
> # std::basic_ostream
> _ZNSt13basic_ostreamIwSt11char_traitsIwEEC[12]Ev;
> -_ZNSt13basic_ostreamIwSt11char_traitsIwEEC[12]E[RP]*;
> +_ZNSt13basic_ostreamIwSt11char_traitsIwEEC[12]EP*;
> _ZNSt13basic_ostreamIwSt11char_traitsIwEED*;
> _ZNKSt13basic_ostreamIwSt11char_traitsIwEE[0-9][a-z]*;
> _ZNSt13basic_ostreamIwSt11char_traitsIwEE3putEw;

it does: {i386-pc, sparc-sun}-solaris2.1[01] bootstraps completed
successfully with it.

Thanks for the quick fix.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [patch] Implement move semantics for iostreams

2014-09-25 Thread Jonathan Wakely

On 25/09/14 13:05 +0200, Rainer Orth wrote:

Hi Jonathan,


almost there: now I only get

ld: fatal: libstdc++-symbols.ver-sun: 4622: symbol 'std::basic_ostream >::basic_ostream(std::basic_iostream >&)': symbol version conflict
ld: fatal: libstdc++-symbols.ver-sun: 4623: symbol 'std::basic_ostream >::basic_ostream(std::basic_iostream >&)': symbol version conflict

from

 GLIBCXX_3.4:

   ##_ZNSt13basic_ostreamIwSt11char_traitsIwEEC[12]E[RP]* (glob)
   _ZNSt13basic_ostreamIwSt11char_traitsIwEEC1ERSt14basic_iostreamIwS1_E;
   _ZNSt13basic_ostreamIwSt11char_traitsIwEEC2ERSt14basic_iostreamIwS1_E;

 GLIBCXX_3.4.21:

   ##_ZNSt13basic_ostreamIwSt11char_traitsIwEEC[12]ERSt14basic_iostreamIwS1_E 
(glob)
   _ZNSt13basic_ostreamIwSt11char_traitsIwEEC1ERSt14basic_iostreamIwS1_E;
   _ZNSt13basic_ostreamIwSt11char_traitsIwEEC2ERSt14basic_iostreamIwS1_E;


Doh, yes, this additional tweak should solve that:

index f736240..95fc3c7 100644
--- a/libstdc++-v3/config/abi/pre/gnu.ver
+++ b/libstdc++-v3/config/abi/pre/gnu.ver
@@ -460,7 +460,7 @@ GLIBCXX_3.4 {

# std::basic_ostream
_ZNSt13basic_ostreamIwSt11char_traitsIwEEC[12]Ev;
-_ZNSt13basic_ostreamIwSt11char_traitsIwEEC[12]E[RP]*;
+_ZNSt13basic_ostreamIwSt11char_traitsIwEEC[12]EP*;
_ZNSt13basic_ostreamIwSt11char_traitsIwEED*;
_ZNKSt13basic_ostreamIwSt11char_traitsIwEE[0-9][a-z]*;
_ZNSt13basic_ostreamIwSt11char_traitsIwEE3putEw;


it does: {i386-pc, sparc-sun}-solaris2.1[01] bootstraps completed
successfully with it.


Great. I hope the slightly-modified version I eventually checked in
still works too :)



Re: [COMMITTED][PATCH] Improve prepare_shrink_wrap to sink more instructions

2014-09-25 Thread Christophe Lyon
On 24 September 2014 20:32, Jiong Wang  wrote:
>
> On 22/09/14 19:01, Jeff Law wrote:
>>
>> On 09/22/14 04:29, Jiong Wang wrote:
>>>
>>> On 19/09/14 21:43, Jeff Law wrote:
>>>
>>> patch attached.
>>>
>>> please review, thanks.
>>>
>>> gcc/
>>> * shrink-wrap.c (move_insn_for_shrink_wrap): Add further check when
>>> !REG_P (src) to
>>> release more instruction sink opportunities.
>>>
>>> gcc/testsuite/
>>> * gcc.target/aarch64/shrink_wrap_symbol_ref_1.c: New testcase.
>>
>> Thanks.  Please verify this version passes a bootstrap & regression
>> test.  Assuming it does it is OK for the trunk.
>
>
> pass bootstrap and on regression on x86 based on revision 215515.
>
> committed as revision 215563.
>
> -- Jiong
>

I have observed regressions in the g++ testsuite: pr49847 now FAILs
after this patch.

Here is what I have in my logs:
/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabihf/gcc3/gcc/testsuite/g++/../../xg++
-B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabihf/gcc3/gcc/testsuite/g++/../../
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr49847.C
-fno-diagnostics-show-caret -fdiagnostics-color=never  -nostdinc++
-I/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabihf/gcc3/arm-none-linux-gnueabihf/libstdc++-v3/include/arm-none-linux-gnueabihf
-I/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabihf/gcc3/arm-none-linux-gnueabihf/libstdc++-v3/include
-I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/libsupc++
-I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/include/backward
-I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/testsuite/util
-fmessage-length=0  -std=gnu++98 -O -fnon-call-exceptions  -S -o
pr49847.s(timeout = 800)
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr49847.C: In
function 'int f(float)':
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr49847.C:7:1:
error: missing REG_EH_REGION note at the end of bb 2
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr49847.C:7:1:
internal compiler error: verify_flow_info failed
0x82f8ba verify_flow_info()
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfghooks.c:260

0x840cd3 commit_edge_insertions()
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgrtl.c:2068
0x9bf243 thread_prologue_and_epilogue_insns
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:5852
0x9bfa52 rest_of_handle_thread_prologue_and_epilogue
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:6245
0x9bfa52 execute
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:6283

As per
http://cbuild.validation.linaro.org/build/cross-validation/gcc/trunk/215563/report-build-info.html
I've noticed this on targets:
arm-none-linux-gnueabihf
armeb-none-linux-gnueabihf
aarch64-none-elf
aarch64_be-none-elf
aarch64-none-linux-gnu
but NOT on
arm-none-eabi
arm-none-linux-gnueabi

Christophe.


[PATCH][match-and-simplify] Get rid of static limit on captures

2014-09-25 Thread Richard Biener

$subject.

Still not the full-blown idea of dynamically assigning the (internal)
IDs from any number or identifier.  Somebody said it would be nice
to write (plus @a @b) instead of (plus @1 @2).

Applied.

Richard.

2014-09-25  Richard Biener  

* genmatch.c: Keep track of the maximum capture index used
and get rid of the static constant dt_simplify::capture_max.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 215594)
+++ gcc/genmatch.c  (working copy)
@@ -483,10 +483,12 @@ struct simplify
 {
   simplify (operand *match_, source_location match_location_,
struct operand *result_, source_location result_location_,
-   vec ifexpr_vec_, vec > for_vec_)
+   vec ifexpr_vec_, vec > for_vec_,
+   unsigned capture_max_)
   : match (match_), match_location (match_location_),
   result (result_), result_location (result_location_),
-  ifexpr_vec (ifexpr_vec_), for_vec (for_vec_) {}
+  ifexpr_vec (ifexpr_vec_), for_vec (for_vec_),
+  capture_max (capture_max_) {}
 
   /* The expression that is matched against the GENERIC or GIMPLE IL.  */
   operand *match; 
@@ -502,6 +504,8 @@ struct simplify
   /* Collected 'for' expression operators that have to be replaced
  in the lowering phase.  */
   vec > for_vec;
+  /* The maximum capture index seen.  */
+  unsigned capture_max;
 };
 
 /* Debugging routines for dumping the AST.  */
@@ -650,7 +654,7 @@ lower_commutative (simplify *s, vecmatch_location,
   s->result, s->result_location, s->ifexpr_vec,
-  s->for_vec);
+  s->for_vec, s->capture_max);
   simplifiers.safe_push (ns);
 }
 }
@@ -780,7 +784,7 @@ lower_opt_convert (simplify *s, vecmatch_location,
   s->result, s->result_location, s->ifexpr_vec,
-  s->for_vec);
+  s->for_vec, s->capture_max);
   simplifiers.safe_push (ns);
 }
 }
@@ -865,7 +869,7 @@ lower_for (simplify *sin, vecmatch_location,
   result_op, s->result_location,
-  ifexpr_vec, vNULL);
+  ifexpr_vec, vNULL, s->capture_max);
  worklist.safe_push (ns);
}
}
@@ -951,17 +955,13 @@ struct dt_operand : public dt_node
 
 struct dt_simplify : public dt_node
 {
-  static const unsigned capture_max = 6;
   simplify *s; 
   unsigned pattern_no;
-  dt_operand *indexes[capture_max]; 
+  dt_operand **indexes;
   
   dt_simplify (simplify *s_, unsigned pattern_no_, dt_operand **indexes_)
-   : dt_node (DT_SIMPLIFY), s (s_), pattern_no (pattern_no_)
-  {
-for (unsigned i = 0; i < capture_max; ++i)
-  indexes[i] = indexes_[i];
-  }
+   : dt_node (DT_SIMPLIFY), s (s_), pattern_no (pattern_no_),
+ indexes (indexes_)  {}
 
   void gen (FILE *f, bool);
   virtual void gen_gimple (FILE *f) { gen (f, true); }
@@ -1190,14 +1190,10 @@ at_assert_elm:
 void
 decision_tree::insert (struct simplify *s, unsigned pattern_no)
 {
-  dt_operand *indexes[dt_simplify::capture_max];
-
   if (s->match->type != operand::OP_EXPR)
 return; 
 
-  for (unsigned j = 0; j < dt_simplify::capture_max; ++j)
-indexes[j] = 0; 
-
+  dt_operand **indexes = XCNEWVEC (dt_operand *, s->capture_max + 1);
   dt_node *p = decision_tree::insert_operand (root, s->match, indexes);
   p->append_simplify (s, pattern_no, indexes);
 }
@@ -1228,7 +1224,7 @@ decision_tree::print_node (dt_node *p, F
{
  dt_simplify *s = static_cast (p);
  fprintf (f, "simplify_%u { ", s->pattern_no); 
- for (unsigned i = 0; i < dt_simplify::capture_max; ++i)
+ for (unsigned i = 0; i <= s->s->capture_max; ++i)
fprintf (f, "%p, ", (void *) s->indexes[i]);
  fprintf (f, " } "); 
}
@@ -1945,9 +1941,9 @@ dt_simplify::gen (FILE *f, bool gimple)
   fprintf (f, "{\n");
   output_line_directive (f, s->result_location);
   fprintf (f, "tree captures[%u] ATTRIBUTE_UNUSED = {};\n",
-  dt_simplify::capture_max);
+  s->capture_max + 1);
 
-  for (unsigned i = 0; i < dt_simplify::capture_max; ++i)
+  for (unsigned i = 0; i <= s->capture_max; ++i)
 if (indexes[i])
   {
char opname[20];
@@ -2284,6 +2280,7 @@ private:
   cpp_reader *r;
   vec active_ifs;
   vec > active_fors;
+  unsigned capture_max;
 
 public:
   vec simplifiers;
@@ -2451,7 +2448,12 @@ struct operand *
 parser::parse_capture (operand *op)
 {
   eat_token (CPP_ATSIGN);
-  return new capture (get_number (), op);
+  /* ???  Ideally we'd accept any identifier or number here
+ and dynamically assign an index to them.  */
+  const char *id = get_number ();
+  if ((unsigned) atoi (id) > capture_max)
+capture_max = atoi (id);
+  return new capture (id, o

Re: [RFC/PATCH] More precise diagnostic locations: dynamic locations for columns vs explicit offset

2014-09-25 Thread Dodji Seketeli
Manuel López-Ibáñez  a écrit:

> In some situations, we would like to point to a location which was not
> encoded when tokenizing. This happens, for example, in two prominent
> cases:
>
> 1) To get precise locations within strings
> (https://gcc.gnu.org/PR52952) for example, for Wformat warnings.

This feature would be very welcome indeed.

>
> 2) In the Fortran FE, which gives quite precise location information
> by tracking the characters that it wants to warn about instead of
> relying on the line-map machinery.

So with this feature, the Fortran FE would then use the then more
"generic" diagnostics machinery, right?

> The most straightforward way to implement this is by adding variants
> of diagnostic functions that take an explicit "offset" argument and
> pass this offset through the whole diagnostics machinery. This is what
> I implemented in the patch format_offset.diff attached. The downside
> is that we would need to add even more variants (with/without offset)
> of various diagnostic functions and track the offset/no-offset cases
> explicitly.

I would be inclined to go for this route at first sight because of its
conceptual simplicity, even if it might be heavy in terms of the
number of entry points to maintain for users of the diagnostics
sub-system but then ...

> The nicer/cleaner alternative is to somehow (re)compute a single
> location value from a given location plus the new offset.

... I agree with this.  It's more elegant and maintainable to go this
way.  But it might involve some hair splitting.


> This is what I implemented in patch fortran-diagnostics-part3.diff
> in linemap_redo_position_for_column(). As far as I understand, this
> method only works reliably if the location+offset does not jump to a
> different line map, that is, if to_column < (1u <<
> map->d.ordinary.column_bits). Otherwise, we may need to recompute
> all successive line-maps to accommodate the new location. The best
> way to do the latter (or to work-around that issue) is not clear to
> me at the moment.

I think it might be more involved than that.

There are two kinds of locations:

 1/ spelling locations.  They represent a real point in the source
 code.  For now, the beginning of a token.

 2/ virtual locations.  They are an abstract number, calculated in a
 convoluted way to encode the fact that a given token (rather, the
 location of that token) was e.g, possibly passed to a function-like
 macro that used it in its expansion, and that macro was expanded
 somewhere.  And from that number, we can get back to the macro into
 which it was used, expanded, where the macro was expanded and we can
 also get the original spelling location of the token we are looking
 at.

I might be maybe missing something, but if the location is not virtual
(case 1/), I *think* that in practice we are not likely to see that
location + column jumps to the "next" map, unless we are running low
on line maps space -- in which case, either columns tracking or even
line maps are turned off -- or the token we are looking at it
is *huge*.  In the later case, when we start tracking the location of
the *end* of tokens (as said in the roadmap), I think that later issue
is going to vanish because a given line map is going to be allocated big
enough to contain locations until at least the end of the last token
it "contains".

If the location is virtual (case 2/), then the "location + offset"
value you are referring to is meaningless, unfortunately.  You must
get back to to the spelling location of that token first; that is, you
have to consider "spelling_location(location) + offset", and we are
back to the first case (case 1/).


> Thus, I am putting forward these two alternative implementations and
> seeking comments/advice/help in deciding what would be the best way to
> fix this key missing piece of GCC diagnostics.

Thanks.

> Related to this, perhaps I should make a more general call for help.
> Despite the heroic, constant torrent of diagnostic fixes by Paolo,
> Marek and others, I have not seen much progress on the key
> infrastructure issues in the roadmap
> (https://gcc.gnu.org/wiki/Better_Diagnostics). We have had at least
> one major item per release since GCC 4.5, but I don't see any
> particular item being tackled for GCC 5.0. Are you planning to tackle
> any of them?

Unfortunately, it's unlikely that I'll have time to tackle any of
this.  I am quite busy on libabigail
(http://https://sourceware.org/libabigail/) in this cycle.  And it's
also important for us.  So I'd rather shoot for the next cycle.

But that shouldn't prevent interested hackers to jump in :-)

> I have a simple patch to implement Fix-it hints but it needs more
> work. Unfortunately, I have very little free time to dedicate to GCC
> nowadays, so I'm afraid I might not even be able to finish this in
> time. Any item in that list would be a nice major feature for GCC
> 5.0.

Thank you for the effort you are putting in this, despite your tight
schedule.  This is re

Re: [PATCH 1/9] Gccgo port to s390[x] -- part I

2014-09-25 Thread Ulrich Weigand
Lynn Boger wrote:

> I modified the patch for statements.cc and rebuilt and that eliminates 
> the regressions and fixes the original problem it was intended to fix 
> for both ppc64 BE & LE.  The ABIs are different between BE & LE, so that 
> make_func_code_reference on ppc64 BE is not returning the function's 
> code address but the function pointer from the .opd.  The first 8 bytes 
> of the entry in the .opd is the function's code address.  Here is the 
> change to statements.cc that made it work:

> +#if defined(__powerpc64__) && _CALL_ELF != 2
> +  Expression* pfn =
> +Expression::make_func_code_reference(function, location);
> +  Type* pfntype =
> +  Type::make_pointer_type(
> +  Type::make_pointer_type(Type::make_void_type()));
> +  Expression* fn = Expression::make_unsafe_cast(pfntype, pfn, 
> location);
> +  Expression* fn_code_addr = Expression::make_unary(OPERATOR_MULT, fn,
> +location);
> +#else
> +  Expression* fn_code_addr =
> +Expression::make_func_code_reference(function, location);
> +#endif
> +  Expression* call = Runtime::make_call(Runtime::SET_DEFERING_FN,
> +location, 1, fn_code_addr);
> +  Statement* s = Statement::make_statement(call, true);

This looks wrong when using gcc-go as a cross-compiler.  The #if is
evaluated in the context of the *host*, but you'd need to check the
processor architecture and ABI of the *target*.  This seems difficult
since you'd have to take into account -mabi= options, which are not
readily available to the front end.

It seems more straightforward to keep the front end as is, i.e. generate
code to pass a plain function pointer (as defined by the target ABI) to
the runtime, and have the *runtime* do whatever target-specific fiddling
is required to get from a function pointer to a code address.

For example, you could add something like:

#if defined(__powerpc64__) && _CALL_ELF != 2
  defering_fn = *(void **)defering_fn;
#endif

to __go_set_defering_fn (or possibly __go_can_recover).

[ Since the runtime is compiled for the target with the appropriate
ABI setting, the #if works as intended when in runtime code.  ]

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU/Linux compilers and toolchain
  ulrich.weig...@de.ibm.com



Re: [PATCH] Add -Wno-abi in c++ struct-layout-1 tests

2014-09-25 Thread Jeff Law

On 09/25/14 01:01, Jakub Jelinek wrote:

Hi!

I've noticed that lots of g++ struct-layout-1.exp tests now fail
on ppc*.  The problem is the psABI warning:
the layout of aggregates containing vectors with N-byte alignment
For various targets we are using -Wno-abi (and, in C
struct-layout-1.exp also by default) for this, so the patch just
changes the default for C++ testing too.

Regtested on x86_64-linux and powerpc*-linux, ok for trunk/4.9/4.8?

2014-09-25  Jakub Jelinek  

* g++.dg/compat/struct-layout-1_generate.c: Add -Wno-abi
to default options.

OK.
jeff



[patch] std::vector::assign should not call std::vector::swap

2014-09-25 Thread Jonathan Wakely

With C++11 allocator semantics the swap() member might also replace
the allocator, which is only allowed in specific circumstances.

Even though the worst that could happen is we replace the allocator
with an equal one, we should avoid using swap and use the internal
_M_swap_data function instead.

I'm not adding a test, as I don't think this is likely to regress and
to do it properly we'd need to test every member function and ensure
only the correct ones replace the allocator. I searched, and we don't
use swap() inappropriately elsewhere in std::vector.

Tested x86_64-linux, committed to trunk.

commit 6a3d7b12c8879a0431e8aa9ffb521f18063debc6
Author: Jonathan Wakely 
Date:   Thu Sep 25 13:03:40 2014 +0100

	* include/bits/vector.tcc (vector::_M_fill_assign): Use _M_swap_data.

diff --git a/libstdc++-v3/include/bits/vector.tcc b/libstdc++-v3/include/bits/vector.tcc
index 5c3dfae..4eacec3 100644
--- a/libstdc++-v3/include/bits/vector.tcc
+++ b/libstdc++-v3/include/bits/vector.tcc
@@ -228,7 +228,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   if (__n > capacity())
 	{
 	  vector __tmp(__n, __val, _M_get_Tp_allocator());
-	  __tmp.swap(*this);
+	  __tmp._M_impl._M_swap_data(this->_M_impl);
 	}
   else if (__n > size())
 	{


Re: parallel check output changes?

2014-09-25 Thread Andrew MacLeod

On 09/24/2014 01:58 PM, Andrew MacLeod wrote:

On 09/24/2014 12:29 PM, Andrew MacLeod wrote:




AH. interesting.

The third run has a gcc.sum that is exactly the same as the first run. 
so only the second run differs, and it seems to be from an 
alphabetical sort.  So run 3 and 1 match.
the gfortran.sum from the third run is identical to the *second* run, 
but it is different from the *first* run.  so run 2 and 3 match.


the two runs that match (2nd and 3rd run) look like:
PASS: gfortran.dg/coarray/this_image_1.f90 -fcoarray=single  -O2 (test 
for excess errors)
PASS: gfortran.dg/coarray/this_image_1.f90 -fcoarray=single  -O2 
execution test
PASS: gfortran.dg/coarray/this_image_1.f90 -fcoarray=lib  -O2 
-lcaf_single (test for excess errors)
PASS: gfortran.dg/coarray/this_image_1.f90 -fcoarray=lib  -O2 
-lcaf_single execution test
PASS: gfortran.dg/coarray/this_image_2.f90 -fcoarray=single  -O2 (test 
for excess errors)
PASS: gfortran.dg/coarray/this_image_2.f90 -fcoarray=single  -O2 
execution test
PASS: gfortran.dg/coarray/this_image_2.f90 -fcoarray=lib  -O2 
-lcaf_single (test for excess errors)
PASS: gfortran.dg/coarray/this_image_2.f90 -fcoarray=lib  -O2 
-lcaf_single execution test


and the odd one out (firstrun:)
PASS: gfortran.dg/coarray/this_image_1.f90 -fcoarray=lib  -O2 
-lcaf_single (test for excess errors)
PASS: gfortran.dg/coarray/this_image_1.f90 -fcoarray=lib  -O2 
-lcaf_single execution test
PASS: gfortran.dg/coarray/this_image_1.f90 -fcoarray=single  -O2 (test 
for excess errors)
PASS: gfortran.dg/coarray/this_image_1.f90 -fcoarray=single  -O2 
execution test
PASS: gfortran.dg/coarray/this_image_2.f90 -fcoarray=lib  -O2 
-lcaf_single (test for excess errors)
PASS: gfortran.dg/coarray/this_image_2.f90 -fcoarray=lib  -O2 
-lcaf_single execution test
PASS: gfortran.dg/coarray/this_image_2.f90 -fcoarray=single  -O2 (test 
for excess errors)
PASS: gfortran.dg/coarray/this_image_2.f90 -fcoarray=single  -O2 
execution test


looks like the first run was sorted, and the other 2 weren't.

There must be some condition under which we don't sort the results? or 
another place which needs to be tweaked to do the sort as well...?


Andrew

So to be fair, I could use test_summary, but I think the concern is 
warranted because if this inconsistent ordering can happen to PASS, I 
would expect the same non-deterministic behaviour if those tests happen 
to FAIL.  we just have far less FAILS so we aren't seeing it with 
test_summary at the moment...


Aggregating all my .sum files,  I see a sampling of about 257,000 PASSs, 
whereas I see a total of 141 FAILs.  FAILs only account for < 0.06% of 
the output. ( I'm getting an average of about 510 mis-ordered PASSs, so 
it only affects a small portion of them as well.)


I would think the output of .sum needs to be consistent from one run to 
the next in order for test_summary to consistently report its results as 
well.


Andrew


Re: [PATCH] Fix ICE with -Woverloaded-virtual (PR c++/61945)

2014-09-25 Thread Jason Merrill

OK.


[PATCH][match-and-simplify] Allow @foo captures

2014-09-25 Thread Richard Biener

The following makes us assign capture indexes dynamically which
easily allows handling of identifiers.  I've used std::map
for this.

Applied.

Note that this may make debugging a little bit harder as
@0 now no longer necessarily corresponds to captures[0]
(I don't think it necessarily did before btw, not 100% sure).

Richard.

2014-09-25  Richard Biener  

* genmatch.c: Include ,  and .
Allow identifiers for captures, assign capture indexes
dynamically.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 215595)
+++ gcc/genmatch.c  (working copy)
@@ -22,6 +22,9 @@ along with GCC; see the file COPYING3.
 
 #include "bconfig.h"
 #include 
+#include 
+#include 
+#include 
 #include "system.h"
 #include "coretypes.h"
 #include 
@@ -422,10 +425,10 @@ struct c_expr : public operand
 
 struct capture : public operand
 {
-  capture (const char *where_, operand *what_)
+  capture (unsigned where_, operand *what_)
   : operand (OP_CAPTURE), where (where_), what (what_) {}
-  /* Identifier for the value.  */
-  const char *where;
+  /* Identifier index for the value.  */
+  unsigned where;
   /* The captured value.  */
   operand *what;
   virtual void gen_transform (FILE *f, const char *, bool, int, const char *, 
dt_operand ** = 0);
@@ -484,7 +487,7 @@ struct simplify
   simplify (operand *match_, source_location match_location_,
struct operand *result_, source_location result_location_,
vec ifexpr_vec_, vec > for_vec_,
-   unsigned capture_max_)
+   int capture_max_)
   : match (match_), match_location (match_location_),
   result (result_), result_location (result_location_),
   ifexpr_vec (ifexpr_vec_), for_vec (for_vec_),
@@ -505,7 +508,7 @@ struct simplify
  in the lowering phase.  */
   vec > for_vec;
   /* The maximum capture index seen.  */
-  unsigned capture_max;
+  int capture_max;
 };
 
 /* Debugging routines for dumping the AST.  */
@@ -515,7 +518,7 @@ print_operand (operand *o, FILE *f = std
 {
   if (capture *c = dyn_cast (o))
 {
-  fprintf (f, "@%s", c->where);
+  fprintf (f, "@%u", c->where);
   if (c->what && flattened == false) 
{
  putc (':', f);
@@ -1122,7 +1125,7 @@ decision_tree::insert_operand (dt_node *
 
   if (capture *c = dyn_cast (o))
 {
-  unsigned capt_index = atoi (c->where);
+  unsigned capt_index = c->where;
 
   if (indexes[capt_index] == 0)
{
@@ -1141,7 +1144,7 @@ decision_tree::insert_operand (dt_node *
 
  if (!c->what)
{
- unsigned cc_index = atoi (c->where);
+ unsigned cc_index = c->where;
  dt_operand *match_op = indexes[cc_index];
 
  dt_operand temp (dt_node::DT_TRUE, 0, 0);
@@ -1224,7 +1227,7 @@ decision_tree::print_node (dt_node *p, F
{
  dt_simplify *s = static_cast (p);
  fprintf (f, "simplify_%u { ", s->pattern_no); 
- for (unsigned i = 0; i <= s->s->capture_max; ++i)
+ for (int i = 0; i <= s->s->capture_max; ++i)
fprintf (f, "%p, ", (void *) s->indexes[i]);
  fprintf (f, " } "); 
}
@@ -1446,16 +1449,15 @@ capture::gen_transform (FILE *f, const c
 {
   if (what && is_a (what))
 {
-  int index = atoi (where);
-  if (indexes[index] == 0)
+  if (indexes[where] == 0)
{
  char buf[20];
- sprintf (buf, "captures[%s]", where);
+ sprintf (buf, "captures[%u]", where);
  what->gen_transform (f, buf, gimple, depth, in_type, NULL);
}
 }
   
-  fprintf (f, "%s = captures[%s];\n", dest, where); 
+  fprintf (f, "%s = captures[%u];\n", dest, where); 
 }
 
 char *
@@ -1940,10 +1942,11 @@ dt_simplify::gen (FILE *f, bool gimple)
 {
   fprintf (f, "{\n");
   output_line_directive (f, s->result_location);
-  fprintf (f, "tree captures[%u] ATTRIBUTE_UNUSED = {};\n",
-  s->capture_max + 1);
+  if (s->capture_max >= 0)
+fprintf (f, "tree captures[%u] ATTRIBUTE_UNUSED = {};\n",
+s->capture_max + 1);
 
-  for (unsigned i = 0; i <= s->capture_max; ++i)
+  for (int i = 0; i <= s->capture_max; ++i)
 if (indexes[i])
   {
char opname[20];
@@ -2280,7 +2283,9 @@ private:
   cpp_reader *r;
   vec active_ifs;
   vec > active_fors;
-  unsigned capture_max;
+
+  int capture_max;
+  std::map *capture_ids;
 
 public:
   vec simplifiers;
@@ -2448,12 +2453,20 @@ struct operand *
 parser::parse_capture (operand *op)
 {
   eat_token (CPP_ATSIGN);
-  /* ???  Ideally we'd accept any identifier or number here
- and dynamically assign an index to them.  */
-  const char *id = get_number ();
-  if ((unsigned) atoi (id) > capture_max)
-capture_max = atoi (id);
-  return new capture (id, op);
+  const cpp_token *token = peek ();
+  const char *id;
+  if (token->type == CPP_NUMBER)
+id = get_number ();
+  else if (token->type == CPP_NAME)
+

Re: [COMMITTED][PATCH] Improve prepare_shrink_wrap to sink more instructions

2014-09-25 Thread Jiong Wang


On 25/09/14 12:25, Christophe Lyon wrote:

On 24 September 2014 20:32, Jiong Wang  wrote:

On 22/09/14 19:01, Jeff Law wrote:

On 09/22/14 04:29, Jiong Wang wrote:

On 19/09/14 21:43, Jeff Law wrote:

patch attached.

please review, thanks.

gcc/
 * shrink-wrap.c (move_insn_for_shrink_wrap): Add further check when
!REG_P (src) to
 release more instruction sink opportunities.

gcc/testsuite/
 * gcc.target/aarch64/shrink_wrap_symbol_ref_1.c: New testcase.

Thanks.  Please verify this version passes a bootstrap & regression
test.  Assuming it does it is OK for the trunk.


pass bootstrap and on regression on x86 based on revision 215515.

committed as revision 215563.

-- Jiong


I have observed regressions in the g++ testsuite: pr49847 now FAILs
after this patch.

no.

even without my patch, the regression still happen.

or you could specify -fno-shrink-wrap, gcc still crash.

so, this regression should caused by other commits which haven't exposed 
on x86 regression test.


-- Jiong



Here is what I have in my logs:
/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabihf/gcc3/gcc/testsuite/g++/../../xg++
-B/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabihf/gcc3/gcc/testsuite/g++/../../
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr49847.C
-fno-diagnostics-show-caret -fdiagnostics-color=never  -nostdinc++
-I/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabihf/gcc3/arm-none-linux-gnueabihf/libstdc++-v3/include/arm-none-linux-gnueabihf
-I/aci-gcc-fsf/builds/gcc-fsf-gccsrc/obj-arm-none-linux-gnueabihf/gcc3/arm-none-linux-gnueabihf/libstdc++-v3/include
-I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/libsupc++
-I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/include/backward
-I/aci-gcc-fsf/sources/gcc-fsf/gccsrc/libstdc++-v3/testsuite/util
-fmessage-length=0  -std=gnu++98 -O -fnon-call-exceptions  -S -o
pr49847.s(timeout = 800)
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr49847.C: In
function 'int f(float)':
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr49847.C:7:1:
error: missing REG_EH_REGION note at the end of bb 2
/aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/testsuite/g++.dg/pr49847.C:7:1:
internal compiler error: verify_flow_info failed
0x82f8ba verify_flow_info()
 /aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfghooks.c:260

0x840cd3 commit_edge_insertions()
 /aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/cfgrtl.c:2068
0x9bf243 thread_prologue_and_epilogue_insns
 /aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:5852
0x9bfa52 rest_of_handle_thread_prologue_and_epilogue
 /aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:6245
0x9bfa52 execute
 /aci-gcc-fsf/sources/gcc-fsf/gccsrc/gcc/function.c:6283

As per
http://cbuild.validation.linaro.org/build/cross-validation/gcc/trunk/215563/report-build-info.html
I've noticed this on targets:
arm-none-linux-gnueabihf
armeb-none-linux-gnueabihf
aarch64-none-elf
aarch64_be-none-elf
aarch64-none-linux-gnu
but NOT on
arm-none-eabi
arm-none-linux-gnueabi

Christophe.






[AArch64] Make gentune.sh also generate "generic_sched" attribute

2014-09-25 Thread James Greenhalgh

Hi,

This patch fixes an annoying gotcha when adding new cores or piepline
models in builds for AArch64. The "generic_sched" attribute also needs
updating in addition to aarch64-tune.md.

I see no good reason for this, we can generate that attribute in
gentune.sh quite easily.

For testing, I built an aarch64-none-elf toolchain with no issues.

OK?

Thanks,
James

---
2014-09-25  James Greenhalgh  

* config/aarch64/aarch64.md (generic_sched): Don't define here.
* config/aarch64/gentune.sh: Also generate "generic_sched" attribute.
* config/aarch64/aarch64-tune.md: Regenerate.
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index b7e40e0b5d13842ba5db02b41c9d17a2e626d916..38c4b30addc87d80fa374148e4a1752a297e9932 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -3,3 +3,11 @@
 (define_attr "tune"
 	"cortexa53,cortexa15,cortexa57cortexa53"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
+;; True if the generic scheduling description should be used.
+(define_attr "generic_sched" "yes,no"
+  (const (if_then_else
+(eq_attr "tune"
+	"cortexa15,cortexa53"
+)
+(const_string "no")
+(const_string "yes"
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 74b554ec4df78a963de6572c0175d0304d2bdf15..71bd131a5a32c2692c217f7189def6d77e02ecd6 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -181,14 +181,6 @@ (define_attr "enabled" "no,yes"
 ;; Processor types.
 (include "aarch64-tune.md")
 
-;; True if the generic scheduling description should be used.
-
-(define_attr "generic_sched" "yes,no"
-  (const (if_then_else
-  (eq_attr "tune" "cortexa53,cortexa15")
-  (const_string "no")
-  (const_string "yes"
-
 ;; Scheduling
 (include "../arm/cortex-a53.md")
 (include "../arm/cortex-a15.md")
diff --git a/gcc/config/aarch64/gentune.sh b/gcc/config/aarch64/gentune.sh
index c0f2e79..7a8a976 100644
--- a/gcc/config/aarch64/gentune.sh
+++ b/gcc/config/aarch64/gentune.sh
@@ -19,8 +19,11 @@
 # along with GCC; see the file COPYING3.  If not see
 # .
 
-# Generate aarch64-tune.md, a file containing the tune attribute from the list of 
-# CPUs in aarch64-cores.def
+# Generate aarch64-tune.md, a file containing the tune attribute and the
+# generic_sched attribute from the list of CPUs in aarch64-cores.def
+#
+# The two attributes require different awk patterns as the tune attribute
+# is sensitive to the ordering of the values it uses.
 
 echo ";; -*- buffer-read-only: t -*-"
 echo ";; Generated automatically by gentune.sh from aarch64-cores.def"
@@ -30,3 +33,15 @@ allcores=`awk -F'[(, 	]+' '/^AARCH64_CORE/ { cores = cores$3"," } END { print co
 echo "(define_attr \"tune\""
 echo "	\"$allcores\"" | sed -e 's/,"$/"/'
 echo "	(const (symbol_ref \"((enum attr_tune) aarch64_tune)\")))"
+
+allcores=`awk -F'[(, 	]+' '/^AARCH64_CORE/ { if ($4 != "genericv8") print $4 }' $1 \
+	  | sort -u | tr '\n' ','`
+
+echo ";; True if the generic scheduling description should be used."
+echo "(define_attr \"generic_sched\" \"yes,no\""
+echo "  (const (if_then_else"
+echo "(eq_attr \"tune\""
+echo "	\"$allcores\"" | sed -e 's/,"$/"/'
+echo ")"
+echo "(const_string \"no\")"
+echo "(const_string \"yes\""

Re: [PATCH, rs6000] Fix PR63335 (vec_any/all_nge/nle)

2014-09-25 Thread David Edelsohn
On Wed, Sep 24, 2014 at 6:51 PM, Bill Schmidt
 wrote:
> Hi,
>
> PR63335 reports that the builtins vec_any_nge, vec_all_nge, vec_any_nle,
> and vec_all_nle produce incorrect results for vector double.  There is
> some special handling for these intrinsics for the various vector
> integral types.  There is a test that excludes vector float from this
> handling, but when vector double was added with VSX, the required
> similar test was apparently not added.  This patch fixes that.  I've
> added a test based on the attachment in the bugzilla.
>
> Bootstrapped and tested on powerpc64le-unknown-linux-gnu.  Is this ok
> for trunk?  I expect we should backport the fix to 4.8 and 4.9 as well.
>
> Thanks,
> Bill
>
>
> [gcc]
>
> 2014-09-24  Bill Schmidt  
>
> PR target/63335
> * config/rs6000/rs6000-c.c (altivec_build_resolved_builtin):
> Exclude VSX_BUILTIN_XVCMPGEDP_P from special handling.
>
> [gcc/testsuite]
>
> 2014-09-24  Bill Schmidt  
>
> PR target/63335
> * gcc.target/powerpc/pr63335.c: New test.

Okay.

Thanks, David


[PATCH] microblaze: microblaze.md: Use VOID instead of SI to fix "((void (*)(void)) 0)()" issue

2014-09-25 Thread Chen Gang
Need use VOID instead of SI, or when real VOIDmode comes, it does not
match SImode, so cause issue. This patch can fix this issue and pass
testsuite.

The related test code ('void' will cause CALL instead of SET):

  typedef void (*T)(void);
  f1 ()
  {
((T) 0)();
  }

The related error:

  [root@localhost gcc]# ./cc1 /tmp/calls.c -o /tmp/1.s
   f1
  Analyzing compilation unit
  Performing interprocedural optimizations
   <*free_lang_data>
 Assembling functions:
   f1
  /tmp/calls.c: In function 'f1':
  /tmp/calls.c:5:1: error: unrecognizable insn:
   }
   ^
  (call_insn 5 2 8 2 (parallel [
  (call (mem:SI (const_int 0 [0]) [0 MEM[(void (*) (void))0B] 
S4 A32])
  (const_int 24 [0x18]))
  (clobber (reg:SI 15 r15))
  ]) /tmp/calls.c:4 -1
   (nil)
  (nil))
  /tmp/calls.c:5:1: internal compiler error: in extract_insn, at recog.c:2204
  0xb0e71b _fatal_insn(char const*, rtx_def const*, char const*, int, char 
const*)
../../gcc/gcc/rtl-error.c:109
  0xb0e75c _fatal_insn_not_found(rtx_def const*, char const*, int, char const*)
../../gcc/gcc/rtl-error.c:117
  0xac552b extract_insn(rtx_def*)
../../gcc/gcc/recog.c:2204
  0x8b919e instantiate_virtual_regs_in_insn
../../gcc/gcc/function.c:1614
  0x8ba347 instantiate_virtual_regs
../../gcc/gcc/function.c:1934
  0x8ba452 execute
../../gcc/gcc/function.c:1983
  Please submit a full bug report,
  with preprocessed source if appropriate.
  Please include the complete backtrace with any bug report.
  See  for instructions.


2014-09-25  Chen Gang  

* config/microblaze/microblaze.md (call_internal1): Use VOID
instead of SI to fix "((void (*)(void)) 0)()" issue

---
 gcc/config/microblaze/microblaze.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/microblaze/microblaze.md 
b/gcc/config/microblaze/microblaze.md
index b971737..3b4faf4 100644
--- a/gcc/config/microblaze/microblaze.md
+++ b/gcc/config/microblaze/microblaze.md
@@ -2062,7 +2062,7 @@
   (set_attr "length"   "4")])
 
 (define_insn "call_internal1"
-  [(call (mem (match_operand:SI 0 "call_insn_simple_operand" "ri"))
+  [(call (mem (match_operand:VOID 0 "call_insn_simple_operand" "ri"))
 (match_operand:SI 1 "" "i"))
   (clobber (reg:SI R_SR))]
   ""


[PATCH] Fix typo in comments

2014-09-25 Thread Felix Yang
Please apply this patch if OK for trunk. Thanks.

Index: gcc/lra.c
===
--- gcc/lra.c(revision 215598)
+++ gcc/lra.c(working copy)
@@ -933,7 +933,7 @@ lra_set_insn_recog_data (rtx_insn *insn)
   nalt = 1;
   if (nop < 0)
 {
-  /* Its is a special insn like USE or CLOBBER.  We should
+  /* It is a special insn like USE or CLOBBER.  We should
  recognize any regular insn otherwise LRA can do nothing
  with this insn.  */
   gcc_assert (GET_CODE (PATTERN (insn)) == USE
Index: gcc/genautomata.c
===
--- gcc/genautomata.c(revision 215598)
+++ gcc/genautomata.c(working copy)
@@ -6178,7 +6178,7 @@ merge_states (automaton_t automaton, vec
 alt_states = new_alt_state;
   }
 }
-  /* Its is important that alt states were sorted before and
+  /* It is important that alt states were sorted before and
  after merging to have the same querying results.  */
   new_state->component_states = uniq_sort_alt_states (alt_states);
 }
Index: gcc/ChangeLog
===
--- gcc/ChangeLog(revision 215598)
+++ gcc/ChangeLog(working copy)
@@ -1,3 +1,8 @@
+2014-09-25  Felix Yang  
+
+* lra.c (lra_set_insn_recog_data): Fix typo in comment.
+* genautomata.c (merge_states): Ditto.
+
 2014-09-25  Alexander Ivchenko  
 Maxim Kuznetsov  
 Anna Tikhonova  


Cheers,
Felix
Index: gcc/lra.c
===
--- gcc/lra.c   (revision 215598)
+++ gcc/lra.c   (working copy)
@@ -933,7 +933,7 @@ lra_set_insn_recog_data (rtx_insn *insn)
   nalt = 1;
   if (nop < 0)
{
- /* Its is a special insn like USE or CLOBBER.  We should
+ /* It is a special insn like USE or CLOBBER.  We should
 recognize any regular insn otherwise LRA can do nothing
 with this insn.  */
  gcc_assert (GET_CODE (PATTERN (insn)) == USE
Index: gcc/genautomata.c
===
--- gcc/genautomata.c   (revision 215598)
+++ gcc/genautomata.c   (working copy)
@@ -6178,7 +6178,7 @@ merge_states (automaton_t automaton, vec
alt_states = new_alt_state;
  }
}
- /* Its is important that alt states were sorted before and
+ /* It is important that alt states were sorted before and
 after merging to have the same querying results.  */
  new_state->component_states = uniq_sort_alt_states (alt_states);
}
Index: gcc/ChangeLog
===
--- gcc/ChangeLog   (revision 215598)
+++ gcc/ChangeLog   (working copy)
@@ -1,3 +1,8 @@
+2014-09-25  Felix Yang  
+
+   * lra.c (lra_set_insn_recog_data): Fix typo in comment.
+   * genautomata.c (merge_states): Ditto.
+
 2014-09-25  Alexander Ivchenko  
Maxim Kuznetsov  
Anna Tikhonova  


Re: [PATCH, Pointer Bounds Checker 23/x] Function split

2014-09-25 Thread Ilya Enkovich
On 23 Sep 09:55, Jeff Law wrote:
> On 09/22/14 00:40, Ilya Enkovich wrote:
> >
> >Bounds don't have to vary for different pointers.  E.g. p and p + 1
> >always have equal bounds.  In this particular case we have function
> >pointers and all of them have default bounds.
> OK.  It looked a bit odd and I wanted to make sure there wasn't
> something fundamentally wrong.
> 
> >>>I attach a dump I got from Chrome compilation with no additional
> >>>checks restrictions in split.  Original function returns value defined
> >>>by phi node in return_bb and bounds defined in BB2.  Split part
> >>>contains BB3, BB4 and BB5 and resulting function part has usage of
> >>>returned bounds but no producer for it.
> >>
> >>Right, but my question is whether or not the bounds from BB2 were really the
> >>correct bounds to be using in the first place!  I would have expected a PHI
> >>in BB6 to select the bounds based on the path leading to BB6, much like we
> >>select a different return value.
> >
> >Consider we have pointer computation and then
> >
> >return __bnd_init_ptr_bounds (res);
> >
> >In such case you would never have a PHI node for bounds.  Also do not
> >forget that we may have no PHI nodes for both return value and return
> >bounds.  In such case we could also easily fall into undefined value
> >as in dump.
> This code (visit_bb, find_return_bb, consider_split) is a bit of a
> mess, but I do see what you're trying to do now.  Thanks for being
> patient with my questions.
> 
> If I were to look at this at a high level, the core issue seems to
> me that we're really not prepared to handle functions with multiple
> return values.  This shows up in your MPX work, but IIRC there's
> cases in the atomics where we have multiple return values as well.
> I wouldn't be surprised if there's latent bugs with splitting &
> atomics lurking to bite us one day.
> 
> So if I'm reading all this code correctly, given a return block
> which returns a (pointer,bounds) pair, if the bounds are set  by a
> normal statement (ie, not a PHI), then we won't use that block for
> RETURN_BB.  So there's nothing to worry about in that case.
> Similarly if the bounds are  set by a PHI in the return block,
> consider_split will reject that split point as well.  So really the
> only case here is when the bounds are set in another dominating
> block.  Right?
> 
> I can see how you're using the relevant part of the same test we
> need for the retval.  My gut tells me we want to commonize that test
> so that they don't get out-of-sync.Specifically, can we pull the
> code which sets split_part_set_retbnd into a little function, then
> use it for the retval here too:
> 
>   else if (TREE_CODE (retval) == SSA_NAME)
> current->split_part_set_retval
>   = (!SSA_NAME_IS_DEFAULT_DEF (retval)
>  && (bitmap_bit_p (current->split_bbs,
>   gimple_bb (SSA_NAME_DEF_STMT (retval))->index)
>  || gimple_bb (SSA_NAME_DEF_STMT (retval)) == return_bb));
> 
> 
> 
> Iteration through the statements in find_retbnd should start at the
> end of the block and walk backwards.  It probably doesn't matter in
> practice all that much, but might as well be sensible since the
> GIMPLE_RETURN is almost always going to be the last statement in the
> block.
> 
> Similarly for the statement walk in split_function when you want to
> replace retbnd with new one.
> 
> It seems like the code to build the bndret call to obtain bounds is
> repeated.  Can you refactor that into its own little function and
> just use that.  It's not a huge amount of code, but it does make
> things a bit easier to follow.
> 
> With those changes this will be OK.
> 
> Jeff
> 
> 

Here is a version with modifications you proposed.  Thanks for review!

Ilya
--
2014-09-25  Ilya Enkovich  

* ipa-split.c: Include tree-chkp.h.
(find_retbnd): New.
(split_part_set_ssa_name_p): New.
(consider_split): Do not split retbnd and retval
producers.
(insert_bndret_call_after): new.
(split_function): Propagate Pointer Bounds Checker
instrumentation marks and handle returned bounds.


diff --git a/gcc/ipa-split.c b/gcc/ipa-split.c
index 2af3a93..7a1b75e 100644
--- a/gcc/ipa-split.c
+++ b/gcc/ipa-split.c
@@ -110,6 +110,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-pretty-print.h"
 #include "ipa-inline.h"
 #include "cfgloop.h"
+#include "tree-chkp.h"
 
 /* Per basic block info.  */
 
@@ -151,6 +152,7 @@ struct split_point best_split_point;
 static bitmap forbidden_dominators;
 
 static tree find_retval (basic_block return_bb);
+static tree find_retbnd (basic_block return_bb);
 
 /* Callback for walk_stmt_load_store_addr_ops.  If T is non-SSA automatic
variable, check it if it is present in bitmap passed via DATA.  */
@@ -370,6 +372,21 @@ dominated_by_forbidden (basic_block bb)
   return false;
 }
 
+/* For give split point CURRENT and return block RETURN_BB return 1
+   if ssa name VA

Re: [PATCH] Fix PR63266: Keep track of impact of sign extension in bswap

2014-09-25 Thread Christophe Lyon
On 25 September 2014 08:39, Thomas Preud'homme
 wrote:
>> From: Christophe Lyon [mailto:christophe.l...@linaro.org]
>> Sent: Thursday, September 25, 2014 4:28 AM
>
>>
>> Hi Thomas,
>
> Hi Christophe,
>
>>
>> Although I could notice the improvement:
>> Pass disappears   [PASS => ]:
>>   gcc.dg/optimize-bswapsi-1.c scan-tree-dump-times bswap "32 bit
>> bswap
>> implementation found at" 3
>> New pass  [ => PASS]:
>>   gcc.dg/optimize-bswapsi-1.c scan-tree-dump-times bswap "32 bit
>> bswap
>> implementation found at" 4
>>
>> for arm-*, armeb-* and aarch64-* targets, there is no change for
>> aarch64_be: is this expected?
>
> No, but neither is this:
>
> @@ -1905,11 +1913,10 @@ find_bswap_or_nop_1 (gimple stmt, struct 
> symbolic_number *n, int limit)
>
> /* Sign extension: result is dependent on the value.  */
> old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
> -   if (!TYPE_UNSIGNED (n->type)
> -   && type_size > old_type_size
> -   && n->n & ((uint64_t) 0xff << ((old_type_size - 1)
> -  * BITS_PER_MARKER)))
> - return NULL;
> +   if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
> +   && HEAD_MARKER (n->n, old_type_size))
> + for (i = 0; i < type_size - old_type_size; i++)
> +   n->n |= MARKER_BYTE_UNKNOWN << (type_size - 1 - i);
>
> if (type_size < 64 / BITS_PER_MARKER)
>   {
>
> type_size - 1 - I gives a number of marker bytes to shift. I forgot to 
> multiply by the number of bits in a marker. Can you do the change locally and 
> tell me if the test now succeed for aarch64_be?
>

While attempting to try this, I noticed that more precisely the test
is currently UNSUPPORTED on aarch64_be,
which is because check_effective_target_bswap only accepts istarget aarch64-*-*.

I didn't try yet to change it into istarget aarch64*-*-*.


> Best regards,
>
> Thomas
>
>
>
>


[PATCH i386 AVX512] [56/n] Add plus/minus/abs/neg/andnot insn patterns.

2014-09-25 Thread Kirill Yukhin
Hello,
Patch in the bottom extends plus/minus/abs/andnot patterns
to support AVX-512.
I've used questionable hack in the patterns.
Instead of writing dozen similar patterns with masking
I've simply substed them, prohibiting non-mask variant in
the pattern condition. E.g.:
(define_expand "3"
  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
   (plusminus:VI12_AVX512VL
  (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
  (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")))]
  "TARGET_AVX512BW && "
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")

If this is not acceptable, I'll rewrite it to somthing like:
(define_expand "3"
  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
(vec_merge: VI12_AVX512VL
  (plusminus:VI12_AVX512VL
(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
(match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
  (match_operand:SUBST_V 2 "vector_move_operand" "0C")
  (match_operand: 3 "register_operand" "Yk")))]
  "TARGET_AVX512BW && "
  "ix86_fixup_binary_operands_no_copy (, mode, operands);")

Testing is in progress. Is it ok for trunk if pass?

Also we might want to rename VI_AVX2, but I didn't do that
since new (generic) name would be too long. Say: VI_AVX2_AVX512BW_AVX512F.

gcc/
* config/i386/sse.md (define_mode_iterator VI_AVX2): Extend
to support AVX-512BW.
(define_mode_iterator VI124_AVX2_48_AVX512F): Remove.
(define_expand "3"): Remove masking support.
(define_insn "*3"): Ditto.
(define_expand 
"3"):
New.
(define_expand 
"3"):
Ditto.
(define_insn "*3"): 
Ditto.
(define_insn "*3"): 
Ditto.
(define_expand "_andnot3"): Remove masking support.
(define_insn "*andnot3"): Ditto.
(define_expand 
"_andnot3"): New.
(define_expand 
"_andnot3"): Ditto.
(define_insn "*andnot3"): Ditto.
(define_insn "*andnot3"): Ditto.
(define_insn "*abs2"): Remove masking support.
(define_insn "abs2"): New.
(define_insn "abs2"): 
Ditto.
(define_expand "abs2"): Use VI_AVX2 mode iterator.

--
Thanks, K

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ffc831f..d6861e5 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -268,8 +268,8 @@
(V4DI "TARGET_AVX") V2DI])
 
 (define_mode_iterator VI_AVX2
-  [(V32QI "TARGET_AVX2") V16QI
-   (V16HI "TARGET_AVX2") V8HI
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
@@ -359,12 +359,6 @@
   [(V16HI "TARGET_AVX2") V8HI
(V8SI "TARGET_AVX2") V4SI])
 
-(define_mode_iterator VI124_AVX2_48_AVX512F
-  [(V32QI "TARGET_AVX2") V16QI
-   (V16HI "TARGET_AVX2") V8HI
-   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
-   (V8DI "TARGET_AVX512F")])
-
 (define_mode_iterator VI124_AVX512F
   [(V32QI "TARGET_AVX2") V16QI
(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
@@ -9051,20 +9045,37 @@
   "TARGET_SSE2"
   "operands[2] = force_reg (mode, CONST0_RTX (mode));")
 
-(define_expand "3"
+(define_expand "3"
   [(set (match_operand:VI_AVX2 0 "register_operand")
(plusminus:VI_AVX2
  (match_operand:VI_AVX2 1 "nonimmediate_operand")
  (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
-  "TARGET_SSE2 && "
+  "TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
-(define_insn "*3"
+(define_expand "3"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
+   (plusminus:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")))]
+  "TARGET_AVX512F && "
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_expand "3"
+  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
+   (plusminus:VI12_AVX512VL
+ (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")))]
+  "TARGET_AVX512BW && "
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
+(define_insn "*3"
   [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
(plusminus:VI_AVX2
  (match_operand:VI_AVX2 1 "nonimmediate_operand" "0,v")
  (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
-  "TARGET_SSE2 && ix86_binary_operator_ok (, mode, operands) && 
"
+  "TARGET_SSE2
+   && ix86_binary_operator_ok (, mode, operands)"
   "@
p\t{%2, %0|%0, %2}
vp\t{%2, %1, 
%0|%0, %1, %2}"
@@ -9074,6 +9085,30 @@
(set_attr "prefix" "")
(set_attr "mode" "")])
 
+(define_insn "*3"
+  [(set (match_operand:VI48_AVX512VL 0 "register_operand" "=v")
+   (plusminus:VI48_AVX512VL
+ (match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "v")
+ (matc

[PATCH][match-and-simplify] Fix capture change

2014-09-25 Thread Richard Biener

Of course I missed c-exprs...  The following patch fixes that.

Now bootstrapped (building stage3 target libs now).

Applied.

Richard.

2014-09-25  Richard Biener  

* genmatch.c (struct c_expr): Also record capture identifier
to index map.
(c_expr::gen_transform): Use it.

Index: gcc/genmatch.c
===
--- gcc/genmatch.c  (revision 215598)
+++ gcc/genmatch.c  (working copy)
@@ -408,12 +408,13 @@ struct c_expr : public operand
   };
 
   c_expr (cpp_reader *r_, vec code_, unsigned nr_stmts_,
- vec ids_ = vNULL)
-: operand (OP_C_EXPR), r (r_), code (code_),
+ vec ids_, std::map *capture_ids_)
+: operand (OP_C_EXPR), r (r_), code (code_), capture_ids (capture_ids_),
   nr_stmts (nr_stmts_), ids (ids_) {}
   /* cpplib tokens and state to transform this back to source.  */
   cpp_reader *r;
   vec code;
+  std::map *capture_ids;
   /* The number of statements parsed (well, the number of ';'s).  */
   unsigned nr_stmts;
   /* The identifier replacement vector.  */
@@ -487,11 +488,11 @@ struct simplify
   simplify (operand *match_, source_location match_location_,
struct operand *result_, source_location result_location_,
vec ifexpr_vec_, vec > for_vec_,
-   int capture_max_)
+   std::map *capture_ids_)
   : match (match_), match_location (match_location_),
   result (result_), result_location (result_location_),
   ifexpr_vec (ifexpr_vec_), for_vec (for_vec_),
-  capture_max (capture_max_) {}
+  capture_ids (capture_ids_), capture_max (capture_ids_->size ()) {}
 
   /* The expression that is matched against the GENERIC or GIMPLE IL.  */
   operand *match; 
@@ -507,7 +508,8 @@ struct simplify
   /* Collected 'for' expression operators that have to be replaced
  in the lowering phase.  */
   vec > for_vec;
-  /* The maximum capture index seen.  */
+  /* A map of capture identifiers to indexes.  */
+  std::map *capture_ids;
   int capture_max;
 };
 
@@ -657,7 +659,7 @@ lower_commutative (simplify *s, vecmatch_location,
   s->result, s->result_location, s->ifexpr_vec,
-  s->for_vec, s->capture_max);
+  s->for_vec, s->capture_ids);
   simplifiers.safe_push (ns);
 }
 }
@@ -787,7 +789,7 @@ lower_opt_convert (simplify *s, vecmatch_location,
   s->result, s->result_location, s->ifexpr_vec,
-  s->for_vec, s->capture_max);
+  s->for_vec, s->capture_ids);
   simplifiers.safe_push (ns);
 }
 }
@@ -820,7 +822,7 @@ replace_id (operand *o, user_id *id, id_
 {
   vec ids = ce->ids.copy ();
   ids.safe_push (c_expr::id_tab (id->id, with->id));
-  return new c_expr (ce->r, ce->code, ce->nr_stmts, ids);
+  return new c_expr (ce->r, ce->code, ce->nr_stmts, ids, ce->capture_ids);
 }
 
   return o;
@@ -872,7 +874,7 @@ lower_for (simplify *sin, vecmatch_location,
   result_op, s->result_location,
-  ifexpr_vec, vNULL, s->capture_max);
+  ifexpr_vec, vNULL, s->capture_ids);
  worklist.safe_push (ns);
}
}
@@ -1399,12 +1401,18 @@ c_expr::gen_transform (FILE *f, const ch
   if (token->type == CPP_ATSIGN)
{
  const cpp_token *n = &code[i+1];
- if (n->type == CPP_NUMBER
+ if ((n->type == CPP_NUMBER
+  || n->type == CPP_NAME)
  && !(n->flags & PREV_WHITE))
{
  if (token->flags & PREV_WHITE)
fputc (' ', f);
- fprintf (f, "captures[%s]", n->val.str.text);
+ const char *id;
+ if (n->type == CPP_NUMBER)
+   id = (const char *)n->val.str.text;
+ else
+   id = (const char *)CPP_HASHNODE (n->val.node.node)->ident.str;
+ fprintf (f, "captures[%u]", (*capture_ids)[id]);
  ++i;
  continue;
}
@@ -2284,7 +2292,6 @@ private:
   vec active_ifs;
   vec > active_fors;
 
-  int capture_max;
   std::map *capture_ids;
 
 public:
@@ -2461,11 +2468,9 @@ parser::parse_capture (operand *op)
 id = get_ident ();
   else
 fatal_at (token, "expected number or identifier");
+  unsigned next_id = capture_ids->size ();
   std::pair::iterator, bool> res
-= capture_ids->insert
-(std::pair(id, capture_max + 1));
-  if (res.second)
-capture_max++;
+= capture_ids->insert (std::pair(id, next_id));
   return new capture ((*res.first).second, op);
 }
 
@@ -2571,7 +2576,7 @@ parser::parse_c_expr (cpp_ttype start)
   code.safe_push (*token);
 }
   while (1);
-  return new c_expr (r, code, nr_stmts);
+  return new c_expr (r, code, nr_stmts, vNULL, capture_ids);
 }
 
 /* Pa

[PATCH/RFC v2 3/14] Add new optabs for reducing vectors to scalars

2014-09-25 Thread Alan Lawrence
Ok, so, I've tried making reduc_plus optab take two modes: that of the vector to 
reduce, and the result; thus allowing platforms to provide a widening reduction. 
However, I'm keeping reduc_[us](min|max)_optab with only a single mode, as 
widening makes no sense there.


I've not gone as far as making the vectorizer use any such a widening reduction, 
however: as previously stated, I'm not really sure what the input source code 
for that even looks like (maybe in a language other than C?). If we wanted to do 
a non-widening reduction using such an instruction (by discarding the extra 
bits), strikes me the platform can/should provide a non-widening optab for that 
case...


Testing: bootstrapped on x86_64 linux + check-gcc; cross-tested aarch64-none-elf 
check-gcc; cross-tested aarch64_be-none-elf aarch64.exp + vect.exp.


So, my feeling is that the extra complexity here doesn't really buy us anything; 
and that if we do want to support / use widening reductions in the future, we 
should do so with a separate, reduc_plus_widen... optab, and stick with the 
original patch/formulation for now. (In other words: this patch is a guide to 
how I think a dual-mode reduc_plus_optab looks, but I don't honestly like it!).


If you agree, I shall transplant the comments on scalar_reduc_to_vector from 
this patch into the original, and then post that revised version?



Cheers, Alan

Richard Biener wrote:

On Mon, Sep 22, 2014 at 3:26 PM, Alan Lawrence  wrote:

Richard Biener wrote:


scalar_reduc_to_vector misses a comment.


Ok to reuse the comment in optabs.h in optabs.c also?


Sure.


I wonder if at the end we wouldn't transition all backends and then
renaming reduc_*_scal_optab back to reduc_*_optab makes sense.


Yes, that sounds like a plan, the _scal is a bit of a mouthful.


The optabs have only one mode - I wouldn't be surprised if an ISA
invents for example v4si -> di reduction?  So do we want to make
reduc_plus_scal_optab a little bit more future proof (maybe there
is already an ISA that supports this kind of reduction?).


That sounds like a plausible thing for an ISA to do, indeed. However given
these names are only used by the autovectorizer rather than directly, the
question is what the corresponding source code looks like, and/or what
changes to the autovectorizer we might have to make to (look for code to)
exploit such an instruction.


Ah, indeed.  Would be sth like a REDUC_WIDEN_SUM_EXPR or so.


At this point I could go for a
reduc_{plus,min_max}_scal_ which reduces from the first vector
mode to the second scalar mode, and then make the vectorizer look only for
cases where the second mode was the element type of the first; but I'm not
sure I want to do anything more complicated than that at this stage.
(However, indeed it would leave the possibility open for the future.)


Yeah, agreed.  For the min/max case a widen variant isn't useful anyway.

Thanks,
Richard.


--Alan



diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 80e8bd6a079b8bf77ef396643aaba512cf83b317..0a9381fc3a26cdaad02e6f837b94c7738daa3a7f 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4783,29 +4783,49 @@ it is unspecified which of the two operands is returned as the result.
 @cindex @code{reduc_smax_@var{m}} instruction pattern
 @item @samp{reduc_smin_@var{m}}, @samp{reduc_smax_@var{m}}
 Find the signed minimum/maximum of the elements of a vector. The vector is
-operand 1, and the scalar result is stored in the least significant bits of
+operand 1, and the result is stored in the least significant bits of
 operand 0 (also a vector). The output and input vector should have the same
-modes.
+modes. These are legacy optabs, and platforms should prefer to implement
+@samp{reduc_smin_scal_@var{m}} and @samp{reduc_smax_scal_@var{m}}.
 
 @cindex @code{reduc_umin_@var{m}} instruction pattern
 @cindex @code{reduc_umax_@var{m}} instruction pattern
 @item @samp{reduc_umin_@var{m}}, @samp{reduc_umax_@var{m}}
 Find the unsigned minimum/maximum of the elements of a vector. The vector is
-operand 1, and the scalar result is stored in the least significant bits of
+operand 1, and the result is stored in the least significant bits of
 operand 0 (also a vector). The output and input vector should have the same
-modes.
+modes. These are legacy optabs, and platforms should prefer to implement
+@samp{reduc_umin_scal_@var{m}} and @samp{reduc_umax_scal_@var{m}}.
 
 @cindex @code{reduc_splus_@var{m}} instruction pattern
-@item @samp{reduc_splus_@var{m}}
-Compute the sum of the signed elements of a vector. The vector is operand 1,
-and the scalar result is stored in the least significant bits of operand 0
-(also a vector). The output and input vector should have the same modes.
-
 @cindex @code{reduc_uplus_@var{m}} instruction pattern
-@item @samp{reduc_uplus_@var{m}}
-Compute the sum of the unsigned elements of a vector. The vector is operand 1,
-and the scalar result is stored in the least significant bits of operand 0
+@item @samp{reduc_spl

[Patch 1/4] Hookize MOVE_BY_PIECES_P, remove most uses of MOVE_RATIO

2014-09-25 Thread James Greenhalgh

Hi,

This patch started off by Hookizing MOVE_RATIO, but pulling on that
thread made it clear that most users of MOVE_RATIO really want to know
whether move_by_pieces is going to be used or not. For that we have
MOVE_BY_PIECES_P.

We can hookize this, and clean up most other callers of MOVE_RATIO.
We leave behind one in SRA and one in tree-inline, which we will clean
up shortly.

Bootstrapped on x86_64, AArch64 and ARM. OK for trunk?

Thanks,
James

---
gcc/

2014-09-25  James Greenhalgh  

* target.def (move_by_pieces_profitable_p): New.
* doc/tm.texi.in (MOVE_BY_PIECES_P): Reduce documentation to a stub
describing that this macro is deprecated.
(TARGET_MOVE_BY_PIECES_PROFITABLE_P): Add hook.
* doc/tm.texi: Regenerate.
* expr.c (MOVE_BY_PIECES_P): Remove.
(STORE_BY_PIECES_P): Rewrite in terms of
TARGET_MOVE_BY_PIECES_PROFITABLE_P.
(can_move_by_pieces): Likewise.
(emit_block_move_hints): Rewrite in terms of can_move_by_pieces.
(emit_push_insn): Likewise.
(expand_constructor): Likewise.
* targhooks.c (get_move_ratio): New.
(default_move_by_pieces_profitable_p): Likewise.
* targhooks.h (default_move_by_pieces_profitable_p): New.
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 10af50e..162aa30 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6114,11 +6114,38 @@ If you don't define this, a reasonable default is used.
 @end defmac
 
 @defmac MOVE_BY_PIECES_P (@var{size}, @var{alignment})
-A C expression used to determine whether @code{move_by_pieces} will be used to
-copy a chunk of memory, or whether some other block move mechanism
-will be used.  Defaults to 1 if @code{move_by_pieces_ninsns} returns less
-than @code{MOVE_RATIO}.
-@end defmac
+A C expression used to implement the default behaviour of
+@code{TARGET_MOVE_BY_PIECES_PROFITABLE_P}.  New ports should implement
+that hook in preference to this macro, which is deprecated.
+@end defmac
+
+@deftypefn {Target Hook} bool TARGET_MOVE_BY_PIECES_PROFITABLE_P (unsigned int @var{size}, unsigned int @var{alignment}, bool @var{speed_p})
+GCC will attempt several strategies when asked to copy between
+two areas of memory, for example when copying a @code{struct}.
+@code{move_by_pieces} implements such a copy as a sequence of
+memory-to-memory move insns.  Alternate strategies are to expand the
+@code{movmem} optab, to emit a library call, or to emit a unit-by-unit
+loop-based copy.
+
+This target hook should return true if, for a memory move with a given
+@var{size} and @var{alignment}, using the @code{move_by_pieces}
+infrastructure is expected to result in better code generation.
+Both @var{size} and @var{alignment} are measured in terms of storage
+units.
+
+The parameter @var{speed_p} is true if the code is currently being
+optimized for speed rather than size.
+
+Returning true for higher values of @var{size} can improve code generation
+for speed if the target does not provide an implementation of the
+@code{movmem} standard name, if the @code{movmem} implementation would be
+more expensive than a sequence of move insns, or if the overhead of a
+library call would dominate that of the body of the copy.
+
+Returning true for higher values of @code{size} may also cause an increase
+in code size, for example where the number of insns emitted to perform a
+move would be greater than that of a library call.
+@end deftypefn
 
 @defmac MOVE_MAX_PIECES
 A C expression used by @code{move_by_pieces} to determine the largest unit
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index f6f241b..1894745 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4597,12 +4597,13 @@ If you don't define this, a reasonable default is used.
 @end defmac
 
 @defmac MOVE_BY_PIECES_P (@var{size}, @var{alignment})
-A C expression used to determine whether @code{move_by_pieces} will be used to
-copy a chunk of memory, or whether some other block move mechanism
-will be used.  Defaults to 1 if @code{move_by_pieces_ninsns} returns less
-than @code{MOVE_RATIO}.
+A C expression used to implement the default behaviour of
+@code{TARGET_MOVE_BY_PIECES_PROFITABLE_P}.  New ports should implement
+that hook in preference to this macro, which is deprecated.
 @end defmac
 
+@hook TARGET_MOVE_BY_PIECES_PROFITABLE_P
+
 @defmac MOVE_MAX_PIECES
 A C expression used by @code{move_by_pieces} to determine the largest unit
 a load or store used to copy memory is.  Defaults to @code{MOVE_MAX}.
diff --git a/gcc/expr.c b/gcc/expr.c
index 0af9b9a..59a85f7 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -157,14 +157,6 @@ static void do_tablejump (rtx, enum machine_mode, rtx, rtx, rtx, int);
 static rtx const_vector_from_tree (tree);
 static void write_complex_part (rtx, rtx, bool);
 
-/* This macro is used to determine whether move_by_pieces should be called
-   to perform a structure copy.  */
-#ifndef MOVE_BY_PIECES_P
-#define MOVE_BY_PIECES_P(SIZE, ALIGN) \
-  (move_by_p

[Patch 2/4] Hack out a use of MOVE_RATIO in tree-inline.c

2014-09-25 Thread James Greenhalgh

Hi,

This patch hookizes the use of MOVE_RATIO in
tree-inline.c:estimate_move_cost as TARGET_ESTIMATE_BLOCK_COPY_NINSNS.
This hook should return an estimate for the number of instructions
which will be emitted to copy a block of memory.

tree-inline.c uses this in inlining heuristics to estimate the cost of
moving an object. The implementation is lacking, and will likely
underestimate the size of most copies.

An initial iteration of this patch migrated tree-inline.c to use
move_by_pieces_profitable_p and move_by_pieces_ninsns, but this
proved painful for performance on ARM.

This patch puts the control in the hands of the backend, and uses
the existing logic as a default.

Bootstrapped on x86_64, ARM, AArch64.

Ok?

Thanks,
James

---
2014-09-25  James Greenhalgh  

* target.def (estimate_block_copy_ninsns): New.
* targhooks.h (default_estimate_block_copy_ninsns): New.
* targhooks.c (default_estimate_block_copy_ninsns): New.
* tree-inline.c (estimate_move_cost): Use new target hook.
* doc/tm.texi.in (TARGET_ESTIMATE_BLOCK_COPY_NINSNS): New.
* doc/tm.texi: Regenerate.
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 162aa30..f59641a 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6147,6 +6147,19 @@ in code size, for example where the number of insns emitted to perform a
 move would be greater than that of a library call.
 @end deftypefn
 
+@deftypefn {Target Hook} {unsigned int} TARGET_ESTIMATE_BLOCK_COPY_NINSNS (HOST_WIDE_INT @var{size}, bool @var{speed_p})
+This target hook should return an estimate of the number of
+instructions which will be emitted when copying an object with a size
+in units @var{size}.
+
+The parameter @var{speed_p} is true if the code is currently being
+optimized for speed rather than size.
+
+Where the block copy would be implemented using a library call, the
+estimate should be for the number of instructions required to set up
+and perform that call.
+@end deftypefn
+
 @defmac MOVE_MAX_PIECES
 A C expression used by @code{move_by_pieces} to determine the largest unit
 a load or store used to copy memory is.  Defaults to @code{MOVE_MAX}.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 1894745..d2a4386 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4604,6 +4604,8 @@ that hook in preference to this macro, which is deprecated.
 
 @hook TARGET_MOVE_BY_PIECES_PROFITABLE_P
 
+@hook TARGET_ESTIMATE_BLOCK_COPY_NINSNS
+
 @defmac MOVE_MAX_PIECES
 A C expression used by @code{move_by_pieces} to determine the largest unit
 a load or store used to copy memory is.  Defaults to @code{MOVE_MAX}.
diff --git a/gcc/target.def b/gcc/target.def
index 0fd6235..10f3b2e 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3079,6 +3079,21 @@ move would be greater than that of a library call.",
  bool, (unsigned int size, unsigned int alignment, bool speed_p),
  default_move_by_pieces_profitable_p)
 
+DEFHOOK
+(estimate_block_copy_ninsns,
+ "This target hook should return an estimate of the number of\n\
+instructions which will be emitted when copying an object with a size\n\
+in units @var{size}.\n\
+\n\
+The parameter @var{speed_p} is true if the code is currently being\n\
+optimized for speed rather than size.\n\
+\n\
+Where the block copy would be implemented using a library call, the\n\
+estimate should be for the number of instructions required to set up\n\
+and perform that call.",
+ unsigned int, (HOST_WIDE_INT size, bool speed_p),
+ default_estimate_block_copy_ninsns)
+
 /* True for MODE if the target expects that registers in this mode will
be allocated to registers in a small register class.  The compiler is
allowed to use registers explicitly used in the rtl as spill registers
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index ffe7080..eb0a4cd 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1437,6 +1437,16 @@ default_move_by_pieces_profitable_p (unsigned int size ATTRIBUTE_UNUSED,
 #endif
 }
 
+unsigned int
+default_estimate_block_copy_ninsns (HOST_WIDE_INT size, bool speed_p)
+{
+  if (size < 0 || size > MOVE_MAX_PIECES * get_move_ratio (speed_p))
+/* Cost of a memcpy call, 3 arguments and the call.  */
+return 4;
+  else
+return ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
+}
+
 bool
 default_profile_before_prologue (void)
 {
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 93f21f8..f76ad31 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -183,6 +183,7 @@ extern int default_register_move_cost (enum machine_mode, reg_class_t,
 
 extern bool default_move_by_pieces_profitable_p (unsigned int,
 		 unsigned int, bool);
+extern unsigned int default_estimate_block_copy_ninsns (HOST_WIDE_INT, bool);
 
 extern bool default_profile_before_prologue (void);
 extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index ad474a5..e5f8653 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3617,7 +3617,7 @

[Patch 0/4] Re: Control SRA and IPA-SRA by a param rather than MOVE_RATIO

2014-09-25 Thread James Greenhalgh
On Wed, Aug 20, 2014 at 10:21:41AM +0100, Richard Biener wrote:
> I think this is overly complicated and instead SRA should only
> use the parameters.  Targets can adjust their default (like they
> do for other parameters).
> 
> The default should be MOVE_RATIO which should be applied
> where the common code adjusts parameters (see existing
> examples for not overriding user specified ones).

Hi,

My attempts to simplify this patch didn't work out so well...

If I move the target hook to the driver, I can't use MOVE_RATIO to
find a deafult value. MOVE_RATIO for some targets is wired to a
function in the back-end, or otherwise references symbols we don't
want to pull in to libcommon/libcommon-target.

My next approach was to hookize just this one use of MOVE_RATIO - again,
this was a failure as libcommon-target doesn't have enough access to the
CPU tuning tables used by backends (nor should it).

That took me to my current approach. Hookize each of the three unique
uses of MOVE_RATIO, allowing us to eliminate it entirely. This still
doesn't let us simplify the patch I sent in August, but it does neaten
up the users of MOVE_RATIO allowing us to separate out concerns.

This gives targets and micro-architectures much more fine-grained
control over the tuning parameters for inlining, SRA and move_by_pieces,
which were all previously wrapped in MOVE_RATIO.

I've bootstrapped and tested the series on x86_64, ARM and AArch64 with
no issues.

The patches coming are:

  [Patch 1/4] Hookize MOVE_BY_PIECES_P, remove most uses of MOVE_RATIO

Which moves everything consulting MOVE_RATIO to decide whether
the move_by_pieces infrastructure will be used to a new hook
TARGET_MOVE_BY_PIECES_PROFITABLE_P.

  [Patch 2/4] Hack out a use of MOVE_RATIO in tree-inline.c

Which adds the target hook TARGET_ESTIMATE_BLOCK_COPY_NINSNS,
used to estimate the number of instructions a target will require
to move a block. This is used by inlining to estimate the cost of
various parameters.

  [Patchv2 3/4] Control SRA and IPA-SRA by a param rather than
MOVE_RATIO

Which is a similar patch to that I sent in August, adding new
parameters and a new target hook to control when SRA should be used.

  [Patch AArch64 4/4] Wire up New target hooks

Which wires all of this up for AArch64.

Thanks,
James

[Patch AArch64 4/4] Wire up New target hooks

2014-09-25 Thread James Greenhalgh

Hi,

This patch wires up our new target hooks for AArch64. This also means
we can bring back the two failing SRA tests.

Bootstrapped on AArch64 with no issues.

OK for trunk?

Thanks,
James

---
gcc/

2014-09-25  James Greenhalgh  

* config/aarch64/aarch64.c
(aarch64_estimate_movmem_ninsns): New.
(aarch64_expand_movmem): Refactor old move costs.
(aarch64_move_by_pieces_profitable_p): New.
(aarch64_estimate_block_copy_ninsns): Likewise.
(aarch64_max_scalarization_size): Likewise.
(TARGET_MAX_SCALARIZATION_SIZE): Likewise.
(TARGET_MOVE_BY_PIECES_PROFITABLE_P): Likewise.
* config/aarch64/aarch64.h (AARCH64_MOVE_RATIO): New.
(MOVE_RATIO): Delete.

gcc/testsuite/

2014-09-25  James Greenhalgh  

* gcc.dg/tree-ssa/pr42585.c: Bring back for AArch64.
* gcc.dg/tree-ssa/sra-12.c: Likewise.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3483081..d8b5a4a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9616,6 +9616,34 @@ aarch64_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
   return false;
 }
 
+static unsigned int
+aarch64_estimate_movmem_ninsns (HOST_WIDE_INT size)
+{
+  HOST_WIDE_INT chunks = 0;
+  int n = size;
+
+  /* 3 bytes is a 2-byte then a 1-byte copy.  */
+  if (n == 3)
+return 2;
+
+  /* 5, 6, 7 bytes need an extra copy.  */
+  if (n > 4 && n < 8)
+chunks++;
+
+  /* If n was greater than 8, it is dealt with in 8/16-byte chunks
+ first.  */
+  chunks += n / 16;
+  n %= 16;
+  chunks += n / 8;
+  n %= 8;
+
+  /* Anything left is dealt with in one instruction.  */
+  if (n != 0)
+chunks++;
+
+  return chunks;
+}
+
 /* Return a new RTX holding the result of moving POINTER forward by
AMOUNT bytes.  */
 
@@ -9673,7 +9701,7 @@ aarch64_expand_movmem (rtx *operands)
 
   /* When optimizing for size, give a better estimate of the length of a
  memcpy call, but use the default otherwise.  */
-  unsigned int max_instructions = (speed_p ? 15 : AARCH64_CALL_RATIO) / 2;
+  unsigned int max_instructions = AARCH64_MOVE_RATIO (speed_p);
 
   /* We can't do anything smart if the amount to copy is not constant.  */
   if (!CONST_INT_P (operands[2]))
@@ -9681,10 +9709,9 @@ aarch64_expand_movmem (rtx *operands)
 
   n = UINTVAL (operands[2]);
 
-  /* Try to keep the number of instructions low.  For cases below 16 bytes we
- need to make at most two moves.  For cases above 16 bytes it will be one
- move for each 16 byte chunk, then at most two additional moves.  */
-  if (((n / 16) + (n % 16 ? 2 : 0)) > max_instructions)
+  /* Try to keep the number of instructions we emit low, fail expansion
+ if we are unable to and leave it to memcpy.  */
+  if (aarch64_estimate_movmem_ninsns (n) > max_instructions)
 return false;
 
   base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
@@ -9774,6 +9801,57 @@ aarch64_expand_movmem (rtx *operands)
   return true;
 }
 
+/* Implement TARGET_MOVE_BY_PIECES_PROFITABLE_P.  */
+
+bool
+aarch64_move_by_pieces_profitable_p (unsigned int size
+ unsigned int align
+ bool speed_p)
+{
+  /* For strict alignment we don't want to use our unaligned
+ movmem implementation.  */
+  if (STRICT_ALIGNMENT)
+return (AARCH64_MOVE_RATIO (speed_p)
+	< move_by_pieces_ninsns (size, align, speed_p));
+
+  /* If we have an overhang of 3, 6 or 7 bytes, we would emit an unaligned
+ load to cover it, if this is likely to be slow we would do better
+ going through move_by_pieces.  */
+  if (size % 8 > 5)
+return SLOW_UNALIGNED_ACCESS (DImode, 1);
+  else if (size % 4 == 3)
+return SLOW_UNALIGNED_ACCESS (SImode, 1);
+
+  /* We can likely do a better job than the move_by_pieces infrastructure
+ can.  */
+  return false;
+}
+
+/* Implement TARGET_ESTIMATE_BLOCK_COPY_NINSNS.  */
+
+unsigned int
+aarch64_estimate_block_copy_ninsns (HOST_WIDE_INT size, bool speed_p)
+{
+  if (aarch64_move_by_pieces_profitable_p (size, 8, speed_p))
+return move_by_pieces_ninsns (size, 8, MOVE_MAX_PIECES);
+  else if (aarch64_estimate_movmem_ninsns (size)
+	   < AARCH64_MOVE_RATIO (speed_p))
+return aarch64_estimate_movmem_ninsns (size);
+  else
+/* memcpy.  Set up 3 arguments and make a call.  */
+return 4;
+}
+
+/* Implement TARGET_MAX_SCALARIZATION_SIZE.  */
+
+unsigned int
+aarch64_max_scalarization_size (bool speed_p)
+{
+  /* The maximum number of instructions we are willing to use * the
+ maximum size we can move in one instruction (LDP/STP).  */
+  return AARCH64_MOVE_RATIO (speed_p) * 16;
+}
+
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST aarch64_address_cost
 
@@ -9843,6 +9921,10 @@ aarch64_expand_movmem (rtx *operands)
 #undef TARGET_BUILTIN_DECL
 #define TARGET_BUILTIN_DECL aarch64_builtin_decl
 
+#undef TARGET_ESTIMATE_BLOCK_COPY_NINSNS
+#define TARGET_ESTIMATE_BLOCK_COPY_NINSNS \
+  aarch64_estimate_block_copy_ninsns
+
 #undef  TARGET_EXPAND

[Patchv2 3/4] Control SRA and IPA-SRA by a param rather than MOVE_RATIO

2014-09-25 Thread James Greenhalgh

Hi,

After hookizing MOVE_BY_PIECES_P and migrating tree-inline.c, we are
left with only one user of MOVE_RATIO - deciding the maximum size of
aggregate for SRA.

Past discussions have made it clear [1] that keeping this use of
MOVE_RATIO is undesirable. Clearly it is now also misnamed.

The previous iteration of this patch was rejected as too complicated. I
went off and tried simplifying it to use MOVE_RATIO, but if we do that we
end up breaking some interface boundaries between the driver and the
backend.

This patch partially hookizes MOVE_RATIO under the new name
TARGET_MAX_SCALARIZATION_SIZE and uses it to set default values for two
new parameters:

  sra-max-scalarization-size-Ospeed - The maximum size of aggregate
  to consider when compiling for speed
  sra-max-scalarization-size-Osize - The maximum size of aggregate
  to consider when compiling for size.

We then modify SRA to use these parameters rather than MOVE_RATIO.

Bootstrapped and regression tested for x86, arm and aarch64 with no
issues.

OK for trunk?

[1]: https://gcc.gnu.org/ml/gcc-patches/2014-08/msg01997.html

---
gcc/

2014-09-25  James Greenhalgh  

* doc/invoke.texi (sra-max-scalarization-size-Ospeed): Document.
(sra-max-scalarization-size-Osize): Likewise.
* doc/tm.texi.in
(MOVE_RATIO): Reduce documentation to a stub, deprecate.
(TARGET_MAX_SCALARIZATION_SIZE): Add hook.
* doc/tm.texi: Regenerate.
* defaults.h (MOVE_RATIO): Remove default implementation.
(SET_RATIO): Add a default implementation if MOVE_RATIO
is not defined.
* params.def (sra-max-scalarization-size-Ospeed): New.
(sra-max-scalarization-size-Osize): Likewise.
* target.def (max_scalarization_size): New.
* targhooks.c (default_max_scalarization_size): New.
* targhooks.h (default_max_scalarization_size): New.
* tree-sra.c (get_max_scalarization_size): New.
(analyze_all_variable_accesses): Use it.
diff --git a/gcc/defaults.h b/gcc/defaults.h
index c1776b0..f723e2c 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -1191,18 +1191,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define BRANCH_COST(speed_p, predictable_p) 1
 #endif
 
-/* If a memory-to-memory move would take MOVE_RATIO or more simple
-   move-instruction sequences, we will do a movmem or libcall instead.  */
-
-#ifndef MOVE_RATIO
-#if defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti)
-#define MOVE_RATIO(speed) 2
-#else
-/* If we are optimizing for space (-Os), cut down the default move ratio.  */
-#define MOVE_RATIO(speed) ((speed) ? 15 : 3)
-#endif
-#endif
-
 /* If a clear memory operation would take CLEAR_RATIO or more simple
move-instruction sequences, we will do a setmem or libcall instead.  */
 
@@ -1219,7 +1207,14 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
SET_RATIO or more simple move-instruction sequences, we will do a movmem
or libcall instead.  */
 #ifndef SET_RATIO
+#ifdef MOVE_RATIO
 #define SET_RATIO(speed) MOVE_RATIO (speed)
+#elif defined (HAVE_movmemqi) || defined (HAVE_movmemhi) || defined (HAVE_movmemsi) || defined (HAVE_movmemdi) || defined (HAVE_movmemti)
+#define SET_RATIO(speed) 2
+#else
+/* If we are optimizing for space (-Os), cut down the default move ratio.  */
+#define SET_RATIO(speed) ((speed) ? 15 : 3)
+#endif
 #endif
 
 /* Supply a default definition for FUNCTION_ARG_PADDING:
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index eae4ab1..c3e6eaa 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10301,6 +10301,16 @@ parameters only when their cumulative size is less or equal to
 @option{ipa-sra-ptr-growth-factor} times the size of the original
 pointer parameter.
 
+@item sra-max-scalarization-size-Ospeed
+@item sra-max-scalarization-size-Osize
+The two Scalar Reduction of Aggregates passes (SRA and IPA-SRA) aim to
+replace scalar parts of aggregates with uses of independent scalar
+variables.  These parameters control the maximum size, in storage units,
+of aggregate which will be considered for replacement when compiling for
+speed
+(@option{sra-max-scalarization-size-Ospeed}) or size
+(@option{sra-max-scalarization-size-Osize}) respectively.
+
 @item tm-max-aggregate-size
 When making copies of thread-local variables in a transaction, this
 parameter specifies the size in bytes after which variables are
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index f59641a..b4061eb 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6098,20 +6098,25 @@ this macro is defined, it should produce a nonzero value when
 @end defmac
 
 @defmac MOVE_RATIO (@var{speed})
-The threshold of number of scalar memory-to-memory move insns, @emph{below}
-which a sequence of insns should be generated instead of a
-string move insn or a library call.  Increasing the value will always
-make code faster, 

Commit: SH: Fix typo in atomic fetch sequence

2014-09-25 Thread Nick Clifton
Hi Alex, Hi Kaz, Hi Oleg,

  I am applying the patch below as an obvious fix for a typo in the
  instruction sequence of the atomic_fetch_nand_soft_imask
  pattern.  I hope that this is OK with you.

Cheers
  Nick

gcc/ChangeLog
2014-09-25  Nick Clifton  

PR target/62218
* config/sh/sync.md (atomic_fetch_nand_soft_imask): Fix typo
in instruction sequence.

Index: gcc/config/sh/sync.md
===
--- gcc/config/sh/sync.md   (revision 215540)
+++ gcc/config/sh/sync.md   (working copy)
@@ -903,7 +903,7 @@
 "  and %0,%3"  "\n"
 "  not %3,%3"  "\n"
 "  mov.   %3,@%1" "\n"
-"  stc %4,sr";
+"  ldc %4,sr";
 }
   [(set_attr "length" "20")])
 


Re: [PATCH, 2/2] shrink wrap a function with a single loop: split live_edge

2014-09-25 Thread Jiong Wang


On 25/09/14 09:52, Zhenqiang Chen wrote:



-Original Message-
From: Jiong Wang [mailto:jiong.w...@arm.com]
Sent: Thursday, September 25, 2014 2:13 AM
To: Jeff Law; Zhenqiang Chen
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop: split
live_edge


On 22/09/14 18:51, Jeff Law wrote:

On 09/22/14 04:24, Jiong Wang wrote:

Great.  Can you send an updated patchkit for review.

patch attached.

please review, thanks.

gcc/ * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the
live-in of new created BB as the intersection of live-in from
"old_dest" and live-out from "bb".

Looks good.  However, before committing we need a couple things.

1. Bootstrap & regression test this variant of the patch.  I know you
tested an earlier one, but please test this one just to be sure.

2. Testcase.  I think you could test for either the reduction in the
live-in set of the newly created block or that you're shrink wrapping
one or more functions you didn't previously shrink-wrap.  I think it's
fine if this test is target specific.

   bootstrap ok based on revision 215515.

   while the x86 regression result is interesting. there is no regression on
check-g++, while there is four regression on check-gcc:

FAIL: gcc.dg/tree-ssa/loadpre10.c (internal compiler error)
FAIL: gcc.dg/tree-ssa/loadpre10.c (test for excess errors)
FAIL: gcc.dg/tree-ssa/pr21417.c (internal compiler error)
FAIL: gcc.dg/tree-ssa/pr21417.c (test for excess errors)

this is caused by our improving the accuracy of live-in for new created 
basic
block. Now we will split
more than one edge for the above two testcase. thus trigger the following
assert in move_insn_for_shrink_wrap:

/* We should not split more than once for a function.  */
gcc_assert (!(*split_p));

According to the algorithm, it is impossible to split one edge twice. It's 
possible to split two different edges. But for such cases, the control flow is 
too complex to perform shrink-wrapping.

Anyway, your patch improves the accuracy. You can replace the "gcc_assert" to "return"; or change 
"split_p" to "splitted_edge" then you can check one edge is not splitted twice.


thanks for the explanation.

actually, the old "bitmap_copy (df_get_live_in (next_block), df_get_live_out (bb));" will 
let any "dest" reg
in entry block alive in the new splitted block. If there is another block which 
"dest" also set in live_in, then
dest alive in two blocks, then those code in "live_edge_for_reg" will always 
return NULL, thus the old
inaccurate data flow will actually never make split two different edges 
happen... thus assert never triggered.

as from the whole x86 boostrap, and regression test, only two cases trigger 
split two different edges, I think it's
trival case, thus prefer to be conservative to keep the old logic, as suggested, just replace 
"gcc_assert" into "return false".

or if we want to allow multi split, I think just remove the assert is OK, because 
"EDGE_COUNT (next_block->preds) == 2"
will guarantee split one edge twice never happen.

new patch updated.

pass bootstrap and no regression, both check-gcc and check-g++, on the x86.

OK for trunk?

thanks.

gcc/
   * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the live-in of
   new created BB as the intersection of live-in from "old_dest" and live-out
   from "bb".
diff --git a/gcc/shrink-wrap.c b/gcc/shrink-wrap.c
index af23f02..bd4813c 100644
--- a/gcc/shrink-wrap.c
+++ b/gcc/shrink-wrap.c
@@ -250,16 +250,21 @@ move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
   if (!df_live)
 	return false;

+  basic_block old_dest = live_edge->dest;
   next_block = split_edge (live_edge);

   /* We create a new basic block.  Call df_grow_bb_info to make sure
 	 all data structures are allocated.  */
   df_grow_bb_info (df_live);
-  bitmap_copy (df_get_live_in (next_block), df_get_live_out (bb));
+
+  bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
+		  df_get_live_in (old_dest));
   df_set_bb_dirty (next_block);

   /* We should not split more than once for a function.  */
-  gcc_assert (!(*split_p));
+  if (*split_p)
+	return false;
+
   *split_p = true;
 }

diff --git a/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c b/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c
new file mode 100644
index 000..47f2468
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/shrink_wrap_1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-rtl-pro_and_epilogue" } */
+
+enum machine_mode
+{
+  FAKE_0,
+  FAKE_1,
+  FAKE_2,
+  FAKE_3,
+  FAKE_4,
+  FAKE_5,
+  NUM_MACHINE_MODES,
+};
+
+typedef int *rtx;
+typedef long unsigned int size_t;
+extern unsigned char mode_size[NUM_MACHINE_MODES];
+
+extern rtx c_readstr (const char *, enum machine_mode);
+extern rtx convert_to_mode (enum machine_mode, rtx, int);
+extern rtx expand_mult (enum machine_mode, rtx, rtx, rtx, int);
+extern rtx force_r

Re: [AArch64] Tighten predicates on SIMD shift intrinsics

2014-09-25 Thread James Greenhalgh

On Fri, Sep 19, 2014 at 05:57:06PM +0100, Richard Henderson wrote:
> On 09/11/2014 01:29 AM, James Greenhalgh wrote:
> > +;; Predicates used by the various SIMD shift operations.  These
> > +;; fall in to 3 categories.
> > +;;   Shifts with a range 0-(bit_size - 1) (aarch64_simd_shift_imm)
> > +;;   Shifts with a range 1-bit_size (aarch64_simd_shift_imm_offset)
> > +;;   Shifts with a range 0-bit_size (aarch64_simd_shift_imm_bitsize)
> > +(define_predicate "aarch64_simd_shift_imm_qi"
> > +  (and (match_code "const_int")
> > +   (match_test "aarch64_simd_const_bounds (op, 0, 7)")))
>
> The function call should be removed and this should be written as
>
>   (match_test "IN_RANGE (ival, 0, 7)")
>

Quite right, updated as attached.

Cross-tested for aarch64-none-elf with no issues.

OK?

Thanks,
James

---
gcc/

2014-09-25  James Greenhalgh  

* config/aarch64/aarch64-protos.h (aarch64_simd_const_bounds): Delete.
* config/aarch64/aarch64-simd.md (aarch64_qshl): Use
new predicates.
(aarch64_shll2_n): Likewise.
(aarch64_shr_n): Likewise.
(aarch64_sra_n: Likewise.
(aarch64_si_n): Likewise.
(aarch64_qshl_n): Likewise.
* config/aarch64/aarch64.c (aarch64_simd_const_bounds): Delete.
* config/aarch64/iterators.md (ve_mode): New.
(offsetlr): Remap to infix text for use in new predicates.
* config/aarch64/predicates.md (aarch64_simd_shift_imm_qi): New.
(aarch64_simd_shift_imm_hi): Likewise.
(aarch64_simd_shift_imm_si): Likewise.
(aarch64_simd_shift_imm_di): Likewise.
(aarch64_simd_shift_imm_offset_qi): Likewise.
(aarch64_simd_shift_imm_offset_hi): Likewise.
(aarch64_simd_shift_imm_offset_si): Likewise.
(aarch64_simd_shift_imm_offset_di): Likewise.
(aarch64_simd_shift_imm_bitsize_qi): Likewise.
(aarch64_simd_shift_imm_bitsize_hi): Likewise.
(aarch64_simd_shift_imm_bitsize_si): Likewise.
(aarch64_simd_shift_imm_bitsize_di): Likewise.

gcc/testsuite/

2014-09-25  James Greenhalgh  

* gcc.target/aarch64/simd/vqshlb_1.c: New.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e32ef64..b5f53d2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -256,7 +256,6 @@ void aarch64_emit_call_insn (rtx);
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
 
-void aarch64_simd_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 void aarch64_simd_disambiguate_copy (rtx *, rtx *, rtx *, unsigned int);
 
 /* Emit code to place a AdvSIMD pair result in memory locations (with equal
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 45ea9d7895e93d4c4b137de1c01f6a1e93942d11..cab26a341ecefb65b81d13d066b349d3be354616 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -3607,12 +3607,12 @@ (define_insn "aarch64_qshl
 (define_insn "aarch64_shll_n"
   [(set (match_operand: 0 "register_operand" "=w")
 	(unspec: [(match_operand:VDW 1 "register_operand" "w")
-			 (match_operand:SI 2 "immediate_operand" "i")]
+			 (match_operand:SI 2
+			   "aarch64_simd_shift_imm_bitsize_" "i")]
  VSHLL))]
   "TARGET_SIMD"
   "*
   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
-  aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
   if (INTVAL (operands[2]) == bit_width)
   {
 return \"shll\\t%0., %1., %2\";
@@ -3633,7 +3633,6 @@ (define_insn "aarch64_shll2_n
   "TARGET_SIMD"
   "*
   int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
-  aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
   if (INTVAL (operands[2]) == bit_width)
   {
 return \"shll2\\t%0., %1., %2\";
@@ -3649,13 +3648,11 @@ (define_insn "aarch64_shll2_n
 (define_insn "aarch64_shr_n"
   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
-			   (match_operand:SI 2 "immediate_operand" "i")]
+			   (match_operand:SI 2
+			 "aarch64_simd_shift_imm_offset_" "i")]
 			  VRSHR_N))]
   "TARGET_SIMD"
-  "*
-  int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
-  aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
-  return \"shr\\t%0, %1, %2\";"
+  "shr\\t%0, %1, %2"
   [(set_attr "type" "neon_sat_shift_imm")]
 )
 
@@ -3665,13 +3662,11 @@ (define_insn "aarch64_sra_n"
   [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 	(unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
 		   (match_operand:VSDQ_I_DI 2 "register_operand" "w")
-   (match_operand:SI 3 "immediate_operand" "i")]
+   (match_operand:SI 3
+			 "aarch64_simd_shift_imm_offset_" "i")]
   VSRA))]
   "TARGET_SIMD"
-  "*
-  int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT;
-  aarch64_simd_const

Re: [Patch 1/4] Hookize MOVE_BY_PIECES_P, remove most uses of MOVE_RATIO

2014-09-25 Thread Steven Bosscher
On Thu, Sep 25, 2014 at 4:57 PM, James Greenhalgh wrote:
> * doc/tm.texi.in (MOVE_BY_PIECES_P): Reduce documentation to a stub
> describing that this macro is deprecated.

Remove it entirely and poison it in system.h?
It takes changes to only a few targets: mips, arc, s390, and sh.

Thanks for hookizing this!

Ciao!
Steven


Re: Commit: SH: Fix typo in atomic fetch sequence

2014-09-25 Thread Oleg Endo
On Thu, 2014-09-25 at 16:04 +0100, Nick Clifton wrote:
> Hi Alex, Hi Kaz, Hi Oleg,
> 
>   I am applying the patch below as an obvious fix for a typo in the
>   instruction sequence of the atomic_fetch_nand_soft_imask
>   pattern.  I hope that this is OK with you.

Yes, ouch.  Thanks!
I'll backport it to 4.8 and 4.9.


Cheers,
Oleg


> gcc/ChangeLog
> 2014-09-25  Nick Clifton  
> 
>   PR target/62218
>   * config/sh/sync.md (atomic_fetch_nand_soft_imask): Fix typo
>   in instruction sequence.
> 
> Index: gcc/config/sh/sync.md
> ===
> --- gcc/config/sh/sync.md (revision 215540)
> +++ gcc/config/sh/sync.md (working copy)
> @@ -903,7 +903,7 @@
>"  and %0,%3"  "\n"
>"  not %3,%3"  "\n"
>"  mov.   %3,@%1" "\n"
> -  "  stc %4,sr";
> +  "  ldc %4,sr";
>  }
>[(set_attr "length" "20")])
>  




Re: [PATCH] PR63300 'const volatile' sometimes stripped in debug info.

2014-09-25 Thread Mark Wielaard
On Wed, 2014-09-24 at 17:09 -0400, Jason Merrill wrote:
> On 09/24/2014 02:42 PM, Mark Wielaard wrote:
> > As my example shows even with older GCCs depending on whether a const
> > type or volatile type was encountered first a const volatile type comes
> > out either as a DIE chain const_type DIE -> volatile_type DIE -> type
> > DIE, or the other way around, in different compile units.
> 
> Sorry, I overlooked that you were talking about 4.8.3 in your earlier mail.
> 
> The patch is OK.

Thanks.

I did post to the DWARF mailinglist to ask for clarification of the
issue of ordering of type qualifier modifiers vs type unit signatures:
http://thread.gmane.org/gmane.comp.standards.dwarf/202

Cheers,

Mark


Re: Avoid privatization of TLS variables

2014-09-25 Thread Ian Lance Taylor
On Wed, Sep 24, 2014 at 6:58 PM, Jan Hubicka  wrote:
>
>b:   00 00
> 9: R_X86_64_TPOFF32 
> __gcov_indirect_call_counters_ltopriv

Look at the .o file where __gcov_indirect_call_counters_ltopriv is
defined.  That .o file must have the symbol marked as STT_TLS and it
must be defined in a section with the SHF_TLS flag.  If that is not
true, then that is your problem.

Ian


Re: [PATCH/RFC v2 3/14] Add new optabs for reducing vectors to scalars

2014-09-25 Thread Richard Biener
On Thu, Sep 25, 2014 at 4:32 PM, Alan Lawrence  wrote:
> Ok, so, I've tried making reduc_plus optab take two modes: that of the
> vector to reduce, and the result; thus allowing platforms to provide a
> widening reduction. However, I'm keeping reduc_[us](min|max)_optab with only
> a single mode, as widening makes no sense there.
>
> I've not gone as far as making the vectorizer use any such a widening
> reduction, however: as previously stated, I'm not really sure what the input
> source code for that even looks like (maybe in a language other than C?). If
> we wanted to do a non-widening reduction using such an instruction (by
> discarding the extra bits), strikes me the platform can/should provide a
> non-widening optab for that case...

I expect it to apply to sth like

int foo (char *in, int n)
{
   int res = 0;
   for (int i = 0; i < n; ++i)
 res += *in;
   return res;
}

where you'd see

  temc = *in;
  tem = (int)temc;
  res += tem;

we probably handle this by widening the chars to ints and unrolling
the loop enough to make that work (thus for n == 16 it would maybe
fail to vectorize?).  It should be more efficient to pattern-detect
this as widening reduction.

> Testing: bootstrapped on x86_64 linux + check-gcc; cross-tested
> aarch64-none-elf check-gcc; cross-tested aarch64_be-none-elf aarch64.exp +
> vect.exp.
>
> So, my feeling is that the extra complexity here doesn't really buy us
> anything; and that if we do want to support / use widening reductions in the
> future, we should do so with a separate, reduc_plus_widen... optab, and
> stick with the original patch/formulation for now. (In other words: this
> patch is a guide to how I think a dual-mode reduc_plus_optab looks, but I
> don't honestly like it!).
>
> If you agree, I shall transplant the comments on scalar_reduc_to_vector from
> this patch into the original, and then post that revised version?

I agree.  We can come back once a target implements such widening
reduction.

Richard.

>
> Cheers, Alan
>
>
> Richard Biener wrote:
>>
>> On Mon, Sep 22, 2014 at 3:26 PM, Alan Lawrence 
>> wrote:
>>>
>>> Richard Biener wrote:


 scalar_reduc_to_vector misses a comment.
>>>
>>>
>>> Ok to reuse the comment in optabs.h in optabs.c also?
>>
>>
>> Sure.
>>
 I wonder if at the end we wouldn't transition all backends and then
 renaming reduc_*_scal_optab back to reduc_*_optab makes sense.
>>>
>>>
>>> Yes, that sounds like a plan, the _scal is a bit of a mouthful.
>>>
 The optabs have only one mode - I wouldn't be surprised if an ISA
 invents for example v4si -> di reduction?  So do we want to make
 reduc_plus_scal_optab a little bit more future proof (maybe there
 is already an ISA that supports this kind of reduction?).
>>>
>>>
>>> That sounds like a plausible thing for an ISA to do, indeed. However
>>> given
>>> these names are only used by the autovectorizer rather than directly, the
>>> question is what the corresponding source code looks like, and/or what
>>> changes to the autovectorizer we might have to make to (look for code to)
>>> exploit such an instruction.
>>
>>
>> Ah, indeed.  Would be sth like a REDUC_WIDEN_SUM_EXPR or so.
>>
>>> At this point I could go for a
>>> reduc_{plus,min_max}_scal_ which reduces from the first
>>> vector
>>> mode to the second scalar mode, and then make the vectorizer look only
>>> for
>>> cases where the second mode was the element type of the first; but I'm
>>> not
>>> sure I want to do anything more complicated than that at this stage.
>>> (However, indeed it would leave the possibility open for the future.)
>>
>>
>> Yeah, agreed.  For the min/max case a widen variant isn't useful anyway.
>>
>> Thanks,
>> Richard.
>>
>>> --Alan
>>>
>>
>


[PING 2, PATCH C++] - SD-6 Implementation Part N/4.

2014-09-25 Thread Ed Smith-Rowland

*PING*

I would like review of my SD-6 implementation.

Part 1 - __has_include__ built-in in libcpp.
https://gcc.gnu.org/ml/gcc-patches/2014-09/msg00083.html

Part 2 - __has_include macro and C++ language feature macros.
https://gcc.gnu.org/ml/gcc-patches/2014-09/msg00084.html

Part 3 - libstdc++ library macros was already approved by Jonathan. 
Thank you.

https://gcc.gnu.org/ml/gcc-patches/2014-09/msg00085.html
https://gcc.gnu.org/ml/gcc-patches/2014-09/msg00110.html

Part 4 - C++ front end tests.
https://gcc.gnu.org/ml/gcc-patches/2014-09/msg00553.html

These build and test cleanly on x86_64-linux.

Part 5 - __has_cpp_attribute and supporting machinery is proving more 
interesting than I thought.

But the rest should be put in.

Thank you,
Ed Smith-Rowland



Re: Avoid privatization of TLS variables

2014-09-25 Thread H.J. Lu
On Thu, Sep 25, 2014 at 8:24 AM, Ian Lance Taylor  wrote:
> On Wed, Sep 24, 2014 at 6:58 PM, Jan Hubicka  wrote:
>>
>>b:   00 00
>> 9: R_X86_64_TPOFF32 
>> __gcov_indirect_call_counters_ltopriv
>
> Look at the .o file where __gcov_indirect_call_counters_ltopriv is
> defined.  That .o file must have the symbol marked as STT_TLS and it
> must be defined in a section with the SHF_TLS flag.  If that is not
> true, then that is your problem.

SHF_TLS isn't required.

16: 0008 8 TLS GLOBAL HIDDEN   COM
__gcov_indirect_call_counters_ltopriv
17: 0008 8 TLS GLOBAL HIDDEN   COM
__gcov_indirect_call_callee_ltopriv

are also sufficient.

-- 
H.J.


Re: [PATCH 1/9] Gccgo port to s390[x] -- part I

2014-09-25 Thread Lynn A. Boger

Hi,

I removed my change to statements.cc and made a change as you suggested 
in libgo/runtime/go-defer.c in function __go_set_defering_fn:


+#if defined(__powerpc64__) && _CALL_ELF != 2
+g->defer->__defering_fn = *(void **)defering_fn;
+#else
+g->defer->__defering_fn = defering_fn;
+#endif

With this change along with PATCH 1/9, the recover.go testcase passes, 
there are no new regressions on for ppc64 LE or BE or ppc (32 bit BE) 
when running the go and libgo testsuites and I agree is the right solution.


Thanks,

Lynn

On 09/25/2014 06:44 AM, Ulrich Weigand wrote:

Lynn Boger wrote:


I modified the patch for statements.cc and rebuilt and that eliminates
the regressions and fixes the original problem it was intended to fix
for both ppc64 BE & LE.  The ABIs are different between BE & LE, so that
make_func_code_reference on ppc64 BE is not returning the function's
code address but the function pointer from the .opd.  The first 8 bytes
of the entry in the .opd is the function's code address.  Here is the
change to statements.cc that made it work:
+#if defined(__powerpc64__) && _CALL_ELF != 2
+  Expression* pfn =
+Expression::make_func_code_reference(function, location);
+  Type* pfntype =
+  Type::make_pointer_type(
+  Type::make_pointer_type(Type::make_void_type()));
+  Expression* fn = Expression::make_unsafe_cast(pfntype, pfn,
location);
+  Expression* fn_code_addr = Expression::make_unary(OPERATOR_MULT, fn,
+location);
+#else
+  Expression* fn_code_addr =
+Expression::make_func_code_reference(function, location);
+#endif
+  Expression* call = Runtime::make_call(Runtime::SET_DEFERING_FN,
+location, 1, fn_code_addr);
+  Statement* s = Statement::make_statement(call, true);

This looks wrong when using gcc-go as a cross-compiler.  The #if is
evaluated in the context of the *host*, but you'd need to check the
processor architecture and ABI of the *target*.  This seems difficult
since you'd have to take into account -mabi= options, which are not
readily available to the front end.

It seems more straightforward to keep the front end as is, i.e. generate
code to pass a plain function pointer (as defined by the target ABI) to
the runtime, and have the *runtime* do whatever target-specific fiddling
is required to get from a function pointer to a code address.

For example, you could add something like:

#if defined(__powerpc64__) && _CALL_ELF != 2
   defering_fn = *(void **)defering_fn;
#endif

to __go_set_defering_fn (or possibly __go_can_recover).

[ Since the runtime is compiled for the target with the appropriate
ABI setting, the #if works as intended when in runtime code.  ]

Bye,
Ulrich





[patch] LWG DR 1339: uninitialized_fill_n should return the end of its range

2014-09-25 Thread Jonathan Wakely

http://cplusplus.github.io/LWG/lwg-defects.html#1339

Also make the same change to our __uninitialized_xxx_n_a extensions
and change std::vector to make use of the returned iterator instead of
recalculating it.

Tested x86_64-linux, committed to trunk.
commit 81456b32a7991351d40ca45c276892fdab5d91ab
Author: Jonathan Wakely 
Date:   Tue Sep 23 00:13:14 2014 +0100

	DR 1339
	* doc/xml/manual/status_cxx2011.xml: Update.
	* include/bits/stl_uninitialized.h (uninitialized_fill_n): Return
	an iterator.
	(__uninitialized_fill_n_a, __uninitialized_default_n_a): Likewise.
	* include/bits/stl_vector.h (vector::_M_fill_initialize,
	vector::_M_default_initialize): Use returned iterator.
	* include/bits/vector.tcc (vector::_M_fill_assign,
	vector::_M_fill_insert, vector::_M_default_append): Likewise.
	* testsuite/20_util/specialized_algorithms/uninitialized_fill_n/
	16505.cc: Adjust return type.
	* testsuite/20_util/specialized_algorithms/uninitialized_fill_n/
	dr1339.cc: New.

diff --git a/libstdc++-v3/doc/xml/manual/status_cxx2011.xml b/libstdc++-v3/doc/xml/manual/status_cxx2011.xml
index 4433c89..36630ce 100644
--- a/libstdc++-v3/doc/xml/manual/status_cxx2011.xml
+++ b/libstdc++-v3/doc/xml/manual/status_cxx2011.xml
@@ -600,11 +600,10 @@ particular release.
   
 
 
-  
   20.6.12.3
   uninitialized_fill
-  Partial
-  Returns void..
+  Y
+  
 
 
   
diff --git a/libstdc++-v3/include/bits/stl_uninitialized.h b/libstdc++-v3/include/bits/stl_uninitialized.h
index cd2a482..c864fa14 100644
--- a/libstdc++-v3/include/bits/stl_uninitialized.h
+++ b/libstdc++-v3/include/bits/stl_uninitialized.h
@@ -190,7 +190,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct __uninitialized_fill_n
 {
   template
-static void
+static _ForwardIterator
 __uninit_fill_n(_ForwardIterator __first, _Size __n,
 			const _Tp& __x)
 {
@@ -199,6 +199,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{
 	  for (; __n > 0; --__n, ++__cur)
 		std::_Construct(std::__addressof(*__cur), __x);
+	  return __cur;
 	}
 	  __catch(...)
 	{
@@ -212,12 +213,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct __uninitialized_fill_n
 {
   template
-static void
+static _ForwardIterator
 __uninit_fill_n(_ForwardIterator __first, _Size __n,
 			const _Tp& __x)
-{ std::fill_n(__first, __n, __x); }
+{ return std::fill_n(__first, __n, __x); }
 };
 
+   // _GLIBCXX_RESOLVE_LIB_DEFECTS
+   // DR 1339. uninitialized_fill_n should return the end of its range
   /**
*  @brief Copies the value x into the range [first,first+n).
*  @param  __first  An input iterator.
@@ -228,7 +231,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  Like fill_n(), but does not require an initialized output range.
   */
   template
-inline void
+inline _ForwardIterator
 uninitialized_fill_n(_ForwardIterator __first, _Size __n, const _Tp& __x)
 {
   typedef typename iterator_traits<_ForwardIterator>::value_type
@@ -239,8 +242,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   // trivial types can have deleted assignment
   const bool __assignable = is_copy_assignable<_ValueType>::value;
 #endif
-
-  std::__uninitialized_fill_n<__is_trivial(_ValueType) && __assignable>::
+  return __uninitialized_fill_n<__is_trivial(_ValueType) && __assignable>::
 	__uninit_fill_n(__first, __n, __x);
 }
 
@@ -328,7 +330,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
-void
+_ForwardIterator
 __uninitialized_fill_n_a(_ForwardIterator __first, _Size __n, 
 			 const _Tp& __x, _Allocator& __alloc)
 {
@@ -338,6 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  typedef __gnu_cxx::__alloc_traits<_Allocator> __traits;
 	  for (; __n > 0; --__n, ++__cur)
 	__traits::construct(__alloc, std::__addressof(*__cur), __x);
+	  return __cur;
 	}
   __catch(...)
 	{
@@ -348,10 +351,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
-inline void
+inline _ForwardIterator
 __uninitialized_fill_n_a(_ForwardIterator __first, _Size __n, 
 			 const _Tp& __x, allocator<_Tp2>&)
-{ std::uninitialized_fill_n(__first, __n, __x); }
+{ return std::uninitialized_fill_n(__first, __n, __x); }
 
 
   // Extensions: __uninitialized_copy_move, __uninitialized_move_copy,
@@ -505,7 +508,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct __uninitialized_default_n_1
 {
   template
-static void
+static _ForwardIterator
 __uninit_default_n(_ForwardIterator __first, _Size __n)
 {
 	  _ForwardIterator __cur = __first;
@@ -513,6 +516,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{
 	  for (; __n > 0; --__n, ++__cur)
 		std::_Construct(std::__addressof(*__cur));
+	  return __cur;
 	}
 	  __catch(...)
 	{
@@ -526,13 +530,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 struct __uninitialized_default_n_1
 {
   template
-   

Re: [AArch64] Tighten predicates on SIMD shift intrinsics

2014-09-25 Thread Richard Henderson
On 09/25/2014 08:05 AM, James Greenhalgh wrote:
> 
> On Fri, Sep 19, 2014 at 05:57:06PM +0100, Richard Henderson wrote:
>> On 09/11/2014 01:29 AM, James Greenhalgh wrote:
>>> +;; Predicates used by the various SIMD shift operations.  These
>>> +;; fall in to 3 categories.
>>> +;;   Shifts with a range 0-(bit_size - 1) (aarch64_simd_shift_imm)
>>> +;;   Shifts with a range 1-bit_size (aarch64_simd_shift_imm_offset)
>>> +;;   Shifts with a range 0-bit_size (aarch64_simd_shift_imm_bitsize)
>>> +(define_predicate "aarch64_simd_shift_imm_qi"
>>> +  (and (match_code "const_int")
>>> +   (match_test "aarch64_simd_const_bounds (op, 0, 7)")))
>>
>> The function call should be removed and this should be written as
>>
>>   (match_test "IN_RANGE (ival, 0, 7)")
>>
> 
> Quite right, updated as attached.
> 
> Cross-tested for aarch64-none-elf with no issues.
> 
> OK?
> 
> Thanks,
> James
> 
> ---
> gcc/
> 
> 2014-09-25  James Greenhalgh  
> 
>   * config/aarch64/aarch64-protos.h (aarch64_simd_const_bounds): Delete.
>   * config/aarch64/aarch64-simd.md (aarch64_qshl): Use
>   new predicates.
>   (aarch64_shll2_n): Likewise.
>   (aarch64_shr_n): Likewise.
>   (aarch64_sra_n: Likewise.
>   (aarch64_si_n): Likewise.
>   (aarch64_qshl_n): Likewise.
>   * config/aarch64/aarch64.c (aarch64_simd_const_bounds): Delete.
>   * config/aarch64/iterators.md (ve_mode): New.
>   (offsetlr): Remap to infix text for use in new predicates.
>   * config/aarch64/predicates.md (aarch64_simd_shift_imm_qi): New.
>   (aarch64_simd_shift_imm_hi): Likewise.
>   (aarch64_simd_shift_imm_si): Likewise.
>   (aarch64_simd_shift_imm_di): Likewise.
>   (aarch64_simd_shift_imm_offset_qi): Likewise.
>   (aarch64_simd_shift_imm_offset_hi): Likewise.
>   (aarch64_simd_shift_imm_offset_si): Likewise.
>   (aarch64_simd_shift_imm_offset_di): Likewise.
>   (aarch64_simd_shift_imm_bitsize_qi): Likewise.
>   (aarch64_simd_shift_imm_bitsize_hi): Likewise.
>   (aarch64_simd_shift_imm_bitsize_si): Likewise.
>   (aarch64_simd_shift_imm_bitsize_di): Likewise.

Looks good to me.


r~


Re: [AArch64] Tighten predicates on SIMD shift intrinsics

2014-09-25 Thread Marcus Shawcroft
On 25 September 2014 16:18, Richard Henderson  wrote:
> On 09/25/2014 08:05 AM, James Greenhalgh wrote:
>>
>> On Fri, Sep 19, 2014 at 05:57:06PM +0100, Richard Henderson wrote:
>>> On 09/11/2014 01:29 AM, James Greenhalgh wrote:
 +;; Predicates used by the various SIMD shift operations.  These
 +;; fall in to 3 categories.
 +;;   Shifts with a range 0-(bit_size - 1) (aarch64_simd_shift_imm)
 +;;   Shifts with a range 1-bit_size (aarch64_simd_shift_imm_offset)
 +;;   Shifts with a range 0-bit_size (aarch64_simd_shift_imm_bitsize)
 +(define_predicate "aarch64_simd_shift_imm_qi"
 +  (and (match_code "const_int")
 +   (match_test "aarch64_simd_const_bounds (op, 0, 7)")))
>>>
>>> The function call should be removed and this should be written as
>>>
>>>   (match_test "IN_RANGE (ival, 0, 7)")
>>>
>>
>> Quite right, updated as attached.
>>
>> Cross-tested for aarch64-none-elf with no issues.
>>
>> OK?

OK /Marcus


Re: Avoid privatization of TLS variables

2014-09-25 Thread Jan Hubicka
> On Wed, Sep 24, 2014 at 6:58 PM, Jan Hubicka  wrote:
> >
> >b:   00 00
> > 9: R_X86_64_TPOFF32 
> > __gcov_indirect_call_counters_ltopriv
> 
> Look at the .o file where __gcov_indirect_call_counters_ltopriv is
> defined.  That .o file must have the symbol marked as STT_TLS and it
> must be defined in a section with the SHF_TLS flag.  If that is not
> true, then that is your problem.

The variable is common, so all the object files define it same way as the
.o file I attached.  Note that the .o file links just fine when executed
out of LTO machinery.

Honza
> 
> Ian


[jit] Expose choose_tmpdir and use it when building tmpdir for jit compilation

2014-09-25 Thread David Malcolm
On Tue, 2014-09-23 at 23:27 +, Joseph S. Myers wrote:
[...]
> The code for compiling a .s file should:
> 
> * use choose_tmpdir from libiberty rather than hardcoding /tmp (or, 
> better, create the files directly with make_temp_file, and delete them 
> individual afterwards);
[...]

I believe that a tempdir is better than creating individual tempfiles:
for debugging all this it's handy to have dumpfiles, and if they're
enabled, they're written out relative to the supposed source file.
So if we have
  SOME_PATH_TO/fake.c
we get e.g.
  SOME_PATH_TO/fake.c.016t.ssa
and so on.  The simplest way to clean this all up seems to be to put
everything related to a compile in a tempdir, and to create that tempdir
as securely as we can.  The tempdir is deleted in the destructor for
the playback::context (unless GCC_JIT_BOOL_OPTION_KEEP_INTERMEDIATES
has been set).

So I went with the choose_tmpdir approach, to avoid hardcoding "/tmp/",
although it wasn't previously exposed in liberty.h.

I've committed the following to branch dmalcolm/jit:

Expose choose_tmpdir and use it when building tmpdir for jit

compilation:

Fix the return type of libiberty's choose_tmpdir.

Expose it in libiberty.h

Use it within the JIT's playback::context::compile when building the
tempdir to avoid hardcoding "/tmp/".

gcc/jit/ChangeLog.jit:
* internal-api.c (make_tempdir_path_template): New.
(gcc::jit::playback::context::compile): Call
make_tempdir_path_template to make m_path_template, rather than
hardcoding "/tmp/" within "/tmp/libgccjit-XX".

include/ChangeLog.jit:
* libiberty.h (choose_tmpdir): New prototype.
* ChangeLog.jit: New.

libiberty/ChangeLog.jit:
* choose-temp.c (choose_tmpdir): Remove now-redundant local
copy of prototype.
* functions.texi: Regenerate.
* make-temp-file.c (choose_tmpdir): Convert return type from
char * to const char * - given that this returns a pointer to
a memoized allocation, the caller must not touch it.
---
 gcc/jit/ChangeLog.jit  |  7 +++
 gcc/jit/internal-api.c | 40 +++-
 include/ChangeLog.jit  | 11 +++
 include/libiberty.h|  5 +
 libiberty/ChangeLog.jit|  9 +
 libiberty/choose-temp.c|  1 -
 libiberty/functions.texi   | 13 ++---
 libiberty/make-temp-file.c |  4 ++--
 8 files changed, 79 insertions(+), 11 deletions(-)
 create mode 100644 include/ChangeLog.jit

diff --git a/gcc/jit/ChangeLog.jit b/gcc/jit/ChangeLog.jit
index b4700e4..d66203a 100644
--- a/gcc/jit/ChangeLog.jit
+++ b/gcc/jit/ChangeLog.jit
@@ -1,3 +1,10 @@
+2014-09-25  David Malcolm  
+
+   * internal-api.c (make_tempdir_path_template): New.
+   (gcc::jit::playback::context::compile): Call
+   make_tempdir_path_template to make m_path_template, rather than
+   hardcoding "/tmp/" within "/tmp/libgccjit-XX".
+
 2014-09-24  David Malcolm  
 
* docs/internals/index.rst ("Overview of code structure"): Add
diff --git a/gcc/jit/internal-api.c b/gcc/jit/internal-api.c
index 32fe7cb..50fd83b 100644
--- a/gcc/jit/internal-api.c
+++ b/gcc/jit/internal-api.c
@@ -4826,6 +4826,44 @@ block (function *func,
   m_label_expr = NULL;
 }
 
+/* Construct a tempdir path template suitable for use by mkdtemp
+   e.g. "/tmp/libgccjit-XX", but respecting the rules in
+   libiberty's choose_tempdir rather than hardcoding "/tmp/".
+
+   The memory is allocated using malloc and must be freed.
+   Aborts the process if allocation fails. */
+
+static char *
+make_tempdir_path_template ()
+{
+  const char *tmpdir_buf;
+  size_t tmpdir_len;
+  const char *file_template_buf;
+  size_t file_template_len;
+  char *result;
+
+  /* The result of choose_tmpdir is a cached buffer within libiberty, so
+ we must *not* free it.  */
+  tmpdir_buf = choose_tmpdir ();
+
+  /* choose_tmpdir aborts on malloc failure.  */
+  gcc_assert (tmpdir_buf);
+
+  tmpdir_len = strlen (tmpdir_buf);
+  /* tmpdir_buf should now have a dir separator as the final byte.  */
+  gcc_assert (tmpdir_len > 0);
+  gcc_assert (tmpdir_buf[tmpdir_len - 1] == DIR_SEPARATOR);
+
+  file_template_buf = "libgccjit-XX";
+  file_template_len = strlen (file_template_buf);
+
+  result = XNEWVEC (char, tmpdir_len + file_template_len + 1);
+  strcpy (result, tmpdir_buf);
+  strcpy (result + tmpdir_len, file_template_buf);
+
+  return result;
+}
+
 /* Compile a playback::context:
 
- Use the context's options to cconstruct command-line options, and
@@ -4845,7 +4883,7 @@ compile ()
   const char *fake_args[20];
   unsigned int num_args;
 
-  m_path_template = xstrdup ("/tmp/libgccjit-XX");
+  m_path_template = make_tempdir_path_template ();
   if (!m_path_template)
 return NULL;
 
diff --git a/include/ChangeLog.jit b/include/ChangeLog.jit
new file mode 100644
index 000..84acd33
--- /dev/null
+++ b/include/ChangeLog.jit
@

Re: [PATCH 2/14][Vectorizer] Make REDUC_xxx_EXPR tree codes produce a scalar result

2014-09-25 Thread Alan Lawrence

Many thanks indeed! :)

--Alan

Segher Boessenkool wrote:

On Wed, Sep 24, 2014 at 04:02:11PM +0100, Alan Lawrence wrote:
However my CompileFarm account is still pending, so to that end, if you 
were able to test patch 2/14 (attached inc. Richie's 
s/VIEW_CONVERT_EXPR/NOP_EXPR/) on the CompileFarm PowerPC machine, that'd 
be great, many thanks indeed. It should apply on its own without patch 1. 


Patch 2/14 on its own has no regressions on gcc110 (powerpc64-linux,
c,c++,fortran, -m64,-m32,-m32/-mpowerpc64,-m64/-mlra).

Cheers,


Segher






Re: Commit: SH: Fix typo in atomic fetch sequence

2014-09-25 Thread Oleg Endo
On Thu, 2014-09-25 at 17:23 +0200, Oleg Endo wrote:
> On Thu, 2014-09-25 at 16:04 +0100, Nick Clifton wrote:
> > Hi Alex, Hi Kaz, Hi Oleg,
> > 
> >   I am applying the patch below as an obvious fix for a typo in the
> >   instruction sequence of the atomic_fetch_nand_soft_imask
> >   pattern.  I hope that this is OK with you.
> 
> Yes, ouch.  Thanks!
> I'll backport it to 4.8 and 4.9.

It turned out there was another such ldc vs stc typo.  Fixed on trunk
with r215607.  Fixed both typos on 4.9 with r215608 and on 4.8 with
r215609.

Cheers,
Oleg
Index: gcc/config/sh/sync.md
===
--- gcc/config/sh/sync.md	(revision 215608)
+++ gcc/config/sh/sync.md	(revision 215609)
@@ -903,7 +903,7 @@
 	 "	and	%0,%3"			"\n"
 	 "	not	%3,%3"			"\n"
 	 "	mov.	%3,@%1"		"\n"
-	 "	stc	%4,sr";
+	 "	ldc	%4,sr";
 }
   [(set_attr "length" "20")])
 
@@ -1353,7 +1353,7 @@
 	 "	ldc	r0,sr"		"\n"
 	 "	mov.b	@%0,r0"		"\n"
 	 "	mov.b	%1,@%0"		"\n"
-	 "	stc	%2,sr"		"\n"
+	 "	ldc	%2,sr"		"\n"
 	 "	tst	r0,r0";
 }
   [(set_attr "length" "16")])


Re: [PATCH/RFC v2 3/14] Add new optabs for reducing vectors to scalars

2014-09-25 Thread Alan Lawrence
Well, even that C source, you'd need to be careful and ensure that the 
vectorized loop never went round more than once, or else the additions within 
the loop would be performed in 8 bits, different from the final reduction...


So: original patch with updated commenting attached...Segher, is there any 
chance you could test this on powerpc too? (in combination with patch 2/14, 
which will need to be applied first; you can skip patch 1, and >=4.)


--Alan

Richard Biener wrote:

On Thu, Sep 25, 2014 at 4:32 PM, Alan Lawrence  wrote:

Ok, so, I've tried making reduc_plus optab take two modes: that of the
vector to reduce, and the result; thus allowing platforms to provide a
widening reduction. However, I'm keeping reduc_[us](min|max)_optab with only
a single mode, as widening makes no sense there.

I've not gone as far as making the vectorizer use any such a widening
reduction, however: as previously stated, I'm not really sure what the input
source code for that even looks like (maybe in a language other than C?). If
we wanted to do a non-widening reduction using such an instruction (by
discarding the extra bits), strikes me the platform can/should provide a
non-widening optab for that case...


I expect it to apply to sth like

int foo (char *in, int n)
{
   int res = 0;
   for (int i = 0; i < n; ++i)
 res += *in;
   return res;
}

where you'd see

  temc = *in;
  tem = (int)temc;
  res += tem;

we probably handle this by widening the chars to ints and unrolling
the loop enough to make that work (thus for n == 16 it would maybe
fail to vectorize?).  It should be more efficient to pattern-detect
this as widening reduction.


Testing: bootstrapped on x86_64 linux + check-gcc; cross-tested
aarch64-none-elf check-gcc; cross-tested aarch64_be-none-elf aarch64.exp +
vect.exp.

So, my feeling is that the extra complexity here doesn't really buy us
anything; and that if we do want to support / use widening reductions in the
future, we should do so with a separate, reduc_plus_widen... optab, and
stick with the original patch/formulation for now. (In other words: this
patch is a guide to how I think a dual-mode reduc_plus_optab looks, but I
don't honestly like it!).

If you agree, I shall transplant the comments on scalar_reduc_to_vector from
this patch into the original, and then post that revised version?


I agree.  We can come back once a target implements such widening
reduction.

Richard.


Cheers, Alan


Richard Biener wrote:

On Mon, Sep 22, 2014 at 3:26 PM, Alan Lawrence 
wrote:

Richard Biener wrote:


scalar_reduc_to_vector misses a comment.


Ok to reuse the comment in optabs.h in optabs.c also?


Sure.


I wonder if at the end we wouldn't transition all backends and then
renaming reduc_*_scal_optab back to reduc_*_optab makes sense.


Yes, that sounds like a plan, the _scal is a bit of a mouthful.


The optabs have only one mode - I wouldn't be surprised if an ISA
invents for example v4si -> di reduction?  So do we want to make
reduc_plus_scal_optab a little bit more future proof (maybe there
is already an ISA that supports this kind of reduction?).


That sounds like a plausible thing for an ISA to do, indeed. However
given
these names are only used by the autovectorizer rather than directly, the
question is what the corresponding source code looks like, and/or what
changes to the autovectorizer we might have to make to (look for code to)
exploit such an instruction.


Ah, indeed.  Would be sth like a REDUC_WIDEN_SUM_EXPR or so.


At this point I could go for a
reduc_{plus,min_max}_scal_ which reduces from the first
vector
mode to the second scalar mode, and then make the vectorizer look only
for
cases where the second mode was the element type of the first; but I'm
not
sure I want to do anything more complicated than that at this stage.
(However, indeed it would leave the possibility open for the future.)


Yeah, agreed.  For the min/max case a widen variant isn't useful anyway.

Thanks,
Richard.


--Alan



diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 80e8bd6..84e5261 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4783,29 +4783,48 @@ it is unspecified which of the two operands is returned as the result.
 @cindex @code{reduc_smax_@var{m}} instruction pattern
 @item @samp{reduc_smin_@var{m}}, @samp{reduc_smax_@var{m}}
 Find the signed minimum/maximum of the elements of a vector. The vector is
-operand 1, and the scalar result is stored in the least significant bits of
+operand 1, and the result is stored in the least significant bits of
 operand 0 (also a vector). The output and input vector should have the same
-modes.
+modes. These are legacy optabs, and platforms should prefer to implement
+@samp{reduc_smin_scal_@var{m}} and @samp{reduc_smax_scal_@var{m}}.
 
 @cindex @code{reduc_umin_@var{m}} instruction pattern
 @cindex @code{reduc_umax_@var{m}} instruction pattern
 @item @samp{reduc_umin_@var{m}}, @samp{reduc_umax_@var{m}}
 Find the unsigned minimum/maximum of the elements of

Re: [PATCH, 2/2] shrink wrap a function with a single loop: split live_edge

2014-09-25 Thread Jeff Law

On 09/25/14 09:04, Jiong Wang wrote:


On 25/09/14 09:52, Zhenqiang Chen wrote:



-Original Message-
From: Jiong Wang [mailto:jiong.w...@arm.com]
Sent: Thursday, September 25, 2014 2:13 AM
To: Jeff Law; Zhenqiang Chen
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH, 2/2] shrink wrap a function with a single loop:
split
live_edge


On 22/09/14 18:51, Jeff Law wrote:

On 09/22/14 04:24, Jiong Wang wrote:

Great.  Can you send an updated patchkit for review.

patch attached.

please review, thanks.

gcc/ * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the
live-in of new created BB as the intersection of live-in from
"old_dest" and live-out from "bb".

Looks good.  However, before committing we need a couple things.

1. Bootstrap & regression test this variant of the patch.  I know you
tested an earlier one, but please test this one just to be sure.

2. Testcase.  I think you could test for either the reduction in the
live-in set of the newly created block or that you're shrink wrapping
one or more functions you didn't previously shrink-wrap.  I think it's
fine if this test is target specific.

   bootstrap ok based on revision 215515.

   while the x86 regression result is interesting. there is no
regression on
check-g++, while there is four regression on check-gcc:

FAIL: gcc.dg/tree-ssa/loadpre10.c (internal compiler error)
FAIL: gcc.dg/tree-ssa/loadpre10.c (test for excess errors)
FAIL: gcc.dg/tree-ssa/pr21417.c (internal compiler error)
FAIL: gcc.dg/tree-ssa/pr21417.c (test for excess errors)

this is caused by our improving the accuracy of live-in for new
created basic
block. Now we will split
more than one edge for the above two testcase. thus trigger the
following
assert in move_insn_for_shrink_wrap:

/* We should not split more than once for a function.  */
gcc_assert (!(*split_p));

According to the algorithm, it is impossible to split one edge twice.
It's possible to split two different edges. But for such cases, the
control flow is too complex to perform shrink-wrapping.

Anyway, your patch improves the accuracy. You can replace the
"gcc_assert" to "return"; or change "split_p" to "splitted_edge" then
you can check one edge is not splitted twice.


thanks for the explanation.

actually, the old "bitmap_copy (df_get_live_in (next_block),
df_get_live_out (bb));" will let any "dest" reg
in entry block alive in the new splitted block. If there is another
block which "dest" also set in live_in, then
dest alive in two blocks, then those code in "live_edge_for_reg" will
always return NULL, thus the old
inaccurate data flow will actually never make split two different edges
happen... thus assert never triggered.

as from the whole x86 boostrap, and regression test, only two cases
trigger split two different edges, I think it's
trival case, thus prefer to be conservative to keep the old logic, as
suggested, just replace "gcc_assert" into "return false".

or if we want to allow multi split, I think just remove the assert is
OK, because "EDGE_COUNT (next_block->preds) == 2"
will guarantee split one edge twice never happen.

new patch updated.

pass bootstrap and no regression, both check-gcc and check-g++, on the x86.

OK for trunk?

thanks.

gcc/
* shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the live-in of
new created BB as the intersection of live-in from "old_dest" and
live-out
from "bb".

Please include a ChangeLog entry for the testsuite.  Something like:

* gcc.target/i386/shrink_wrap_1.c: New test.

With that addition, OK for the trunk.

Jeff




Re: Avoid privatization of TLS variables

2014-09-25 Thread Ian Lance Taylor
On Thu, Sep 25, 2014 at 8:37 AM, H.J. Lu  wrote:
> On Thu, Sep 25, 2014 at 8:24 AM, Ian Lance Taylor  wrote:
>> On Wed, Sep 24, 2014 at 6:58 PM, Jan Hubicka  wrote:
>>>
>>>b:   00 00
>>> 9: R_X86_64_TPOFF32 
>>> __gcov_indirect_call_counters_ltopriv
>>
>> Look at the .o file where __gcov_indirect_call_counters_ltopriv is
>> defined.  That .o file must have the symbol marked as STT_TLS and it
>> must be defined in a section with the SHF_TLS flag.  If that is not
>> true, then that is your problem.
>
> SHF_TLS isn't required.
>
> 16: 0008 8 TLS GLOBAL HIDDEN   COM
> __gcov_indirect_call_counters_ltopriv
> 17: 0008 8 TLS GLOBAL HIDDEN   COM
> __gcov_indirect_call_callee_ltopriv
>
> are also sufficient.

I can create a .o file with a hidden common symbol, but I can't
recreate the problem.  When I try, gold creates a TLS section and TLS
segment itself.

How exactly is gold being invoked?

Ian


Re: Avoid privatization of TLS variables

2014-09-25 Thread Jan Hubicka
> On Thu, Sep 25, 2014 at 8:37 AM, H.J. Lu  wrote:
> > On Thu, Sep 25, 2014 at 8:24 AM, Ian Lance Taylor  wrote:
> >> On Wed, Sep 24, 2014 at 6:58 PM, Jan Hubicka  wrote:
> >>>
> >>>b:   00 00
> >>> 9: R_X86_64_TPOFF32 
> >>> __gcov_indirect_call_counters_ltopriv
> >>
> >> Look at the .o file where __gcov_indirect_call_counters_ltopriv is
> >> defined.  That .o file must have the symbol marked as STT_TLS and it
> >> must be defined in a section with the SHF_TLS flag.  If that is not
> >> true, then that is your problem.
> >
> > SHF_TLS isn't required.
> >
> > 16: 0008 8 TLS GLOBAL HIDDEN   COM
> > __gcov_indirect_call_counters_ltopriv
> > 17: 0008 8 TLS GLOBAL HIDDEN   COM
> > __gcov_indirect_call_callee_ltopriv
> >
> > are also sufficient.
> 
> I can create a .o file with a hidden common symbol, but I can't
> recreate the problem.  When I try, gold creates a TLS section and TLS
> segment itself.
> 
> How exactly is gold being invoked?

It seems to happen with LTO compilation only, just build mainline tree
and try the original testcase.

Honza
> 
> Ian


Re: [PATCH][AArch64] Fix PR63293

2014-09-25 Thread Jiong Wang


On 19/09/14 15:35, Wilco Dijkstra wrote:

Jiong Wang wrote:

when generating instructions to access local variable, for example a local 
array,

if the array size very big, then we need a temp reg to keep the intermediate 
index,
then use that temp reg as base reg, so that ldr is capable of indexing the 
element.

while this will cause trouble, because the introduce of temp reg break the 
dependence
between the stack variable access and stack adjustment instructions which is 
unsafe
when signal trampoline executed.

this patch add barrier before stack adjustment in epilogue.

ok for trunk?

I believe you need more barriers. Ie. for all SP modifying instructions 
(including ldp
with writeback) except for ones that just remove the outgoing arguments. You 
can avoid
emitting barriers if alloca is not used and there are no locals in the frame 
(common case).

Basically without that any memory access that may alias with the locals could be
scheduled incorrectly. It seems odd that the scheduler does not understand this 
by
default.


thanks for pointing this out.

patch updated, please review.


2014-09-25  Jiong Wang  
2014-09-25  Wilco Dijkstra  

gcc/
  PR target/63293
  * config/aarch64/aarch64.c (aarch64_expand_epiloue): Add barriers before 
stack adjustment.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 023f9fd..d258425 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2331,6 +2331,9 @@ aarch64_expand_epilogue (bool for_sibcall)
   HOST_WIDE_INT fp_offset;
   HOST_WIDE_INT hard_fp_offset;
   rtx_insn *insn;
+  /* We need to add memory barrier to prevent read from deallocated stack.  */
+  bool need_barrier_p = (get_frame_size () != 0
+			 || cfun->machine->frame.saved_varargs_size);
 
   aarch64_layout_frame ();
 
@@ -2365,6 +2368,9 @@ aarch64_expand_epilogue (bool for_sibcall)
   if (frame_pointer_needed
   && (crtl->outgoing_args_size || cfun->calls_alloca))
 {
+  if (cfun->calls_alloca)
+	emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+
   insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
    hard_frame_pointer_rtx,
    GEN_INT (0)));
@@ -2391,6 +2397,9 @@ aarch64_expand_epilogue (bool for_sibcall)
   aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
 skip_wb, &cfi_ops);
 
+  if (need_barrier_p)
+	emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+
   if (skip_wb)
 	{
 	  enum machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
@@ -2431,6 +2440,9 @@ aarch64_expand_epilogue (bool for_sibcall)
 
   if (frame_size > 0)
 {
+  if (need_barrier_p)
+	emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+
   if (frame_size >= 0x100)
 	{
 	  rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);

Re: [PATCH] Fix typo in comments

2014-09-25 Thread Jeff Law

On 09/25/14 08:04, Felix Yang wrote:

Please apply this patch if OK for trunk. Thanks.

[ ... ]

Index: gcc/ChangeLog
===
--- gcc/ChangeLog(revision 215598)
+++ gcc/ChangeLog(working copy)
@@ -1,3 +1,8 @@
+2014-09-25  Felix Yang  
+
+* lra.c (lra_set_insn_recog_data): Fix typo in comment.
+* genautomata.c (merge_states): Ditto.

Thanks.  Installed.
jeff



[COMMITTED][PATCH, 2/2] shrink wrap a function with a single loop: split live_edge

2014-09-25 Thread Jiong Wang


On 25/09/14 17:24, Jeff Law wrote:

On 09/25/14 09:04, Jiong Wang wrote:

new patch updated.

pass bootstrap and no regression, both check-gcc and check-g++, on the x86.

OK for trunk?

thanks.

gcc/
 * shrink-wrap.c (move_insn_for_shrink_wrap): Initialize the live-in of
 new created BB as the intersection of live-in from "old_dest" and
live-out
 from "bb".

Please include a ChangeLog entry for the testsuite.  Something like:

* gcc.target/i386/shrink_wrap_1.c: New test.

With that addition, OK for the trunk.


committed as r215611.

-- Jiong



Jeff








Re: [Patch, Fortran] Add CO_BROADCAST

2014-09-25 Thread Alessandro Fanfarillo
Hi, I'm implementing the co_broadcast on libcafmpi right now.

2014-09-25 8:08 GMT+02:00 Tobias Burnus :
> Hi Paul,
>
> Paul Richard Thomas wrote:
>>
>> In the check.c error messages, you use 'A argument'.  Should you not
>> use 'SOURCE argument', following CO BROADCAST (SOURCE, SOURCE IMAGE [,
>> STAT, ERRMSG]) ? I am looking at WG5/N1983 - is there some more recent
>> proposal?
>
>
> Looking at N2027, I see "A, SOURCE_IMAGE [, STAT, ERRMSG]". It might be that
> J3/WG5 decided that SOURCE= is a bad name on all but one image as it would
> for all other images a DESTINATION.
>
> Regarding the references: I try to keep
> https://gcc.gnu.org/wiki/GFortranStandards up to date; I think the latest
> draft is N2027:
> http://isotc.iso.org/livelink/livelink?func=ll&objId=16769292&objAction=Open
>
> Thanks for cross checking!
>
>> When do you intend to implement a _gfortran_caf_co_broadcast that does
>> something?
>
>
> Well, the current libgfortran/caf/single.c is fully compliant - for a single
> image. (Ignoring allocatable components and the lacking finalization.)
>
> I intend to leave the MPI and GASNet implementation to Alessandro, unless I
> feel really tempted to do it.
>
>> Anway, the patch is OK for trunk.
>
>
> Thanks for the review! I committed the unmodified patch as Rev. 215579.
>
> Tobias
>
>
>> On 20 September 2014 16:09, Tobias Burnus  wrote:
>>>
>>> This patch adds a CO_BROADCAST and prepares a bit for CO_REDUCE.
>>>
>>> Both functions permit arguments with allocatable components
>>> (nonpolymophic
>>> or polymorphic), CO_BROADCAST also permits polymorphic arguments. This
>>> patch
>>> doesn't support allocatable/polymorphic arguments but otherwise
>>> CO_BROADCAST
>>> should work. For CO_REDUCE only some parsing/argument checking is done
>>> but
>>> no actual implementation.
>>>
>>> The allocatables make life harder for general coarray communication,
>>> broadcast and reduction and have to be implemented at some point in a
>>> clever
>>> way. I am thinking of some call-back-able function - which could also be
>>> used for OpenMP 4.x/5.0 to handle copying to threadprivate variables and
>>> for
>>> copyin/out to accelerators; the current spec handles allocatable
>>> components
>>> by creating the copying code in the middle end, but that won't work for
>>> polymorphic allocatables.
>>>
>>> For CO_REDUCE, it becomes even harder as currently any pure function
>>> works
>>> (elemental or not, passing arguments with array descriptor, as pointer or
>>> as
>>> value, having a hidden string length argument or [with C binding] not
>>> etc.
>>> Requiring packed array arguments or not, whether gfortran returns the
>>> result
>>> as value or as argument - and possibly more). There is some J3 discussion
>>> if
>>> one could narrow down the possibilities a bit. In any case, implementing
>>> co_reduce requires some thinking.
>>>
>>> The attached patch was build and regtested on x86-64-gnu-linux.
>>> OK for the trunk?
>>>
>>> Tobias
>
>


Re: [PATCH C++] - SD-6 Implementation Part 1 - __has_include.

2014-09-25 Thread Jason Merrill

On 09/01/2014 09:34 PM, Ed Smith-Rowland wrote:

(open_file_failed()): Not an error to not find a header file for
__has_include__.


Hmm, looks like this means that __has_include__ will silently return 
false if a header exists but is unreadable; I would think that we want 
it to be true (and have an error when the user tries to include it).


Jason


Re: parallel check output changes?

2014-09-25 Thread Segher Boessenkool
On Thu, Sep 25, 2014 at 08:22:29AM -0400, Andrew MacLeod wrote:
> So to be fair, I could use test_summary, but I think the concern is 
> warranted because if this inconsistent ordering can happen to PASS, I 
> would expect the same non-deterministic behaviour if those tests happen 
> to FAIL.  we just have far less FAILS so we aren't seeing it with 
> test_summary at the moment...
> 
> Aggregating all my .sum files,  I see a sampling of about 257,000 PASSs, 
> whereas I see a total of 141 FAILs.  FAILs only account for < 0.06% of 
> the output. ( I'm getting an average of about 510 mis-ordered PASSs, so 
> it only affects a small portion of them as well.)

0.24% here (2241 FAILs, 917715 PASSes).

You're seeing about 1 in 500 misordered, so if it was independent (which
of course it is not) I should see it in the FAILs already.

> I would think the output of .sum needs to be consistent from one run to 
> the next in order for test_summary to consistently report its results as 
> well.

Yes.  There also is the problem of the summaries being messed up (which
they were already before the parallelisation changes, but now the result
is much worse).

I'll have another look.


Segher


Re: [RFC PATCH, RTL]: Fix PR63348, gcc.dg/pr43670.c fail -fcompare-debug on MIPS

2014-09-25 Thread Jeff Law

On 09/24/14 13:39, Uros Bizjak wrote:

Hello!

The failure was caused by barrier detection code, which failed to
detect barrier after call insn was to be split when
NOTE_CALL_ARG_LOCATION was present. This problem caused
-fcompare-debug failure.

Digging a bit deeped, and as hinted in the PR, the handling of
barriers in try_split seems to be broken. The code is emitting extra
barrier for non-debug compiles, but it "forgots" to remove the
existing one, leading to duplicated barriers. The barrier is not
detected at all for debug build.

I have removed special handling of barriers here (also, the comment in
removed code was not helpful at all), and this solved -fcompare-debug
failure.

The patch was also bootstrapped and regression tested on
x86_64-linux-gnu {,-m32} which in -m32 mode splits x87 FP jump insns,
and there were no regressions. However, I am not too familiar with
rtl-optimization part and I am not confident that this code surgery is
fully correct, so this is the reason for RFC status of the patch.

2014-09-24  Uros Bizjak  

 PR rtl-optimization/63348
 * emit-rtl.c (try_split): Do not emit extra barrier.
Good grief, the code you're removing pre-dates any version control we 
have.  ie, it's in the first revision of emit-rtl.c from 1992.   Egad.


It's going to be a hell of a time figuring out why that code exists in 
the first place.  I don't like removing code if we don't know why the 
code exists...  Any reason you picked that route rather than looking 
forward through the NOTEs to see if they're followed by a suitable BARRIER?




jeff


Re: [patch] Implement move semantics for iostreams

2014-09-25 Thread Rainer Orth
Hi Jonathan,

>>it does: {i386-pc, sparc-sun}-solaris2.1[01] bootstraps completed
>>successfully with it.
>
> Great. I hope the slightly-modified version I eventually checked in
> still works too :)

it does indeed, as just verified by a i386-pc-solaris2.11 bootstrap :-)

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [AArch64] Fix predicate and constraint mismatch in logical atomic operations

2014-09-25 Thread Michael Collison

Segher,

The problem is the "CONST_INT 0", not a large constant. This constant is 
not accepted by the predicate, but is accepted by the constraint.


On 09/25/2014 03:12 AM, Segher Boessenkool wrote:

On Wed, Sep 24, 2014 at 09:17:23PM -0700, Andrew Pinski wrote:

On Wed, Sep 24, 2014 at 9:13 PM, Michael Collison
 wrote:

I have that attached to the bug report at the URL provided. I will work on a
testcase if you think it is warranted.

Yes it is almost always warranted.

https://gcc.gnu.org/contribute.html#patches

Testcases   If you cannot follow the recommendations of the GCC coding
conventions about testcases, you should include a justification for
why adequate testcases cannot be added.

See the last part of that sentence.  You don't have any justification
on why you are not including testcases.

It is very hard to make a reliable testcase for such problems, because
they only happen when register allocation is under pressure.

The problem is not that "n" allows more than your predicate does.  The
predicate allows registers too, so the compiler happily made a register
contain some big const.  Now RA comes along, is out of registers but hey,
there is this "n", let's just put the big constant there!  Carnage.

So this is hard to test for; you can add some (big) code that exposed the
problem, but in a few months time that won't trigger the problem anymore
because earlier stages in the compiler will have generated slightly
different code.

It also does nothing to catch similar problems in other patterns.


Segher


--
Michael Collison
Linaro Toolchain Working Group
michael.colli...@linaro.org



[jit] Use pex_one rather than system when assembling/linking

2014-09-25 Thread David Malcolm
On Tue, 2014-09-23 at 23:27 +, Joseph S. Myers wrote:
[...]
> The code for compiling a .s file should:
[...]
> * use libiberty's pexecute to run subprocesses, not "system" (building up 
> a string to pass to the shell always looks like a security hole, though in 
> this case it may in fact be safe);
[...]

libiberty.h has this deprecation comment about pexecute:

/* pexecute and pwait are the old pexecute interface, still here for
   backward compatibility.  Don't use these for new code.  Instead,
   use pex_init/pex_run/pex_get_status/pex_free, or pex_one.  */

so I used pex_one when eliminating the "system" callsite.

Committed to branch dmalcolm/jit:

gcc/jit/ChangeLog.jit:
* internal-api.c (gcc::jit::playback::context::compile): Use
pex_one rather than system when invoking "gcc" to go from a .s
file to a .so file.
---
 gcc/jit/ChangeLog.jit  |  6 ++
 gcc/jit/internal-api.c | 46 +-
 2 files changed, 43 insertions(+), 9 deletions(-)

diff --git a/gcc/jit/ChangeLog.jit b/gcc/jit/ChangeLog.jit
index d66203a..9cbba20 100644
--- a/gcc/jit/ChangeLog.jit
+++ b/gcc/jit/ChangeLog.jit
@@ -1,5 +1,11 @@
 2014-09-25  David Malcolm  
 
+   * internal-api.c (gcc::jit::playback::context::compile): Use
+   pex_one rather than system when invoking "gcc" to go from a .s
+   file to a .so file.
+
+2014-09-25  David Malcolm  
+
* internal-api.c (make_tempdir_path_template): New.
(gcc::jit::playback::context::compile): Call
make_tempdir_path_template to make m_path_template, rather than
diff --git a/gcc/jit/internal-api.c b/gcc/jit/internal-api.c
index 50fd83b..05ef544 100644
--- a/gcc/jit/internal-api.c
+++ b/gcc/jit/internal-api.c
@@ -4992,18 +4992,46 @@ compile ()
  We could reuse parts of gcc/gcc.c to do this.
  For now, just use the /usr/bin/gcc on the system...
*/
-  /* FIXME: totally faking it for now, not even using pex */
   {
 auto_timevar assemble_timevar (TV_ASSEMBLE);
+const char *errmsg;
+const char *argv[6];
+int exit_status = 0;
+int err = 0;
+
+argv[0] = "gcc";
+argv[1] = "-shared";
+/* The input: assembler.  */
+argv[2] = m_path_s_file;
+/* The output: shared library.  */
+argv[3] = "-o";
+argv[4] = m_path_so_file;
+/* pex argv arrays are NULL-terminated.  */
+argv[5] = NULL;
+
+errmsg = pex_one (PEX_SEARCH, /* int flags, */
+ "gcc", /* const char *executable */
+ const_cast (argv),
+ ctxt_progname, /* const char *pname */
+ NULL, /* const char *outname */
+ NULL, /* const char *errname */
+ &exit_status, /* int *status */
+ &err); /* int *err*/
+if (errmsg)
+  {
+   add_error (NULL, "error invoking gcc harness: %s", errmsg);
+   return NULL;
+  }
 
-char cmd[1024];
-snprintf (cmd, 1024, "gcc -shared %s -o %s",
-  m_path_s_file, m_path_so_file);
-if (0)
-  printf ("cmd: %s\n", cmd);
-int ret = system (cmd);
-if (ret)
-  return NULL;
+/* pex_one can return a NULL errmsg when the executable wasn't
+   found (or doesn't exist), so trap these cases also.  */
+if (exit_status || err)
+  {
+   add_error (NULL,
+  "error invoking gcc harness: exit_status: %i err: %i",
+  exit_status, err);
+   return NULL;
+  }
   }
 
   // TODO: split out assembles vs linker
-- 
1.7.11.7



Re: [PATCH C++] - SD-6 Implementation Part 2 - __has_include macro and C++ language feature macros.

2014-09-25 Thread Jason Merrill

On 09/01/2014 09:41 PM, Ed Smith-Rowland wrote:

+ /* Return type deduction was added as an extension to C++11
+and was standardized for C+14.  */
+ cpp_define (pfile, "__cpp_return_type_deduction=201304");


When I try to use it with -std=c++11 I get

wa.C:1:8: error: ‘f’ function uses ‘auto’ type specifier without 
trailing return type

 auto f() { return 42; }
^
wa.C:1:8: note: deduced return type only available with -std=c++14 or 
-std=gnu++14


Let's move it to the C++14 section.


There is one bit: arrays of runtime bound.  These got kicked out of C++14 I 
think and is languishing in a TS.
OTOH, we still support it.  It's better than the C99 version we supported.
What direction should I take?
/*  Runtime sized arrays
+have C++14 semantics even for C++98.  */


The comment is inaccurate; most of the restrictions on arrays of runtime 
bound are only applied in C++14 mode.  Let's put this macro in the C++14 
section as well until there's a value or separate macro indicating C99 
VLA support.


Jason



Re: [PATCH] microblaze: microblaze.md: Use VOID instead of SI to fix "((void (*)(void)) 0)()" issue

2014-09-25 Thread Michael Eager

On 09/25/14 07:03, Chen Gang wrote:

Need use VOID instead of SI, or when real VOIDmode comes, it does not
match SImode, so cause issue. This patch can fix this issue and pass
testsuite.


Did you forget to attach the patch?

--
Michael Eagerea...@eagercon.com
1960 Park Blvd., Palo Alto, CA 94306  650-325-8077


Re: [PATCH] microblaze: microblaze.md: Use VOID instead of SI to fix "((void (*)(void)) 0)()" issue

2014-09-25 Thread Michael Eager

On 09/25/14 10:38, Michael Eager wrote:

On 09/25/14 07:03, Chen Gang wrote:

Need use VOID instead of SI, or when real VOIDmode comes, it does not
match SImode, so cause issue. This patch can fix this issue and pass
testsuite.


Did you forget to attach the patch?


Never mind.  My eyes were playing tricks on me.

--
Michael Eagerea...@eagercon.com
1960 Park Blvd., Palo Alto, CA 94306  650-325-8077


Re: [PATCH C++] - SD-6 Implementation Part 2 - __has_include macro and C++ language feature macros.

2014-09-25 Thread Jason Merrill

On 09/01/2014 09:41 PM, Ed Smith-Rowland wrote:

+ cpp_define (pfile, "__cpp_attribute_deprecated=201309");


Don't we support attribute deprecated in C++11?

Jason



Re: [RFC PATCH, RTL]: Fix PR63348, gcc.dg/pr43670.c fail -fcompare-debug on MIPS

2014-09-25 Thread Uros Bizjak
On Thu, Sep 25, 2014 at 7:26 PM, Jeff Law  wrote:
> On 09/24/14 13:39, Uros Bizjak wrote:
>>
>> Hello!
>>
>> The failure was caused by barrier detection code, which failed to
>> detect barrier after call insn was to be split when
>> NOTE_CALL_ARG_LOCATION was present. This problem caused
>> -fcompare-debug failure.
>>
>> Digging a bit deeped, and as hinted in the PR, the handling of
>> barriers in try_split seems to be broken. The code is emitting extra
>> barrier for non-debug compiles, but it "forgots" to remove the
>> existing one, leading to duplicated barriers. The barrier is not
>> detected at all for debug build.
>>
>> I have removed special handling of barriers here (also, the comment in
>> removed code was not helpful at all), and this solved -fcompare-debug
>> failure.
>>
>> The patch was also bootstrapped and regression tested on
>> x86_64-linux-gnu {,-m32} which in -m32 mode splits x87 FP jump insns,
>> and there were no regressions. However, I am not too familiar with
>> rtl-optimization part and I am not confident that this code surgery is
>> fully correct, so this is the reason for RFC status of the patch.
>>
>> 2014-09-24  Uros Bizjak  
>>
>>  PR rtl-optimization/63348
>>  * emit-rtl.c (try_split): Do not emit extra barrier.
>
> Good grief, the code you're removing pre-dates any version control we have.
> ie, it's in the first revision of emit-rtl.c from 1992.   Egad.
>
> It's going to be a hell of a time figuring out why that code exists in the
> first place.  I don't like removing code if we don't know why the code
> exists...  Any reason you picked that route rather than looking forward
> through the NOTEs to see if they're followed by a suitable BARRIER?

I have tried with alternative patch that just skipped the NOTE:

--cut here--
Index: emit-rtl.c
===
--- emit-rtl.c  (revision 215606)
+++ emit-rtl.c  (working copy)
@@ -3622,6 +3622,10 @@ try_split (rtx pat, rtx uncast_trial, int last)
   int njumps = 0;
   rtx call_insn = NULL_RTX;

+  if (after && NOTE_P (after)
+  && NOTE_KIND (after) == NOTE_INSN_CALL_ARG_LOCATION)
+after = NEXT_INSN (after);
+
   /* We're not good at redistributing frame information.  */
   if (RTX_FRAME_RELATED_P (trial))
 return trial;
--cut here--

and resulted in:

(call_insn 184 190 185 (parallel [
(call (mem:SI (reg:SI 25 $25 [217]) [0  S4 A32])
(const_int 16 [0x10]))
(clobber (reg:SI 31 $31))
(clobber (reg:SI 28 $28))
]) pr43670.c:29 595 {call_split}
 (expr_list:REG_NORETURN (const_int 0 [0])
(nil))
(expr_list (use (reg:SI 79 $fakec))
(expr_list (use (reg:SI 28 $28))
(nil
(barrier 185 184 175)
(note 175 185 130 (expr_list:REG_DEP_TRUE (concat:SI (pc)
(unspec:SI [
(reg:SI 28 $28)
(const:SI (unspec:SI [
(symbol_ref:SI ("abort") [flags 0x41]
)
] 227))
(reg:SI 79 $fakec)
] UNSPEC_LOAD_CALL))
(nil)) NOTE_INSN_CALL_ARG_LOCATION)
(barrier 130 175 174)

I have noticed that the barrier is always there, since without -g, we have:

(call_insn 76 82 77 (parallel [
(call (mem:SI (reg:SI 25 $25 [217]) [0  S4 A32])
(const_int 16 [0x10]))
(clobber (reg:SI 31 $31))
(clobber (reg:SI 28 $28))
]) pr43670.c:29 595 {call_split}
 (expr_list:REG_NORETURN (const_int 0 [0])
(nil))
(expr_list (use (reg:SI 79 $fakec))
(expr_list (use (reg:SI 28 $28))
(nil
(barrier 77 76 37)
(barrier 37 77 40)

and considering the fact that the code didn't process barriers
correctly with -g, I simply removed the emission. The - probably
stalled - comment was not helpful at all.

Uros.


Re: [RFC PATCH, RTL]: Fix PR63348, gcc.dg/pr43670.c fail -fcompare-debug on MIPS

2014-09-25 Thread Jeff Law

On 09/24/14 13:39, Uros Bizjak wrote:

Hello!

The failure was caused by barrier detection code, which failed to
detect barrier after call insn was to be split when
NOTE_CALL_ARG_LOCATION was present. This problem caused
-fcompare-debug failure.

Digging a bit deeped, and as hinted in the PR, the handling of
barriers in try_split seems to be broken. The code is emitting extra
barrier for non-debug compiles, but it "forgots" to remove the
existing one, leading to duplicated barriers. The barrier is not
detected at all for debug build.

I have removed special handling of barriers here (also, the comment in
removed code was not helpful at all), and this solved -fcompare-debug
failure.

The patch was also bootstrapped and regression tested on
x86_64-linux-gnu {,-m32} which in -m32 mode splits x87 FP jump insns,
and there were no regressions. However, I am not too familiar with
rtl-optimization part and I am not confident that this code surgery is
fully correct, so this is the reason for RFC status of the patch.

2014-09-24  Uros Bizjak  

 PR rtl-optimization/63348
 * emit-rtl.c (try_split): Do not emit extra barrier.

Patch was bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

OK, so digging deeper

I'm pretty sure this code was originally intended to cope with 
delete_insn removing any barrier which immediately followed a jump insn 
when the jump insn is removed.


ie, if TRIAL was a JUMP_INSN and it was immediately followed by a 
BARRIER.  Assume we successfully split TRIAL.  In that case we call 
delete_insn (TRIAL).  That will in turn remove the BARRIER.


At least that's how things worked in gcc-2.0 ;-)

Walking through a modern try_split, delete_insn and friends, it doesn't 
look like we'll try to remove the original BARRIER anymore.


I'm comfortable removing this ancient code as I don't believe the 
original problem that folks were solving with it can occur anymore.


Approved for the trunk.

jeff








Re: [RFC PATCH, RTL]: Fix PR63348, gcc.dg/pr43670.c fail -fcompare-debug on MIPS

2014-09-25 Thread Uros Bizjak
On Thu, Sep 25, 2014 at 7:44 PM, Uros Bizjak  wrote:

>>> The failure was caused by barrier detection code, which failed to
>>> detect barrier after call insn was to be split when
>>> NOTE_CALL_ARG_LOCATION was present. This problem caused
>>> -fcompare-debug failure.
>>>
>>> Digging a bit deeped, and as hinted in the PR, the handling of
>>> barriers in try_split seems to be broken. The code is emitting extra
>>> barrier for non-debug compiles, but it "forgots" to remove the
>>> existing one, leading to duplicated barriers. The barrier is not
>>> detected at all for debug build.
>>>
>>> I have removed special handling of barriers here (also, the comment in
>>> removed code was not helpful at all), and this solved -fcompare-debug
>>> failure.
>>>
>>> The patch was also bootstrapped and regression tested on
>>> x86_64-linux-gnu {,-m32} which in -m32 mode splits x87 FP jump insns,
>>> and there were no regressions. However, I am not too familiar with
>>> rtl-optimization part and I am not confident that this code surgery is
>>> fully correct, so this is the reason for RFC status of the patch.
>>>
>>> 2014-09-24  Uros Bizjak  
>>>
>>>  PR rtl-optimization/63348
>>>  * emit-rtl.c (try_split): Do not emit extra barrier.
>>
>> Good grief, the code you're removing pre-dates any version control we have.
>> ie, it's in the first revision of emit-rtl.c from 1992.   Egad.
>>
>> It's going to be a hell of a time figuring out why that code exists in the
>> first place.  I don't like removing code if we don't know why the code
>> exists...  Any reason you picked that route rather than looking forward
>> through the NOTEs to see if they're followed by a suitable BARRIER?
>
> I have tried with alternative patch that just skipped the NOTE:
>
> --cut here--
> Index: emit-rtl.c
> ===
> --- emit-rtl.c  (revision 215606)
> +++ emit-rtl.c  (working copy)
> @@ -3622,6 +3622,10 @@ try_split (rtx pat, rtx uncast_trial, int last)
>int njumps = 0;
>rtx call_insn = NULL_RTX;
>
> +  if (after && NOTE_P (after)
> +  && NOTE_KIND (after) == NOTE_INSN_CALL_ARG_LOCATION)
> +after = NEXT_INSN (after);
> +
>/* We're not good at redistributing frame information.  */
>if (RTX_FRAME_RELATED_P (trial))
>  return trial;
> --cut here--
>
> and resulted in:
>
> (call_insn 184 190 185 (parallel [
> (call (mem:SI (reg:SI 25 $25 [217]) [0  S4 A32])
> (const_int 16 [0x10]))
> (clobber (reg:SI 31 $31))
> (clobber (reg:SI 28 $28))
> ]) pr43670.c:29 595 {call_split}
>  (expr_list:REG_NORETURN (const_int 0 [0])
> (nil))
> (expr_list (use (reg:SI 79 $fakec))
> (expr_list (use (reg:SI 28 $28))
> (nil
> (barrier 185 184 175)
> (note 175 185 130 (expr_list:REG_DEP_TRUE (concat:SI (pc)
> (unspec:SI [
> (reg:SI 28 $28)
> (const:SI (unspec:SI [
> (symbol_ref:SI ("abort") [flags 0x41]
> )
> ] 227))
> (reg:SI 79 $fakec)
> ] UNSPEC_LOAD_CALL))
> (nil)) NOTE_INSN_CALL_ARG_LOCATION)
> (barrier 130 175 174)
>
> I have noticed that the barrier is always there, since without -g, we have:
>
> (call_insn 76 82 77 (parallel [
> (call (mem:SI (reg:SI 25 $25 [217]) [0  S4 A32])
> (const_int 16 [0x10]))
> (clobber (reg:SI 31 $31))
> (clobber (reg:SI 28 $28))
> ]) pr43670.c:29 595 {call_split}
>  (expr_list:REG_NORETURN (const_int 0 [0])
> (nil))
> (expr_list (use (reg:SI 79 $fakec))
> (expr_list (use (reg:SI 28 $28))
> (nil
> (barrier 77 76 37)
> (barrier 37 77 40)
>
> and considering the fact that the code didn't process barriers
> correctly with -g, I simply removed the emission. The - probably
> stalled - comment was not helpful at all.

FYI, unpatched gcc created (-g):

(call_insn 184 189 175 (parallel [
(call (mem:SI (reg:SI 25 $25 [217]) [0  S4 A32])
(const_int 16 [0x10]))
(clobber (reg:SI 31 $31))
(clobber (reg:SI 28 $28))
]) pr43670.c:29 595 {call_split}
 (expr_list:REG_NORETURN (const_int 0 [0])
(nil))
(expr_list (use (reg:SI 79 $fakec))
(expr_list (use (reg:SI 28 $28))
(nil
(note 175 184 130 (expr_list:REG_DEP_TRUE (concat:SI (pc)
(unspec:SI [
(reg:SI 28 $28)
(const:SI (unspec:SI [
(symbol_ref:SI ("abort") [flags 0x41]
)
] 227))
(reg:SI 79 $fakec)
] UNSPEC_LOAD_CALL))
(nil)) NOTE_INSN_CALL_ARG_LOCATION)
(barrier 130 175 174)

so, it didn't emit barrier at all.

Uros.


Re: [PATCH i386 AVX512] [52.1/n] Add vec2mask and mask2vec insn patterns.

2014-09-25 Thread Uros Bizjak
On Thu, Sep 25, 2014 at 11:42 AM, Kirill Yukhin  wrote:
> Hello,
> As suggested, this is splitted out part of [52/n] patch,
> which introduces new vec2mask and mask2vec insn patterns.
>
> As suggested, I've got rid off use of UNSPEC_CVTINT2MASK
> unspec. Unfortunatelly, only partially.
> I suppose, that vec2mask generic RTX will be too complex:
> get most significant bit of each vec elt & compare it
> with zero setting destination mask register accordingly.
> If this approach is preferrable: I ready to do that.

No, we won't benefit anything from overly-complex patterns. Combine is
not _that_ smart.

> Testing in progress.
> Is it ok for trunk if pass?
>
> gcc/
> * config/i386/predicates.md (define_predicate "constm1_operand"): New.
> * config/i386/sse.md
> (define_c_enum "unspec"): Add UNSPEC_CVTINT2MASK.
> (define_insn "_cvt2mask"): 
> New.
> (define_insn "_cvt2mask"): 
> Ditto.
> (define_expand 
> "_cvtmask2"): Ditto.
> (define_insn 
> "*_cvtmask2"): Ditto.
> (define_expand 
> "_cvtmask2"): Ditto.
> (define_insn 
> "*_cvtmask2"): Ditto.

Looks reasonable.

Updated patch is OK for mainline.

Thanks,
Uros.


[debug-early] fix fortran regressions

2014-09-25 Thread Aldy Hernandez
push_cfun() fails when there's no cfun stack.  With this patch, we use 
set_cfun if not stack is available.


This fixes the 16 Fortran guality regressions.

Now guality tests all pass, for all languages.

Committed to branch.
commit a6f19a625bc6f662db6f23679503458f22721de9
Author: Aldy Hernandez 
Date:   Mon Sep 22 10:37:16 2014 -0600

* dwarf2out.c (dwarf2out_early_global_decl): Set cfun correctly
even if cfun stack is empty.

diff --git a/gcc/ChangeLog.debug-early b/gcc/ChangeLog.debug-early
index a6b9e0a..f8b1880 100644
--- a/gcc/ChangeLog.debug-early
+++ b/gcc/ChangeLog.debug-early
@@ -1,3 +1,8 @@
+2014-09-22  Aldy Hernandez  
+
+   * dwarf2out.c (dwarf2out_early_global_decl): Set cfun correctly
+   even if cfun stack is empty.
+
 2014-09-19  Aldy Hernandez  
 
* dwarf2out.c (gen_subprogram_die): Remove DW_AT_declaration even
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 48b1106..0519839 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -20844,6 +20844,7 @@ dwarf2out_early_global_decl (tree decl)
   bool save = symtab->global_info_ready;
   symtab->global_info_ready = true;
 
+  bool fndecl_was_null = false;
   /* We don't handle TYPE_DECLs.  If required, they'll be reached via
  other DECLs and they can point to template types or other things
  that dwarf2out can't handle when done via dwarf2out_decl.  */
@@ -20857,7 +20858,13 @@ dwarf2out_early_global_decl (tree decl)
  if (!DECL_STRUCT_FUNCTION (decl))
goto early_decl_exit;
 
- push_cfun (DECL_STRUCT_FUNCTION (decl));
+ if (current_function_decl)
+   push_cfun (DECL_STRUCT_FUNCTION (decl));
+ else
+   {
+ set_cfun (DECL_STRUCT_FUNCTION (decl));
+ fndecl_was_null = true;
+   }
  current_function_decl = decl;
}
   dw_die_ref die = dwarf2out_decl (decl);
@@ -20865,7 +20872,10 @@ dwarf2out_early_global_decl (tree decl)
die->dumped_early = true;
   if (TREE_CODE (decl) == FUNCTION_DECL)
{
- pop_cfun ();
+ if (fndecl_was_null)
+   set_cfun (NULL);
+ else
+   pop_cfun ();
  current_function_decl = NULL;
}
 }


Re: [PATCH i386 AVX512] [56/n] Add plus/minus/abs/neg/andnot insn patterns.

2014-09-25 Thread Uros Bizjak
On Thu, Sep 25, 2014 at 4:12 PM, Kirill Yukhin  wrote:
> Hello,
> Patch in the bottom extends plus/minus/abs/andnot patterns
> to support AVX-512.
> I've used questionable hack in the patterns.
> Instead of writing dozen similar patterns with masking
> I've simply substed them, prohibiting non-mask variant in
> the pattern condition. E.g.:
> (define_expand "3"
>   [(set (match_operand:VI12_AVX512VL 0 "register_operand")
>(plusminus:VI12_AVX512VL
>   (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
>   (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")))]
>   "TARGET_AVX512BW && "
>   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
>
> If this is not acceptable, I'll rewrite it to somthing like:
> (define_expand "3"
>   [(set (match_operand:VI12_AVX512VL 0 "register_operand")
> (vec_merge: VI12_AVX512VL
>   (plusminus:VI12_AVX512VL
> (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
> (match_operand:VI12_AVX512VL 2 "nonimmediate_operand"))
>   (match_operand:SUBST_V 2 "vector_move_operand" "0C")
>   (match_operand: 3 "register_operand" "Yk")))]
>   "TARGET_AVX512BW && "
>   "ix86_fixup_binary_operands_no_copy (, mode, operands);")

I'd rather go with the second approach, it is less confusing from the
maintainer POV. All other patterns with masking use some consistent
template, so I'd suggest using the same approach for everything. If it
is indeed too many patterns, then please split the patch to smaller
pieces.

Uros.

> Testing is in progress. Is it ok for trunk if pass?
>
> Also we might want to rename VI_AVX2, but I didn't do that
> since new (generic) name would be too long. Say: VI_AVX2_AVX512BW_AVX512F.

Iterator names try to follow some logic, but we can live with that.

> gcc/
> * config/i386/sse.md (define_mode_iterator VI_AVX2): Extend
> to support AVX-512BW.
> (define_mode_iterator VI124_AVX2_48_AVX512F): Remove.
> (define_expand "3"): Remove masking support.
> (define_insn "*3"): Ditto.
> (define_expand 
> "3"):
> New.
> (define_expand 
> "3"):
> Ditto.
> (define_insn "*3"): 
> Ditto.
> (define_insn "*3"): 
> Ditto.
> (define_expand "_andnot3"): Remove masking support.
> (define_insn "*andnot3"): Ditto.
> (define_expand 
> "_andnot3"): New.
> (define_expand 
> "_andnot3"): Ditto.
> (define_insn "*andnot3"): Ditto.
> (define_insn "*andnot3"): Ditto.
> (define_insn "*abs2"): Remove masking support.
> (define_insn "abs2"): 
> New.
> (define_insn "abs2"): 
> Ditto.
> (define_expand "abs2"): Use VI_AVX2 mode iterator.
>
> --
> Thanks, K
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index ffc831f..d6861e5 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -268,8 +268,8 @@
> (V4DI "TARGET_AVX") V2DI])
>
>  (define_mode_iterator VI_AVX2
> -  [(V32QI "TARGET_AVX2") V16QI
> -   (V16HI "TARGET_AVX2") V8HI
> +  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
> +   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
> (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
> (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
>
> @@ -359,12 +359,6 @@
>[(V16HI "TARGET_AVX2") V8HI
> (V8SI "TARGET_AVX2") V4SI])
>
> -(define_mode_iterator VI124_AVX2_48_AVX512F
> -  [(V32QI "TARGET_AVX2") V16QI
> -   (V16HI "TARGET_AVX2") V8HI
> -   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
> -   (V8DI "TARGET_AVX512F")])
> -
>  (define_mode_iterator VI124_AVX512F
>[(V32QI "TARGET_AVX2") V16QI
> (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
> @@ -9051,20 +9045,37 @@
>"TARGET_SSE2"
>"operands[2] = force_reg (mode, CONST0_RTX (mode));")
>
> -(define_expand "3"
> +(define_expand "3"
>[(set (match_operand:VI_AVX2 0 "register_operand")
> (plusminus:VI_AVX2
>   (match_operand:VI_AVX2 1 "nonimmediate_operand")
>   (match_operand:VI_AVX2 2 "nonimmediate_operand")))]
> -  "TARGET_SSE2 && "
> +  "TARGET_SSE2"
>"ix86_fixup_binary_operands_no_copy (, mode, operands);")
>
> -(define_insn "*3"
> +(define_expand "3"
> +  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
> +   (plusminus:VI48_AVX512VL
> + (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
> + (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")))]
> +  "TARGET_AVX512F && "
> +  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
> +
> +(define_expand "3"
> +  [(set (match_operand:VI12_AVX512VL 0 "register_operand")
> +   (plusminus:VI12_AVX512VL
> + (match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
> + (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")))]
> +  "TARGET_AVX512BW && "
> +  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
> +
> +(define_insn "*3"
>[(set (match_operand:VI_AVX2 0 "register_ope

Re: [PATCH] Add -Wno-abi in c++ struct-layout-1 tests

2014-09-25 Thread Mike Stump
On Sep 25, 2014, at 1:02 AM, Uros Bizjak  wrote:
>>> IMO, these should be converted to dg-additional-options.
>> 
>> That would be something like following, except that compat framework doesn't
>> support dg-additional-options:
>> WARNING: compat.exp does not support dg-additional-options
> 
> Rainer, Mike - any hope here?

Someone would have to pony up the work…  The drivers are a bit different.  I 
don’t know of anyone that has that work planned.

Re: [PATCH IRA] update_equiv_regs fails to set EQUIV reg-note for pseudo with more than one definition

2014-09-25 Thread Jeff Law

On 09/24/14 06:07, Felix Yang wrote:

Hi Jeff,

 Thanks for the comments. I updated the patch adding some enhancements.
 Bootstrapped on x86_64-suse-linux. Please apply this patch if OK for trunk.

 Three points:
 1. For multiple-set register, it is not qualified to have a equiv
note once it is marked by no_equiv. The patch is updated with
this consideration.

Correct.


 2. For the rtx_insn_list new interface, I noticed that the old
style XEXP accessor macros is still used in function no_equiv.
And I choose to the old style macros with this patch and should
come up with another patch to fix this issue, OK?
I'd rather any new code going in use the updated interfaces.  It's 
certainly OK to have af followup patch which converts more pre-existing 
code to the new interfaces.



 3. For the conditions that an insn on the init_insns list which
did not have a note, I reconsider this and find that this can
never happens. So I replaced the check with a gcc assertion.

OK.

Also, I should have asked this earlier, do you have an assignment on 
file with the FSF, or does your employer have any kind of blanket 
assignment on file with the FSF?  These changes are large enough to 
require an assignment.




Index: gcc/ira.c
===
--- gcc/ira.c(revision 215550)
+++ gcc/ira.c(working copy)
@@ -2900,6 +2900,8 @@ struct equivalence
/* Set when an attempt should be made to replace a register
   with the associated src_p entry.  */
char replace;
+  /* Set if this register has no known equivalence.  */
+  char no_equiv;
  };
As a follow-up, can you turn is_arg_equivalence, replace and no_equiv 
into boolean bitfields and turn loop_depth into a short (to match 
assumptions elsewhere in GCC).



The point is to get better packing of these objects and ultimately use 
less memory.



+
+  /* Check if it is possible that this multiple-set register has
+ a known equivalence.  */
+  if (reg_equiv[regno].no_equiv)
+continue;

This comment is a bit confusing.  Please consider something like

/* If we have already processed this pseudo and determined it
   can not have an equivalence, then honor that decision.  */


Do you have a testcase we can add to the regression suite?  If at all 
possible please include one.An execution test would be best, but you 
could also scan the RTL for bogus REG_EQUIV notes.


Please update to use the new type and interfaces for list walking the 
init_insns list.


Finally, you need to verify your patch will bootstrap and not cause any 
regressions in the testsuite.  If you're unsure how to do that, let me know.


I think we'll be ready to go once those tasks are complete.


jeff



Re: [PATCH 2/2] Add patch for debugging compiler ICEs.

2014-09-25 Thread Jeff Law

On 09/23/14 01:14, Maxim Ostapenko wrote:



2014-09-04  Jakub Jelinek
Max Ostapenko

* common.opt: New option.
* doc/invoke.texi: Describe new option.
* gcc.c (execute): Don't free first string early, but at the end
of the function.  Call retry_ice if compiler exited with
ICE_EXIT_CODE.
(main): Factor out common code.
(print_configuration): New function.
(files_equal_p): Likewise.
(check_repro): Likewise.
(run_attempt): Likewise.
(do_report_bug): Likewise.
(append_text): Likewise.
(try_generate_repro): Likewise

Approved.  Please install.

Thanks for your patience,
Jeff



Re: [PATCH] fix hardreg_cprop to honor HARD_REGNO_MODE_OK.

2014-09-25 Thread Jeff Law

On 09/01/14 04:29, Ilya Tocar wrote:


AVX512 added new 16 xmm registers (xmm16-xmm31).
Those registers require evex encoding.
Only 512-bit wide versions of instructions have evex encoding with
avx512f, but all versions have it with avx512vl.
Most instructions have same macroized pattern for 128/256/512 vector
length. They all use constraint 'v', which corresponds to
class ALL_SSE_REGS (xmm0 - xmm31). To disallow e. g. xmm20 in
256-bit case (avx512f) and allow it only in avx512vl case we have
HARD_REGNO_MODE_OK checking for regno being evex-only and
disallowing it if mode is not 512-bit.

Generally this kind of thing has been handled by splitting the register
class into two classes.  I strongly suspect there are numerous places where
we assume that two regs in the same class are interchangeable.

I'm not sure that there are many places where we replace hard regs
without checks. E. g. in regrename we have HARD_REGNO_RENAME_OK.
As far as I understand, idea behind HARD_REGNO_RENAME_OK is that we
should always check when substituting hard reg. Why is regcprop
different, and what's the point of HARD_REGNO_MODE_OK if it is ignored
by some passes?



I realize that's going to require some work in the x86 machine description,
but I think that's going to be a much better approach and save you work in
the long run.



This will approximately double sse.md, as we will need to split all
patterns with 512-bit versions in 2 (512 and 128/256 cases) and play
games with enabling/disabling alternatives depending on flags.
Are you sure that this better than honoring HARD_REGNO_MODE_OK?
As far as I understand, honoring  HARD_REGNO_MODE_OK shouldn't produce
worse code.
I don't see how it doubles the size.  You split the class into two 
classes.  Whatever letter your second class has, you use it in 
conjunction with 'v' that you're already using.  Note you do not need 
different alternatives, you use them in the same alternative.


It's not a question of performance, but of design.  I suspect you're 
really just at the tip of the iceberg with this stuff if you continue to 
go down the path of having registers in the same class, some of which 
are allocatable and some of which are not.


The other approach that I believe has been taken has been to mark the 
new registers as fixed when compiling for hardware where they're not 
available.  But I'm not sure offhand if that would be sufficient to fix 
this problem.



Jeff



Re: [PATCH/RFC v2 3/14] Add new optabs for reducing vectors to scalars

2014-09-25 Thread Segher Boessenkool
On Thu, Sep 25, 2014 at 05:12:24PM +0100, Alan Lawrence wrote:
> So: original patch with updated commenting attached...Segher, is there any 
> chance you could test this on powerpc too? (in combination with patch 2/14, 
> which will need to be applied first; you can skip patch 1, and >=4.)

2+3/14, tested as before, on powerpc64-linux; no regressions.

Cheers,


Segher


Re: [AArch64] Fix predicate and constraint mismatch in logical atomic operations

2014-09-25 Thread Segher Boessenkool
On Thu, Sep 25, 2014 at 10:33:17AM -0700, Michael Collison wrote:
> The problem is the "CONST_INT 0", not a large constant. This constant is 
> not accepted by the predicate, but is accepted by the constraint.

Yes, bad choice of words, sorry.  Just read "big" as "not matching the
predicate".  The point is that everything works fine until RA, and that
makes it hard to make a useful test.


Segher


p.s.  Please don't top-post.  Thanks.


Re: [PATCH] Add direct support for Linux kernel __fentry__ patching

2014-09-25 Thread Jeff Law

On 09/01/14 21:24, Andi Kleen wrote:


Having a nop area at the beginning of each function can be also
also useful for other things. For example it can be used to patch
functions at runtime to point to different functions, to do
binary updates without restarting the program (like ksplice or
similar)

ISTM the black-hats would love this too...



gcc/:

2014-09-01  Andi Kleen  

* config/i386/i386.c (x86_print_call_or_nop): New function.
(x86_function_profiler): Support -mnop-mcount and
-mrecord-mcount.
* config/i386/i386.opt (-mnop-mcount, -mrecord-mcount): Add
* doc/invoke.texi: Document -mnop-mcount, -mrecord-mcount
* testsuite/gcc/gcc.target/i386/nop-mcount.c: New file.
* testsuite/gcc/gcc.target/i386/record-mcount.c: New file.

OK.  Please install.



Jeff


Re: Prepare gcc for 64-bit obstacks

2014-09-25 Thread Jeff Law

On 08/04/14 05:04, Alan Modra wrote:

Two small changes to gcc code to support newer obstacks:

1) gcc currently calls _obstack_begin, which requires some ugly casts
on alloc and free functions, and the casts will change when/if 64-bit
obstacks are available.  It's cleaner to use the convenience functions
provided in obstack.h.

2) Current upstream obstack.h makes obstack_base() return a void*,
with the result that a few places in gcc need a (char *) cast.

Bootstrapped and regression tested x86_64-linux and powerpc-linux,
with both the new obstack.{h,c} and the old versions.  OK to apply?

gcc/
* gengtype.h (obstack_chunk_alloc, obstack_chunk_free): Remove cast.
* coretypes.h (obstack_chunk_alloc, obstack_chunk_free): Likewise.
(gcc_obstack_init): Use obstack_specify_allocation in place of
_obstack_begin.
* genautomata.c (next_sep_el): Cast result of obstack_base to (char *).
(regexp_representation): Likewise.
* godump.c (go_output_type): Likewise.
gcc/java/
* mangle.c (finish_mangling): Cast result of obstack_base to (char *).
* typeck.c (build_java_argument_signature): Likewise.
(build_java_signature): Likewise.
gcc/objc/
* objc-encoding.c (encode_array): Cast result of obstack_base.
(encode_type): Likewise.
libcpp/
* symtab.c (ht_create): Use obstack_specify_allocation in place of
_obstack_begin.
* files.c (_cpp_init_files): Likewise.
* init.c (cpp_create_reader): Likewise.
* identifiers.c (_cpp_init_hashtable): Likewise.

OK for the trunk.  Sorry for the delay in getting to this.

Thanks,
Jeff



ipa-devirt TLC 8 - break out polymorphic context code

2014-09-25 Thread Jan Hubicka
Hi,
this patch moves all code dealing with polymorphic_call_context to
ipa-polymorphic-call.c in hope that things are more manageable this way.
This code contains most of the magic of analyzing GIMPLE and determining
contextes.

ipa-devirt.c still contains several essentially independent things - the ODR
logic, the type inheritance graph, the code for determining possible targets,
and, the ipa_devirt pass itself.  Perhaps it would make sense to break out
ODR and type inheritance too, but first I want to revisit the APIs.

Honza

* ipa-utils.h (subbinfo_with_vtable_at_offset, 
type_all_derivations_known_p,
type_known_to_have_no_deriavations_p, types_must_be_same_for_odr,
types_odr_comparable): Declare.
(polymorphic_type_binfo_p): Move here from ipa-devirt.c
* ipa-polymorphic-call.c: New file.
(contains_polymorphic_type_p, possible_placement_new,
ipa_polymorphic_call_context::restrict_to_inner_class,
contains_type_p, decl_maybe_in_construction_p,
ipa_polymorphic_call_context::stream_out,
ipa_polymorphic_call_context::debug,
ipa_polymorphic_call_context::stream_in,
ipa_polymorphic_call_context::set_by_decl,
ipa_polymorphic_call_context::set_by_invariant,
walk_ssa_copies,
ipa_polymorphic_call_context::ipa_polymorphic_call_context,
type_change_info, noncall_stmt_may_be_vtbl_ptr_store,
extr_type_from_vtbl_ptr_store, record_known_type
check_stmt_for_type_change,
ipa_polymorphic_call_context::get_dynamic_type): Move here from
ipa-devirt.c
* ipa-devirt.c: No longer include data-streamer.h, lto-streamer.h
and streamer-hooks.h
(contains_polymorphic_type_p, possible_placement_new,
ipa_polymorphic_call_context::restrict_to_inner_class,
contains_type_p, decl_maybe_in_construction_p,
ipa_polymorphic_call_context::stream_out,
ipa_polymorphic_call_context::debug,
ipa_polymorphic_call_context::stream_in,
ipa_polymorphic_call_context::set_by_decl,
ipa_polymorphic_call_context::set_by_invariant,
walk_ssa_copies,
ipa_polymorphic_call_context::ipa_polymorphic_call_context,
type_change_info, noncall_stmt_may_be_vtbl_ptr_store,
extr_type_from_vtbl_ptr_store, record_known_type
check_stmt_for_type_change,
ipa_polymorphic_call_context::get_dynamic_type): Move to
ipa-polymorphic-call.c
(type_all_derivations_known_p, types_odr_comparable,
types_must_be_same_for_odr): Export.
(type_known_to_have_no_deriavations_p): New function.
* Makefile.in: Add ipa-polymorphic-call.c

Index: ipa-utils.h
===
--- ipa-utils.h (revision 215614)
+++ ipa-utils.h (working copy)
@@ -74,9 +74,14 @@ tree method_class_type (const_tree);
 bool decl_maybe_in_construction_p (tree, tree, gimple, tree);
 tree vtable_pointer_value_to_binfo (const_tree);
 bool vtable_pointer_value_to_vtable (const_tree, tree *, unsigned 
HOST_WIDE_INT *);
+tree subbinfo_with_vtable_at_offset (tree, unsigned HOST_WIDE_INT, tree);
 void compare_virtual_tables (varpool_node *, varpool_node *);
+bool type_all_derivations_known_p (const_tree);
+bool type_known_to_have_no_deriavations_p (tree);
 bool contains_polymorphic_type_p (const_tree);
 void register_odr_type (tree);
+bool types_must_be_same_for_odr (tree, tree);
+bool types_odr_comparable (tree, tree);
 
 /* Return vector containing possible targets of polymorphic call E.
If COMPLETEP is non-NULL, store true if the list is complette. 
@@ -162,6 +167,21 @@ odr_type_p (const_tree t)
   return (TYPE_NAME (t)
   && (DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (t;
 }
+
+/* Return true if BINFO corresponds to a type with virtual methods. 
+
+   Every type has several BINFOs.  One is the BINFO associated by the type
+   while other represents bases of derived types.  The BINFOs representing
+   bases do not have BINFO_VTABLE pointer set when this is the single
+   inheritance (because vtables are shared).  Look up the BINFO of type
+   and check presence of its vtable.  */
+
+inline bool
+polymorphic_type_binfo_p (const_tree binfo)
+{
+  /* See if BINFO's type has an virtual table associtated with it.  */
+  return BINFO_VTABLE (TYPE_BINFO (BINFO_TYPE (binfo)));
+}
 #endif  /* GCC_IPA_UTILS_H  */
 
 
Index: ipa-polymorphic-call.c
===
--- ipa-polymorphic-call.c  (revision 0)
+++ ipa-polymorphic-call.c  (revision 0)
@@ -0,0 +1,1518 @@
+/* Analysis of polymorphic call context.
+   Copyright (C) 2013-2014 Free Software Foundation, Inc.
+   Contributed by Jan Hubicka
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3

[patch, testsuite] don't use dg-run in gcc.dg/vect tests

2014-09-25 Thread Sandra Loosemore
While doing some arm-none-eabi testing, I noticed that a bunch of 
gcc.dg/vect tests were causing the target to hang from trying to execute 
code compiled with "-mfpu=neon -mfloat-abi=softfp", on a target that 
doesn't support those instructions.  And, this was caused by tests 
specifying "dg-do run" unconditionally instead of allowing the 
dg-require-effective-target mechanism to decide whether the target can 
run code compiled with the vectorization options added by vect.exp.


This patch fixes the bad tests.  OK to check in?

-Sandra


2014-09-25  Sandra Loosemore  

gcc/testsuite/
* gcc.dg/vect/pr56933.c: Don't specify "dg-do run" explicitly; use
dg-require-effective-target to decide whether the testcase can
be run.
* gcc.dg/vect/pr57705.c: Likewise.
* gcc.dg/vect/pr57741-2.c: Likewise.
* gcc.dg/vect/pr57741-3.c: Likewise.
* gcc.dg/vect/pr59591-1.c: Likewise.
* gcc.dg/vect/pr59591-2.c: Likewise.
* gcc.dg/vect/pr60196-1.c: Likewise.
* gcc.dg/vect/pr60196-2.c: Likewise.
* gcc.dg/vect/pr60276.c: Likewise.
* gcc.dg/vect/pr61680.c: Likewise.
* gcc.dg/vect/pr63148.c: Likewise.
* gcc.dg/vect/pr63189.c: Likewise.
* gcc.dg/vect/vect-mask-load-1.c: Likewise.
* gcc.dg/vect/vect-mask-loadstore-1.c: Likewise.
* gcc.dg/vect/vect-nop-move.c: Likewise.
* gcc.dg/vect/vect-simd-clone-10.c: Likewise.
* gcc.dg/vect/vect-simd-clone-12.c: Likewise.



Index: gcc/testsuite/gcc.dg/vect/pr56933.c
===
--- gcc/testsuite/gcc.dg/vect/pr56933.c	(revision 215576)
+++ gcc/testsuite/gcc.dg/vect/pr56933.c	(working copy)
@@ -1,4 +1,3 @@
-/* { dg-do run } */
 /* { dg-require-effective-target vect_double } */
 
 #include "tree-vect.h"
Index: gcc/testsuite/gcc.dg/vect/pr57705.c
===
--- gcc/testsuite/gcc.dg/vect/pr57705.c	(revision 215576)
+++ gcc/testsuite/gcc.dg/vect/pr57705.c	(working copy)
@@ -1,4 +1,3 @@
-/* { dg-do run } */
 /* { dg-require-effective-target vect_int } */
 
 #include "tree-vect.h"
Index: gcc/testsuite/gcc.dg/vect/pr57741-2.c
===
--- gcc/testsuite/gcc.dg/vect/pr57741-2.c	(revision 215576)
+++ gcc/testsuite/gcc.dg/vect/pr57741-2.c	(working copy)
@@ -1,5 +1,4 @@
 /* PR tree-optimization/57741 */
-/* { dg-do run } */
 /* { dg-require-effective-target vect_float } */
 /* { dg-additional-options "-ffast-math" } */
 
Index: gcc/testsuite/gcc.dg/vect/pr57741-3.c
===
--- gcc/testsuite/gcc.dg/vect/pr57741-3.c	(revision 215576)
+++ gcc/testsuite/gcc.dg/vect/pr57741-3.c	(working copy)
@@ -1,5 +1,4 @@
 /* PR tree-optimization/57741 */
-/* { dg-do run } */
 /* { dg-require-effective-target vect_float } */
 /* { dg-require-effective-target vect_int } */
 /* { dg-additional-options "-ffast-math" } */
Index: gcc/testsuite/gcc.dg/vect/pr59591-1.c
===
--- gcc/testsuite/gcc.dg/vect/pr59591-1.c	(revision 215576)
+++ gcc/testsuite/gcc.dg/vect/pr59591-1.c	(working copy)
@@ -1,5 +1,5 @@
 /* PR tree-optimization/59591 */
-/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
 /* { dg-additional-options "-fopenmp-simd" } */
 
 #ifndef CHECK_H
Index: gcc/testsuite/gcc.dg/vect/pr59591-2.c
===
--- gcc/testsuite/gcc.dg/vect/pr59591-2.c	(revision 215576)
+++ gcc/testsuite/gcc.dg/vect/pr59591-2.c	(working copy)
@@ -1,5 +1,5 @@
 /* PR tree-optimization/59591 */
-/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
 /* { dg-additional-options "-fopenmp-simd" } */
 
 #ifndef CHECK_H
Index: gcc/testsuite/gcc.dg/vect/pr60196-1.c
===
--- gcc/testsuite/gcc.dg/vect/pr60196-1.c	(revision 215576)
+++ gcc/testsuite/gcc.dg/vect/pr60196-1.c	(working copy)
@@ -1,6 +1,6 @@
 /* PR tree-optimization/63189 */
 /* { dg-additional-options "-fwrapv" } */
-/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
 
 #include "tree-vect.h"
 
Index: gcc/testsuite/gcc.dg/vect/pr60196-2.c
===
--- gcc/testsuite/gcc.dg/vect/pr60196-2.c	(revision 215576)
+++ gcc/testsuite/gcc.dg/vect/pr60196-2.c	(working copy)
@@ -1,5 +1,5 @@
 /* PR tree-optimization/63189 */
-/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
 
 #include "tree-vect.h"
 
Index: gcc/testsuite/gcc.dg/vect/pr60276.c
===
--- gcc/testsuite/gcc.dg/vect/pr60276.c	(revision 215576)
+++ gcc/testsuite/gcc.dg/vect/pr60276.c	(working copy)
@@ -1,4 +1,4 @@
-/* { dg-do run } */
+/* { dg-require-effective-target vect_int } */
 
 extern void abort (vo

Re: [patch,gomp-4_0-branch] openacc parallel reduction part 1

2014-09-25 Thread Thomas Schwinge
Hi!

On Tue, 8 Jul 2014 07:28:24 -0700, Cesar Philippidis 
 wrote:
> On 07/07/2014 02:55 AM, Thomas Schwinge wrote:
> > On Sun, 6 Jul 2014 16:10:56 -0700, Cesar Philippidis 
> >  wrote:
> >> This patch is the first step to enabling parallel reductions in openacc.

> --- /dev/null
> +++ b/gcc/testsuite/c-c++-common/goacc/reduction-1.c

> +#pragma acc parallel vector_length (vl)
> +#pragma acc loop reduction (+:result)
> +  for (i = 0; i < n; i++)
> +result += array[i];
> +#pragma acc end parallel

> [...]

Committed to gomp-4_0-branch in r215617:

commit a6b46623f7543f07c9b2ebcd080d3216c6b30d69
Author: tschwinge 
Date:   Thu Sep 25 20:04:16 2014 +

Remove erroneous "#pragma acc end parallel"s.

gcc/testsuite/
* c-c++-common/goacc/reduction-1.c: Remove erroneous "#pragma acc
end parallel"s.
* c-c++-common/goacc/reduction-2.c: Likewise.
* c-c++-common/goacc/reduction-3.c: Likewise.
* c-c++-common/goacc/reduction-4.c: Likewise.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@215617 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/testsuite/ChangeLog.gomp   | 8 
 gcc/testsuite/c-c++-common/goacc/reduction-1.c | 9 -
 gcc/testsuite/c-c++-common/goacc/reduction-2.c | 6 --
 gcc/testsuite/c-c++-common/goacc/reduction-3.c | 6 --
 gcc/testsuite/c-c++-common/goacc/reduction-4.c | 6 --
 5 files changed, 8 insertions(+), 27 deletions(-)

diff --git gcc/testsuite/ChangeLog.gomp gcc/testsuite/ChangeLog.gomp
index f350c35..786f434 100644
--- gcc/testsuite/ChangeLog.gomp
+++ gcc/testsuite/ChangeLog.gomp
@@ -1,3 +1,11 @@
+2014-09-25  Thomas Schwinge  
+
+   * c-c++-common/goacc/reduction-1.c: Remove erroneous "#pragma acc
+   end parallel"s.
+   * c-c++-common/goacc/reduction-2.c: Likewise.
+   * c-c++-common/goacc/reduction-3.c: Likewise.
+   * c-c++-common/goacc/reduction-4.c: Likewise.
+
 2014-09-23  Thomas Schwinge  
 
* c-c++-common/goacc/acc_on_device-1.c: New file.
diff --git gcc/testsuite/c-c++-common/goacc/reduction-1.c 
gcc/testsuite/c-c++-common/goacc/reduction-1.c
index cff7d2d..0f50082 100644
--- gcc/testsuite/c-c++-common/goacc/reduction-1.c
+++ gcc/testsuite/c-c++-common/goacc/reduction-1.c
@@ -15,14 +15,12 @@ main(void)
 #pragma acc loop reduction (+:result)
   for (i = 0; i < n; i++)
 result += array[i];
-#pragma acc end parallel
 
   /* '*' reductions.  */
 #pragma acc parallel vector_length (vl)
 #pragma acc loop reduction (*:result)
   for (i = 0; i < n; i++)
 result *= array[i];
-#pragma acc end parallel
 
 //   result = 0;
 //   vresult = 0;
@@ -32,49 +30,42 @@ main(void)
 // #pragma acc loop reduction (+:result)
 //   for (i = 0; i < n; i++)
 //   result = result > array[i] ? result : array[i];
-// #pragma acc end parallel
 //
 //   /* 'min' reductions.  */
 // #pragma acc parallel vector_length (vl)
 // #pragma acc loop reduction (+:result)
 //   for (i = 0; i < n; i++)
 //   result = result < array[i] ? result : array[i];
-// #pragma acc end parallel
 
   /* '&' reductions.  */
 #pragma acc parallel vector_length (vl)
 #pragma acc loop reduction (&:result)
   for (i = 0; i < n; i++)
 result &= array[i];
-#pragma acc end parallel
 
   /* '|' reductions.  */
 #pragma acc parallel vector_length (vl)
 #pragma acc loop reduction (|:result)
   for (i = 0; i < n; i++)
 result |= array[i];
-#pragma acc end parallel
 
   /* '^' reductions.  */
 #pragma acc parallel vector_length (vl)
 #pragma acc loop reduction (^:result)
   for (i = 0; i < n; i++)
 result ^= array[i];
-#pragma acc end parallel
 
   /* '&&' reductions.  */
 #pragma acc parallel vector_length (vl)
 #pragma acc loop reduction (&&:lresult)
   for (i = 0; i < n; i++)
 lresult = lresult && (result > array[i]);
-#pragma acc end parallel
 
   /* '||' reductions.  */
 #pragma acc parallel vector_length (vl)
 #pragma acc loop reduction (||:lresult)
   for (i = 0; i < n; i++)
 lresult = lresult || (result > array[i]);
-#pragma acc end parallel
 
   return 0;
 }
diff --git gcc/testsuite/c-c++-common/goacc/reduction-2.c 
gcc/testsuite/c-c++-common/goacc/reduction-2.c
index 9686b37..1f95138 100644
--- gcc/testsuite/c-c++-common/goacc/reduction-2.c
+++ gcc/testsuite/c-c++-common/goacc/reduction-2.c
@@ -15,42 +15,36 @@ main(void)
 #pragma acc loop reduction (+:result)
   for (i = 0; i < n; i++)
 result += array[i];
-#pragma acc end parallel
 
   /* '*' reductions.  */
 #pragma acc parallel vector_length (vl)
 #pragma acc loop reduction (*:result)
   for (i = 0; i < n; i++)
 result *= array[i];
-#pragma acc end parallel
 
 //   /* 'max' reductions.  */
 // #pragma acc parallel vector_length (vl)
 // #pragma acc loop reduction (+:result)
 //   for (i = 0; i < n; i++)
 //   result = result > array[i] ? result : array[i];
-// #pragma acc end parallel
 // 
 //   /* 'min' reductions.  */
 // #pragma acc parallel vector_length (vl)
 // #pragma acc loop reduct

  1   2   >