RFA: Remove alias usage from libgcc/sync.c

2013-10-11 Thread Richard Sandiford
This is a follow-up to:

  http://gcc.gnu.org/ml/gcc/2013-10/msg00075.html

The outcome on IRC was that __asm ought to behave like aliases eventually,
so we should abandom the renaming-in-C thing.  One way would be to code
the functions directly as asm files, or use inline asm, but it seems a
shame to do that when GCC already knows how to generate the function
body we want.

This patch instead compiles the functions under a different name
and postprocesses the asm output.  See the comment in the patch
for more details.

It would be possible to do the whole compilation using pipes,
but that wouldn't pick up failures in earlier commands (at least
not without relying on bashisms).

Tested on mips64-linux-gnu.  OK to install?

Thanks,
Richard


libgcc/
* Makefile.in (sync_compile): New macro.
($(libgcc-sync-size-funcs-o), $(libgcc-sync-funcs-o))
($(libgcc-sync-size-funcs-s-o), $(libgcc-sync-funcs-s-o)): Use it.
* sync.c: Remove aliases.  Rename all functions to the_function.

Index: libgcc/Makefile.in
===
--- libgcc/Makefile.in  2013-10-11 08:18:40.309571904 +0100
+++ libgcc/Makefile.in  2013-10-11 08:25:26.513967865 +0100
@@ -718,38 +718,49 @@ libgcc-sync-size-funcs := $(foreach pref
$(foreach suffix, 1 2 4 8 16, \
  $(prefix)_$(suffix)))
 
+# sync.c uses GCC's expansion of built-in functions to define out-of-line
+# versions of those same functions.  This cannot be expressed directly in C,
+# even with GNU extensions, so instead sync.c calls the out-of-line function
+# "the_function" and we postprocess the assembly code to rename it.
+#
+# This code is morally assembly code rather than C code (and so cannot
+# be included in LTO, for example).  However, having GCC do most of the
+# work saves recoding the functions as inline asm or .S files.
+sync_compile = $(gcc_compile_bare) $(compile_deps) $(SYNC_CFLAGS) -S \
+-Wno-missing-prototypes $(1) -o $@.s.in && \
+sed 's/the_function/__$*/g' < $@.s.in > $@.s && \
+$(gcc_compile_bare) -c $@.s -o $@ && \
+rm $@.s.in $@.s
+
 libgcc-sync-size-funcs-o = $(patsubst %,%$(objext),$(libgcc-sync-size-funcs))
 $(libgcc-sync-size-funcs-o): %$(objext): $(srcdir)/sync.c
-   $(gcc_compile) $(SYNC_CFLAGS) \
+   $(call sync_compile, \
  -DFN=`echo "$*" | sed 's/_[^_]*$$//'` \
  -DSIZE=`echo "$*" | sed 's/.*_//'` \
- -c $< $(vis_hide)
+ $< $(vis_hide))
+
 libgcc-objects += $(libgcc-sync-size-funcs-o)
 
 libgcc-sync-funcs := sync_synchronize
 
 libgcc-sync-funcs-o = $(patsubst %,%$(objext),$(libgcc-sync-funcs))
 $(libgcc-sync-funcs-o): %$(objext): $(srcdir)/sync.c
-   $(gcc_compile) $(SYNC_CFLAGS) \
- -DL$* \
- -c $< $(vis_hide)
+   $(call sync_compile, -DL$* $< $(vis_hide))
 libgcc-objects += $(libgcc-sync-funcs-o)
 
 ifeq ($(enable_shared),yes)
 libgcc-sync-size-funcs-s-o = $(patsubst %,%_s$(objext), \
   $(libgcc-sync-size-funcs))
 $(libgcc-sync-size-funcs-s-o): %_s$(objext): $(srcdir)/sync.c
-   $(gcc_s_compile) $(SYNC_CFLAGS) \
+   $(call sync_compile, \
  -DFN=`echo "$*" | sed 's/_[^_]*$$//'` \
  -DSIZE=`echo "$*" | sed 's/.*_//'` \
- -c $<
+ $< -DSHARED)
 libgcc-s-objects += $(libgcc-sync-size-funcs-s-o)
 
 libgcc-sync-funcs-s-o = $(patsubst %,%_s$(objext),$(libgcc-sync-funcs))
 $(libgcc-sync-funcs-s-o): %_s$(objext): $(srcdir)/sync.c
-   $(gcc_s_compile) $(SYNC_CFLAGS) \
- -DL$* \
- -c $<
+   $(call sync_compile, -DL$* $< -DSHARED)
 libgcc-s-objects += $(libgcc-sync-funcs-s-o)
 endif
 endif
Index: libgcc/sync.c
===
--- libgcc/sync.c   2013-10-11 08:18:40.321572005 +0100
+++ libgcc/sync.c   2013-10-11 08:18:40.664574880 +0100
@@ -72,22 +72,22 @@ Software Foundation; either version 3, o
TYPE is a type that has UNITS bytes.  */
 
 #define DEFINE_V_PV(NAME, UNITS, TYPE) \
-  static TYPE  \
-  NAME##_##UNITS (TYPE *ptr, TYPE value)   \
+  TYPE \
+  the_function (TYPE *ptr, TYPE value) \
   {\
 return __##NAME (ptr, value);  \
   }
 
-#define DEFINE_V_PVV(NAME, UNITS, TYPE)\
-  static TYPE  \
-  NAME##_##UNITS (TYPE *ptr, TYPE value1, TYPE value2) \
+#define DEFINE_V_PVV(NAME, UNITS, TYPE)
\
+  TYPE \
+  the_function (TYPE 

RE: [PATCH] Fix PR58682

2013-10-11 Thread Paulo Matos
> -Original Message-
> From: Kyrill Tkachov [mailto:kyrylo.tkac...@arm.com]
> Sent: 10 October 2013 17:15
> To: Paulo Matos
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH] Fix PR58682
> 
> Hi Paul,
> 
> On 10/10/13 17:10, Paulo Matos wrote:
> > - inline_call (edge, true, &new_indirect_edges, &overall_size, true);
> > + if (inline_call (edge, true, &new_indirect_edges, &overall_size,
> true))
> > +{
> > +  /* Update max_count if new edges were found */
> Comments end in full stop and two spaces.
> 
> Also, a regression test to add to the testsuite would be nice...
> 

Thanks, fixed patch attached.
Working on how to submit a testcase for this given that I need to submit 5 
files + compile with profile-generate + execute + compile with profile-use to 
generate the ICE.

-- 
Paulo Matos


pr58682.patch
Description: pr58682.patch


Re: gimplify.c fix for rs6000 and mips

2013-10-11 Thread Richard Biener
On Thu, Oct 10, 2013 at 7:12 PM, Andrew MacLeod  wrote:
> On 10/10/2013 10:35 AM, Richard Biener wrote:
>>
>> On Thu, Oct 10, 2013 at 4:30 PM, Richard Biener
>>  wrote:
>>>
>>> On Thu, Oct 10, 2013 at 3:10 PM, Andrew MacLeod 
>>> wrote:

 More fun target specific stuff.  gimplify.c needs the back end supplied
 macros/function for the va-arg padding functions to bootstrap...
>>>
>>> Err ... don't we have the va_arg gimplify target hooks to hide this
>>> kind of target dependency?
>>
>> Ah - it's common code for the targets...  which makes me believe
>> it belongs in targhooks.c, kind of.
>>
>> Btw, you didn't move gimplify_va_arg_expr to gimplify.c for some reason?
>>
>>
> yeah, gimplify.c already was calling it, so it seemed more appropriate for a
> gimplification routine in gimplify.c.  If we didn't move it, gimpliify.c
> would need builtins.h for no reason other than to get the prototype (once it
> was moved out of tree.h where it happened to be declared)  I didn't realize
> it would drag crud with it :-P
>
> seems like there should be a better solution for PAD_VARARGS_DOWN (defined
> in config/targ.h) and ARGS_GROW_DOWNWARD.. and somewhere in there something
> gets expanded to ARGS_SIZE_RTX which required expr.h  :-P  . Its a bit ugly
> for sure.

Well, I think the factored std_ routine which is supposed to be used
by target hook implementations just resides in the wrong place.  It
should be in targhooks.c or target.c or somewhere similar.

Richard.


Re: [PATCH] rewrite stack vectors

2013-10-11 Thread Richard Biener
On Thu, Oct 10, 2013 at 8:07 PM,   wrote:
> From: Trevor Saunders 
>
> Hi,
>
> This makes the implementation of stack vectors simpler and easier to use.  
> This  
>   
> works by making the size of the on stack storage a template argument, so the
> size is embedded in the type.  This allows you to implicitly convert a
> stack_vec to a vec *, and it will just work.

Any particular reason to not go with a

 vec

partial specialization?

Btw, one reason against N being a template parameter is code bloat
(so hopefully the implementation dispatches to a single vec
set of template instantiations - I didn't look too closely at the patch yet).

Richard.

>  Because there's
> no need to support stack vectors in unions we can make them be a more normal
> c++ class with a constructor and destructor that are nontrivial.
>
> Trev
>
> 2013-10-08  Trevor Saunders  
>
> ada/
> * gcc-interface/decl.c (components_to_record): Adjust.
>
> gcc/
> * df-scan.c (df_collection_rec): Adjust.
> (copy_defs): New constant.
> (copy_uses): Likewise.
> (copy_eq_uses): Likewise.
> (copy_mw): Likewise.
> (copy_all): Likewise.
> (df_insn_rescan): Adjust.
> (df_notes_rescan): Likewise.
> (df_swap_refs): Likewise.
> (df_sort_and_compress_refs): Likewise.
> (df_sort_and_compress_mws): Likewise.
> (df_install_refs): Likewise.
> (df_install_mws): Likewise.
> (df_refs_add_to_chains): Add flags parameter controlling which vectors
> are coppied.
> (df_bb_refs_record): Adjust.
> (df_record_entry_block_defs): Likewise.
> (df_record_exit_block_defs): Likewise.
> (df_refs_verify): Likewise.
> (df_mws_verify): Likewise.
> (df_insn_refs_verify): Likewise.
> (df_bb_verify): Likewise.
> * ipa-pure-const.c (finish_state): Remove.
> (propagate): Adjust.
> * tree-data-ref.c tree-ssa-alias.c tree-ssa-loop-ivcanon.c
> tree-ssa-threadedge.c tree-vect-loop-manip.c tree-vect-slp.c 
> var-tracking.c
> Adjust.
> * vec.c (stack_vecs): Remove.
> (register_stack_vec): Likewise.
> (stack_vec_register_index): Likewise.
> (unregister_stack_vec): Likewise.
> * vec.h (struct va_stack): Remove.
> (struct vec): Specialize as
> struct vec instead since va_heap is the only
> allocation strategy compatable with the vl_ptr layout.
> (struct vec): Remove because it now gets an empty
> specialization anyway.
> (class stack_vec): New class.
> (vec_stack_alloc): Remove.
> (vec::using_auto_storage): New method.
>
> diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c
> index 456d7ab..0debdae 100644
> --- a/gcc/ada/gcc-interface/decl.c
> +++ b/gcc/ada/gcc-interface/decl.c
> @@ -6994,13 +6994,11 @@ components_to_record (tree gnu_record_type, Node_Id 
> gnat_component_list,
>tree gnu_union_type, gnu_union_name;
>tree this_first_free_pos, gnu_variant_list = NULL_TREE;
>bool union_field_needs_strict_alignment = false;
> -  vec  variant_types;
> +  stack_vec  variant_types;
>vinfo_t *gnu_variant;
>unsigned int variants_align = 0;
>unsigned int i;
>
> -  vec_stack_alloc (vinfo_t, variant_types, 16);
> -
>if (TREE_CODE (gnu_name) == TYPE_DECL)
> gnu_name = DECL_NAME (gnu_name);
>
> @@ -7196,9 +7194,6 @@ components_to_record (tree gnu_record_type, Node_Id 
> gnat_component_list,
>   gnu_variant_list = gnu_field;
> }
>
> -  /* We are done with the variants.  */
> -  variant_types.release ();
> -
>/* Only make the QUAL_UNION_TYPE if there are non-empty variants.  */
>if (gnu_variant_list)
> {
> diff --git a/gcc/df-scan.c b/gcc/df-scan.c
> index f2e8ab2..aace96d 100644
> --- a/gcc/df-scan.c
> +++ b/gcc/df-scan.c
> @@ -86,10 +86,10 @@ static HARD_REG_SET elim_reg_set;
>
>  struct df_collection_rec
>  {
> -  vec def_vec;
> -  vec use_vec;
> -  vec eq_use_vec;
> -  vec mw_vec;
> +  stack_vec def_vec;
> +  stack_vec use_vec;
> +  stack_vec eq_use_vec;
> +  stack_vec mw_vec;
>  };
>
>  static df_ref df_null_ref_rec[1];
> @@ -131,7 +131,7 @@ static void df_ref_chain_delete_du_chain (df_ref *);
>  static void df_ref_chain_delete (df_ref *);
>
>  static void df_refs_add_to_chains (struct df_collection_rec *,
> -  basic_block, rtx);
> +  basic_block, rtx, unsigned int);
>
>  static bool df_insn_refs_verify (struct df_collection_rec *, basic_block, 
> rtx, bool);
>  static void df_entry_block_defs_collect (struct df_collection_rec *, bitmap);
> @@ -153,6 +153,14 @@ static void df_insn_info_delete (unsigned int);
> and epilogue to save

Re: [patch] omp-low.h

2013-10-11 Thread Richard Biener
On Fri, Oct 11, 2013 at 5:31 AM, Andrew MacLeod  wrote:
> Missed a bit in tree-flow.h..  I mistakenly assumed omp_region belonged
> there :-P
>
> Anyway by moving struct omp_region into omp_low.h, along with the prototypes
> from tree-flow.h, gimple.h and tree.h.  Everything works great with just a
> few files actually requiring omp-low.h.
>
> AS an extra bonus, omp-low.c was *exporting*  "struct omp_region
> *root_omp_region".   tree-cfg.c was checking it for non-null and calling
> free_omp_regions().   Well,  free_omp_regions works just fine will a NULL
> root_omp_region (basically does nothing and returns), so exporting it just
> for that one check seems nonsensical.  Its now static.
>
> Bootstraps (will really-all languages) on x86_64-unknown-linux-gnu with no
> new regressions.  also stage 1 cross builds on rs6000 and mips.  No more of
> that crap :-)
>
> OK?

Index: tree-flow.h
===
*** tree-flow.h (revision 203379)
--- tree-flow.h (working copy)
*** along with GCC; see the file COPYING3.
*** 41,91 
  OpenMP Region Tree
  ---*/

- /* Parallel region information.  Every parallel and workshare
-directive is enclosed between two markers, the OMP_* directive
-and a corresponding OMP_RETURN statement.  */
-
- struct omp_region

I can spot the end of a comment containing "OpenMP Region Tree"
that you didn't move - probably an oversight.

Otherwise ok.

Thanks,
Richard.

> Andrew
>
>
>
>
>


Re: [patch] omp-low.h

2013-10-11 Thread Jakub Jelinek
On Fri, Oct 11, 2013 at 10:12:17AM +0200, Richard Biener wrote:
> I can spot the end of a comment containing "OpenMP Region Tree"
> that you didn't move - probably an oversight.
> 
> Otherwise ok.

Please wait for the gomp4 merge (which I'm going to commit today).

Jakub


Re: RFA: Remove alias usage from libgcc/sync.c

2013-10-11 Thread Richard Biener
On Fri, Oct 11, 2013 at 9:29 AM, Richard Sandiford
 wrote:
> This is a follow-up to:
>
>   http://gcc.gnu.org/ml/gcc/2013-10/msg00075.html
>
> The outcome on IRC was that __asm ought to behave like aliases eventually,
> so we should abandom the renaming-in-C thing.  One way would be to code
> the functions directly as asm files, or use inline asm, but it seems a
> shame to do that when GCC already knows how to generate the function
> body we want.
>
> This patch instead compiles the functions under a different name
> and postprocesses the asm output.  See the comment in the patch
> for more details.
>
> It would be possible to do the whole compilation using pipes,
> but that wouldn't pick up failures in earlier commands (at least
> not without relying on bashisms).
>
> Tested on mips64-linux-gnu.  OK to install?

Btw, one other way of deliberately and forever get around GCC getting
what you are after is using a toplevel asm to build the alias manually.
Like

asm(".alias __sync_synchronize sync_synchronize");

at least unless Honza starts to implement a (toplevel) asm parser
to catch symbol definitions and uses (something we need a way
for).  Don't forget to mark sync_synchronize with the used attribute.

Richard.

> Thanks,
> Richard
>
>
> libgcc/
> * Makefile.in (sync_compile): New macro.
> ($(libgcc-sync-size-funcs-o), $(libgcc-sync-funcs-o))
> ($(libgcc-sync-size-funcs-s-o), $(libgcc-sync-funcs-s-o)): Use it.
> * sync.c: Remove aliases.  Rename all functions to the_function.
>
> Index: libgcc/Makefile.in
> ===
> --- libgcc/Makefile.in  2013-10-11 08:18:40.309571904 +0100
> +++ libgcc/Makefile.in  2013-10-11 08:25:26.513967865 +0100
> @@ -718,38 +718,49 @@ libgcc-sync-size-funcs := $(foreach pref
> $(foreach suffix, 1 2 4 8 16, \
>   $(prefix)_$(suffix)))
>
> +# sync.c uses GCC's expansion of built-in functions to define out-of-line
> +# versions of those same functions.  This cannot be expressed directly in C,
> +# even with GNU extensions, so instead sync.c calls the out-of-line function
> +# "the_function" and we postprocess the assembly code to rename it.
> +#
> +# This code is morally assembly code rather than C code (and so cannot
> +# be included in LTO, for example).  However, having GCC do most of the
> +# work saves recoding the functions as inline asm or .S files.
> +sync_compile = $(gcc_compile_bare) $(compile_deps) $(SYNC_CFLAGS) -S \
> +-Wno-missing-prototypes $(1) -o $@.s.in && \
> +sed 's/the_function/__$*/g' < $@.s.in > $@.s && \
> +$(gcc_compile_bare) -c $@.s -o $@ && \
> +rm $@.s.in $@.s
> +
>  libgcc-sync-size-funcs-o = $(patsubst %,%$(objext),$(libgcc-sync-size-funcs))
>  $(libgcc-sync-size-funcs-o): %$(objext): $(srcdir)/sync.c
> -   $(gcc_compile) $(SYNC_CFLAGS) \
> +   $(call sync_compile, \
>   -DFN=`echo "$*" | sed 's/_[^_]*$$//'` \
>   -DSIZE=`echo "$*" | sed 's/.*_//'` \
> - -c $< $(vis_hide)
> + $< $(vis_hide))
> +
>  libgcc-objects += $(libgcc-sync-size-funcs-o)
>
>  libgcc-sync-funcs := sync_synchronize
>
>  libgcc-sync-funcs-o = $(patsubst %,%$(objext),$(libgcc-sync-funcs))
>  $(libgcc-sync-funcs-o): %$(objext): $(srcdir)/sync.c
> -   $(gcc_compile) $(SYNC_CFLAGS) \
> - -DL$* \
> - -c $< $(vis_hide)
> +   $(call sync_compile, -DL$* $< $(vis_hide))
>  libgcc-objects += $(libgcc-sync-funcs-o)
>
>  ifeq ($(enable_shared),yes)
>  libgcc-sync-size-funcs-s-o = $(patsubst %,%_s$(objext), \
>$(libgcc-sync-size-funcs))
>  $(libgcc-sync-size-funcs-s-o): %_s$(objext): $(srcdir)/sync.c
> -   $(gcc_s_compile) $(SYNC_CFLAGS) \
> +   $(call sync_compile, \
>   -DFN=`echo "$*" | sed 's/_[^_]*$$//'` \
>   -DSIZE=`echo "$*" | sed 's/.*_//'` \
> - -c $<
> + $< -DSHARED)
>  libgcc-s-objects += $(libgcc-sync-size-funcs-s-o)
>
>  libgcc-sync-funcs-s-o = $(patsubst %,%_s$(objext),$(libgcc-sync-funcs))
>  $(libgcc-sync-funcs-s-o): %_s$(objext): $(srcdir)/sync.c
> -   $(gcc_s_compile) $(SYNC_CFLAGS) \
> - -DL$* \
> - -c $<
> +   $(call sync_compile, -DL$* $< -DSHARED)
>  libgcc-s-objects += $(libgcc-sync-funcs-s-o)
>  endif
>  endif
> Index: libgcc/sync.c
> ===
> --- libgcc/sync.c   2013-10-11 08:18:40.321572005 +0100
> +++ libgcc/sync.c   2013-10-11 08:18:40.664574880 +0100
> @@ -72,22 +72,22 @@ Software Foundation; either version 3, o
> TYPE is a type that has UNITS bytes.  */
>
>  #define DEFINE_V_PV(NAME, UNITS, TYPE) \
> -  static TYPE  \
> -  NAME##_##UNITS (TYPE *ptr, TYPE value)   \
> +  TYPE 

Re: RFA: Remove alias usage from libgcc/sync.c

2013-10-11 Thread Jakub Jelinek
On Fri, Oct 11, 2013 at 10:17:41AM +0200, Richard Biener wrote:
> asm(".alias __sync_synchronize sync_synchronize");

It is .set, but not everywhere.
/* The MIPS assembler has different syntax for .set. We set it to
   .dummy to trap any errors.  */
#undef SET_ASM_OP
#define SET_ASM_OP "\t.dummy\t"
But perhaps it would require fewer variants than providing inline asm
of the __sync_* builtin by hand for all the targets that need it.

Jakub


Re: [C++ Patch] PR 31671

2013-10-11 Thread Paolo Carlini

Hi,

On 10/11/2013 05:55 AM, Jason Merrill wrote:

On 10/10/2013 03:31 PM, Paolo Carlini wrote:

On 10/10/2013 08:26 PM, Jason Merrill wrote:

On 10/10/2013 08:33 AM, Paolo Carlini wrote:

+  expr_type = TREE_TYPE (expr) = cp_build_qualified_type
+(TREE_TYPE (expr), cp_type_quals (TREE_TYPE (probe_type)));


Won't that end up being the same as the contents of expr_type before
this statement?  Can we just remove this assignment?



Sorry. Having figured out where the problem was, I messed up very badly
when I prepared the actual patch for submission. The below makes much
more sense to me.



+  expr_type = TREE_TYPE (probe_type);


Again, won't that set expr_type to the value it already had?  I'd 
prefer to just have a comment that we're leaving it alone.

Sorry forgot about that. No, that line isn't redundant.

The 6th time we get there when compiling the testcase, that is when we 
are converting to int&, at that line this is expr_type:


 size bitsizetype> constant 32>

   ...

Thus, if you think it makes the logic easier to follow we could likely 
do (only lightly tested):


expr_type = TREE_TYPE (expr_type);

but we can't possibly leave expr_type alone.

Thanks,
Paolo.


Re: [SKETCH] Refactor implicit function template implementation and fix 58534, 58536, 58548, 58549 and 58637.

2013-10-11 Thread Adam Butcher

On 2013-10-11 3:26, Jason Merrill wrote:
Can we put this in cp_parser, invert the sense of the flag, and 
only

set it during cp_parser_parameter_declaration_clause?


Done.  But it still needs a flag to disable in the case of explicit
template specialization.


Can't you just check processing_specialization?


Yep.  That's much cleaner.

Cheers,
Adam



Re: RFA: Remove alias usage from libgcc/sync.c

2013-10-11 Thread Richard Sandiford
Jakub Jelinek  writes:
> On Fri, Oct 11, 2013 at 10:17:41AM +0200, Richard Biener wrote:
>> asm(".alias __sync_synchronize sync_synchronize");
>
> It is .set, but not everywhere.
> /* The MIPS assembler has different syntax for .set. We set it to
>.dummy to trap any errors.  */
> #undef SET_ASM_OP
> #define SET_ASM_OP "\t.dummy\t"
> But perhaps it would require fewer variants than providing inline asm
> of the __sync_* builtin by hand for all the targets that need it.

Yeah, that's why I prefer the sed approach.  GCC knows how to do exactly
what we want, and what syntax to use.  We just need to take its output and
change the function name.

And like Richard says, parsing top-level asms would be fair game,
especially if GCC and binutils ever were integrated (for libgccjit.so).
So using top-level asms seems like putting off the inevitable
(albeit putting it off further than __asm renaming).

Do either of you object to the sed thing?

Thanks,
Richard


Re: RFA: Remove alias usage from libgcc/sync.c

2013-10-11 Thread Richard Biener
On Fri, Oct 11, 2013 at 11:43 AM, Richard Sandiford
 wrote:
> Jakub Jelinek  writes:
>> On Fri, Oct 11, 2013 at 10:17:41AM +0200, Richard Biener wrote:
>>> asm(".alias __sync_synchronize sync_synchronize");
>>
>> It is .set, but not everywhere.
>> /* The MIPS assembler has different syntax for .set. We set it to
>>.dummy to trap any errors.  */
>> #undef SET_ASM_OP
>> #define SET_ASM_OP "\t.dummy\t"
>> But perhaps it would require fewer variants than providing inline asm
>> of the __sync_* builtin by hand for all the targets that need it.
>
> Yeah, that's why I prefer the sed approach.  GCC knows how to do exactly
> what we want, and what syntax to use.  We just need to take its output and
> change the function name.
>
> And like Richard says, parsing top-level asms would be fair game,
> especially if GCC and binutils ever were integrated (for libgccjit.so).
> So using top-level asms seems like putting off the inevitable
> (albeit putting it off further than __asm renaming).
>
> Do either of you object to the sed thing?

Well, ideally there would be a way to not lie about symbol names to GCC.
That is, have a "native" way of telling GCC what to do here (which is
as far as I understand to emit the code for the builtin-handled $SYM
in a function with $SYM - provide the out-of-line copy for a builtin).

For the __sync functions it's unfortunate that the library function has
the same 'name' as the builtin and the builtin doesn't have an alternate
spelling.  So - can't we just add __builtin__sync_... spellings and use

__sync_synchronize ()
{
  __builtin_sync_syncronize ();
}

?  (what if __builtin_sync_syncronize expands to a libcall?  if it can't,
what's the point of the library function?)

Why does a simple

__sync_synchronize ()
{
  __sync_synchronize ();
}

not work?  On x86_64 I get from that:

__sync_synchronize:
.LFB0:
.cfi_startproc
mfence
ret
.cfi_endproc

(similar to how you can have a definition of memcpy () and have
another memcpy inside it inline-expanded)

Richard.

> Thanks,
> Richard


Re: [0/6] Merge of gomp-4_0-branch to trunk

2013-10-11 Thread Jakub Jelinek
On Tue, Oct 08, 2013 at 09:50:55PM +0200, Jakub Jelinek wrote:
> I'd like to merge (most of) gomp-4_0-branch to trunk.

I've incorporated the feedback as r203339, r203391 and r203392
and committed the whole merge including those 3 commits to trunk
as r203408.  I've merged then trunk up to r203408 to gomp-4_0-branch
for further development (elementals, offloading, Fortran, OpenACC?).

The current diff from trunk@203408 to gomp-4_0-branch@203409 is just
following plus ChangeLog.gomp files.

--- libgomp/target.c(.../trunk) (revision 203408)
+++ libgomp/target.c(.../branches/gomp-4_0-branch)  (revision 203409)
@@ -31,10 +31,439 @@
 #include 
 #include 
 
+#ifdef PLUGIN_SUPPORT
+# include 
+# include 
+#endif
+
+static void gomp_target_init (void);
+
+static pthread_once_t gomp_is_initialized = PTHREAD_ONCE_INIT;
+
+/* Forward declaration for a node in the tree.  */
+typedef struct splay_tree_node_s *splay_tree_node;
+typedef struct splay_tree_s *splay_tree;
+typedef struct splay_tree_key_s *splay_tree_key;
+
+struct target_mem_desc {
+  /* Reference count.  */
+  uintptr_t refcount;
+  /* All the splay nodes allocated together.  */
+  splay_tree_node array;
+  /* Start of the target region.  */
+  uintptr_t tgt_start;
+  /* End of the targer region.  */
+  uintptr_t tgt_end;
+  /* Handle to free.  */
+  void *to_free;
+  /* Previous target_mem_desc.  */
+  struct target_mem_desc *prev;
+  /* Number of items in following list.  */
+  size_t list_count;
+
+  /* Corresponding target device descriptor.  */
+  struct gomp_device_descr *device_descr;
+
+  /* List of splay keys to remove (or decrease refcount)
+ at the end of region.  */
+  splay_tree_key list[];
+};
+
+struct splay_tree_key_s {
+  /* Address of the host object.  */
+  uintptr_t host_start;
+  /* Address immediately after the host object.  */
+  uintptr_t host_end;
+  /* Descriptor of the target memory.  */
+  struct target_mem_desc *tgt;
+  /* Offset from tgt->tgt_start to the start of the target object.  */
+  uintptr_t tgt_offset;
+  /* Reference count.  */
+  uintptr_t refcount;
+  /* True if data should be copied from device to host at the end.  */
+  bool copy_from;
+};
+
+/* Array of descriptors of all available devices.  */
+static struct gomp_device_descr *devices;
+
+/* Total number of available devices.  */
+static int num_devices;
+
+/* The comparison function.  */
+
+static int
+splay_compare (splay_tree_key x, splay_tree_key y)
+{
+  if (x->host_start == x->host_end
+  && y->host_start == y->host_end)
+return 0;
+  if (x->host_end <= y->host_start)
+return -1;
+  if (x->host_start >= y->host_end)
+return 1;
+  return 0;
+}
+
+#include "splay-tree.h"
+
+/* This structure describes accelerator device.
+   It contains name of the corresponding libgomp plugin, function handlers for
+   interaction with the device, ID-number of the device, and information about
+   mapped memory.  */
+struct gomp_device_descr
+{
+  /* This is the ID number of device.  It could be specified in DEVICE-clause 
of
+ TARGET construct.  */
+  int id;
+
+  /* Plugin file handler.  */
+  void *plugin_handle;
+
+  /* Function handlers.  */
+  bool (*device_available_func) (void);
+
+  /* Splay tree containing information about mapped memory regions.  */
+  struct splay_tree_s dev_splay_tree;
+
+  /* Mutex for operating with the splay tree and other shared structures.  */
+  gomp_mutex_t dev_env_lock;
+};
+
 attribute_hidden int
 gomp_get_num_devices (void)
 {
-  return 0;
+  (void) pthread_once (&gomp_is_initialized, gomp_target_init);
+  return num_devices;
+}
+
+static struct gomp_device_descr *
+resolve_device (int device_id)
+{
+  if (device_id == -1)
+{
+  struct gomp_task_icv *icv = gomp_icv (false);
+  device_id = icv->default_device_var;
+}
+  if (device_id < 0
+  || (device_id >= gomp_get_num_devices ()
+ && device_id != 257))
+return NULL;
+
+  /* FIXME: Temporary hack for testing non-shared address spaces on host.  */
+  if (device_id == 257)
+return &devices[0];
+
+  return &devices[device_id];
+}
+
+
+/* Handle the case where splay_tree_lookup found oldn for newn.
+   Helper function of gomp_map_vars.  */
+
+static inline void
+gomp_map_vars_existing (splay_tree_key oldn, splay_tree_key newn,
+   unsigned char kind)
+{
+  if (oldn->host_start > newn->host_start
+  || oldn->host_end < newn->host_end)
+gomp_fatal ("Trying to map into device [%p..%p) object when"
+   "[%p..%p) is already mapped",
+   (void *) newn->host_start, (void *) newn->host_end,
+   (void *) oldn->host_start, (void *) oldn->host_end);
+  if (((kind & 7) == 2 || (kind & 7) == 3)
+  && !oldn->copy_from
+  && oldn->host_start == newn->host_start
+  && oldn->host_end == newn->host_end)
+oldn->copy_from = true;
+  oldn->refcount++;
+}
+
+static struct target_mem_desc *
+gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
+ 

Re: RFA: Remove alias usage from libgcc/sync.c

2013-10-11 Thread Jakub Jelinek
On Fri, Oct 11, 2013 at 12:16:03PM +0200, Richard Biener wrote:
> For the __sync functions it's unfortunate that the library function has
> the same 'name' as the builtin and the builtin doesn't have an alternate
> spelling.  So - can't we just add __builtin__sync_... spellings and use
> 
> __sync_synchronize ()
> {
>   __builtin_sync_syncronize ();
> }
> 
> ?  (what if __builtin_sync_syncronize expands to a libcall?  if it can't,
> what's the point of the library function?)

Actually, we already have a different spelling for that,
__sync_synchronize ()
should be equivalent to
__atomic_thread_fence (__ATOMIC_SEQ_CST)
though no idea what exactly it does on targets I'm not familiar with.

Jakub


Re: [C++ Patch] PR 58633

2013-10-11 Thread Paolo Carlini

Hi,

On 10/10/2013 08:48 PM, Jason Merrill wrote:

I think that committing was important for two primary reasons:

 1. We should not have irreversible side-effects while in a tentative
state.  For example, we shouldn't create a permanent entry in the
symbol table, or issue an error message that might not apply if the
tentative parse is aborted.  The "tentative" stuff is only in the
parser; there's no easy way to remove a function or variable from
the symbol table, or undo other pieces of global state in the
compiler as a whole.


Right.  In cases where there aren't side-effects, this shouldn't be an 
issue.  We should only use Paolo's new function in such cases; Paolo, 
please add that to the comment.  The patch is OK with that change.

Thanks. Thus I'm finishing testing the below.

The issue is a regression in 4_7/4_8 too, what should we do in those 
branches? I'm thinking applying the change to 4_8 too and either not 
fixing in 4_7 or just reverting the cp_parser_commit_to_tentative_parse 
change which improved the diagnostic for 47277?


Thanks,
Paolo.

///
/cp
2013-10-11  Paolo Carlini  

PR c++/58633
* parser.c (cp_parser_commit_to_topmost_tentative_parse): New.
(cp_parser_pseudo_destructor_name): Use it.

/testsuite
2013-10-11  Paolo Carlini  

PR c++/58633
* g++.dg/cpp0x/decltype57.C: New.
Index: cp/parser.c
===
--- cp/parser.c (revision 203392)
+++ cp/parser.c (working copy)
@@ -2347,6 +2347,8 @@ static void cp_parser_parse_tentatively
   (cp_parser *);
 static void cp_parser_commit_to_tentative_parse
   (cp_parser *);
+static void cp_parser_commit_to_topmost_tentative_parse
+  (cp_parser *);
 static void cp_parser_abort_tentative_parse
   (cp_parser *);
 static bool cp_parser_parse_definitely
@@ -6693,7 +6695,7 @@ cp_parser_pseudo_destructor_name (cp_parser* parse
 
   /* Once we see the ~, this has to be a pseudo-destructor.  */
   if (!processing_template_decl && !cp_parser_error_occurred (parser))
-cp_parser_commit_to_tentative_parse (parser);
+cp_parser_commit_to_topmost_tentative_parse (parser);
 
   /* Look for the type-name again.  We are not responsible for
  checking that it matches the first type-name.  */
@@ -24346,6 +24348,32 @@ cp_parser_commit_to_tentative_parse (cp_parser* pa
 }
 }
 
+/* Commit to the topmost currently active tentative parse.
+
+   Note that this function shouldn't be called when there are
+   irreversible side-effects while in a tentative state.  For
+   example, we shouldn't create a permanent entry in the symbol
+   table, or issue an error message that might not apply if the
+   tentative parse is aborted.  */
+
+static void
+cp_parser_commit_to_topmost_tentative_parse (cp_parser* parser)
+{
+  cp_parser_context *context = parser->context;
+  cp_lexer *lexer = parser->lexer;
+
+  if (context)
+{
+  if (context->status == CP_PARSER_STATUS_KIND_COMMITTED)
+   return;
+  context->status = CP_PARSER_STATUS_KIND_COMMITTED;
+
+  while (!cp_lexer_saving_tokens (lexer))
+   lexer = lexer->next;
+  cp_lexer_commit_tokens (lexer);
+}
+}
+
 /* Abort the currently active tentative parse.  All consumed tokens
will be rolled back, and no diagnostics will be issued.  */
 
Index: testsuite/g++.dg/cpp0x/decltype57.C
===
--- testsuite/g++.dg/cpp0x/decltype57.C (revision 0)
+++ testsuite/g++.dg/cpp0x/decltype57.C (working copy)
@@ -0,0 +1,8 @@
+// PR c++/58633
+// { dg-do compile { target c++11 } }
+
+void foo(int i)
+{
+  typedef int I;
+  decltype(i.I::~I())* p;
+}


Re: RFA: Remove alias usage from libgcc/sync.c

2013-10-11 Thread Richard Sandiford
Richard Biener  writes:
> On Fri, Oct 11, 2013 at 11:43 AM, Richard Sandiford
>  wrote:
>> Jakub Jelinek  writes:
>>> On Fri, Oct 11, 2013 at 10:17:41AM +0200, Richard Biener wrote:
 asm(".alias __sync_synchronize sync_synchronize");
>>>
>>> It is .set, but not everywhere.
>>> /* The MIPS assembler has different syntax for .set. We set it to
>>>.dummy to trap any errors.  */
>>> #undef SET_ASM_OP
>>> #define SET_ASM_OP "\t.dummy\t"
>>> But perhaps it would require fewer variants than providing inline asm
>>> of the __sync_* builtin by hand for all the targets that need it.
>>
>> Yeah, that's why I prefer the sed approach.  GCC knows how to do exactly
>> what we want, and what syntax to use.  We just need to take its output and
>> change the function name.
>>
>> And like Richard says, parsing top-level asms would be fair game,
>> especially if GCC and binutils ever were integrated (for libgccjit.so).
>> So using top-level asms seems like putting off the inevitable
>> (albeit putting it off further than __asm renaming).
>>
>> Do either of you object to the sed thing?
>
> Well, ideally there would be a way to not lie about symbol names to GCC.
> That is, have a "native" way of telling GCC what to do here (which is
> as far as I understand to emit the code for the builtin-handled $SYM
> in a function with $SYM - provide the out-of-line copy for a builtin).
>
> For the __sync functions it's unfortunate that the library function has
> the same 'name' as the builtin and the builtin doesn't have an alternate
> spelling.  So - can't we just add __builtin__sync_... spellings and use
>
> __sync_synchronize ()
> {
>   __builtin_sync_syncronize ();
> }
>
> ?  (what if __builtin_sync_syncronize expands to a libcall?  if it can't,
> what's the point of the library function?)

It can't expand to a libcall in nomips16 mode.  It always expands to a
libcall in mips16 mode.  The point is to provide out-of-line nomips16
functions that the mips16 code can call.

> Why does a simple
>
> __sync_synchronize ()
> {
>   __sync_synchronize ();
> }
>
> not work?  On x86_64 I get from that:
>
> __sync_synchronize:
> .LFB0:
> .cfi_startproc
> mfence
> ret
> .cfi_endproc
>
> (similar to how you can have a definition of memcpy () and have
> another memcpy inside it inline-expanded)

Is that with optimisation enabled?  -O2 gives me:

__sync_synchronize:
.LFB0:
.cfi_startproc
.p2align 4,,10
.p2align 3
.L2:
jmp .L2
.cfi_endproc
.LFE0:

We do want to compile this stuff with optimisation enabled.

Thanks,
Richard


Re: RFA: Remove alias usage from libgcc/sync.c

2013-10-11 Thread Richard Biener
On Fri, Oct 11, 2013 at 12:48 PM, Richard Sandiford
 wrote:
> Richard Biener  writes:
>> On Fri, Oct 11, 2013 at 11:43 AM, Richard Sandiford
>>  wrote:
>>> Jakub Jelinek  writes:
 On Fri, Oct 11, 2013 at 10:17:41AM +0200, Richard Biener wrote:
> asm(".alias __sync_synchronize sync_synchronize");

 It is .set, but not everywhere.
 /* The MIPS assembler has different syntax for .set. We set it to
.dummy to trap any errors.  */
 #undef SET_ASM_OP
 #define SET_ASM_OP "\t.dummy\t"
 But perhaps it would require fewer variants than providing inline asm
 of the __sync_* builtin by hand for all the targets that need it.
>>>
>>> Yeah, that's why I prefer the sed approach.  GCC knows how to do exactly
>>> what we want, and what syntax to use.  We just need to take its output and
>>> change the function name.
>>>
>>> And like Richard says, parsing top-level asms would be fair game,
>>> especially if GCC and binutils ever were integrated (for libgccjit.so).
>>> So using top-level asms seems like putting off the inevitable
>>> (albeit putting it off further than __asm renaming).
>>>
>>> Do either of you object to the sed thing?
>>
>> Well, ideally there would be a way to not lie about symbol names to GCC.
>> That is, have a "native" way of telling GCC what to do here (which is
>> as far as I understand to emit the code for the builtin-handled $SYM
>> in a function with $SYM - provide the out-of-line copy for a builtin).
>>
>> For the __sync functions it's unfortunate that the library function has
>> the same 'name' as the builtin and the builtin doesn't have an alternate
>> spelling.  So - can't we just add __builtin__sync_... spellings and use
>>
>> __sync_synchronize ()
>> {
>>   __builtin_sync_syncronize ();
>> }
>>
>> ?  (what if __builtin_sync_syncronize expands to a libcall?  if it can't,
>> what's the point of the library function?)
>
> It can't expand to a libcall in nomips16 mode.  It always expands to a
> libcall in mips16 mode.  The point is to provide out-of-line nomips16
> functions that the mips16 code can call.
>
>> Why does a simple
>>
>> __sync_synchronize ()
>> {
>>   __sync_synchronize ();
>> }
>>
>> not work?  On x86_64 I get from that:
>>
>> __sync_synchronize:
>> .LFB0:
>> .cfi_startproc
>> mfence
>> ret
>> .cfi_endproc
>>
>> (similar to how you can have a definition of memcpy () and have
>> another memcpy inside it inline-expanded)
>
> Is that with optimisation enabled?  -O2 gives me:
>
> __sync_synchronize:
> .LFB0:
> .cfi_startproc
> .p2align 4,,10
> .p2align 3
> .L2:
> jmp .L2
> .cfi_endproc
> .LFE0:
>
> We do want to compile this stuff with optimisation enabled.

I compiled with -O1 only.  Yes, at -O2 I get the infinite loop.

Eventually we should simply not build cgraph edges _from_ calls
to builtins?  Or disable tail recursion for builtin calls (tail-recursion
is what does this optimization).

Honza?  For tailr this boils down to symtab_semantically_equivalent_p ().
I suppose we don't want to change that but eventually special case
builtins in tailr, thus

Index: gcc/tree-tailcall.c
===
--- gcc/tree-tailcall.c (revision 203409)
+++ gcc/tree-tailcall.c (working copy)
@@ -446,7 +446,9 @@ find_tail_calls (basic_block bb, struct
   /* We found the call, check whether it is suitable.  */
   tail_recursion = false;
   func = gimple_call_fndecl (call);
-  if (func && recursive_call_p (current_function_decl, func))
+  if (func
+  && ! DECL_BUILT_IN (func)
+  && recursive_call_p (current_function_decl, func))
 {
   tree arg;

which makes -O2 not turn it into an infinite loop (possibly also applies
to the original code with the alias declaration?)

Thanks,
Richard.

> Thanks,
> Richard


[PATCH] Mark overflowed constants in dumps, allow a_2(D)(ab)

2013-10-11 Thread Richard Biener

This adjusts dumping to show differences in TREE_OVERFLOW on
INTEGER_CSTs in the IL (to make the issue causing VRP differences
visible) and to also dump whether an abnormal SSA name is
a default def.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2013-10-11  Richard Biener  

* tree-pretty-print.c (dump_generic_node): Allow to dump
both (D) and (ab) for SSA_NAMEs.  Mark INTEGER_CSTs with
(OVF) if TREE_OVERFLOW is set.

Index: gcc/tree-pretty-print.c
===
--- gcc/tree-pretty-print.c (revision 203356)
+++ gcc/tree-pretty-print.c (working copy)
@@ -1065,6 +1065,8 @@ dump_generic_node (pretty_printer *buffe
   else
pp_double_int (buffer, tree_to_double_int (node),
   TYPE_UNSIGNED (TREE_TYPE (node)));
+  if (TREE_OVERFLOW (node))
+   pp_string (buffer, "(OVF)");
   break;
 
 case REAL_CST:
@@ -2052,10 +2054,10 @@ dump_generic_node (pretty_printer *buffe
   spc, flags, false);
   pp_underscore (buffer);
   pp_decimal_int (buffer, SSA_NAME_VERSION (node));
+  if (SSA_NAME_IS_DEFAULT_DEF (node))
+   pp_string (buffer, "(D)");
   if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (node))
pp_string (buffer, "(ab)");
-  else if (SSA_NAME_IS_DEFAULT_DEF (node))
-   pp_string (buffer, "(D)");
   break;
 
 case WITH_SIZE_EXPR:


Re: [PATCH, rs6000] Fix variable permute control vectors for little endian

2013-10-11 Thread David Edelsohn
On Wed, Oct 9, 2013 at 7:11 PM, Bill Schmidt
 wrote:
> Hi,
>
> This is a follow-up to the recent patch that fixed constant permute
> control vectors for little endian.  When the control vector is constant,
> we can adjust the constant and use a vperm without increasing code size.
> When the control vector is unknown, however, we have to generate two
> additional instructions to subtract each element of the control vector
> from 31 (equivalently, from -1, since only 5 bits are pertinent).  This
> patch adds the additional code generation.
>
> There are two main paths to the affected permutes:  via the known
> pattern vec_perm, and via an altivec builtin.  The builtin path
> causes a little difficulty because there's no way to dispatch a builtin
> to two different insns for BE and LE.  I solved this by adding two new
> unspecs for the builtins (UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X).  The
> insns for the builtins are changed from a define_insn to a
> define_insn_and_split.  We create the _X forms at expand time and later
> split them into the correct sequences for BE and LE, using the "real"
> UNSPEC_VPERM and UNSPEC_VPERM_UNS to generate the vperm instruction.
>
> For the path via the known pattern, I added a new routine in rs6000.c in
> similar fashion to the solution for the constant control vector case.
>
> When the permute control vector is a rotate vector loaded by lvsl or
> lvsr, we can generate the desired control vector more cheaply by simply
> changing to use the opposite instruction.  We are already doing that
> when expanding an unaligned load.  The changes in vector.md avoid
> undoing that effort by circumventing the subtract-from-splat (going
> straight to the UNSPEC_VPERM).
>
> I bootstrapped and tested this for big endian on
> powerpc64-unknown-linux-gnu with no new regressions.  I did the same for
> little endian on powerpc64le-unknown-linux-gnu.  Here the results were
> slightly mixed: the changes fix 32 test failures, but expose an
> unrelated bug in 9 others when -mvsx is permitted on LE (not currently
> allowed).  The bug is a missing permute for a vector load in the
> unaligned vector load logic that will be fixed in a subsequent patch.
>
> Is this okay for trunk?
>
> Thanks,
> Bill
>
>
> 2013-10-09  Bill Schmidt  
>
> * config/rs6000/vector.md (vec_realign_load): Generate vperm
> directly to circumvent subtract from splat{31} workaround.
> * config/rs6000/rs6000-protos.h (altivec_expand_vec_perm_le): New
> prototype.
> * config/rs6000/rs6000.c (altivec_expand_vec_perm_le): New.
> * config/rs6000/altivec.md (define_c_enum "unspec"): Add
> UNSPEC_VPERM_X and UNSPEC_VPERM_UNS_X.
> (altivec_vperm_): Convert to define_insn_and_split to
> separate big and little endian logic.
> (*altivec_vperm__internal): New define_insn.
> (altivec_vperm__uns): Convert to define_insn_and_split to
> separate big and little endian logic.
> (*altivec_vperm__uns_internal): New define_insn.
> (vec_permv16qi): Add little endian logic.

Okay.

Thanks, David


[PATCH, rs6000] Handle missing permute splits for V2DF/V4SF in little endian

2013-10-11 Thread Bill Schmidt
Hi,

In my previous patch to split LE VSX loads and stores to introduce
permutes, I managed to miss the vector float modes.  This patch corrects
the oversight, fixing up a few more test failures.

Bootstrapped and tested on both powerpc64-unknown-linux-gnu and
powerpc64le-unknown-linux-gnu with no regressions.  Ok for trunk?

Thanks,
Bill


2013-10-11  Bill Schmidt  

* config/rs6000/vsx.md (*vsx_le_perm_load_v2di): Generalize to
handle vector float as well.
(*vsx_le_perm_load_v4si): Likewise.
(*vsx_le_perm_store_v2di): Likewise.
(*vsx_le_perm_store_v4si): Likewise.


Index: gcc/config/rs6000/vsx.md
===
--- gcc/config/rs6000/vsx.md(revision 203246)
+++ gcc/config/rs6000/vsx.md(working copy)
@@ -219,18 +219,18 @@
 
 ;; The patterns for LE permuted loads and stores come before the general
 ;; VSX moves so they match first.
-(define_insn_and_split "*vsx_le_perm_load_v2di"
-  [(set (match_operand:V2DI 0 "vsx_register_operand" "=wa")
-(match_operand:V2DI 1 "memory_operand" "Z"))]
+(define_insn_and_split "*vsx_le_perm_load_"
+  [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+(match_operand:VSX_D 1 "memory_operand" "Z"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   [(set (match_dup 2)
-(vec_select:V2DI
+(vec_select:
   (match_dup 1)
   (parallel [(const_int 1) (const_int 0)])))
(set (match_dup 0)
-(vec_select:V2DI
+(vec_select:
   (match_dup 2)
   (parallel [(const_int 1) (const_int 0)])))]
   "
@@ -242,19 +242,19 @@
   [(set_attr "type" "vecload")
(set_attr "length" "8")])
 
-(define_insn_and_split "*vsx_le_perm_load_v4si"
-  [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa")
-(match_operand:V4SI 1 "memory_operand" "Z"))]
+(define_insn_and_split "*vsx_le_perm_load_"
+  [(set (match_operand:VSX_W 0 "vsx_register_operand" "=wa")
+(match_operand:VSX_W 1 "memory_operand" "Z"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   [(set (match_dup 2)
-(vec_select:V4SI
+(vec_select:
   (match_dup 1)
   (parallel [(const_int 2) (const_int 3)
  (const_int 0) (const_int 1)])))
(set (match_dup 0)
-(vec_select:V4SI
+(vec_select:
   (match_dup 2)
   (parallel [(const_int 2) (const_int 3)
  (const_int 0) (const_int 1)])))]
@@ -333,18 +333,18 @@
   [(set_attr "type" "vecload")
(set_attr "length" "8")])
 
-(define_insn_and_split "*vsx_le_perm_store_v2di"
-  [(set (match_operand:V2DI 0 "memory_operand" "=Z")
-(match_operand:V2DI 1 "vsx_register_operand" "+wa"))]
+(define_insn_and_split "*vsx_le_perm_store_"
+  [(set (match_operand:VSX_D 0 "memory_operand" "=Z")
+(match_operand:VSX_D 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   [(set (match_dup 2)
-(vec_select:V2DI
+(vec_select:
   (match_dup 1)
   (parallel [(const_int 1) (const_int 0)])))
(set (match_dup 0)
-(vec_select:V2DI
+(vec_select:
   (match_dup 2)
   (parallel [(const_int 1) (const_int 0)])))]
   "
@@ -356,19 +356,19 @@
   [(set_attr "type" "vecstore")
(set_attr "length" "8")])
 
-(define_insn_and_split "*vsx_le_perm_store_v4si"
-  [(set (match_operand:V4SI 0 "memory_operand" "=Z")
-(match_operand:V4SI 1 "vsx_register_operand" "+wa"))]
+(define_insn_and_split "*vsx_le_perm_store_"
+  [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
+(match_operand:VSX_W 1 "vsx_register_operand" "+wa"))]
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   "#"
   "!BYTES_BIG_ENDIAN && TARGET_VSX"
   [(set (match_dup 2)
-(vec_select:V4SI
+(vec_select:
   (match_dup 1)
   (parallel [(const_int 2) (const_int 3)
 (const_int 0) (const_int 1)])))
(set (match_dup 0)
-(vec_select:V4SI
+(vec_select:
   (match_dup 2)
   (parallel [(const_int 2) (const_int 3)
 (const_int 0) (const_int 1)])))]




Re: Optimize callers using nonnull attribute

2013-10-11 Thread Jakub Jelinek
On Mon, Oct 07, 2013 at 03:52:25PM +0200, Marc Glisse wrote:
> 2013-10-08  Marc Glisse  
> 
>   PR tree-optimization/58480
> gcc/
>   * tree-vrp.c (infer_nonnull_range): New function.
>   (infer_value_range): Call infer_nonnull_range.

This broke whole bunch of OpenMP tests.  Internal calls
have NULL gimple_call_fntype.

Fixed thusly, committed as obvious to trunk.

OT, do you plan to define ATTRIBUTE_RETURNS_NONNULL for
GCC_VERSION >= 4009 in ansidecl.h and use it on the various xmalloc
etc. prototypes?

2013-10-11  Jakub Jelinek  

* tree-vrp.c (infer_nonnull_range): Use is_gimple_call,
ignore internal calls.

--- gcc/tree-vrp.c.jj   2013-10-10 18:28:15.0 +0200
+++ gcc/tree-vrp.c  2013-10-11 14:41:22.625280236 +0200
@@ -4484,7 +4484,7 @@ infer_nonnull_range (gimple stmt, tree o
   if (num_loads + num_stores > 0)
 return true;
 
-  if (gimple_code (stmt) == GIMPLE_CALL)
+  if (is_gimple_call (stmt) && !gimple_call_internal_p (stmt))
 {
   tree fntype = gimple_call_fntype (stmt);
   tree attrs = TYPE_ATTRIBUTES (fntype);


Jakub


Re: [PATCH, rs6000] Handle missing permute splits for V2DF/V4SF in little endian

2013-10-11 Thread David Edelsohn
On Fri, Oct 11, 2013 at 8:48 AM, Bill Schmidt
 wrote:
> Hi,
>
> In my previous patch to split LE VSX loads and stores to introduce
> permutes, I managed to miss the vector float modes.  This patch corrects
> the oversight, fixing up a few more test failures.
>
> Bootstrapped and tested on both powerpc64-unknown-linux-gnu and
> powerpc64le-unknown-linux-gnu with no regressions.  Ok for trunk?
>
> Thanks,
> Bill
>
>
> 2013-10-11  Bill Schmidt  
>
> * config/rs6000/vsx.md (*vsx_le_perm_load_v2di): Generalize to
> handle vector float as well.
> (*vsx_le_perm_load_v4si): Likewise.
> (*vsx_le_perm_store_v2di): Likewise.
> (*vsx_le_perm_store_v4si): Likewise.

Okay.

Thanks, David


Re: RFA: Remove alias usage from libgcc/sync.c

2013-10-11 Thread David Edelsohn
Jakub Jelinek  writes:
> On Fri, Oct 11, 2013 at 10:17:41AM +0200, Richard Biener wrote:
>> asm(".alias __sync_synchronize sync_synchronize");
>
> It is .set, but not everywhere.
> /* The MIPS assembler has different syntax for .set. We set it to
>.dummy to trap any errors.  */
> #undef SET_ASM_OP
> #define SET_ASM_OP "\t.dummy\t"
> But perhaps it would require fewer variants than providing inline asm
> of the __sync_* builtin by hand for all the targets that need it.

Please remember that GCC supports non-GNU/Linux and ELF targets.

Thanks, David


Re: [PATCH i386 3/8] [AVX512] [2/n] Add AVX-512 patterns: Fix missing `v' constraint.

2013-10-11 Thread Kirill Yukhin
Hello,

On 09 Oct 14:20, Richard Henderson wrote:
> On 10/09/2013 03:24 AM, Kirill Yukhin wrote:
> > Here's 2nd subpatch. It fixes missing `v' constraints.
> 
> And one v constraint that shouldn't have been.
Exactly!

> Ok.
Checked into main trunk: http://gcc.gnu.org/ml/gcc-cvs/2013-10/msg00379.html

Thanks, K


Re: [PATCH i386 3/8] [AVX512] [3/n] Add AVX-512 patterns: VF1 and VI iterators.

2013-10-11 Thread Kirill Yukhin
Hello,
On 09 Oct 14:25, Richard Henderson wrote:
> On 10/09/2013 03:24 AM, Kirill Yukhin wrote:
> > Here's 3rd subpatch. It extends VF1 and VI iterators.
> 
> Ok.
Thanks, checked into main trunk: 
http://gcc.gnu.org/ml/gcc-cvs/2013-10/msg00380.html

K


Re: Optimize callers using nonnull attribute

2013-10-11 Thread Marc Glisse

On Fri, 11 Oct 2013, Jakub Jelinek wrote:


On Mon, Oct 07, 2013 at 03:52:25PM +0200, Marc Glisse wrote:

2013-10-08  Marc Glisse  

PR tree-optimization/58480
gcc/
* tree-vrp.c (infer_nonnull_range): New function.
(infer_value_range): Call infer_nonnull_range.


This broke whole bunch of OpenMP tests.  Internal calls
have NULL gimple_call_fntype.


Ah, in a first version I was checking (fntype != 0), but since nothing 
complained when I removed it, I assumed it could never be 0...



Fixed thusly, committed as obvious to trunk.


Thanks.


OT, do you plan to define ATTRIBUTE_RETURNS_NONNULL for
GCC_VERSION >= 4009 in ansidecl.h and use it on the various xmalloc
etc. prototypes?


I was planning to at least have a look at some point. I can do that now.

--
Marc Glisse


Re: [patch] omp-low.h

2013-10-11 Thread Andrew MacLeod

On 10/11/2013 04:16 AM, Jakub Jelinek wrote:

On Fri, Oct 11, 2013 at 10:12:17AM +0200, Richard Biener wrote:

I can spot the end of a comment containing "OpenMP Region Tree"
that you didn't move - probably an oversight.

Otherwise ok.

Please wait for the gomp4 merge (which I'm going to commit today).



wont be until next week anwyay :-)

Andrew


Re: RFA: Remove alias usage from libgcc/sync.c

2013-10-11 Thread Richard Sandiford
Richard Biener  writes:
> On Fri, Oct 11, 2013 at 12:48 PM, Richard Sandiford
>  wrote:
>> Richard Biener  writes:
>>> On Fri, Oct 11, 2013 at 11:43 AM, Richard Sandiford
>>>  wrote:
 Jakub Jelinek  writes:
> On Fri, Oct 11, 2013 at 10:17:41AM +0200, Richard Biener wrote:
>> asm(".alias __sync_synchronize sync_synchronize");
>
> It is .set, but not everywhere.
> /* The MIPS assembler has different syntax for .set. We set it to
>.dummy to trap any errors.  */
> #undef SET_ASM_OP
> #define SET_ASM_OP "\t.dummy\t"
> But perhaps it would require fewer variants than providing inline asm
> of the __sync_* builtin by hand for all the targets that need it.

 Yeah, that's why I prefer the sed approach.  GCC knows how to do exactly
 what we want, and what syntax to use.  We just need to take its output and
 change the function name.

 And like Richard says, parsing top-level asms would be fair game,
 especially if GCC and binutils ever were integrated (for libgccjit.so).
 So using top-level asms seems like putting off the inevitable
 (albeit putting it off further than __asm renaming).

 Do either of you object to the sed thing?
>>>
>>> Well, ideally there would be a way to not lie about symbol names to GCC.
>>> That is, have a "native" way of telling GCC what to do here (which is
>>> as far as I understand to emit the code for the builtin-handled $SYM
>>> in a function with $SYM - provide the out-of-line copy for a builtin).
>>>
>>> For the __sync functions it's unfortunate that the library function has
>>> the same 'name' as the builtin and the builtin doesn't have an alternate
>>> spelling.  So - can't we just add __builtin__sync_... spellings and use
>>>
>>> __sync_synchronize ()
>>> {
>>>   __builtin_sync_syncronize ();
>>> }
>>>
>>> ?  (what if __builtin_sync_syncronize expands to a libcall?  if it can't,
>>> what's the point of the library function?)
>>
>> It can't expand to a libcall in nomips16 mode.  It always expands to a
>> libcall in mips16 mode.  The point is to provide out-of-line nomips16
>> functions that the mips16 code can call.
>>
>>> Why does a simple
>>>
>>> __sync_synchronize ()
>>> {
>>>   __sync_synchronize ();
>>> }
>>>
>>> not work?  On x86_64 I get from that:
>>>
>>> __sync_synchronize:
>>> .LFB0:
>>> .cfi_startproc
>>> mfence
>>> ret
>>> .cfi_endproc
>>>
>>> (similar to how you can have a definition of memcpy () and have
>>> another memcpy inside it inline-expanded)
>>
>> Is that with optimisation enabled?  -O2 gives me:
>>
>> __sync_synchronize:
>> .LFB0:
>> .cfi_startproc
>> .p2align 4,,10
>> .p2align 3
>> .L2:
>> jmp .L2
>> .cfi_endproc
>> .LFE0:
>>
>> We do want to compile this stuff with optimisation enabled.
>
> I compiled with -O1 only.  Yes, at -O2 I get the infinite loop.
>
> Eventually we should simply not build cgraph edges _from_ calls
> to builtins?  Or disable tail recursion for builtin calls (tail-recursion
> is what does this optimization).
>
> Honza?  For tailr this boils down to symtab_semantically_equivalent_p ().
> I suppose we don't want to change that but eventually special case
> builtins in tailr, thus
>
> Index: gcc/tree-tailcall.c
> ===
> --- gcc/tree-tailcall.c (revision 203409)
> +++ gcc/tree-tailcall.c (working copy)
> @@ -446,7 +446,9 @@ find_tail_calls (basic_block bb, struct
>/* We found the call, check whether it is suitable.  */
>tail_recursion = false;
>func = gimple_call_fndecl (call);
> -  if (func && recursive_call_p (current_function_decl, func))
> +  if (func
> +  && ! DECL_BUILT_IN (func)
> +  && recursive_call_p (current_function_decl, func))
>  {
>tree arg;
>
> which makes -O2 not turn it into an infinite loop (possibly also applies
> to the original code with the alias declaration?)

If that's OK then I'm certainly happy with it :-)  FWIW, the alias case
was first optimised from __sync_synchronize->sync_synchronize, before it
got converted into a tail call.  That happened very early in the pipeline
and I suppose would stop the built-in expansion from kicking in,
even with tail calls disabled.  But if we say that:

foo () { foo (); }

is a supported way of defining out-of-line versions of built-in foo,
then that's much more convenient than the aliases anyway.

Thanks,
Richard


Re: [PATCH i386 3/8] [AVX512] [4/n] Add AVX-512 patterns: V iterator.

2013-10-11 Thread Kirill Yukhin
Hello,
On 09 Oct 14:32, Richard Henderson wrote:
> On 10/09/2013 03:25 AM, Kirill Yukhin wrote:
> > Here's 4th subpatch. It extends V iterator.
> Ok.
Thanks, checked into main trunk: 
http://gcc.gnu.org/ml/gcc-cvs/2013-10/msg00382.html

K


Re: [PATCH i386 3/8] [AVX512] [5/n] Add AVX-512 patterns: Introduce `multdiv' code iterator.

2013-10-11 Thread Kirill Yukhin
Hello,
On 09 Oct 14:34, Richard Henderson wrote:
> On 10/09/2013 03:25 AM, Kirill Yukhin wrote:
> > Here's 5th subpatch. It introduces `multdiv' code iterator.
> 
> This is the sort of patch I like to see.  It's the first one
> you've sent that's done exactly one thing.  Congratulations.
> 
> Ok.
Thanks, checked into main trunk: 
http://gcc.gnu.org/ml/gcc-cvs/2013-10/msg00383.html

K


Re: [PATCH i386 3/8] [AVX512] [6/n] Add AVX-512 patterns: VI2 and VI124 iterators.

2013-10-11 Thread Kirill Yukhin
Hello
On 09 Oct 14:35, Richard Henderson wrote:
> On 10/09/2013 03:26 AM, Kirill Yukhin wrote:
> > Here's 6th subpatch. It extends VI2 and VI124 iterators.
> 
> Ok.
Thanks, checked into main trunk: 
http://gcc.gnu.org/ml/gcc-cvs/2013-10/msg00384.html

K


Re: [PATCH i386 3/8] [AVX512] [5/n] Add AVX-512 patterns: Introduce `multdiv' code iterator.

2013-10-11 Thread Jakub Jelinek
On Fri, Oct 11, 2013 at 05:39:05PM +0400, Kirill Yukhin wrote:
> Hello,
> On 09 Oct 14:34, Richard Henderson wrote:
> > On 10/09/2013 03:25 AM, Kirill Yukhin wrote:
> > > Here's 5th subpatch. It introduces `multdiv' code iterator.
> > 
> > This is the sort of patch I like to see.  It's the first one
> > you've sent that's done exactly one thing.  Congratulations.
> > 
> > Ok.
> Thanks, checked into main trunk: 
> http://gcc.gnu.org/ml/gcc-cvs/2013-10/msg00383.html

Everybody can read gcc-cvs mailing list, it's archives or svn log
or git log, there is no need to duplicate this info to gcc-patches
mailing list.

Jakub


Re: [PATCH i386 3/8] [AVX512] [7/n] Add AVX-512 patterns: VI4 and VI8 iterators.

2013-10-11 Thread Kirill Yukhin
Hello,
On 09 Oct 14:37, Richard Henderson wrote:
> On 10/09/2013 03:26 AM, Kirill Yukhin wrote:
> > Here's 7th subpatch. It extends VI4 and VI8 iterators.
> 
> Ok.
Thanks, checked into main trunk: 
http://gcc.gnu.org/ml/gcc-cvs/2013-10/msg00385.html

K


Re: [PATCH i386 3/8] [AVX512] [5/n] Add AVX-512 patterns: Introduce `multdiv' code iterator.

2013-10-11 Thread Kirill Yukhin
On 11 Oct 15:43, Jakub Jelinek wrote:
> On Fri, Oct 11, 2013 at 05:39:05PM +0400, Kirill Yukhin wrote:
> > Thanks, checked into main trunk: 
> > http://gcc.gnu.org/ml/gcc-cvs/2013-10/msg00383.html
> 
> Everybody can read gcc-cvs mailing list, it's archives or svn log
> or git log, there is no need to duplicate this info to gcc-patches
> mailing list.
Okay, I'll stop sending these updates!

Thanks, K


Re: [buildrobot] OMP: r203408 probably needs another operator& returning bool

2013-10-11 Thread Jakub Jelinek
Hi!

On Fri, Oct 11, 2013 at 02:44:16PM +0200, Jan-Benedict Glaw wrote:
> The recent change probably gave us this[1]:
> 
> g++ -c  -DIN_GCC_FRONTEND -DIN_GCC_FRONTEND -g -O2 -DIN_GCC 
> -DCROSS_DIRECTORY_STRUCTURE  -fno-exceptions -fno-rtti 
> -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings 
> -Wcast-qual -Wmissing-format-attribute -pedantic -Wno-long-long 
> -Wno-variadic-macros -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H -I. 
> -Ic-family -I../../../../gcc/gcc -I../../../../gcc/gcc/c-family 
> -I../../../../gcc/gcc/../include -I../../../../gcc/gcc/../libcpp/include  
> -I../../../../gcc/gcc/../libdecnumber 
> -I../../../../gcc/gcc/../libdecnumber/dpd -I../libdecnumber 
> -I../../../../gcc/gcc/../libbacktrace-o c-family/c-omp.o -MT 
> c-family/c-omp.o -MMD -MP -MF c-family/.deps/c-omp.TPo 
> ../../../../gcc/gcc/c-family/c-omp.c
> ../../../../gcc/gcc/c-family/c-omp.c: In function ‘void 
> c_omp_split_clauses(location_t, tree_code, omp_clause_mask, tree, 
> tree_node**)’:
> ../../../../gcc/gcc/c-family/c-omp.c:634:12: error: could not convert 
> ‘mask.omp_clause_mask::operator&(omp_clause_mask(1ul).omp_clause_mask::operator<<(22))’
>  from ‘omp_clause_mask’ to ‘bool’
>if (mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_THREADS))
> ^

The original omp_clause_mask fallback was tested, but since then several
changes have been made and I forgot to retest it; when HOST_WIDE_INT is
64-bit omp_clause_mask is a normal unsigned HOST_WIDE_INT and no C++
stuff is used to emulate 64-bit bitmask.

I have tested two different versions of a fix for this, dunno which one is
preferrable, Jason?

With the operator bool (), there is ambiguity in the
if (((mask >> something) & 1) == 0)
tests (so had to use OMP_CLAUSE_MASK_{1,0} instead of {1,0}), another
possibility is not to add operator bool () overload that introduces that
ambiguity, but then if (mask & something) needs to be replaced with
if ((mask & something) != 0) and operator != (int) added.
I guess I slightly prefer the first patch since it is smaller.

Jakub
2013-10-11  Jakub Jelinek  

c-family/
* c-common.h (OMP_CLAUSE_MASK_0): Define.
(omp_clause_mask::operator bool ()): New method.
c/
* c-parser.c (c_parser_omp_all_clauses): Use OMP_CLAUSE_MASK_1
and OMP_CLAUSE_MASK_0 instead of 1 and 0 to avoid ambiguity.
cp/
* parser.c (cp_parser_omp_all_clauses): Use OMP_CLAUSE_MASK_1
and OMP_CLAUSE_MASK_0 instead of 1 and 0 to avoid ambiguity.

--- gcc/c-family/c-common.h.jj  2013-10-11 11:23:59.0 +0200
+++ gcc/c-family/c-common.h 2013-10-11 15:40:32.581791018 +0200
@@ -1036,6 +1036,7 @@ extern void pp_dir_change (cpp_reader *,
 /* In c-omp.c  */
 #if HOST_BITS_PER_WIDE_INT >= 64
 typedef unsigned HOST_WIDE_INT omp_clause_mask;
+# define OMP_CLAUSE_MASK_0 ((omp_clause_mask) 0)
 # define OMP_CLAUSE_MASK_1 ((omp_clause_mask) 1)
 #else
 struct omp_clause_mask
@@ -1052,6 +1053,7 @@ struct omp_clause_mask
   inline omp_clause_mask operator >> (int);
   inline omp_clause_mask operator << (int);
   inline bool operator == (omp_clause_mask) const;
+  inline operator bool () const;
   unsigned HOST_WIDE_INT low, high;
 };
 
@@ -1156,6 +1158,13 @@ omp_clause_mask::operator == (omp_clause
   return low == b.low && high == b.high;
 }
 
+inline
+omp_clause_mask::operator bool () const
+{
+  return low || high;
+}
+
+# define OMP_CLAUSE_MASK_0 omp_clause_mask (0)
 # define OMP_CLAUSE_MASK_1 omp_clause_mask (1)
 #endif
 
--- gcc/c/c-parser.c.jj 2013-10-11 11:23:59.0 +0200
+++ gcc/c/c-parser.c2013-10-11 15:41:35.864464738 +0200
@@ -10644,7 +10644,8 @@ c_parser_omp_all_clauses (c_parser *pars
 
   first = false;
 
-  if (((mask >> c_kind) & 1) == 0 && !parser->error)
+  if (((mask >> c_kind) & OMP_CLAUSE_MASK_1) == OMP_CLAUSE_MASK_0
+ && !parser->error)
{
  /* Remove the invalid clause(s) from the list to avoid
 confusing the rest of the compiler.  */
--- gcc/cp/parser.c.jj  2013-10-11 11:23:59.0 +0200
+++ gcc/cp/parser.c 2013-10-11 15:40:50.939719303 +0200
@@ -27865,7 +27865,7 @@ cp_parser_omp_all_clauses (cp_parser *pa
 
   first = false;
 
-  if (((mask >> c_kind) & 1) == 0)
+  if (((mask >> c_kind) & OMP_CLAUSE_MASK_1) == OMP_CLAUSE_MASK_0)
{
  /* Remove the invalid clause(s) from the list to avoid
 confusing the rest of the compiler.  */
2013-10-11  Jakub Jelinek  

* c-common.h (omp_clause_mask::operator != (int)): New method.
* c-omp.c (c_omp_split_clauses): Use if ((mask & something) != 0)
instead of if (mask & something) tests everywhere.

--- gcc/c-family/c-common.h.jj  2013-10-11 11:23:59.0 +0200
+++ gcc/c-family/c-common.h 2013-10-11 15:33:12.954073363 +0200
@@ -1052,6 +1052,7 @@ struct omp_clause_mask
   inline omp_clause_mask operator >> (int);
   inline omp_clause_mask operator << (int);
   inline bool operator ==

Re: [C++ Patch] PR 31671

2013-10-11 Thread Jason Merrill

On 10/11/2013 04:51 AM, Paolo Carlini wrote:

The 6th time we get there when compiling the testcase, that is when we
are converting to int&, at that line this is expr_type:

  

Aha.  The patch is OK then.

Jason



Re: New attribute: returns_nonnull

2013-10-11 Thread Marc Glisse

On Mon, 7 Oct 2013, Marc Glisse wrote:


2013-10-08  Marc Glisse  

[...]

gcc/
* doc/extend.texi (returns_nonnull): New function attribute.


By the way, I'll commit this obvious doc fix next chance I get:

2013-10-12  Marc Glisse  

* doc/extend.texi (returns_nonnull): Remove arguments.

--- extend.texi (revision 203436)
+++ extend.texi (working copy)
@@ -3310,7 +3310,7 @@ my_memcpy (void *dest, const void *src,
 __attribute__((nonnull));
 @end smallexample

-@item returns_nonnull (@var{arg-index}, @dots{})
+@item returns_nonnull
 @cindex @code{returns_nonnull} function attribute
 The @code{returns_nonnull} attribute specifies that the function
 return value should be a non-null pointer.  For instance, the declaration:



--
Marc Glisse


Re: Optimize callers using nonnull attribute

2013-10-11 Thread Marc Glisse

On Fri, 11 Oct 2013, Marc Glisse wrote:


On Fri, 11 Oct 2013, Jakub Jelinek wrote:


OT, do you plan to define ATTRIBUTE_RETURNS_NONNULL for
GCC_VERSION >= 4009 in ansidecl.h and use it on the various xmalloc
etc. prototypes?


I was planning to at least have a look at some point. I can do that now.


I notice that our definition:

#  define ATTRIBUTE_NONNULL(m) __attribute__ ((__nonnull__ (m)))

is not very convenient.

I can't get the no-argument version of nonnull, and if I want to specify 2 
non-null arguments, I have to specify the attribute twice. IMHO this 
definition would have been more convenient:


#  define ATTRIBUTE_NONNULL(m) __attribute__ ((__nonnull__ m))

but I guess it is too late to change that.

--
Marc Glisse


Re: [buildrobot] OMP: r203408 probably needs another operator& returning bool

2013-10-11 Thread Jason Merrill

On 10/11/2013 09:56 AM, Jakub Jelinek wrote:

With the operator bool (), there is ambiguity in the
if (((mask >> something) & 1) == 0)
tests (so had to use OMP_CLAUSE_MASK_{1,0} instead of {1,0})


This is an example of why operator bool is a bad idea in general.  If we 
were using C++11, we could make the operator 'explicit' so that it's 
only used in actual boolean context, but without 'explicit' it gets hit 
too often.


To deal with this issue, C++98 programmers tend to use a conversion to 
pointer-to-member, which is also testable as a boolean, instead.


http://www.artima.com/cppsource/safebool.html


another
possibility is not to add operator bool () overload that introduces that
ambiguity, but then if (mask & something) needs to be replaced with
if ((mask & something) != 0) and operator != (int) added.
I guess I slightly prefer the first patch since it is smaller.


Since the coding standards say "Conversion operators should be avoided" 
(because they can't be explicit), I think this is the way to go.


But it would be better to add operator!=(omp_clause_mask).

Jason



Re: Optimize callers using nonnull attribute

2013-10-11 Thread Marc Glisse

On Fri, 11 Oct 2013, Jakub Jelinek wrote:


OT, do you plan to define ATTRIBUTE_RETURNS_NONNULL for
GCC_VERSION >= 4009 in ansidecl.h and use it on the various xmalloc
etc. prototypes?


I just read this note in libiberty/concat.c:

This function uses xmalloc() which is expected to be a front end
function to malloc() that deals with low memory situations.  In
typical use, if malloc() returns NULL then xmalloc() diverts to an
error handler routine which never returns, and thus xmalloc will
never return a NULL pointer.  If the client application wishes to
deal with low memory situations itself, it should supply an xmalloc
that just directly invokes malloc and blindly returns whatever
malloc returns.

I am afraid that if I add the returns_nonnull attribute to xmalloc in 
include/libiberty.h, it will break this supported use, no? Or is this 
comment out-of-date?


--
Marc Glisse


Re: Optimize callers using nonnull attribute

2013-10-11 Thread Jakub Jelinek
On Fri, Oct 11, 2013 at 04:21:04PM +0200, Marc Glisse wrote:
> On Fri, 11 Oct 2013, Jakub Jelinek wrote:
> 
> >OT, do you plan to define ATTRIBUTE_RETURNS_NONNULL for
> >GCC_VERSION >= 4009 in ansidecl.h and use it on the various xmalloc
> >etc. prototypes?
> 
> I just read this note in libiberty/concat.c:
> 
> This function uses xmalloc() which is expected to be a front end
> function to malloc() that deals with low memory situations.  In
> typical use, if malloc() returns NULL then xmalloc() diverts to an
> error handler routine which never returns, and thus xmalloc will
> never return a NULL pointer.  If the client application wishes to
> deal with low memory situations itself, it should supply an xmalloc
> that just directly invokes malloc and blindly returns whatever
> malloc returns.
> 
> I am afraid that if I add the returns_nonnull attribute to xmalloc
> in include/libiberty.h, it will break this supported use, no? Or is
> this comment out-of-date?

That comment looks weird, because the libiberty/xmalloc.c clearly never
returns NULL, so you'd need to supply a different xmalloc (#define xmalloc
malloc, something else) and I don't see how concat function would handle that
case gracefully.  If
newstr = XNEWVEC (char, vconcat_length (first, args) + 1);
results in NULL newstr, then vconcat_copy will just segfault, it stores at
least '\000' to that unconditionally.

Jakub


Re: [buildrobot] OMP: r203408 probably needs another operator& returning bool

2013-10-11 Thread Jakub Jelinek
On Fri, Oct 11, 2013 at 10:11:21AM -0400, Jason Merrill wrote:
> >another
> >possibility is not to add operator bool () overload that introduces that
> >ambiguity, but then if (mask & something) needs to be replaced with
> >if ((mask & something) != 0) and operator != (int) added.
> >I guess I slightly prefer the first patch since it is smaller.
> 
> Since the coding standards say "Conversion operators should be
> avoided" (because they can't be explicit), I think this is the way
> to go.

We then violate the coding standard in vec.h:
/* Type to provide NULL values for vec.  This is used to
   provide nil initializers for vec instances.  Since vec must be
   a POD, we cannot have proper ctor/dtor for it.  To initialize
   a vec instance, you can assign it the value vNULL.  */
struct vnull
{
  template 
  operator vec () { return vec(); }
};
extern vnull vNULL;

but perhaps we can keep that as an exception and don't allow further
exceptions...

> But it would be better to add operator!=(omp_clause_mask).

Thanks, that works too, thus here is what I've committed.

2013-10-11  Jakub Jelinek  

* c-common.h (omp_clause_mask::operator !=): New method.
* c-omp.c (c_omp_split_clauses): Use if ((mask & something) != 0)
instead of if (mask & something) tests everywhere.

--- gcc/c-family/c-common.h.jj  2013-10-11 16:14:35.157176208 +0200
+++ gcc/c-family/c-common.h 2013-10-11 16:15:30.031891445 +0200
@@ -1052,6 +1052,7 @@ struct omp_clause_mask
   inline omp_clause_mask operator >> (int);
   inline omp_clause_mask operator << (int);
   inline bool operator == (omp_clause_mask) const;
+  inline bool operator != (omp_clause_mask) const;
   unsigned HOST_WIDE_INT low, high;
 };
 
@@ -1156,6 +1157,12 @@ omp_clause_mask::operator == (omp_clause
   return low == b.low && high == b.high;
 }
 
+inline bool
+omp_clause_mask::operator != (omp_clause_mask b) const
+{
+  return low != b.low || high != b.high;
+}
+
 # define OMP_CLAUSE_MASK_1 omp_clause_mask (1)
 #endif
 
--- gcc/c-family/c-omp.c.jj 2013-10-11 16:14:35.153176228 +0200
+++ gcc/c-family/c-omp.c2013-10-11 16:14:48.164106509 +0200
@@ -631,7 +631,7 @@ c_omp_split_clauses (location_t loc, enu
 cclauses[i] = NULL;
   /* Add implicit nowait clause on
  #pragma omp parallel {for,for simd,sections}.  */
-  if (mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_THREADS))
+  if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_THREADS)) != 0)
 switch (code)
   {
   case OMP_FOR:
@@ -691,10 +691,10 @@ c_omp_split_clauses (location_t loc, enu
  OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_SIMD];
  cclauses[C_OMP_CLAUSE_SPLIT_SIMD] = c;
}
- if (mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_SCHEDULE))
+ if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_SCHEDULE)) != 0)
{
- if (mask & (OMP_CLAUSE_MASK_1
- << PRAGMA_OMP_CLAUSE_DIST_SCHEDULE))
+ if ((mask & (OMP_CLAUSE_MASK_1
+  << PRAGMA_OMP_CLAUSE_DIST_SCHEDULE)) != 0)
{
  c = build_omp_clause (OMP_CLAUSE_LOCATION (clauses),
OMP_CLAUSE_COLLAPSE);
@@ -729,19 +729,20 @@ c_omp_split_clauses (location_t loc, enu
   target and simd.  Put it on the outermost of those and
   duplicate on parallel.  */
case OMP_CLAUSE_FIRSTPRIVATE:
- if (mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_THREADS))
+ if ((mask & (OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_THREADS))
+ != 0)
{
- if (mask & ((OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_TEAMS)
- | (OMP_CLAUSE_MASK_1
-<< PRAGMA_OMP_CLAUSE_DIST_SCHEDULE)))
+ if ((mask & ((OMP_CLAUSE_MASK_1 << PRAGMA_OMP_CLAUSE_NUM_TEAMS)
+  | (OMP_CLAUSE_MASK_1
+ << PRAGMA_OMP_CLAUSE_DIST_SCHEDULE))) != 0)
{
  c = build_omp_clause (OMP_CLAUSE_LOCATION (clauses),
OMP_CLAUSE_FIRSTPRIVATE);
  OMP_CLAUSE_DECL (c) = OMP_CLAUSE_DECL (clauses);
  OMP_CLAUSE_CHAIN (c) = cclauses[C_OMP_CLAUSE_SPLIT_PARALLEL];
  cclauses[C_OMP_CLAUSE_SPLIT_PARALLEL] = c;
- if (mask & (OMP_CLAUSE_MASK_1
- << PRAGMA_OMP_CLAUSE_NUM_TEAMS))
+ if ((mask & (OMP_CLAUSE_MASK_1
+  << PRAGMA_OMP_CLAUSE_NUM_TEAMS)) != 0)
s = C_OMP_CLAUSE_SPLIT_TEAMS;
  else
s = C_OMP_CLAUSE_SPLIT_DISTRIBUTE;
@@ -751,14 +752,15 @@ c_omp_split_clauses (location_t loc, enu
   #pragma omp parallel{, for{, simd}, sections}.  */
s = C_OMP_CLAUSE_SPLIT_PARALLEL;
}
- else if (mask & (OMP_CLAUSE_MASK

Re: [C++ Patch] PR 58633

2013-10-11 Thread Jason Merrill

On 10/11/2013 06:28 AM, Paolo Carlini wrote:

The issue is a regression in 4_7/4_8 too, what should we do in those
branches? I'm thinking applying the change to 4_8 too and either not
fixing in 4_7 or just reverting the cp_parser_commit_to_tentative_parse
change which improved the diagnostic for 47277?


I think let's revert the diagnostic fix for both 4.7 and 4.8.

Jason



Re: [C++ Patch] PR 58633

2013-10-11 Thread Paolo Carlini

On 10/11/2013 03:59 PM, Jason Merrill wrote:

On 10/11/2013 06:28 AM, Paolo Carlini wrote:

The issue is a regression in 4_7/4_8 too, what should we do in those
branches? I'm thinking applying the change to 4_8 too and either not
fixing in 4_7 or just reverting the cp_parser_commit_to_tentative_parse
change which improved the diagnostic for 47277?


I think let's revert the diagnostic fix for both 4.7 and 4.8.

Safer, agreed.

Thanks,
Paolo.


Re: RFA [reload]: Fix PR other/58545

2013-10-11 Thread Joern Rennecke
On 11 October 2013 04:53, Jeff Law  wrote:
 > With your change it seems to me that we do a single round of spilling &
> caller-save setup, then align the stack, then restart.  The net result being
> we align the stack a lot more often.

Yes, but AFAICT, that should not result in more space being used,
because assign_stack_local uses ASLK_RECORD_PAD, so
assign_stack_local_1 will record any space
added for alignment purposes with add_frame_space, so it can be used
for a subsequent spill of suitable size.

Also, trying to do register elimination with an unaligned frame size
could lead to invalid stack slot addresses; if not ICEs, these could
lead to extra reloads, extra spill register needs, and thus ultimately
more
spills and larger frame size.


[PATCH i386] Use ordered comparisons for rounding builtins when -mno-ieee-fp

2013-10-11 Thread Alexander Monakov
Hello,

With -mno-ieee-fp, or when that flag is implied by other flags such as
-ffast-math, i386 backend will use ordered rather than unordered comparisons
(fcom* instead of fucom*, or their SSE counterparts).

Expansion of several rounding builtins currently uses unordered comparisons
always, unconditionally.  That looks historical, or an oversight rather than
deliberate, and the following patch makes expansion use ix86_fp_compare_mode
like in other places.  Bootstrapped and regtested on x86_64-linux.

2013-10-11  Alexander Monakov  

* config/i386/i386.c (ix86_expand_sse_compare_and_jump): Use mode
provided by ix86_fp_compare_mode instead of CCFPUmode.

testsuite/:
* gcc.target/i386/builtin-ucmp.c: New test.

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 21fc531..69650ff 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -38263,6 +38263,7 @@ static rtx
 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
   bool swap_operands)
 {
+  enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
   rtx label, tmp;
 
   if (swap_operands)
@@ -38273,9 +38274,9 @@ ix86_expand_sse_compare_and_jump (enum rtx_code code, 
rtx op0, rtx op1,
 }
 
   label = gen_label_rtx ();
-  tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
+  tmp = gen_rtx_REG (fpcmp_mode, FLAGS_REG);
   emit_insn (gen_rtx_SET (VOIDmode, tmp,
- gen_rtx_COMPARE (CCFPUmode, op0, op1)));
+ gen_rtx_COMPARE (fpcmp_mode, op0, op1)));
   tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
   tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
  gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
diff --git a/gcc/testsuite/gcc.target/i386/builtin-ucmp.c 
b/gcc/testsuite/gcc.target/i386/builtin-ucmp.c
new file mode 100644
index 000..709804c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/builtin-ucmp.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -mfpmath=sse -msse2" } */
+
+double foo(double a)
+{
+  return __builtin_round(a);
+}
+
+/* { dg-final { scan-assembler-not "ucom" } } */



Re: [patch] combine ICE fix

2013-10-11 Thread Cesar Philippidis
On 10/10/13 9:25 AM, Jakub Jelinek wrote:

> That looks broken.  You leave everything from the last size till the current
> one uninitialized, so it would only work if combine_split_insns
> always increments max_reg_num () by at most one.

Good catch.

> Furthermore, there are macros which should be used to access
> the fields, and, if the vector is ever going to be resized, supposedly
> it should be vec.h vector rather than just array.
> Or perhaps take into account:
> /* If a pass need to change these values in some magical way or the
>pass needs to have accurate values for these and is not using
>incremental df scanning, then it should use REG_N_SETS and
>REG_N_USES.  If the pass is doing incremental scanning then it
>should be getting the info from DF_REG_DEF_COUNT and
>DF_REG_USE_COUNT.  */
> and not use REG_N_SETS etc. but instead the df stuff.

I was thinking about converting that array to a vec. But I don't want to
touch more code than I have to right now. Is this OK as a stopgap?

Thanks for the review!

Cesar
2013-10-11  Cesar Philippidis  

gcc/
* regs.h (REG_N_GROW): New function. 
* combine.c (combine_split_insns): Call REG_N_GROW when
new registers are created.

Index: gcc/regs.h
===
--- gcc/regs.h  (revision 203289)
+++ gcc/regs.h  (working copy)
@@ -89,6 +89,20 @@ REG_N_SETS (int regno)
 #define SET_REG_N_SETS(N,V) (regstat_n_sets_and_refs[N].sets = V)
 #define INC_REG_N_SETS(N,V) (regstat_n_sets_and_refs[N].sets += V)
 
+/* Indexed by n, inserts new registers (old_regno+1)..new_regno.  */
+static inline void
+REG_N_GROW (int new_regno, int old_regno)
+{
+  regstat_n_sets_and_refs = XRESIZEVEC (struct regstat_n_sets_and_refs_t, 
+   regstat_n_sets_and_refs, new_regno+1);
+
+  for (int i = old_regno + 1; i <= new_regno; ++i)
+{
+  SET_REG_N_SETS (i, 1);
+  SET_REG_N_REFS (i, 1);
+}
+}
+
 /* Given a REG, return TRUE if the reg is a PARM_DECL, FALSE otherwise.  */
 extern bool reg_is_parm_p (rtx);
 
Index: gcc/combine.c
===
--- gcc/combine.c   (revision 203289)
+++ gcc/combine.c   (working copy)
@@ -518,7 +518,10 @@ combine_split_insns (rtx pattern, rtx insn)
   ret = split_insns (pattern, insn);
   nregs = max_reg_num ();
   if (nregs > reg_stat.length ())
-reg_stat.safe_grow_cleared (nregs);
+{
+  REG_N_GROW (nregs, reg_stat.length ());
+  reg_stat.safe_grow_cleared (nregs);
+}
   return ret;
 }
 


Re: Patch: Add #pragma ivdep support to the ME and C FE

2013-10-11 Thread Joseph S. Myers
On Thu, 10 Oct 2013, Tobias Burnus wrote:

> Joseph: Could you have a look at the C part? Thanks!

Your error "missing loop condition loop with IVDEP pragma" should, I 
think, say "loop condition in loop".  Since the pragma is "ivdep" 
(case-sensitive), it should also say % not IVDEP.  This 
restriction should be clarified in the documentation, which could do with 
examples.  There should be at least one execution test for correct 
execution of loops with this pragma, and at least one test for this 
diagnostic.  The documentation refers to "safelen" which sounds like 
implementation terminology; this is the user manual and needs to describe 
things in user terms, not in terms of the implementation.

-- 
Joseph S. Myers
jos...@codesourcery.com


[AArch64] Fix early-clobber operands to vtbx[1,3]

2013-10-11 Thread James Greenhalgh

Hi,

The vtbx intrinsics are implemented in assembly without noting
that their tmp1 operand is early-clobber. This can, when the
wind blows the wrong way, result in us making a total mess of
the state of registers.

Fix by marking the required operands as early-clobber.

Regression tested against aarch64.exp with no problems.

OK?

Thanks,
James

---
2013-10-11  James Greenhalgh  

* config/aarch64/arm_neon.h
(vtbx<1,3>_8): Fix register constriants.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 482d7d0..f7c9db6 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -15636,7 +15636,7 @@ vtbx1_s8 (int8x8_t r, int8x8_t tab, int8x8_t idx)
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
 	   "tbl %1.8b, {%2.16b}, %3.8b\n\t"
 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
-   : "+w"(result), "=w"(tmp1)
+   : "+w"(result), "=&w"(tmp1)
: "w"(temp), "w"(idx), "w"(r)
: /* No clobbers */);
   return result;
@@ -15652,7 +15652,7 @@ vtbx1_u8 (uint8x8_t r, uint8x8_t tab, uint8x8_t idx)
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
 	   "tbl %1.8b, {%2.16b}, %3.8b\n\t"
 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
-   : "+w"(result), "=w"(tmp1)
+   : "+w"(result), "=&w"(tmp1)
: "w"(temp), "w"(idx), "w"(r)
: /* No clobbers */);
   return result;
@@ -15668,7 +15668,7 @@ vtbx1_p8 (poly8x8_t r, poly8x8_t tab, uint8x8_t idx)
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
 	   "tbl %1.8b, {%2.16b}, %3.8b\n\t"
 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
-   : "+w"(result), "=w"(tmp1)
+   : "+w"(result), "=&w"(tmp1)
: "w"(temp), "w"(idx), "w"(r)
: /* No clobbers */);
   return result;
@@ -15723,7 +15723,7 @@ vtbx3_s8 (int8x8_t r, int8x8x3_t tab, int8x8_t idx)
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
 	   "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
-   : "+w"(result), "=w"(tmp1)
+   : "+w"(result), "=&w"(tmp1)
: "Q"(temp), "w"(idx), "w"(r)
: "v16", "v17", "memory");
   return result;
@@ -15742,7 +15742,7 @@ vtbx3_u8 (uint8x8_t r, uint8x8x3_t tab, uint8x8_t idx)
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
 	   "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
-   : "+w"(result), "=w"(tmp1)
+   : "+w"(result), "=&w"(tmp1)
: "Q"(temp), "w"(idx), "w"(r)
: "v16", "v17", "memory");
   return result;
@@ -15761,7 +15761,7 @@ vtbx3_p8 (poly8x8_t r, poly8x8x3_t tab, uint8x8_t idx)
 	   "cmhs %0.8b, %3.8b, %0.8b\n\t"
 	   "tbl %1.8b, {v16.16b - v17.16b}, %3.8b\n\t"
 	   "bsl %0.8b, %4.8b, %1.8b\n\t"
-   : "+w"(result), "=w"(tmp1)
+   : "+w"(result), "=&w"(tmp1)
: "Q"(temp), "w"(idx), "w"(r)
: "v16", "v17", "memory");
   return result;

[C++ Patch] PR 58466

2013-10-11 Thread Paolo Carlini

Hi,

this is just an ICE on invalid, but it's a 4_8/4_9 Regression, and, more 
importantly, we don't emit any sensible error message before ICEing. It 
manifests itself as TEMPLATE_PARM_INDEX unhandled by the main 
cxx_eval_constant_expression switch, and just adding the code to the 
bunch of those returned untouched leads to the same error message of 4_7 
and no ICE. Not sure at the moment whether something deeper is going on, 
though.


Tested x86_64-linux.

Thanks,
Paolo.

//
/cp
2013-10-11  Paolo Carlini  

PR c++/58466
* semantics.c (cxx_eval_constant_expression): Handle 
TEMPLATE_PARM_INDEX.

/testsuite
2013-10-11  Paolo Carlini  

PR c++/58466
* g++.dg/cpp0x/variadic145.C: New.
Index: cp/semantics.c
===
--- cp/semantics.c  (revision 203449)
+++ cp/semantics.c  (working copy)
@@ -9335,6 +9335,7 @@ cxx_eval_constant_expression (const constexpr_call
 case FUNCTION_DECL:
 case TEMPLATE_DECL:
 case LABEL_DECL:
+case TEMPLATE_PARM_INDEX:
   return t;
 
 case PARM_DECL:
Index: testsuite/g++.dg/cpp0x/variadic145.C
===
--- testsuite/g++.dg/cpp0x/variadic145.C(revision 0)
+++ testsuite/g++.dg/cpp0x/variadic145.C(working copy)
@@ -0,0 +1,10 @@
+// PR c++/58466
+// { dg-do compile { target c++11 } }
+
+template struct A;
+
+template struct B;
+
+template struct B> {};
+
+B> b;// { dg-error "incomplete type" }


[PATCH] Transaction fix: New target hook special_function_flags

2013-10-11 Thread Andreas Krebbel
Hi,

the attached patch introduces a new target hook
(targetm.calls.special_function_flags) which can be used by a backend
to add specific call properties to builtins.

On S/390 this is used for the *tbegin* and *tabort builtins whose
control flow otherwise is not modelled correctly.

On S/390 this leads to problems since our TX intructions do not save
and restore the floating point registers.  On RTL level we deal with
this by adding FPR clobbers to the tbegin instruction pattern.
However, there are several optimizations taking place on tree level
which need to know about the special characteristics of tbegin as
well.

So e.g. constant copy propagation misoptimizes the following example
by propagating f = 77.0f into the f != 88.0f comparison:

int foo ()
{
  float f = 77.0f;
  if (__builtin_tbegin (0) == 0)
{
  f = 88.0f;
  __builtin_tabort (256);
  return 2;
}
  if (f != 88.0f)
return 3;
  return 4; 
}

Fixed with the attached patch.

Another side effect of the patch is that the "return 2" statement is
now optimized away due to __builtin_tabort being "noreturn".

Bootstrapped and regtested on s390 and s390x with --with-arch=zEC12.

htm-nofloat-2.c fails with that patch.  The returns-twice flag on
tbegin prevents several optimizations on the cfg and basically
disables the TX optimization in s390_optimize_nonescaping_tx that way.
I'll try to address this with a follow-on patch.

Ok for mainline and 4.8?

Bye,

-Andreas-


2013-10-11  Andreas Krebbel  

* calls.c (special_function_p): Call
targetm.calls.special_function_flags.
* target.def (special_function_flags): Add new target hook.
* targhooks.c (default_get_reg_raw_mode): New function.
* targhooks.h (default_get_reg_raw_mode): Add prototype.
* doc/tm.texi.in: Add doc placeholder.
* doc/tm.texi: Update.

* config/s390/s390.c (s390_special_function_flags): Implement the
new target hook for S/390.
(TARGET_SPECIAL_FUNCTION_FLAGS): Define new target hook for S/390.


2013-10-11  Andreas Krebbel  

* gcc.target/s390/htm-1.c: Move __builtin_tabort invokations into
separate functions.


---
 gcc/calls.c   |3 +
 gcc/config/s390/s390.c|   24 +++
 gcc/doc/tm.texi   |4 ++
 gcc/doc/tm.texi.in|2 +
 gcc/target.def|   10 ++
 gcc/targhooks.c   |8 +
 gcc/targhooks.h   |1 
 gcc/testsuite/gcc.target/s390/htm-1.c |   52 ++
 8 files changed, 92 insertions(+), 12 modifications(!)

Index: gcc/calls.c
===
*** gcc/calls.c.orig
--- gcc/calls.c
*** special_function_p (const_tree fndecl, i
*** 562,567 
--- 562,570 
else if (tname[0] == 'l' && tname[1] == 'o'
   && ! strcmp (tname, "longjmp"))
flags |= ECF_NORETURN;
+ 
+   /* Apply target specific flags.  */
+   flags |= targetm.calls.special_function_flags (name);
  }
  
return flags;
Index: gcc/config/s390/s390.c
===
*** gcc/config/s390/s390.c.orig
--- gcc/config/s390/s390.c
*** s390_expand_builtin (tree exp, rtx targe
*** 10065,10070 
--- 10065,10091 
  return const0_rtx;
  }
  
+ /* Return call flags to be added for target specific functions.  */
+ 
+ static int
+ s390_special_function_flags (const char *name)
+ {
+   int flags = 0;
+ 
+   if (!TARGET_HTM)
+ return 0;
+ 
+   if (strcmp (name, "__builtin_tbegin") == 0
+   || strcmp (name, "__builtin_tbegin_nofloat") == 0
+   || strcmp (name, "__builtin_tbegin_retry") == 0
+   || strcmp (name, "__builtin_tbegin_retry_nofloat") == 0)
+ flags |= ECF_RETURNS_TWICE;
+ 
+   if (strcmp (name, "__builtin_tabort") == 0)
+ flags |= ECF_NORETURN;
+ 
+   return flags;
+ }
  
  /* Output assembly code for the trampoline template to
 stdio stream FILE.
*** s390_loop_unroll_adjust (unsigned nunrol
*** 11833,11838 
--- 11854,11862 
  #undef TARGET_LIBCALL_VALUE
  #define TARGET_LIBCALL_VALUE s390_libcall_value
  
+ #undef TARGET_SPECIAL_FUNCTION_FLAGS
+ #define TARGET_SPECIAL_FUNCTION_FLAGS s390_special_function_flags
+ 
  #undef TARGET_FIXED_CONDITION_CODE_REGS
  #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
  
Index: gcc/target.def
===
*** gcc/target.def.orig
--- gcc/target.def
*** DEFHOOK
*** 4179,4184 
--- 4179,4194 
   enum machine_mode, (int regno),
   default_get_reg_raw_mode)
  
+ /* Return a mode wide enough to copy any argument value that might be
+passed.  */
+ DEFHOOK
+ (special_function_flags,
+  "This target hook returns the flags to be set for a function with the\
+  specified @v

Go patch committed: Better handling of invalid ASCII in Go frontend

2013-10-11 Thread Ian Lance Taylor
This patch improves the handling of invalid ASCII characters in the Go
frontend when they appear in identifiers.  Note that Go input is by
definition always UTF-8, so the ASCII-specific assumptions here are OK.
Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.
Committed to mainline.  Will commit to 4.8 branch when it reopens.

Ian

diff -r 99af66244a26 go/lex.cc
--- a/go/lex.cc	Thu Oct 10 20:11:40 2013 -0700
+++ b/go/lex.cc	Fri Oct 11 10:02:05 2013 -0700
@@ -873,7 +873,28 @@
 	  && (cc < 'a' || cc > 'z')
 	  && cc != '_'
 	  && (cc < '0' || cc > '9'))
-	break;
+	{
+	  // Check for an invalid character here, as we get better
+	  // error behaviour if we swallow them as part of the
+	  // identifier we are building.
+	  if ((cc >= ' ' && cc < 0x7f)
+		  || cc == '\t'
+		  || cc == '\r'
+		  || cc == '\n')
+		break;
+
+	  this->lineoff_ = p - this->linebuf_;
+	  error_at(this->location(),
+		   "invalid character 0x%x in identifier",
+		   cc);
+	  if (!has_non_ascii_char)
+		{
+		  buf.assign(pstart, p - pstart);
+		  has_non_ascii_char = true;
+		}
+	  if (!Lex::is_invalid_identifier(buf))
+		buf.append("$INVALID$");
+	}
 	  ++p;
 	  if (is_first)
 	{


Re: Optimize callers using nonnull attribute

2013-10-11 Thread Mike Stump
On Oct 11, 2013, at 7:21 AM, Marc Glisse  wrote:
> I just read this note in libiberty/concat.c:

> I am afraid that if I add the returns_nonnull attribute to xmalloc in 
> include/libiberty.h, it will break this supported use, no? Or is this comment 
> out-of-date?

I think the comment should be updated, it is just old.  Some people envisioned 
some weird things, and I think we can safely observe that no system makes 
xmalloc return 0 on low memory situations, and in-deed, that direction would be 
a bad direction to go in.  No caller of xmalloc expects it to return 0, so, 
therefore, it just can't.  Life goes on.

Re: [PATCH, PR 57748] Check for out of bounds access, Part 2

2013-10-11 Thread Eric Botcazou
> That would definitely be a good move. Maybe someone should approve it?

Yes, but I agree that we ought to restrict it to trailing zero-sized arrays. 
That would help to allay Jakub's concerns about possible ABI fallouts.

-- 
Eric Botcazou


Re: [PING] 3 patches waiting for approval/review

2013-10-11 Thread Andreas Krebbel
On 10/10/13 18:41, Jeff Law wrote:
> On 10/10/13 04:00, Andreas Krebbel wrote:
>> On 09/10/13 21:46, Jeff Law wrote:
>>> On 08/21/13 03:21, Andreas Krebbel wrote:
 [RFC] Allow functions calling mcount before prologue to be leaf functions
 http://gcc.gnu.org/ml/gcc-patches/2013-04/msg00993.html
>>> I don't think this is necessarily correct for all targets.  ISTM the
>>> ability to consider a function calling mcount as a leaf needs to be a
>>> property of the target.
>>
>> We have already "profile_before_prologue" as a target property. Shouldn't 
>> this be enough to decide
>> upon this? When a function calls mcount before the prologue it shouldn't 
>> matter whether the function
>> is leaf or not.
> I don't think so, I think it'd break the PA's 32 bit ABI, maybe the 64 
> bit ABI as well.  It's the caller's responsibility to build a mini stack 
> frame if the function makes any calls.  If the code in the prologue 
> expander uses "leafness" to make the decision about whether or not to 
> allocate the mini frame, then it'd do the wrong thing here.

Since it seems to be about PROFILE_HOOKS vs FUNCTION_PROFILER targets what 
about the following patch:

Index: gcc/final.c
===
*** gcc/final.c.orig2013-10-11 18:51:53.928452672 +0200
--- gcc/final.c 2013-10-11 18:56:20.761359142 +0200
*** leaf_function_p (void)
*** 4237,4244 
  {
rtx insn;

!   if (crtl->profile || profile_arc_flag)
  return 0;

for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
  {
--- 4237,4253 
  {
rtx insn;

! #ifdef PROFILE_HOOK
!   /* PROFILE_HOOK emits code always after prologue.  */
!   if (crtl->profile)
  return 0;
+ #else
+   /* This is for targets using the FUNCTION_PROFILER hook.  If the
+  code is emitted before prologue the function might still be a
+  leaf function.  */
+   if (!targetm.profile_before_prologue () && crtl->profile)
+ return 0;
+ #endif

for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
  {

I've verified with a cross that on hppa target nothing changes when using -pg.

Bye,

-Andreas-



RE: Fix scheduler ix86_issue_rate and ix86_adjust_cost for modern x86 chips

2013-10-11 Thread Gopalasubramanian, Ganesh
Hi Honza!

> I will give it a try.
I understand how the proposed patch would be. That is OK to me.

> OK, my undertanding always was, that the decoders works sort of sequentially.
>
> One decoder of bdver1 can do
> vector,
> double, single,
> single, single, signle
>
>where decoder 1 is somehow special but hardware is able to swap first and 
>second single.  Now if two decoders run, i would expect
>
> vector, vector
> single, single, signle
> double, single, double, single
> 
> to be decoded in once cycle

My understanding on the decode unit is mentioned below. Please correct me if I 
am wrong.

The sequential allotment of decoders is not there for bdver1.
Intel Sandybridge\core2 have four decoders. The first decoder is special for 
intel processors. 
For ex, In Sandybridge, the instructions that have only one µop can be decoded 
by any of the four decoders.
Instructions that have up to four µops will be decoded by the first decoder (of 
the four decoders available) only.

Bdver1 has four decoders. None of them is special unlike intel processors.
For bdver1, microcoded instructions are single issue. All four decoders are 
engaged for decoding microcoded instructions.

Decode unit of bdver3 has following specifications
* Four independent decoders which can execute in parallel
* Microcoded instructions are single issue. (All four decoders are engaged). 
This means that only one vectorpath instruction get issued in one cycle.
* The additional hardware instruction decoder increases the instruction decode 
capacity to eight instructions per clock cycle.
* The decoders are pipelined 4 cycles deep and are non-stalling.

So modeling vectorpath instructions is straightforward. No instructions are 
issued along with vector instructions.
We need to model only fastpath single and fastpath double instructions.
There are four decoders and they can execute in parallel. So they can take 
either two double or four single instructions.
We also don't need to model them in two stage as there is no sequence involved.
So, the modeling can be done such that in one cycle we schedule  2 singles + 1 
double (or) 4 singles (or) 2 doubles.

I have tried to model this for bdver3 (code changes are mentioned below). 
Please let me know your opinion.

Regards
Ganesh

Patch
-
diff --git a/gcc/config/i386/bdver3.md b/gcc/config/i386/bdver3.md
index 52418b5..9e59395 100644
--- a/gcc/config/i386/bdver3.md
+++ b/gcc/config/i386/bdver3.md
@@ -34,6 +34,8 @@

 (define_cpu_unit "bdver3-decode0" "bdver3")
 (define_cpu_unit "bdver3-decode1" "bdver3")
+(define_cpu_unit "bdver3-decode2" "bdver3")
+(define_cpu_unit "bdver3-decode3" "bdver3")
 (define_cpu_unit "bdver3-decodev" "bdver3")

 ;; Double decoded instructions take two cycles whereas
@@ -42,12 +44,15 @@
 ;; two decoders in two cycles.
 ;; Vectorpath instructions are single issue instructions.
 ;; So, we have separate unit for vector instructions.
-(exclusion_set "bdver3-decodev" "bdver3-decode0,bdver3-decode1")
+(exclusion_set "bdver3-decodev" 
"bdver3-decode0,bdver3-decode1,bdver3-decode2,bdver3-decode3")

 (define_reservation "bdver3-vector" "bdver3-decodev")
-(define_reservation "bdver3-direct" "(bdver3-decode0|bdver3-decode1)")
+(define_reservation "bdver3-direct" 
"(bdver3-decode0|bdver3-decode1|bdver3-decoder2|bdver3-decoder3)")

-(define_reservation "bdver3-double" "(bdver3-decode0|bdver3-decode1)*2")
+(define_reservation "bdver3-double" "(bdver3-decode0+bdver3-decode1)|
+   (bdver3-decode1+bdver3-decode2)|(bdver3-decode2+bdver3-decode3)|
+   (bdver3-decode0+bdver3-decode2)|(bdver3-decode1+bdver3-decode3)|
+   (bdver3-decode0+bdver3-decode3)") 


-Original Message-
From: Jan Hubicka [mailto:hubi...@ucw.cz] 
Sent: Wednesday, October 09, 2013 7:18 PM
To: Gopalasubramanian, Ganesh
Cc: Jan Hubicka; gcc-patches@gcc.gnu.org; hjl.to...@gmail.com
Subject: Re: Fix scheduler ix86_issue_rate and ix86_adjust_cost for modern x86 
chips

> Before merging the insn reservations, I need to compare the latency values 
> for bdver1 and bdver3. I know that they are different for some of the 
> instructions. 
> In that case, the merging should prop up another subset of latency 
> differences. I would like to keep these insn reservations in two .md files 
> (one for bdver1 and one for bdver3) even after the merger.

I am not really insisting on merging (define_insn_reservation "bdver3*") with 
(define_insn_reservation "bdver1*).  What I have in mind is merging actual 
atuomatons in cases it makes sense.  Latencies are not really encoded in those.

Bdver 12 has:
(define_automaton "bdver1,bdver1_ieu,bdver1_load,bdver1_fp,bdver1_agu")
while bdver 3:
(define_automaton "bdver3,bdver3_ieu,bdver3_load,bdver3_fp,bdver3_agu")

automatons bdver1 and bdver3 are very different, because one handles up to 3 
instructions, while other handles only 2.  I am still bit confused with this 
every second cycle logic, so lets discuss it incrementally.

I would propose to have
(defin

Re: [PATCH i386 3/8] [AVX512] [15/n] Add AVX-512 patterns: VI48F_512 iterator.

2013-10-11 Thread Richard Henderson
On 10/09/2013 03:29 AM, Kirill Yukhin wrote:
> +(define_insn "avx512f_vec_dup_mem"
> +  [(set (match_operand:VI48F_512 0 "register_operand" "=x")
> + (vec_duplicate:VI48F_512
> +   (match_operand: 1 "nonimmediate_operand" "xm")))]
> +  "TARGET_AVX512F"
> +  "vbroadcast\t{%1, %0|%0, %1}"
> +  [(set_attr "type" "ssemov")
> +   (set_attr "prefix" "evex")
> +   (set_attr "mode" "")])

Ought these be 'v' not 'x'?  All the v*broadcast insns that I see in the
document with zmm outputs are evex encoded.

Otherwise ok.


r~


Re: [PATCH PING] Move edge_within_scc to ipa-utils.c

2013-10-11 Thread Martin Jambor
Ping.

Thanks,

Martin

On Wed, Sep 11, 2013 at 03:02:02PM +0200, Martin Jambor wrote:
> Hi,
> 
> edge_within_scc should really be a part of API accompanying
> ipa_reduced_postorder just like ipa_get_nodes_in_cycle and so this
> patch moves it to ipa-utils.c and gives it the ipa_ prefix.
> 
> Bootstrapped and tested on x86_64-linux.  OK for trunk?
> 
> Thanks,
> 
> Martin
> 
> 
> 
> 2013-09-10  Martin Jambor  
> 
>   * ipa-utils.h (ipa_edge_within_scc): Declare.
>   * ipa-cp.c (edge_within_scc): Moved...
>   * ipa-utils.c (ipa_edge_within_scc): ...here.  Updated all callers.
> 
> Index: src/gcc/ipa-utils.c
> ===
> --- src.orig/gcc/ipa-utils.c
> +++ src/gcc/ipa-utils.c
> @@ -253,6 +253,22 @@ ipa_get_nodes_in_cycle (struct cgraph_no
>return v;
>  }
>  
> +/* Return true iff the CS is an edge within a strongly connected component as
> +   computed by ipa_reduced_postorder.  */
> +
> +bool
> +ipa_edge_within_scc (struct cgraph_edge *cs)
> +{
> +  struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) 
> cs->caller->symbol.aux;
> +  struct ipa_dfs_info *callee_dfs;
> +  struct cgraph_node *callee = cgraph_function_node (cs->callee, NULL);
> +
> +  callee_dfs = (struct ipa_dfs_info *) callee->symbol.aux;
> +  return (caller_dfs
> +   && callee_dfs
> +   && caller_dfs->scc_no == callee_dfs->scc_no);
> +}
> +
>  struct postorder_stack
>  {
>struct cgraph_node *node;
> Index: src/gcc/ipa-utils.h
> ===
> --- src.orig/gcc/ipa-utils.h
> +++ src/gcc/ipa-utils.h
> @@ -42,6 +42,7 @@ int ipa_reduced_postorder (struct cgraph
> bool (*ignore_edge) (struct cgraph_edge *));
>  void ipa_free_postorder_info (void);
>  vec ipa_get_nodes_in_cycle (struct cgraph_node *);
> +bool ipa_edge_within_scc (struct cgraph_edge *);
>  int ipa_reverse_postorder (struct cgraph_node **);
>  tree get_base_var (tree);
>  void ipa_merge_profiles (struct cgraph_node *dst,
> Index: src/gcc/ipa-cp.c
> ===
> --- src.orig/gcc/ipa-cp.c
> +++ src/gcc/ipa-cp.c
> @@ -287,22 +287,6 @@ ipa_lat_is_single_const (struct ipcp_lat
>  return true;
>  }
>  
> -/* Return true iff the CS is an edge within a strongly connected component as
> -   computed by ipa_reduced_postorder.  */
> -
> -static inline bool
> -edge_within_scc (struct cgraph_edge *cs)
> -{
> -  struct ipa_dfs_info *caller_dfs = (struct ipa_dfs_info *) 
> cs->caller->symbol.aux;
> -  struct ipa_dfs_info *callee_dfs;
> -  struct cgraph_node *callee = cgraph_function_node (cs->callee, NULL);
> -
> -  callee_dfs = (struct ipa_dfs_info *) callee->symbol.aux;
> -  return (caller_dfs
> -   && callee_dfs
> -   && caller_dfs->scc_no == callee_dfs->scc_no);
> -}
> -
>  /* Print V which is extracted from a value in a lattice to F.  */
>  
>  static void
> @@ -957,7 +941,7 @@ add_value_to_lattice (struct ipcp_lattic
>for (val = lat->values; val; val = val->next)
>  if (values_equal_for_ipcp_p (val->value, newval))
>{
> - if (edge_within_scc (cs))
> + if (ipa_edge_within_scc (cs))
> {
>   struct ipcp_value_source *s;
>   for (s = val->sources; s ; s = s->next)
> @@ -1030,7 +1014,7 @@ propagate_vals_accross_pass_through (str
>   are arithmetic functions with circular dependencies, there is infinite
>   number of them and we would just make lattices bottom.  */
>if ((ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
> -  and edge_within_scc (cs))
> +  && ipa_edge_within_scc (cs))
>  ret = set_lattice_contains_variable (dest_lat);
>else
>  for (src_val = src_lat->values; src_val; src_val = src_val->next)
> @@ -1061,7 +1045,7 @@ propagate_vals_accross_ancestor (struct
>struct ipcp_value *src_val;
>bool ret = false;
>  
> -  if (edge_within_scc (cs))
> +  if (ipa_edge_within_scc (cs))
>  return set_lattice_contains_variable (dest_lat);
>  
>for (src_val = src_lat->values; src_val; src_val = src_val->next)
> @@ -2129,7 +2113,7 @@ propagate_constants_topo (struct topo_in
> struct cgraph_edge *cs;
>  
> for (cs = v->callees; cs; cs = cs->next_callee)
> - if (edge_within_scc (cs)
> + if (ipa_edge_within_scc (cs)
>   && propagate_constants_accross_call (cs))
> push_node_to_stack (topo, cs->callee);
> v = pop_node_from_stack (topo);
> @@ -2146,7 +2130,7 @@ propagate_constants_topo (struct topo_in
>   estimate_local_effects (v);
>   add_all_node_vals_to_toposort (v);
>   for (cs = v->callees; cs; cs = cs->next_callee)
> -   if (!edge_within_scc (cs))
> +   if (!ipa_edge_within_scc (cs))
>   propagate_constants_accross_call (cs);
> }
>cycle_nodes.release ();
> @@ -3462,7 +3446,7 @@ spread_undeadness (struct cgraph_node *n
>struct cgraph_e

Re: [buildrobot] OMP: r203408 probably needs another operator& returning bool

2013-10-11 Thread Jason Merrill

On 10/11/2013 10:36 AM, Jakub Jelinek wrote:

>Since the coding standards say "Conversion operators should be
>avoided" (because they can't be explicit), I think this is the way
>to go.

We then violate the coding standard in vec.h:
/* Type to provide NULL values for vec.  This is used to
provide nil initializers for vec instances.  Since vec must be
a POD, we cannot have proper ctor/dtor for it.  To initialize
a vec instance, you can assign it the value vNULL.  */
struct vnull
{
   template 
   operator vec () { return vec(); }
};
extern vnull vNULL;

but perhaps we can keep that as an exception and don't allow further
exceptions...


I think we just need to have good reasons for exceptions.  This seems 
like a fine one; the entire purpose of this struct is to have a 
conversion operator.


Jason



Go patch committed: Accept an integral float as a string index

2013-10-11 Thread Ian Lance Taylor
This patch from Rémy Oudompheng fixes the Go frontend to accept an
integral float constant as an index into a string.  That was already
fixes for arrays, but not for strings.  Bootstrapped and ran Go
testsuite on x86_64-unknown-linux-gnu.  Committed to mainline.  Will
commit to 4.8 branch when the branch opens.

Ian

diff -r 57f0ef7df750 go/expressions.cc
--- a/go/expressions.cc	Fri Oct 11 10:03:09 2013 -0700
+++ b/go/expressions.cc	Fri Oct 11 10:40:53 2013 -0700
@@ -10888,11 +10888,20 @@
 void
 String_index_expression::do_check_types(Gogo*)
 {
-  if (this->start_->type()->integer_type() == NULL)
+  Numeric_constant nc;
+  unsigned long v;
+  if (this->start_->type()->integer_type() == NULL
+  && !this->start_->type()->is_error()
+  && (!this->start_->numeric_constant_value(&nc)
+	  || nc.to_unsigned_long(&v) == Numeric_constant::NC_UL_NOTINT))
 this->report_error(_("index must be integer"));
   if (this->end_ != NULL
   && this->end_->type()->integer_type() == NULL
-  && !this->end_->is_nil_expression())
+  && !this->end_->type()->is_error()
+  && !this->end_->is_nil_expression()
+  && !this->end_->is_error_expression()
+  && (!this->end_->numeric_constant_value(&nc)
+	  || nc.to_unsigned_long(&v) == Numeric_constant::NC_UL_NOTINT))
 this->report_error(_("slice end must be integer"));
 
   std::string sval;


Re: [C++ Patch] PR 58466

2013-10-11 Thread Jason Merrill
How does a TEMPLATE_PARM_INDEX get this far?  It should have been 
detected as dependent before this.


Jason


Apply attribute returns_nonnull in libiberty

2013-10-11 Thread Marc Glisse

Hello,

here are some uses for returns_nonnull in libiberty.

Bootstrap+testsuite (default languages) on x86_64-unknown-linux-gnu.

2013-10-11  Marc Glisse  

PR tree-optimization/58689
include/
* ansidecl.h (ATTRIBUTE_RETURNS_NONNULL): New macro.
* libiberty.h (basename, lbasename, dos_lbasename, unix_lbasename,
concat_copy): Mark with attributes nonnull(1) and returns_nonnull.
(concat, reconcat, concat_copy2, choose_temp_base, xstrerror,
xmalloc, xrealloc, xcalloc, xstrdup, xstrndup, xmemdup, pex_init):
Mark with attribute returns_nonnull.

libiberty/
* concat.c: Remove note about xmalloc.

--
Marc Glisse


Re: Apply attribute returns_nonnull in libiberty

2013-10-11 Thread Marc Glisse

With the patch now...

On Fri, 11 Oct 2013, Marc Glisse wrote:


Hello,

here are some uses for returns_nonnull in libiberty.

Bootstrap+testsuite (default languages) on x86_64-unknown-linux-gnu.

2013-10-11  Marc Glisse  

PR tree-optimization/58689
include/
* ansidecl.h (ATTRIBUTE_RETURNS_NONNULL): New macro.
* libiberty.h (basename, lbasename, dos_lbasename, unix_lbasename,
concat_copy): Mark with attributes nonnull(1) and returns_nonnull.
(concat, reconcat, concat_copy2, choose_temp_base, xstrerror,
xmalloc, xrealloc, xcalloc, xstrdup, xstrndup, xmemdup, pex_init):
Mark with attribute returns_nonnull.

libiberty/
* concat.c: Remove note about xmalloc.


--
Marc GlisseIndex: include/ansidecl.h
===
--- include/ansidecl.h  (revision 203451)
+++ include/ansidecl.h  (working copy)
@@ -304,20 +304,29 @@ So instead we use the macro below and te
 
 /* Attribute `nonnull' was valid as of gcc 3.3.  */
 #ifndef ATTRIBUTE_NONNULL
 # if (GCC_VERSION >= 3003)
 #  define ATTRIBUTE_NONNULL(m) __attribute__ ((__nonnull__ (m)))
 # else
 #  define ATTRIBUTE_NONNULL(m)
 # endif /* GNUC >= 3.3 */
 #endif /* ATTRIBUTE_NONNULL */
 
+/* Attribute `returns_nonnull' was valid as of gcc 4.9.  */
+#ifndef ATTRIBUTE_RETURNS_NONNULL
+# if (GCC_VERSION >= 4009)
+#  define ATTRIBUTE_RETURNS_NONNULL __attribute__ ((__returns_nonnull__))
+# else
+#  define ATTRIBUTE_RETURNS_NONNULL
+# endif /* GNUC >= 4.9 */
+#endif /* ATTRIBUTE_RETURNS_NONNULL */
+
 /* Attribute `pure' was valid as of gcc 3.0.  */
 #ifndef ATTRIBUTE_PURE
 # if (GCC_VERSION >= 3000)
 #  define ATTRIBUTE_PURE __attribute__ ((__pure__))
 # else
 #  define ATTRIBUTE_PURE
 # endif /* GNUC >= 3.0 */
 #endif /* ATTRIBUTE_PURE */
 
 /* Use ATTRIBUTE_PRINTF when the format specifier must not be NULL.
Index: include/libiberty.h
===
--- include/libiberty.h (revision 203451)
+++ include/libiberty.h (working copy)
@@ -100,82 +100,82 @@ extern int countargv (char**);
across different systems, sometimes as "char *" and sometimes as
"const char *" */
 
 /* HAVE_DECL_* is a three-state macro: undefined, 0 or 1.  If it is
undefined, we haven't run the autoconf check so provide the
declaration without arguments.  If it is 0, we checked and failed
to find the declaration so provide a fully prototyped one.  If it
is 1, we found it so don't provide any declaration at all.  */
 #if !HAVE_DECL_BASENAME
 #if defined (__GNU_LIBRARY__ ) || defined (__linux__) || defined (__FreeBSD__) 
|| defined (__OpenBSD__) || defined(__NetBSD__) || defined (__CYGWIN__) || 
defined (__CYGWIN32__) || defined (__MINGW32__) || defined (HAVE_DECL_BASENAME)
-extern char *basename (const char *);
+extern char *basename (const char *) ATTRIBUTE_NONNULL(1) 
ATTRIBUTE_RETURNS_NONNULL;
 #else
 /* Do not allow basename to be used if there is no prototype seen.  We
either need to use the above prototype or have one from
autoconf which would result in HAVE_DECL_BASENAME being set.  */
 #define basename basename_cannot_be_used_without_a_prototype
 #endif
 #endif
 
 /* A well-defined basename () that is always compiled in.  */
 
-extern const char *lbasename (const char *);
+extern const char *lbasename (const char *) ATTRIBUTE_NONNULL(1) 
ATTRIBUTE_RETURNS_NONNULL;
 
 /* Same, but assumes DOS semantics (drive name, backslash is also a
dir separator) regardless of host.  */
 
-extern const char *dos_lbasename (const char *);
+extern const char *dos_lbasename (const char *) ATTRIBUTE_NONNULL(1) 
ATTRIBUTE_RETURNS_NONNULL;
 
 /* Same, but assumes Unix semantics (absolute paths always start with
a slash, only forward slash is accepted as dir separator)
regardless of host.  */
 
-extern const char *unix_lbasename (const char *);
+extern const char *unix_lbasename (const char *) ATTRIBUTE_NONNULL(1) 
ATTRIBUTE_RETURNS_NONNULL;
 
 /* A well-defined realpath () that is always compiled in.  */
 
 extern char *lrealpath (const char *);
 
 /* Concatenate an arbitrary number of strings.  You must pass NULL as
the last argument of this function, to terminate the list of
strings.  Allocates memory using xmalloc.  */
 
-extern char *concat (const char *, ...) ATTRIBUTE_MALLOC ATTRIBUTE_SENTINEL;
+extern char *concat (const char *, ...) ATTRIBUTE_MALLOC ATTRIBUTE_SENTINEL 
ATTRIBUTE_RETURNS_NONNULL;
 
 /* Concatenate an arbitrary number of strings.  You must pass NULL as
the last argument of this function, to terminate the list of
strings.  Allocates memory using xmalloc.  The first argument is
not one of the strings to be concatenated, but if not NULL is a
pointer to be freed after the new string is created, similar to the
way xrealloc works.  */
 
-extern char *reconcat (char *, const char *, ...) ATTRIBUTE_MALLOC 
ATTRIBUTE_SENTINEL;
+extern char *reconcat (char *, const char

Re: [PATCH] Do not append " *INTERNAL* " to the decl name

2013-10-11 Thread Jason Merrill
This needs a testcase (compile with -dA and use scan-assembler; see 
other tests in g++.dg/debug/dwarf2).


Jason


Re: [PATCH] Do not append " *INTERNAL* " to the decl name

2013-10-11 Thread Dehao Chen
It's hard to get a testcase without
http://gcc.gnu.org/viewcvs/gcc?view=revision&revision=201856 because
none of these *INTERNAL* symbols will be emitted in debug info.

Thanks,
Dehao

On Fri, Oct 11, 2013 at 10:55 AM, Jason Merrill  wrote:
> This needs a testcase (compile with -dA and use scan-assembler; see other
> tests in g++.dg/debug/dwarf2).
>
> Jason


Go patch committed: Better error for offsetof(method value)

2013-10-11 Thread Ian Lance Taylor
This patch to the Go frontend gives a more useful error message for an
invalid call to unsafe.Offsetof on a method value (method values look
like field references, which are valid for unsafe.offsetof).
Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.
Committed to mainline.  Will commit to 4.8 branch when it reopens.

Ian

diff -r 07ab2f26915d go/expressions.cc
--- a/go/expressions.cc	Fri Oct 11 10:44:51 2013 -0700
+++ b/go/expressions.cc	Fri Oct 11 11:05:45 2013 -0700
@@ -7253,6 +7253,15 @@
   if (this->code_ == BUILTIN_OFFSETOF)
 {
   Expression* arg = this->one_arg();
+
+  if (arg->bound_method_expression() != NULL
+	  || arg->interface_field_reference_expression() != NULL)
+	{
+	  this->report_error(_("invalid use of method value as argument "
+			   "of Offsetof"));
+	  return this;
+	}
+
   Field_reference_expression* farg = arg->field_reference_expression();
   while (farg != NULL)
 	{
@@ -7262,7 +7271,8 @@
 	  // it must not be reached through pointer indirections.
 	  if (farg->expr()->deref() != farg->expr())
 	{
-	  this->report_error(_("argument of Offsetof implies indirection of an embedded field"));
+	  this->report_error(_("argument of Offsetof implies "
+   "indirection of an embedded field"));
 	  return this;
 	}
 	  // Go up until we reach the original base.
@@ -7672,6 +7682,8 @@
 bool
 Builtin_call_expression::do_is_constant() const
 {
+  if (this->is_error_expression())
+return true;
   switch (this->code_)
 {
 case BUILTIN_LEN:


Re: Apply attribute returns_nonnull in libiberty

2013-10-11 Thread David Malcolm
On Fri, 2013-10-11 at 19:53 +0200, Marc Glisse wrote:
> With the patch now...

[...]

> -extern char *concat_copy (char *, const char *, ...) ATTRIBUTE_SENTINEL;
> +extern char *concat_copy (char *, const char *, ...) ATTRIBUTE_SENTINEL 
> ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURNS_NONNULL;

An aesthetic idea: should the attributes be ordered to reflect the order
that the related entities appear in the declaration?  Return types
appear before parameters in declarations, and thus perhaps the
attributes describing them should also.

This would make the above look like this (introducing a newline to avoid
overlong lines):

extern char *concat_copy (char *, const char *, ...)
  ATTRIBUTE_RETURNS_NONNULL ATTRIBUTE_NONNULL(1) ATTRIBUTE_SENTINEL;

Such runs of blockcaps might be made more readable by adding newlines:

extern char *concat_copy (char *, const char *, ...)
  ATTRIBUTE_RETURNS_NONNULL
  ATTRIBUTE_NONNULL(1)
  ATTRIBUTE_SENTINEL;

FWIW I do something like this with custom attributes in my
gcc-python-plugin, see e.g.:
https://gcc-python-plugin.readthedocs.org/en/latest/cpychecker.html#marking-functions-that-steal-references-to-their-arguments

Thoughts?
Dave



Re: Apply attribute returns_nonnull in libiberty

2013-10-11 Thread Marc Glisse

On Fri, 11 Oct 2013, David Malcolm wrote:


On Fri, 2013-10-11 at 19:53 +0200, Marc Glisse wrote:

With the patch now...


[...]


-extern char *concat_copy (char *, const char *, ...) ATTRIBUTE_SENTINEL;
+extern char *concat_copy (char *, const char *, ...) ATTRIBUTE_SENTINEL 
ATTRIBUTE_NONNULL(1) ATTRIBUTE_RETURNS_NONNULL;


An aesthetic idea: should the attributes be ordered to reflect the order
that the related entities appear in the declaration?  Return types
appear before parameters in declarations, and thus perhaps the
attributes describing them should also.


I have no opinion about it, I'll do whatever reviewers tell me.


This would make the above look like this (introducing a newline to avoid
overlong lines):


I didn't introduce new lines because there were already many overlong 
lines in this file, but I can wrap those as well if needed.


--
Marc Glisse


Re: [PATCH i386 3/8] [AVX512] [16/n] Add AVX-512 patterns: VI48_512 and VI4F_128 iterators.

2013-10-11 Thread Richard Henderson
On 10/09/2013 03:30 AM, Kirill Yukhin wrote:
> +;; Return true if OP is either -1 constant or stored in register.
> +(define_predicate "register_or_constm1_operand"
> +  (ior (match_operand 0 "register_operand")
> +   (match_test "op == constm1_rtx")))

This won't do the right thing, because you're not exposing
that const_int is a valid input.  You need a match_code too.


Otherwise ok.


r~


[PATCH, powerpc] PR target/58673, Fix power8 quad memory/no-vsx-timode interaction

2013-10-11 Thread Michael Meissner
When I patched the compiler for PR 58587 to not set -mvsx-timode automatically
for ISA 2.06 sytems (power7/power8), there was an unexpected problem between
ISA 2.07 quad memory support and not allowing TImode variables in VSX
registers.

The movti insn in the -mno-vsx-timode case, did not have quad memory support
enabled, and issued a split for quad memory load/stores, while the insn
splitter said the insn could be done by a quad memory load/store, and did not
split the insn.

This patch allows TImode values to have register + offset form in the
-mno-vsx-timode (-mvsx-timode currently limits TImode addresses to be a single
register, so that it can be used with both quad memory and VSX load -- this is
one more thing that needs to be addressed with secondary reload support), and
it fixes the non VSX-timode insn to add quad memory support.

I have done a bootstrap and run make check with no regressions.  I have built
all of Spec 2006 which showed the initial problem and it all built
successfully.  The only code changes that are due to these patches are due to
load and store quad allowing reg+offset addressing on 64-bit power8.  Are these
patches ok to install?

[gcc]
2013-10-11  Michael Meissner  

PR target/58673
* config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
restrict TImode addresses to single indirect registers if both
-mquad-memory and -mvsx-timode are used.
(rs6000_output_move_128bit): Use quad_load_store_p to determine if
we should emit load/store quad.  Remove using %y for quad memory
addresses.

* config/rs6000/rs6000.md (mov_ppc64, TI/PTImode): Add
constraints to allow load/store quad on machines where TImode is
not allowed in VSX registers.

[gcc/testsuite]
2013-10-11  Michael Meissner  

PR target/58673
* gcc.target/powerpc/pr58673-1.c: New file to test whether
-mquad-word + -mno-vsx-timode causes errors.
* gcc.target/powerpc/pr58673-2.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 203389)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -7182,12 +7182,12 @@ rs6000_legitimate_address_p (enum machin
   if (reg_offset_p
   && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
 return 1;
-  /* For TImode, if we have load/store quad, only allow register indirect
- addresses.  This will allow the values to go in either GPRs or VSX
- registers without reloading.  The vector types would tend to go into VSX
- registers, so we allow REG+REG, while TImode seems somewhat split, in that
- some uses are GPR based, and some VSX based.  */
-  if (mode == TImode && TARGET_QUAD_MEMORY)
+  /* For TImode, if we have load/store quad and TImode in VSX registers, only
+ allow register indirect addresses.  This will allow the values to go in
+ either GPRs or VSX registers without reloading.  The vector types would
+ tend to go into VSX registers, so we allow REG+REG, while TImode seems
+ somewhat split, in that some uses are GPR based, and some VSX based.  */
+  if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
 return 0;
   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
   if (! reg_ok_strict
@@ -16060,12 +16060,11 @@ rs6000_output_move_128bit (rtx operands[
 {
   if (dest_gpr_p)
{
- if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0
- && quad_memory_operand (src, mode)
- && !reg_overlap_mentioned_p (dest, src))
+ if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
{
  /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
- return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1";
+ /* return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1"; */
+ return "lq %0,%1";
}
  else
return "#";
@@ -16095,11 +16094,11 @@ rs6000_output_move_128bit (rtx operands[
 {
   if (src_gpr_p)
{
- if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0
- && quad_memory_operand (dest, mode))
+ if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
{
  /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
- return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0";
+ /* return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0"; */
+ return "stq %1,%0";
}
  else
return "#";
Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 203389)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -10312,1

Go patch committed: Error for same name for receiver and parameter

2013-10-11 Thread Ian Lance Taylor
This patch to the Go compiler makes it give an error if the same name is
used for a method receiver and some parameter to the method.
Bootstrapped and ran Go testsuite on x86_64-unknown-linux-gnu.
Committed to mainline.  Will commit to 4.8 branch when the branch
reopens.

Ian

diff -r 09a1bcee0ad3 go/parse.cc
--- a/go/parse.cc	Fri Oct 11 11:09:28 2013 -0700
+++ b/go/parse.cc	Fri Oct 11 11:26:45 2013 -0700
@@ -744,6 +744,8 @@
 return NULL;
 
   Parse::Names names;
+  if (receiver != NULL)
+names[receiver->name()] = receiver;
   if (params != NULL)
 this->check_signature_names(params, &names);
   if (results != NULL)


Re: Apply attribute returns_nonnull in libiberty

2013-10-11 Thread DJ Delorie

Your patch changes the rule that the application can override
xmalloc() and get functions that return NULL...


Re: [C++ Patch] PR 58466

2013-10-11 Thread Paolo Carlini

Hi,

On 10/11/2013 07:46 PM, Jason Merrill wrote:
How does a TEMPLATE_PARM_INDEX get this far?  It should have been 
detected as dependent before this.
Thanks. Interesting. We get there from the convert_nontype_argument call 
at line 6453 of pt.c (in convert_template_argument) , which is protected by:


  else if (!dependent_template_arg_p (orig_arg)
   && !uses_template_parms (t))

thus dependent_template_arg_p (orig_arg) returns *false* for that tree. 
This is trivially the case because dependent_template_arg_p begins:


  if (!processing_template_decl)
return false;

thus it doesn't go that far when processing_template_decl is 0. 
Otherwise it would return true.


Is this information already useful to you? Are we maybe failing to bump 
processing_template_decl somewhere while processing the specialization?


Thanks!
Paolo.


Re: Apply attribute returns_nonnull in libiberty

2013-10-11 Thread Marc Glisse

On Fri, 11 Oct 2013, DJ Delorie wrote:


Your patch changes the rule that the application can override
xmalloc() and get functions that return NULL...


Only after Jakub and Mike told me, earlier today, that this rule didn't 
exist anymore. If it does, we can just forget about this patch.


--
Marc Glisse


Re: Apply attribute returns_nonnull in libiberty

2013-10-11 Thread Jakub Jelinek
On Fri, Oct 11, 2013 at 02:30:13PM -0400, DJ Delorie wrote:
> 
> Your patch changes the rule that the application can override
> xmalloc() and get functions that return NULL...

Why would you want to do that?  Big part of libiberty itself
wouldn't allow such an implementation.
E.g. concat.c, argv.c, partition.c, xstrdup.c all dereference without
checking result of xmalloc.

In any case, if for some obscure reason we want to allow it for libiberty,
at least gcc itself certainly doesn't allow xmalloc etc. returning NULL,
so if you think it is wrong to put these into libiberty.h, we could
instead put those into gcc/system.h:
#if GCC_VERSION >= 4009
extern "C" {
__typeof (xmalloc) xmalloc ATTRIBUTE_RETURNS_NONNULL;
...
}
#endif

Jakub


PATCH: PR target/58690: internal compiler error: in copy_to_mode_reg, at explow.c:641

2013-10-11 Thread H.J. Lu
Hi,

In x32, when a TLS address is in DImode and Pmode is SImode,
copy_addr_to_reg will fail.  This patch adds ix86_copy_addr_to_reg
to first copy DImode address into a DImode register and then to generate
SImode SUBREG in this case.  Tested on x32.  OK for trunk?

Thanks.


H.J.
---
gcc/

2013-10-11  H.J. Lu  

PR target/58690
* config/i386/i386.c (ix86_copy_addr_to_reg): New function.
(ix86_expand_movmem): Replace copy_addr_to_reg with
ix86_copy_addr_to_reg.
(ix86_expand_setmem): Likewise.

gcc/testsuite/

2013-10-11  H.J. Lu  

PR target/58690
* gcc.target/i386/pr58690.c: New test

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 37c1bec..e39d63db 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -22076,6 +22076,21 @@ counter_mode (rtx count_exp)
   return SImode;
 }
 
+/* Copy the address to a register in Pmode.  Support x32 TLS address in
+   DImode and Pmode in SImode.  */
+
+static rtx
+ix86_copy_addr_to_reg (rtx addr)
+{
+  if (GET_MODE (addr) != Pmode)
+{
+  gcc_assert (GET_MODE (addr) == DImode && Pmode == SImode);
+  return gen_rtx_SUBREG (SImode, copy_to_mode_reg (DImode, addr), 0);
+}
+  else
+return copy_addr_to_reg (addr);
+}
+
 /* When SRCPTR is non-NULL, output simple loop to move memory
pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
overall size is COUNT specified in bytes.  When SRCPTR is NULL, output the
@@ -23032,8 +23047,8 @@ ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, 
rtx align_exp,
 
   if (!count)
 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
-  destreg = copy_addr_to_reg (XEXP (dst, 0));
-  srcreg = copy_addr_to_reg (XEXP (src, 0));
+  destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
+  srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
 
   unroll_factor = 1;
   move_mode = word_mode;
@@ -23436,7 +23451,7 @@ ix86_expand_setmem (rtx dst, rtx count_exp, rtx 
val_exp, rtx align_exp,
 
   if (!count)
 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
-  destreg = copy_addr_to_reg (XEXP (dst, 0));
+  destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
 
   move_mode = word_mode;
   unroll_factor = 1;
diff --git a/gcc/testsuite/gcc.target/i386/pr58690.c 
b/gcc/testsuite/gcc.target/i386/pr58690.c
new file mode 100644
index 000..87a87cc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr58690.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { ! { ia32 } } } } */
+/* { dg-require-effective-target maybe_x32 } */
+/* { dg-options "-O2 -mx32 -maddress-mode=short" } */
+
+struct gomp_thread
+{
+  char foo[41];
+};
+extern __thread struct gomp_thread gomp_tls_data;
+void
+foo (void)
+{
+  __builtin_memset (&gomp_tls_data, '\0', sizeof (gomp_tls_data));
+}


[PATCH, powerpc] PR target/58673: Fix power8 quad memory/no-vsx-timode interaction (revised)

2013-10-11 Thread Michael Meissner
Whoops, I didn't notice I had left in old code for lq/stq that I had commented
out.  This patch replaces the previous patch, and eliminates the commented out
code.  Sorry about that.

[gcc]
2013-10-11  Michael Meissner  

PR target/58673
* config/rs6000/rs6000.c (rs6000_legitimate_address_p): Only
restrict TImode addresses to single indirect registers if both
-mquad-memory and -mvsx-timode are used.
(rs6000_output_move_128bit): Use quad_load_store_p to determine if
we should emit load/store quad.  Remove using %y for quad memory
addresses.

* config/rs6000/rs6000.md (mov_ppc64, TI/PTImode): Add
constraints to allow load/store quad on machines where TImode is
not allowed in VSX registers.

[gcc/testsuite]
2013-10-11  Michael Meissner  

PR target/58673
* gcc.target/powerpc/pr58673-1.c: New file to test whether
-mquad-word + -mno-vsx-timode causes errors.
* gcc.target/powerpc/pr58673-2.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 203389)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -7182,12 +7182,12 @@ rs6000_legitimate_address_p (enum machin
   if (reg_offset_p
   && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
 return 1;
-  /* For TImode, if we have load/store quad, only allow register indirect
- addresses.  This will allow the values to go in either GPRs or VSX
- registers without reloading.  The vector types would tend to go into VSX
- registers, so we allow REG+REG, while TImode seems somewhat split, in that
- some uses are GPR based, and some VSX based.  */
-  if (mode == TImode && TARGET_QUAD_MEMORY)
+  /* For TImode, if we have load/store quad and TImode in VSX registers, only
+ allow register indirect addresses.  This will allow the values to go in
+ either GPRs or VSX registers without reloading.  The vector types would
+ tend to go into VSX registers, so we allow REG+REG, while TImode seems
+ somewhat split, in that some uses are GPR based, and some VSX based.  */
+  if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
 return 0;
   /* If not REG_OK_STRICT (before reload) let pass any stack offset.  */
   if (! reg_ok_strict
@@ -16060,13 +16060,8 @@ rs6000_output_move_128bit (rtx operands[
 {
   if (dest_gpr_p)
{
- if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0
- && quad_memory_operand (src, mode)
- && !reg_overlap_mentioned_p (dest, src))
-   {
- /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
- return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1";
-   }
+ if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+   return "lq %0,%1";
  else
return "#";
}
@@ -16095,12 +16090,8 @@ rs6000_output_move_128bit (rtx operands[
 {
   if (src_gpr_p)
{
- if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0
- && quad_memory_operand (dest, mode))
-   {
- /* lq/stq only has DQ-form, so avoid X-form that %y produces.  */
- return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0";
-   }
+ if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+   return "stq %1,%0";
  else
return "#";
}
Index: gcc/config/rs6000/rs6000.md
===
--- gcc/config/rs6000/rs6000.md (revision 203389)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -10312,15 +10312,15 @@ (define_insn "*mov_string"
  (const_string "conditional")))])
 
 (define_insn "*mov_ppc64"
-  [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
-   (match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
+  [(set (match_operand:TI2 0 "nonimmediate_operand" "=wQ,Y,r,r,r,r")
+   (match_operand:TI2 1 "input_operand" "r,r,wQ,Y,r,F"))]
   "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (mode)
&& (gpc_reg_operand (operands[0], mode)
|| gpc_reg_operand (operands[1], mode)))"
 {
   return rs6000_output_move_128bit (operands);
 }
-  [(set_attr "type" "store,load,*,*")
+  [(set_attr "type" "store,store,load,load,*,*")
(set_attr "length" "8")])
 
 (define_split
Index: gcc/testsuite/gcc.target/powerpc/pr58673-1.c
===
--- gcc/testsuite/gcc.target/powerpc/pr58673-1.c(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr58673-1.c(revision 0)
@@ -0,0 +1,78 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+

Re: Apply attribute returns_nonnull in libiberty

2013-10-11 Thread DJ Delorie

> Why would you want to do that?

I wouldn't, but gcc isn't the only user of libiberty.


PATCH: Update x32 baseline_symbols.txt

2013-10-11 Thread H.J. Lu
Hi,

I checked in this patch to update baseline_symbols.txt for x32.

H.J.
---
Index: ChangeLog
===
--- ChangeLog   (revision 203455)
+++ ChangeLog   (working copy)
@@ -1,3 +1,7 @@
+2013-10-11  H.J. Lu  
+
+   * config/abi/post/x86_64-linux-gnu/x32/baseline_symbols.txt: Update.
+
 2013-10-10  Marcus Shawcroft  
 
* testsuite/29_atomics/atomic/cons/49445.cc
Index: config/abi/post/x86_64-linux-gnu/x32/baseline_symbols.txt
===
--- config/abi/post/x86_64-linux-gnu/x32/baseline_symbols.txt   (revision 
203455)
+++ config/abi/post/x86_64-linux-gnu/x32/baseline_symbols.txt   (working copy)
@@ -403,6 +403,7 @@ FUNC:_ZNKSt15basic_streambufIwSt11char_t
 FUNC:_ZNKSt15basic_streambufIwSt11char_traitsIwEE6getlocEv@@GLIBCXX_3.4
 FUNC:_ZNKSt15basic_stringbufIcSt11char_traitsIcESaIcEE3strEv@@GLIBCXX_3.4
 FUNC:_ZNKSt15basic_stringbufIwSt11char_traitsIwESaIwEE3strEv@@GLIBCXX_3.4
+FUNC:_ZNKSt17bad_function_call4whatEv@@GLIBCXX_3.4.18
 FUNC:_ZNKSt18basic_stringstreamIcSt11char_traitsIcESaIcEE3strEv@@GLIBCXX_3.4
 FUNC:_ZNKSt18basic_stringstreamIcSt11char_traitsIcESaIcEE5rdbufEv@@GLIBCXX_3.4
 FUNC:_ZNKSt18basic_stringstreamIwSt11char_traitsIwESaIwEE3strEv@@GLIBCXX_3.4
@@ -590,6 +591,8 @@ FUNC:_ZNKSt7num_putIwSt19ostreambuf_iter
 
FUNC:_ZNKSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE6do_putES3_RSt8ios_basewm@@GLIBCXX_3.4
 
FUNC:_ZNKSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE6do_putES3_RSt8ios_basewx@@GLIBCXX_3.4
 
FUNC:_ZNKSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE6do_putES3_RSt8ios_basewy@@GLIBCXX_3.4
+FUNC:_ZNKSt8__detail20_Prime_rehash_policy11_M_next_bktEj@@GLIBCXX_3.4.18
+FUNC:_ZNKSt8__detail20_Prime_rehash_policy14_M_need_rehashEjjj@@GLIBCXX_3.4.18
 FUNC:_ZNKSt8bad_cast4whatEv@@GLIBCXX_3.4.9
 FUNC:_ZNKSt8ios_base7failure4whatEv@@GLIBCXX_3.4
 FUNC:_ZNKSt8messagesIcE18_M_convert_to_charERKSs@@GLIBCXX_3.4
@@ -1207,6 +1210,7 @@ FUNC:_ZNSt11range_errorD2Ev@@GLIBCXX_3.4
 FUNC:_ZNSt11regex_errorD0Ev@@GLIBCXX_3.4.15
 FUNC:_ZNSt11regex_errorD1Ev@@GLIBCXX_3.4.15
 FUNC:_ZNSt11regex_errorD2Ev@@GLIBCXX_3.4.15
+FUNC:_ZNSt11this_thread11__sleep_forENSt6chrono8durationIxSt5ratioILx1ELx1NS1_IxS2_ILx1ELx10@@GLIBCXX_3.4.18
 FUNC:_ZNSt12__basic_fileIcE2fdEv@@GLIBCXX_3.4
 FUNC:_ZNSt12__basic_fileIcE4fileEv@@GLIBCXX_3.4.1
 FUNC:_ZNSt12__basic_fileIcE4openEPKcSt13_Ios_Openmodei@@GLIBCXX_3.4
@@ -1485,6 +1489,11 @@ FUNC:_ZNSt13basic_ostreamIwSt11char_trai
 FUNC:_ZNSt13basic_ostreamIwSt11char_traitsIwEElsEt@@GLIBCXX_3.4
 FUNC:_ZNSt13basic_ostreamIwSt11char_traitsIwEElsEx@@GLIBCXX_3.4
 FUNC:_ZNSt13basic_ostreamIwSt11char_traitsIwEElsEy@@GLIBCXX_3.4
+FUNC:_ZNSt13random_device14_M_init_pretr1ERKSs@@GLIBCXX_3.4.18
+FUNC:_ZNSt13random_device16_M_getval_pretr1Ev@@GLIBCXX_3.4.18
+FUNC:_ZNSt13random_device7_M_finiEv@@GLIBCXX_3.4.18
+FUNC:_ZNSt13random_device7_M_initERKSs@@GLIBCXX_3.4.18
+FUNC:_ZNSt13random_device9_M_getvalEv@@GLIBCXX_3.4.18
 FUNC:_ZNSt13runtime_errorC1ERKSs@@GLIBCXX_3.4
 FUNC:_ZNSt13runtime_errorC2ERKSs@@GLIBCXX_3.4
 FUNC:_ZNSt13runtime_errorD0Ev@@GLIBCXX_3.4
@@ -1929,6 +1938,8 @@ FUNC:_ZNSt6__norm15_List_node_base7rever
 FUNC:_ZNSt6__norm15_List_node_base8transferEPS0_S1_@@GLIBCXX_3.4.9
 FUNC:_ZNSt6__norm15_List_node_base9_M_unhookEv@@GLIBCXX_3.4.14
 FUNC:_ZNSt6chrono12system_clock3nowEv@@GLIBCXX_3.4.11
+FUNC:_ZNSt6chrono3_V212steady_clock3nowEv@@GLIBCXX_3.4.19
+FUNC:_ZNSt6chrono3_V212system_clock3nowEv@@GLIBCXX_3.4.19
 FUNC:_ZNSt6gslice8_IndexerC1EjRKSt8valarrayIjES4_@@GLIBCXX_3.4
 FUNC:_ZNSt6gslice8_IndexerC2EjRKSt8valarrayIjES4_@@GLIBCXX_3.4
 FUNC:_ZNSt6locale11_M_coalesceERKS_S1_i@@GLIBCXX_3.4
@@ -2467,6 +2478,7 @@ FUNC:__cxa_guard_acquire@@CXXABI_1.3
 FUNC:__cxa_guard_release@@CXXABI_1.3
 FUNC:__cxa_pure_virtual@@CXXABI_1.3
 FUNC:__cxa_rethrow@@CXXABI_1.3
+FUNC:__cxa_thread_atexit@@CXXABI_1.3.7
 FUNC:__cxa_throw@@CXXABI_1.3
 FUNC:__cxa_tm_cleanup@@CXXABI_TM_1
 FUNC:__cxa_vec_cctor@@CXXABI_1.3
@@ -2491,6 +2503,7 @@ OBJECT:0:CXXABI_1.3.3
 OBJECT:0:CXXABI_1.3.4
 OBJECT:0:CXXABI_1.3.5
 OBJECT:0:CXXABI_1.3.6
+OBJECT:0:CXXABI_1.3.7
 OBJECT:0:CXXABI_TM_1
 OBJECT:0:GLIBCXX_3.4
 OBJECT:0:GLIBCXX_3.4.1
@@ -2502,6 +2515,8 @@ OBJECT:0:GLIBCXX_3.4.14
 OBJECT:0:GLIBCXX_3.4.15
 OBJECT:0:GLIBCXX_3.4.16
 OBJECT:0:GLIBCXX_3.4.17
+OBJECT:0:GLIBCXX_3.4.18
+OBJECT:0:GLIBCXX_3.4.19
 OBJECT:0:GLIBCXX_3.4.2
 OBJECT:0:GLIBCXX_3.4.3
 OBJECT:0:GLIBCXX_3.4.4
@@ -3033,6 +3048,8 @@ OBJECT:1:_ZNSt21__numeric_limits_base9is
 OBJECT:1:_ZNSt21__numeric_limits_base9is_moduloE@@GLIBCXX_3.4
 OBJECT:1:_ZNSt21__numeric_limits_base9is_signedE@@GLIBCXX_3.4
 OBJECT:1:_ZNSt6chrono12system_clock12is_monotonicE@@GLIBCXX_3.4.11
+OBJECT:1:_ZNSt6chrono3_V212steady_clock9is_steadyE@@GLIBCXX_3.4.19
+OBJECT:1:_ZNSt6chrono3_V212system_clock9is_steadyE@@GLIBCXX_3.4.19
 OBJECT:1:_ZSt10adopt_lock@@GLIBCXX_3.4.11
 OBJECT:1:_ZSt10defer_lock@@GLIBCXX_3.4.11
 OBJECT:1:_ZSt11try_to_lock@@GLIBCXX_3.

Re: [AArch64] Fix early-clobber operands to vtbx[1,3]

2013-10-11 Thread Marcus Shawcroft
On 11 October 2013 17:45, James Greenhalgh  wrote:
>
> Hi,
>
> The vtbx intrinsics are implemented in assembly without noting
> that their tmp1 operand is early-clobber. This can, when the
> wind blows the wrong way, result in us making a total mess of
> the state of registers.
>
> Fix by marking the required operands as early-clobber.
>
> Regression tested against aarch64.exp with no problems.
>
> OK?

OK, and back port to 4.8 please.
/Marcus


[patch] Fix altivec-7.C testsuite failure due to vector name mangling.

2013-10-11 Thread Brooks Moses
Hi, all -

We recently built a compiler with a default -fabi-version value that's
different from the 2 that's used as default on trunk.  The result of
this was a test failure in g++.dg/ext/altivec-7.C, which compiles a
bunch of empty functions with vector arguments and inspects the
generated assembly for the mangled names.

The failure is because the test is searching for mangled names of the
form "_Z3fooU8__vectorh", which are only generated by ABI versions 1,
2, and 3.  As noted in the documentation, "Version 4, which first
appeared in G++ 4.5, implements a standard mangling for vector types";
this standard mangling looks like "_Z3fooDv16_h" instead.

This patch fixes the failure by adjusting the test to look for the
names using the standard mangling.  It passes with all ABI versions;
the compiler always emits the standard symbols, and with versions 1,
2, and 3 it also emits duplicate symbols with the old mangling.

Ok to commit?

(An alternate approach would be to force -mabi-version=2, and check
for the presence of both sets of symbols.  That seems overkill to me,
but I can alter the test to do that if desired.)

Thanks,
- Brooks



2013-10-11  Brooks Moses  

* g++.dg/ext/altivec-7.C: Check for standard vector-type
 name mangling.


2013-10-11_altivec-7-fix.diff
Description: Binary data


Re: [C++ Patch] PR 58466

2013-10-11 Thread Paolo Carlini

On 10/11/2013 08:29 PM, Paolo Carlini wrote:
Are we maybe failing to bump processing_template_decl somewhere while 
processing the specialization?

... I'm finishing testing this, already past g++.dg/dg.exp...

Paolo.
Index: cp/pt.c
===
--- cp/pt.c (revision 203449)
+++ cp/pt.c (working copy)
@@ -18594,10 +18594,15 @@ most_specialized_class (tree type, tree tmpl, tsub
   if (spec_tmpl == error_mark_node)
return error_mark_node;
 
+  ++processing_template_decl;
+
   tree parms = DECL_INNERMOST_TEMPLATE_PARMS (spec_tmpl);
   spec_args = get_class_bindings (tmpl, parms,
  partial_spec_args,
  args);
+
+  --processing_template_decl;
+
   if (spec_args)
{
  if (outer_args)
Index: testsuite/g++.dg/cpp0x/variadic145.C
===
--- testsuite/g++.dg/cpp0x/variadic145.C(revision 0)
+++ testsuite/g++.dg/cpp0x/variadic145.C(working copy)
@@ -0,0 +1,10 @@
+// PR c++/58466
+// { dg-do compile { target c++11 } }
+
+template struct A;
+
+template struct B;
+
+template struct B> {};
+
+B> b;// { dg-error "incomplete type" }


Re: [PATCH] Add --enable-host-shared configuration option

2013-10-11 Thread David Malcolm
On Thu, 2013-10-10 at 01:05 +, Joseph S. Myers wrote:
> On Wed, 9 Oct 2013, David Malcolm wrote:
> 
> > This patch adds an "--enable-host-shared" option throughout the various
> > configure/Make machinery for host code, adding "-fPIC" where appropriate
> > when enabled.
> 
> Please document this in install.texi (even if it isn't particularly useful 
> at the stage where it just means PIC rather than actual shared libraries).

How does the following look:

gcc/
* doc/install.texi (--enable-shared): Add note contrasting it
with...
(--enable-host-shared): ...new option.

commit 23bf2a4389817352bca1bdcbe3d7971b4f607e4b
Author: David Malcolm 
Date:   Fri Oct 11 16:22:45 2013 -0400

Document --enable-host-shared

gcc/
	* doc/install.texi (--enable-shared): Add note contrasting it
	with...
	(--enable-host-shared): New option.

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 7be8e5a..5cb4d3c 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -925,6 +925,19 @@ Use @option{--disable-shared} to build only static libraries.  Note that
 @option{--disable-shared} does not accept a list of package names as
 argument, only @option{--enable-shared} does.
 
+Contrast with @option{--enable-host-shared}, which affects @emph{host}
+code.
+
+@item --enable-host-shared
+Specify that the @emph{host} code should be built into position-independent
+machine code (with -fPIC), allowing it to be used within shared libraries,
+but yielding a slightly slower compiler.
+
+Currently this option is only of use to people developing GCC itself.
+
+Contrast with @option{--enable-shared}, which affects @emph{target}
+libraries.
+
 @item @anchor{with-gnu-as}--with-gnu-as
 Specify that the compiler should assume that the
 assembler it finds is the GNU assembler.  However, this does not modify


[PATCH] FIx pr58640

2013-10-11 Thread Jeff Law


The problem shown by pr58640 is the more aggressive jump threading is 
recording a jump threading opportunity that crosses through two loop 
headers.


The updating code is not prepared to update the CFG and loop structures 
in that situation.  The net result is we have two latch edges for a 
particular loop.  This causes the full unrolling code to go a bit nuts 
with a particular block is considered unreachable and has no outgoing 
edges.  It is (of course) reachable and falling off the end of the block 
results in bad things happening.


The easiest fix would be to simply cancel the jump threading request 
entirely.  However, we can do better by merely truncating the request 
immediately prior to crossing the second loop entry point.


In fact, the code we generate for pr58640 is considerably better when we 
truncate the path rather than eliminating the jump threading request 
entirely.


Bootstrapped and regression tested on x86_64-unknown-linux-gnu. 
Installed onto the trunk.


commit 7aec1a83e5c18ddb0f053b28f62a1c242ab9e477
Author: Jeff Law 
Date:   Fri Oct 11 14:24:11 2013 -0600

PR tree-optimization/58640
* tree-ssa-threadupdate.c (mark_threaded_blocks): Truncate jump 
threading
paths that cross over two loop entry points.

* gcc.c-torture/execute/pr58640.c: New test.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 41e29dc..9f4e297 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2013-10-11  Jeff Law  
+
+   PR tree-optimization/58640
+   * tree-ssa-threadupdate.c (mark_threaded_blocks): Truncate jump 
threading
+   paths that cross over two loop entry points.
+
 2013-10-11  Bill Schmidt  
 
* config/rs6000/vsx.md (*vsx_le_perm_load_v2di): Generalize to
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 87ff2a7..bb2ede4 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2013-10-11  Jeff Law  
+
+   * gcc.c-torture/execute/pr58640.c: New test.
+
 2013-10-11  Paolo Carlini  
 
PR c++/58633
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr58640.c 
b/gcc/testsuite/gcc.c-torture/execute/pr58640.c
new file mode 100644
index 000..7786b8d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr58640.c
@@ -0,0 +1,32 @@
+int a, b, c, d = 1, e;
+
+static signed char
+foo ()
+{
+  int f, g = a;
+
+  for (f = 1; f < 3; f++)
+for (; b < 1; b++)
+  {
+if (d)
+  for (c = 0; c < 4; c++)
+for (f = 0; f < 3; f++)
+  {
+for (e = 0; e < 1; e++)
+  a = g;
+if (f)
+  break;
+  }
+else if (f)
+  continue;
+return 0;
+  }
+  return 0;
+}
+
+int
+main ()
+{
+  foo ();
+  exit (0);
+}
diff --git a/gcc/tree-ssa-threadupdate.c b/gcc/tree-ssa-threadupdate.c
index 2adea1b..3e34567 100644
--- a/gcc/tree-ssa-threadupdate.c
+++ b/gcc/tree-ssa-threadupdate.c
@@ -1354,6 +1354,68 @@ mark_threaded_blocks (bitmap threaded_blocks)
   else
 bitmap_copy (threaded_blocks, tmp);
 
+  /* Look for jump threading paths which cross multiple loop headers.
+
+ The code to thread through loop headers will change the CFG in ways
+ that break assumptions made by the loop optimization code.
+
+ We don't want to blindly cancel the requests.  We can instead do better
+ by trimming off the end of the jump thread path.  */
+  EXECUTE_IF_SET_IN_BITMAP (tmp, 0, i, bi)
+{
+  basic_block bb = BASIC_BLOCK (i);
+  FOR_EACH_EDGE (e, ei, bb->preds)
+   {
+ if (e->aux)
+   {
+ vec *path = THREAD_PATH (e);
+
+ /* Basically we're looking for a situation where we can see
+3 or more loop structures on a jump threading path.  */
+
+ struct loop *first_father = (*path)[0]->e->src->loop_father;
+ struct loop *second_father = NULL;
+ for (unsigned int i = 0; i < path->length (); i++)
+   {
+ /* See if this is a loop father we have not seen before.  */
+ if ((*path)[i]->e->dest->loop_father != first_father
+ && (*path)[i]->e->dest->loop_father != second_father)
+   {
+ /* We've already seen two loop fathers, so we
+need to trim this jump threading path.  */
+ if (second_father != NULL)
+   {
+ /* Trim from entry I onwards.  */
+ for (unsigned int j = i; j < path->length (); j++)
+   delete (*path)[j];
+ path->truncate (i);
+
+ /* Now that we've truncated the path, make sure
+what's left is still valid.   We need at least
+two edges on the path and the last edge can not
+be a joiner.  This should

Re: Apply attribute returns_nonnull in libiberty

2013-10-11 Thread Marc Glisse

On Fri, 11 Oct 2013, DJ Delorie wrote:


Your patch changes the rule that the application can override
xmalloc() and get functions that return NULL...


How about this more limited version? I'll see about following Jakub's 
advice to apply the attributes to the other functions only in gcc.


2013-10-11  Marc Glisse  

PR tree-optimization/58689
* ansidecl.h (ATTRIBUTE_RETURNS_NONNULL): New macro.
* libiberty.h (basename, lbasename, dos_lbasename, unix_lbasename,
concat_copy): Mark with attributes nonnull(1) and returns_nonnull.
(concat_copy2, xstrerror): Mark with attribute returns_nonnull.

--
Marc GlisseIndex: ansidecl.h
===
--- ansidecl.h  (revision 203451)
+++ ansidecl.h  (working copy)
@@ -304,20 +304,29 @@ So instead we use the macro below and te
 
 /* Attribute `nonnull' was valid as of gcc 3.3.  */
 #ifndef ATTRIBUTE_NONNULL
 # if (GCC_VERSION >= 3003)
 #  define ATTRIBUTE_NONNULL(m) __attribute__ ((__nonnull__ (m)))
 # else
 #  define ATTRIBUTE_NONNULL(m)
 # endif /* GNUC >= 3.3 */
 #endif /* ATTRIBUTE_NONNULL */
 
+/* Attribute `returns_nonnull' was valid as of gcc 4.9.  */
+#ifndef ATTRIBUTE_RETURNS_NONNULL
+# if (GCC_VERSION >= 4009)
+#  define ATTRIBUTE_RETURNS_NONNULL __attribute__ ((__returns_nonnull__))
+# else
+#  define ATTRIBUTE_RETURNS_NONNULL
+# endif /* GNUC >= 4.9 */
+#endif /* ATTRIBUTE_RETURNS_NONNULL */
+
 /* Attribute `pure' was valid as of gcc 3.0.  */
 #ifndef ATTRIBUTE_PURE
 # if (GCC_VERSION >= 3000)
 #  define ATTRIBUTE_PURE __attribute__ ((__pure__))
 # else
 #  define ATTRIBUTE_PURE
 # endif /* GNUC >= 3.0 */
 #endif /* ATTRIBUTE_PURE */
 
 /* Use ATTRIBUTE_PRINTF when the format specifier must not be NULL.
Index: libiberty.h
===
--- libiberty.h (revision 203451)
+++ libiberty.h (working copy)
@@ -100,43 +100,43 @@ extern int countargv (char**);
across different systems, sometimes as "char *" and sometimes as
"const char *" */
 
 /* HAVE_DECL_* is a three-state macro: undefined, 0 or 1.  If it is
undefined, we haven't run the autoconf check so provide the
declaration without arguments.  If it is 0, we checked and failed
to find the declaration so provide a fully prototyped one.  If it
is 1, we found it so don't provide any declaration at all.  */
 #if !HAVE_DECL_BASENAME
 #if defined (__GNU_LIBRARY__ ) || defined (__linux__) || defined (__FreeBSD__) 
|| defined (__OpenBSD__) || defined(__NetBSD__) || defined (__CYGWIN__) || 
defined (__CYGWIN32__) || defined (__MINGW32__) || defined (HAVE_DECL_BASENAME)
-extern char *basename (const char *);
+extern char *basename (const char *) ATTRIBUTE_NONNULL(1) 
ATTRIBUTE_RETURNS_NONNULL;
 #else
 /* Do not allow basename to be used if there is no prototype seen.  We
either need to use the above prototype or have one from
autoconf which would result in HAVE_DECL_BASENAME being set.  */
 #define basename basename_cannot_be_used_without_a_prototype
 #endif
 #endif
 
 /* A well-defined basename () that is always compiled in.  */
 
-extern const char *lbasename (const char *);
+extern const char *lbasename (const char *) ATTRIBUTE_NONNULL(1) 
ATTRIBUTE_RETURNS_NONNULL;
 
 /* Same, but assumes DOS semantics (drive name, backslash is also a
dir separator) regardless of host.  */
 
-extern const char *dos_lbasename (const char *);
+extern const char *dos_lbasename (const char *) ATTRIBUTE_NONNULL(1) 
ATTRIBUTE_RETURNS_NONNULL;
 
 /* Same, but assumes Unix semantics (absolute paths always start with
a slash, only forward slash is accepted as dir separator)
regardless of host.  */
 
-extern const char *unix_lbasename (const char *);
+extern const char *unix_lbasename (const char *) ATTRIBUTE_NONNULL(1) 
ATTRIBUTE_RETURNS_NONNULL;
 
 /* A well-defined realpath () that is always compiled in.  */
 
 extern char *lrealpath (const char *);
 
 /* Concatenate an arbitrary number of strings.  You must pass NULL as
the last argument of this function, to terminate the list of
strings.  Allocates memory using xmalloc.  */
 
 extern char *concat (const char *, ...) ATTRIBUTE_MALLOC ATTRIBUTE_SENTINEL;
@@ -154,28 +154,28 @@ extern char *reconcat (char *, const cha
strings.  You must pass NULL as the last argument of this function,
to terminate the list of strings.  */
 
 extern unsigned long concat_length (const char *, ...) ATTRIBUTE_SENTINEL;
 
 /* Concatenate an arbitrary number of strings into a SUPPLIED area of
memory.  You must pass NULL as the last argument of this function,
to terminate the list of strings.  The supplied memory is assumed
to be large enough.  */
 
-extern char *concat_copy (char *, const char *, ...) ATTRIBUTE_SENTINEL;
+extern char *concat_copy (char *, const char *, ...) ATTRIBUTE_NONNULL(1) 
ATTRIBUTE_RETURNS_NONNULL ATTRIBUTE_SENTINEL;
 
 /* Concatenate an arbitrary number of strings into a G

Re: [PATCH] Add --enable-host-shared configuration option

2013-10-11 Thread Joseph S. Myers
On Fri, 11 Oct 2013, David Malcolm wrote:

> On Thu, 2013-10-10 at 01:05 +, Joseph S. Myers wrote:
> > On Wed, 9 Oct 2013, David Malcolm wrote:
> > 
> > > This patch adds an "--enable-host-shared" option throughout the various
> > > configure/Make machinery for host code, adding "-fPIC" where appropriate
> > > when enabled.
> > 
> > Please document this in install.texi (even if it isn't particularly useful 
> > at the stage where it just means PIC rather than actual shared libraries).
> 
> How does the following look:
> 
> gcc/
>   * doc/install.texi (--enable-shared): Add note contrasting it
>   with...
>   (--enable-host-shared): ...new option.

Seems reasonable to me.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: Apply attribute returns_nonnull in libiberty

2013-10-11 Thread DJ Delorie

PR tree-optimization/58689
* ansidecl.h (ATTRIBUTE_RETURNS_NONNULL): New macro.
* libiberty.h (basename, lbasename, dos_lbasename, unix_lbasename,
concat_copy): Mark with attributes nonnull(1) and returns_nonnull.
(concat_copy2, xstrerror): Mark with attribute returns_nonnull.

This part is OK.


Re: [PATCH] Add --enable-host-shared configuration option

2013-10-11 Thread David Malcolm
On Fri, 2013-10-11 at 20:45 +, Joseph S. Myers wrote:
> On Fri, 11 Oct 2013, David Malcolm wrote:
> 
> > On Thu, 2013-10-10 at 01:05 +, Joseph S. Myers wrote:
> > > On Wed, 9 Oct 2013, David Malcolm wrote:
> > > 
> > > > This patch adds an "--enable-host-shared" option throughout the various
> > > > configure/Make machinery for host code, adding "-fPIC" where appropriate
> > > > when enabled.
> > > 
> > > Please document this in install.texi (even if it isn't particularly 
> > > useful 
> > > at the stage where it just means PIC rather than actual shared libraries).
> > 
> > How does the following look:
> > 
> > gcc/
> > * doc/install.texi (--enable-shared): Add note contrasting it
> > with...
> > (--enable-host-shared): ...new option.
> 
> Seems reasonable to me.

Thanks.   Presumably the initially posted configure/make patch still
needs review, right?

Dave



[PATCH] increase builtin_expert probability in loop exit test

2013-10-11 Thread Rong Xu
Hi,

An earlier patch (r203167) changed the default probability for
builtin_expect to 90%.
It does not work properly for the following case:
   while (__builin_expect (expr, 1)) { }

W/o builtin_expect, the exit probability is 9% while w/
builtin_expect, the exit probability is 10%.
It seems to be wrong as we should estimate this loop has more
iterations than w/o the annotation.

This attached patch bump the probability for this particular case.

Tested with bootstrap.

Thanks,

-Rong


patch_diff
Description: Binary data


[google gcc-4_8] increase builtin_expect probability in loop exit test

2013-10-11 Thread Rong Xu
The trunk version of this patch is submitted for review.

David: can we have this patch for google/gcc-4_8 branch first?
It tested with regression and google internal benchmarks.

Thanks,

-Rong
2013-10-11  Rong Xu  

* predict.c (tree_predict_by_opcode): Bump the 
estimiated probability if builtin_expect expression is
in loop exit test.

Index: predict.c
===
--- predict.c   (revision 203462)
+++ predict.c   (working copy)
@@ -1951,7 +1951,31 @@ tree_predict_by_opcode (basic_block bb)
   if (val)
 {
   int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
+  void **preds;
 
+  /* This handles the cases like
+   while (__builtin_expect (exp, 1)) { ... }
+ W/o builtin_expect, the default HITRATE is 91%.
+It does not make sense to estimate a lower probability of 90%
+(current default for builtin_expect) with the annotation.
+So here, we bump the probability by a small amount.  */
+  preds = pointer_map_contains (bb_predictions, bb);
+  if (preds)
+{
+  struct edge_prediction *pred;
+
+  for (pred = (struct edge_prediction *) *preds; pred;
+  pred = pred->ep_next)
+   {
+ if (pred->ep_predictor == PRED_LOOP_EXIT
+ && predictor_info [(int) PRED_LOOP_EXIT].hitrate
+> HITRATE (percent))
+   percent += 4;
+ if (percent > 100)
+   percent = 100;
+   }
+   }
+
   gcc_assert (percent >= 0 && percent <= 100);
   if (integer_zerop (val))
 percent = 100 - percent;


Re: [patch] Fix altivec-7.C testsuite failure due to vector name mangling.

2013-10-11 Thread Mike Stump
On Oct 11, 2013, at 11:55 AM, Brooks Moses  wrote:
> This patch fixes the failure by adjusting the test to look for the
> names using the standard mangling.  It passes with all ABI versions;
> the compiler always emits the standard symbols, and with versions 1,
> 2, and 3 it also emits duplicate symbols with the old mangling.
> 
> Ok to commit?

Ok.

For those that want to see additional testing, they can always split it and run 
it once with -fabi-version=2, and once with =4; if they care enough.

Re: [google gcc-4_8] increase builtin_expect probability in loop exit test

2013-10-11 Thread Xinliang David Li
Why this 'percent += 4'  instead of taking the max?

David

On Fri, Oct 11, 2013 at 2:10 PM, Rong Xu  wrote:
> The trunk version of this patch is submitted for review.
>
> David: can we have this patch for google/gcc-4_8 branch first?
> It tested with regression and google internal benchmarks.
>
> Thanks,
>
> -Rong


Re: [google gcc-4_8] increase builtin_expect probability in loop exit test

2013-10-11 Thread Rong Xu
I want to differentiate the cases w/o and w/ builtin.
If I take the max, they will be the same (91%).

-Rong

On Fri, Oct 11, 2013 at 2:26 PM, Xinliang David Li  wrote:
> Why this 'percent += 4'  instead of taking the max?
>
> David
>
> On Fri, Oct 11, 2013 at 2:10 PM, Rong Xu  wrote:
>> The trunk version of this patch is submitted for review.
>>
>> David: can we have this patch for google/gcc-4_8 branch first?
>> It tested with regression and google internal benchmarks.
>>
>> Thanks,
>>
>> -Rong


Re: [google gcc-4_8] increase builtin_expect probability in loop exit test

2013-10-11 Thread Xinliang David Li
Should it be max + some_delta then?

David

On Fri, Oct 11, 2013 at 2:32 PM, Rong Xu  wrote:
> I want to differentiate the cases w/o and w/ builtin.
> If I take the max, they will be the same (91%).
>
> -Rong
>
> On Fri, Oct 11, 2013 at 2:26 PM, Xinliang David Li  wrote:
>> Why this 'percent += 4'  instead of taking the max?
>>
>> David
>>
>> On Fri, Oct 11, 2013 at 2:10 PM, Rong Xu  wrote:
>>> The trunk version of this patch is submitted for review.
>>>
>>> David: can we have this patch for google/gcc-4_8 branch first?
>>> It tested with regression and google internal benchmarks.
>>>
>>> Thanks,
>>>
>>> -Rong


Re: [google gcc-4_8] increase builtin_expect probability in loop exit test

2013-10-11 Thread Rong Xu
ok. that makes sense.


On Fri, Oct 11, 2013 at 2:41 PM, Xinliang David Li  wrote:
> Should it be max + some_delta then?
>
> David
>
> On Fri, Oct 11, 2013 at 2:32 PM, Rong Xu  wrote:
>> I want to differentiate the cases w/o and w/ builtin.
>> If I take the max, they will be the same (91%).
>>
>> -Rong
>>
>> On Fri, Oct 11, 2013 at 2:26 PM, Xinliang David Li  
>> wrote:
>>> Why this 'percent += 4'  instead of taking the max?
>>>
>>> David
>>>
>>> On Fri, Oct 11, 2013 at 2:10 PM, Rong Xu  wrote:
 The trunk version of this patch is submitted for review.

 David: can we have this patch for google/gcc-4_8 branch first?
 It tested with regression and google internal benchmarks.

 Thanks,

 -Rong


Go patch committed: Use backend interface for function code exprs

2013-10-11 Thread Ian Lance Taylor
This patch by Chris Manghane changes the Go frontend to use the backend
interface for function code expressions.  Bootstrapped and ran Go
testsuite on x86_64-unknown-linux-gnu.  Committed to mainline.

Ian


2013-10-11  Chris Manghane  

* go-gcc.cc (Gcc_backend::function_code_expression): New
function.


Index: gcc/go/gofrontend/gogo.h
===
--- gcc/go/gofrontend/gogo.h	(revision 203403)
+++ gcc/go/gofrontend/gogo.h	(working copy)
@@ -1090,8 +1090,8 @@ class Function
 this->descriptor_ = descriptor;
   }
 
-  // Return the function's decl given an identifier.
-  tree
+  // Return the backend representation.
+  Bfunction*
   get_or_make_decl(Gogo*, Named_object*);
 
   // Return the function's decl after it has been built.
@@ -1262,8 +1262,8 @@ class Function_declaration
   has_descriptor() const
   { return this->descriptor_ != NULL; }
 
-  // Return a decl for the function given an identifier.
-  tree
+  // Return a backend representation.
+  Bfunction*
   get_or_make_decl(Gogo*, Named_object*);
 
   // If there is a descriptor, build it into the backend
Index: gcc/go/gofrontend/gogo-tree.cc
===
--- gcc/go/gofrontend/gogo-tree.cc	(revision 203403)
+++ gcc/go/gofrontend/gogo-tree.cc	(working copy)
@@ -1089,7 +1089,7 @@ Named_object::get_tree(Gogo* gogo, Named
 case NAMED_OBJECT_FUNC:
   {
 	Function* func = this->u_.func_value;
-	decl = func->get_or_make_decl(gogo, this);
+	decl = function_to_tree(func->get_or_make_decl(gogo, this));
 	if (decl != error_mark_node)
 	  {
 	if (func->block() != NULL)
@@ -1214,83 +1214,9 @@ Variable::get_init_block(Gogo* gogo, Nam
   return block_tree;
 }
 
-// Get a tree for a function decl.
+// Get the backend representation.
 
-tree
-Function::get_or_make_decl(Gogo* gogo, Named_object* no)
-{
-  if (this->fndecl_ == NULL)
-{
-  std::string asm_name;
-  bool is_visible = false;
-  if (no->package() != NULL)
-;
-  else if (this->enclosing_ != NULL || Gogo::is_thunk(no))
-;
-  else if (Gogo::unpack_hidden_name(no->name()) == "init"
-   && !this->type_->is_method())
-;
-  else if (Gogo::unpack_hidden_name(no->name()) == "main"
-   && gogo->is_main_package())
-is_visible = true;
-  // Methods have to be public even if they are hidden because
-  // they can be pulled into type descriptors when using
-  // anonymous fields.
-  else if (!Gogo::is_hidden_name(no->name())
-   || this->type_->is_method())
-{
-  is_visible = true;
-  std::string pkgpath = gogo->pkgpath_symbol();
-  if (this->type_->is_method()
-  && Gogo::is_hidden_name(no->name())
-  && Gogo::hidden_name_pkgpath(no->name()) != gogo->pkgpath())
-{
-  // This is a method we created for an unexported
-  // method of an imported embedded type.  We need to
-  // use the pkgpath of the imported package to avoid
-  // a possible name collision.  See bug478 for a test
-  // case.
-  pkgpath = Gogo::hidden_name_pkgpath(no->name());
-  pkgpath = Gogo::pkgpath_for_symbol(pkgpath);
-}
-
-  asm_name = pkgpath;
-  asm_name.append(1, '.');
-  asm_name.append(Gogo::unpack_hidden_name(no->name()));
-  if (this->type_->is_method())
-{
-  asm_name.append(1, '.');
-  Type* rtype = this->type_->receiver()->type();
-  asm_name.append(rtype->mangled_name(gogo));
-}
-}
-
-  // If a function calls the predeclared recover function, we
-  // can't inline it, because recover behaves differently in a
-  // function passed directly to defer.  If this is a recover
-  // thunk that we built to test whether a function can be
-  // recovered, we can't inline it, because that will mess up
-  // our return address comparison.
-  bool is_inlinable = !(this->calls_recover_ || this->is_recover_thunk_);
-
-  // If this is a thunk created to call a function which calls
-  // the predeclared recover function, we need to disable
-  // stack splitting for the thunk.
-  bool disable_split_stack = this->is_recover_thunk_;
-
-  Btype* functype = this->type_->get_backend_fntype(gogo);
-  this->fndecl_ =
-  gogo->backend()->function(functype, no->get_id(gogo), asm_name,
-is_visible, false, is_inlinable,
-disable_split_stack,
-this->in_unique_section_, this->location());
-}
-  return function_to_tree(this->fndecl_);
-}
-
-// Get a tree for a function declaration.
-
-tree
+Bfunction*
 Function_declaration::get_or_make_decl(Gogo* gogo, Named_object* no)
 {
   if (this->fndec

Re: [google gcc-4_8] increase builtin_expect probability in loop exit test

2013-10-11 Thread Rong Xu
here is the new patch. Note that the hitrate won't change by this
patch for the case of
  while (__builtin_expect (exp, 0))

Index: predict.c
===
--- predict.c   (revision 203462)
+++ predict.c   (working copy)
@@ -1951,11 +1951,42 @@ tree_predict_by_opcode (basic_block bb)
   if (val)
 {
   int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
+  int hitrate;

   gcc_assert (percent >= 0 && percent <= 100);
   if (integer_zerop (val))
-percent = 100 - percent;
-  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
+hitrate = HITRATE (100 - percent);
+  else
+{
+  /* This handles the cases like
+   while (__builtin_expect (exp, 1)) { ... }
+ W/o builtin_expect, the default HITRATE is 91%.
+ It does not make sense to estimate a lower probability of 90%
+ (current default for builtin_expect) with the annotation.
+ So here, we bump the probability by a small amount.  */
+  void **preds = pointer_map_contains (bb_predictions, bb);
+
+  hitrate = HITRATE (percent);
+  if (preds)
+{
+  struct edge_prediction *pred;
+  int exit_hitrate = predictor_info [(int) PRED_LOOP_EXIT].hitrate;
+
+  for (pred = (struct edge_prediction *) *preds; pred;
+   pred = pred->ep_next)
+{
+  if (pred->ep_predictor == PRED_LOOP_EXIT
+  && exit_hitrate > hitrate)
+{
+  hitrate = exit_hitrate + HITRATE (4);
+  if (hitrate > REG_BR_PROB_BASE)
+hitrate = REG_BR_PROB_BASE;
+  break;
+}
+}
+}
+}
+  predict_edge (then_edge, PRED_BUILTIN_EXPECT, hitrate);
 }
   /* Try "pointer heuristic."
  A comparison ptr == 0 is predicted as false.

On Fri, Oct 11, 2013 at 2:45 PM, Rong Xu  wrote:
> ok. that makes sense.
>
>
> On Fri, Oct 11, 2013 at 2:41 PM, Xinliang David Li  wrote:
>> Should it be max + some_delta then?
>>
>> David
>>
>> On Fri, Oct 11, 2013 at 2:32 PM, Rong Xu  wrote:
>>> I want to differentiate the cases w/o and w/ builtin.
>>> If I take the max, they will be the same (91%).
>>>
>>> -Rong
>>>
>>> On Fri, Oct 11, 2013 at 2:26 PM, Xinliang David Li  
>>> wrote:
 Why this 'percent += 4'  instead of taking the max?

 David

 On Fri, Oct 11, 2013 at 2:10 PM, Rong Xu  wrote:
> The trunk version of this patch is submitted for review.
>
> David: can we have this patch for google/gcc-4_8 branch first?
> It tested with regression and google internal benchmarks.
>
> Thanks,
>
> -Rong


Re: [google gcc-4_8] increase builtin_expect probability in loop exit test

2013-10-11 Thread Xinliang David Li
ok for google branch.

David

On Fri, Oct 11, 2013 at 3:17 PM, Rong Xu  wrote:
> here is the new patch. Note that the hitrate won't change by this
> patch for the case of
>   while (__builtin_expect (exp, 0))
>
> Index: predict.c
> ===
> --- predict.c   (revision 203462)
> +++ predict.c   (working copy)
> @@ -1951,11 +1951,42 @@ tree_predict_by_opcode (basic_block bb)
>if (val)
>  {
>int percent = PARAM_VALUE (BUILTIN_EXPECT_PROBABILITY);
> +  int hitrate;
>
>gcc_assert (percent >= 0 && percent <= 100);
>if (integer_zerop (val))
> -percent = 100 - percent;
> -  predict_edge (then_edge, PRED_BUILTIN_EXPECT, HITRATE (percent));
> +hitrate = HITRATE (100 - percent);
> +  else
> +{
> +  /* This handles the cases like
> +   while (__builtin_expect (exp, 1)) { ... }
> + W/o builtin_expect, the default HITRATE is 91%.
> + It does not make sense to estimate a lower probability of 90%
> + (current default for builtin_expect) with the annotation.
> + So here, we bump the probability by a small amount.  */
> +  void **preds = pointer_map_contains (bb_predictions, bb);
> +
> +  hitrate = HITRATE (percent);
> +  if (preds)
> +{
> +  struct edge_prediction *pred;
> +  int exit_hitrate = predictor_info [(int) 
> PRED_LOOP_EXIT].hitrate;
> +
> +  for (pred = (struct edge_prediction *) *preds; pred;
> +   pred = pred->ep_next)
> +{
> +  if (pred->ep_predictor == PRED_LOOP_EXIT
> +  && exit_hitrate > hitrate)
> +{
> +  hitrate = exit_hitrate + HITRATE (4);
> +  if (hitrate > REG_BR_PROB_BASE)
> +hitrate = REG_BR_PROB_BASE;
> +  break;
> +}
> +}
> +}
> +}
> +  predict_edge (then_edge, PRED_BUILTIN_EXPECT, hitrate);
>  }
>/* Try "pointer heuristic."
>   A comparison ptr == 0 is predicted as false.
>
> On Fri, Oct 11, 2013 at 2:45 PM, Rong Xu  wrote:
>> ok. that makes sense.
>>
>>
>> On Fri, Oct 11, 2013 at 2:41 PM, Xinliang David Li  
>> wrote:
>>> Should it be max + some_delta then?
>>>
>>> David
>>>
>>> On Fri, Oct 11, 2013 at 2:32 PM, Rong Xu  wrote:
 I want to differentiate the cases w/o and w/ builtin.
 If I take the max, they will be the same (91%).

 -Rong

 On Fri, Oct 11, 2013 at 2:26 PM, Xinliang David Li  
 wrote:
> Why this 'percent += 4'  instead of taking the max?
>
> David
>
> On Fri, Oct 11, 2013 at 2:10 PM, Rong Xu  wrote:
>> The trunk version of this patch is submitted for review.
>>
>> David: can we have this patch for google/gcc-4_8 branch first?
>> It tested with regression and google internal benchmarks.
>>
>> Thanks,
>>
>> -Rong


Re: [PATCH] Do not append " *INTERNAL* " to the decl name

2013-10-11 Thread Cary Coutant
> It's hard to get a testcase without
> http://gcc.gnu.org/viewcvs/gcc?view=revision&revision=201856 because
> none of these *INTERNAL* symbols will be emitted in debug info.

The original code was in there as a form of assembly-time assertion
that the symbol would never get output. Now that you want to emit it
in the debug info, the original intent is still valid, but without
that extra kruft, the "assertion" is now gone. I think Jason is
suggesting that you replace that with a test case, which would
explicitly materialize one or more of the inlined functions that would
get these mangled names, and then check the assembler output to verify
that the (now valid) mangled name doesn't appear anywhere as an
assembler label.

-cary


Go patch committed: Fix handling of hidden methods for unnamed types

2013-10-11 Thread Ian Lance Taylor
This patch to the Go frontend fixes the handling of hidden methods for
unnamed types.  If an interface has hidden methods, we must make the
interface table comdat if it is for an unnamed type.  When we create a
stub method for an unnamed type, we don't make it publically visible.
Bootstrapped and ran Go testsuite for x86_64-unknown-linux-gnu.
Committed to mainline.  Will commit to 4.8 branch when the branch
reopens.

Ian

diff -r 92e9a04996ea go/gogo-tree.cc
--- a/go/gogo-tree.cc	Fri Oct 11 15:14:51 2013 -0700
+++ b/go/gogo-tree.cc	Fri Oct 11 15:50:19 2013 -0700
@@ -2154,10 +2154,11 @@
   TREE_CONSTANT(decl) = 1;
   DECL_INITIAL(decl) = constructor;
 
-  // If the interface type has hidden methods, then this is the only
-  // definition of the table.  Otherwise it is a comdat table which
-  // may be defined in multiple packages.
-  if (has_hidden_methods)
+  // If the interface type has hidden methods, and the table is for a
+  // named type, then this is the only definition of the table.
+  // Otherwise it is a comdat table which may be defined in multiple
+  // packages.
+  if (has_hidden_methods && type->named_type() != NULL)
 TREE_PUBLIC(decl) = 1;
   else
 {
diff -r 92e9a04996ea go/gogo.cc
--- a/go/gogo.cc	Fri Oct 11 15:14:51 2013 -0700
+++ b/go/gogo.cc	Fri Oct 11 15:50:19 2013 -0700
@@ -3320,7 +3320,8 @@
 closure_var_(NULL), block_(block), location_(location), labels_(),
 local_type_count_(0), descriptor_(NULL), fndecl_(NULL), defer_stack_(NULL),
 is_sink_(false), results_are_named_(false), nointerface_(false),
-calls_recover_(false), is_recover_thunk_(false), has_recover_thunk_(false),
+is_unnamed_type_stub_method_(false), calls_recover_(false),
+is_recover_thunk_(false), has_recover_thunk_(false),
 in_unique_section_(false)
 {
 }
@@ -3844,7 +3845,8 @@
   else if (!Gogo::is_hidden_name(no->name())
|| this->type_->is_method())
 {
-  is_visible = true;
+	  if (!this->is_unnamed_type_stub_method_)
+	is_visible = true;
   std::string pkgpath = gogo->pkgpath_symbol();
   if (this->type_->is_method()
   && Gogo::is_hidden_name(no->name())
diff -r 92e9a04996ea go/gogo.h
--- a/go/gogo.h	Fri Oct 11 15:14:51 2013 -0700
+++ b/go/gogo.h	Fri Oct 11 15:50:19 2013 -0700
@@ -953,6 +953,15 @@
 this->nointerface_ = true;
   }
 
+  // Record that this function is a stub method created for an unnamed
+  // type.
+  void
+  set_is_unnamed_type_stub_method()
+  {
+go_assert(this->is_method());
+this->is_unnamed_type_stub_method_ = true;
+  }
+
   // Add a new field to the closure variable.
   void
   add_closure_field(Named_object* var, Location loc)
@@ -1178,6 +1187,9 @@
   bool results_are_named_ : 1;
   // True if this method should not be included in the type descriptor.
   bool nointerface_ : 1;
+  // True if this function is a stub method created for an unnamed
+  // type.
+  bool is_unnamed_type_stub_method_ : 1;
   // True if this function calls the predeclared recover function.
   bool calls_recover_ : 1;
   // True if this a thunk built for a function which calls recover.
diff -r 92e9a04996ea go/types.cc
--- a/go/types.cc	Fri Oct 11 15:14:51 2013 -0700
+++ b/go/types.cc	Fri Oct 11 15:50:19 2013 -0700
@@ -9031,6 +9031,8 @@
   fntype->is_varargs(), location);
 	  gogo->finish_function(fntype->location());
 
+	  if (type->named_type() == NULL && stub->is_function())
+	stub->func_value()->set_is_unnamed_type_stub_method();
 	  if (m->nointerface() && stub->is_function())
 	stub->func_value()->set_nointerface();
 	}


  1   2   >