fold strlen (s) eq/ne 0 to *s eq/ne 0 on GIMPLE

2016-08-01 Thread Prathamesh Kulkarni
Hi Richard,
The attached patch tries to fold strlen (s) eq/ne 0 to *s eq/ne 0 on GIMPLE.
I am not sure where was the ideal place to put this transform in and ended up
adding it to strlen_optimize_stmt().
Does that look OK ?

I needed to add TODO_update_ssa to strlen pass, otherwise we hit the
following assert in execute_todo():
if (flag_checking
  && cfun
  && need_ssa_update_p (cfun))
gcc_assert (flags & TODO_update_ssa_any);

Bootstrap+test in progress on x86_64-unknown-linux-gnu.

Thanks,
Prathamesh
2016-08-01  Prathamesh Kulkarni  

* tree-ssa-strlen.c (strlen_optimize_stmt): Fold strlen (s) eq/ne 0 to
*s eq/ne 0.
Change todo_flags_finish for pass_data_strlen from 0 to TODO_update_ssa.

testsuite/
* gcc.dg/strlenopt-30.c: New test-case.

diff --git a/gcc/testsuite/gcc.dg/strlenopt-30.c 
b/gcc/testsuite/gcc.dg/strlenopt-30.c
new file mode 100644
index 000..da9732f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/strlenopt-30.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-strlen" } */
+
+__attribute__((noinline, no_icf))
+_Bool f1(const char *s)
+{
+  unsigned long len = __builtin_strlen (s);
+  _Bool ret = (len == 0);
+  return ret;
+}
+
+/* Check CONVERT_EXPR's get properly handled.  */
+__attribute__((noinline, no_icf))
+_Bool f2(const char *s)
+{
+  unsigned len = __builtin_strlen (s);
+  return len == 0; 
+}
+
+/* { dg-final { scan-tree-dump-times "strlen \\(" 0 "strlen" } } */
diff --git a/gcc/tree-ssa-strlen.c b/gcc/tree-ssa-strlen.c
index 9d7b4df..54f8109 100644
--- a/gcc/tree-ssa-strlen.c
+++ b/gcc/tree-ssa-strlen.c
@@ -45,6 +45,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ipa-chkp.h"
 #include "tree-hash-traits.h"
 #include "builtins.h"
+#include "tree-pretty-print.h"
 
 /* A vector indexed by SSA_NAME_VERSION.  0 means unknown, positive value
is an index into strinfo vector, negative value stands for
@@ -2302,6 +2303,43 @@ strlen_optimize_stmt (gimple_stmt_iterator *gsi)
  else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
handle_pointer_plus (gsi);
}
+  /* strlen (s) eq/ne 0 -> *s eq/ne 0.  */
+  else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (TREE_TYPE 
(lhs)))
+   {
+ tree rhs2 = gimple_assign_rhs2 (stmt);
+ tree_code code = gimple_assign_rhs_code (stmt);
+
+ if ((code == EQ_EXPR || code == NE_EXPR) && integer_zerop (rhs2))
+   {
+ tree rhs1 = gimple_assign_rhs1 (stmt);
+ if (TREE_CODE (rhs1) == SSA_NAME)
+   {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (rhs1);
+ if (is_a (def_stmt)
+ && (gimple_assign_rhs_code (def_stmt) == CONVERT_EXPR
+ || gimple_assign_rhs_code (def_stmt) == NOP_EXPR)
+ && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
+   def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 
(def_stmt));
+
+ if (gcall *call_stmt = dyn_cast (def_stmt))
+   {
+ tree callee = gimple_call_fndecl (call_stmt);
+ if (valid_builtin_call (call_stmt)
+ && DECL_FUNCTION_CODE (callee) == BUILT_IN_STRLEN)
+   {
+ tree arg = gimple_call_arg (call_stmt, 0);
+ tree op = build2 (MEM_REF, char_type_node, arg, 
build_zero_cst (TREE_TYPE (arg)));
+ tree temp = make_temp_ssa_name (TREE_TYPE (op), NULL, 
"strlen");
+ gimple *memref_stmt = gimple_build_assign (temp, op);
+ gimple_stmt_iterator call_gsi = gsi_for_stmt 
(call_stmt);
+ gsi_insert_before (&call_gsi, memref_stmt, 
GSI_SAME_STMT);
+ gassign *g = gimple_build_assign (gimple_call_lhs 
(call_stmt), CONVERT_EXPR, temp);
+ gsi_replace (&call_gsi, g, true);
+   }
+   }
+   }
+   }
+   }
   else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
{
  tree type = TREE_TYPE (lhs);
@@ -2505,7 +2543,7 @@ const pass_data pass_data_strlen =
   0, /* properties_provided */
   0, /* properties_destroyed */
   0, /* todo_flags_start */
-  0, /* todo_flags_finish */
+  TODO_update_ssa, /* todo_flags_finish */
 };
 
 class pass_strlen : public gimple_opt_pass


Re: fold strlen (s) eq/ne 0 to *s eq/ne 0 on GIMPLE

2016-08-01 Thread Andrew Pinski
On Mon, Aug 1, 2016 at 12:15 AM, Prathamesh Kulkarni
 wrote:
> Hi Richard,
> The attached patch tries to fold strlen (s) eq/ne 0 to *s eq/ne 0 on GIMPLE.
> I am not sure where was the ideal place to put this transform in and ended up
> adding it to strlen_optimize_stmt().
> Does that look OK ?

I suspect it might be better in match.pd.

>
> I needed to add TODO_update_ssa to strlen pass, otherwise we hit the
> following assert in execute_todo():
> if (flag_checking
>   && cfun
>   && need_ssa_update_p (cfun))
> gcc_assert (flags & TODO_update_ssa_any);

Also you only need to update the virtual SSAs rather than a full SSA update.

Thanks,
Andrew

>
> Bootstrap+test in progress on x86_64-unknown-linux-gnu.
>
> Thanks,
> Prathamesh


Re: fold strlen (s) eq/ne 0 to *s eq/ne 0 on GIMPLE

2016-08-01 Thread Andrew Pinski
On Mon, Aug 1, 2016 at 12:22 AM, Andrew Pinski  wrote:
> On Mon, Aug 1, 2016 at 12:15 AM, Prathamesh Kulkarni
>  wrote:
>> Hi Richard,
>> The attached patch tries to fold strlen (s) eq/ne 0 to *s eq/ne 0 on GIMPLE.
>> I am not sure where was the ideal place to put this transform in and ended up
>> adding it to strlen_optimize_stmt().
>> Does that look OK ?
>
> I suspect it might be better in match.pd.

The main reason is it is already in fold-const.c:
  /* Optimize comparisons of strlen vs zero to a compare of the
 first character of the string vs zero.  To wit,
strlen(ptr) == 0   =>  *ptr == 0
strlen(ptr) != 0   =>  *ptr != 0
 Other cases should reduce to one of these two (or a constant)
 due to the return value of strlen being unsigned.  */
  if (TREE_CODE (arg0) == CALL_EXPR
  && integer_zerop (arg1))
{
  tree fndecl = get_callee_fndecl (arg0);

  if (fndecl
  && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
  && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_STRLEN
  && call_expr_nargs (arg0) == 1
  && TREE_CODE (TREE_TYPE (CALL_EXPR_ARG (arg0, 0))) ==
POINTER_TYPE)
{
  tree iref = build_fold_indirect_ref_loc (loc,
   CALL_EXPR_ARG (arg0, 0));
  return fold_build2_loc (loc, code, type, iref,
  build_int_cst (TREE_TYPE (iref), 0));
}
}

So you are basically moving that to match.pd instead of adding extra code.


Thanks,
Andrew Pinski

>
>>
>> I needed to add TODO_update_ssa to strlen pass, otherwise we hit the
>> following assert in execute_todo():
>> if (flag_checking
>>   && cfun
>>   && need_ssa_update_p (cfun))
>> gcc_assert (flags & TODO_update_ssa_any);
>
> Also you only need to update the virtual SSAs rather than a full SSA update.
>
> Thanks,
> Andrew
>
>>
>> Bootstrap+test in progress on x86_64-unknown-linux-gnu.
>>
>> Thanks,
>> Prathamesh


Re: [PATCH] Update libstdc++ baseline symbols

2016-08-01 Thread Uros Bizjak
Attached patch updates alpha.

2016-08-01  Uros Bizjak  

* config/abi/post/alpha-linux-gnu/baseline_symbols.txt: Update.

Tested on alphaev68-linux-gnu and committed.

Uros.
Index: config/abi/post/alpha-linux-gnu/baseline_symbols.txt
===
--- config/abi/post/alpha-linux-gnu/baseline_symbols.txt(revision 
238933)
+++ config/abi/post/alpha-linux-gnu/baseline_symbols.txt(working copy)
@@ -1,3 +1,72 @@
+FUNC:_ZGTtNKSt11logic_error4whatEv@@GLIBCXX_3.4.22
+FUNC:_ZGTtNKSt13bad_exception4whatEv@@CXXABI_1.3.10
+FUNC:_ZGTtNKSt13bad_exceptionD1Ev@@CXXABI_1.3.10
+FUNC:_ZGTtNKSt13runtime_error4whatEv@@GLIBCXX_3.4.22
+FUNC:_ZGTtNKSt9exception4whatEv@@CXXABI_1.3.10
+FUNC:_ZGTtNKSt9exceptionD1Ev@@CXXABI_1.3.10
+FUNC:_ZGTtNSt11logic_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentD2Ev@@GLIBCXX_3.4.22
 FUNC:_ZN10__cxxabiv116__enum_type_infoD0Ev@@CXXABI_1.3
 FUNC:_ZN10__cxxab

Re: [RFC] warn on dead function calls in ipa-pure-const [1/4]

2016-08-01 Thread Richard Biener
On Sun, 31 Jul 2016, Prathamesh Kulkarni wrote:

> On 31 July 2016 at 22:01, Jan Hubicka  wrote:
> >> On Tue, 26 Jul 2016, Prathamesh Kulkarni wrote:
> >>
> >> > +   warning_at (gimple_location (g), OPT_Wunused_value,
> >> > +   "Call from %s to %s has no effect",
> >> > +   e->caller->name (), e->callee->name ());
> >>
> >> Diagnostics should not start with capital letters.  Function names in
> >> diagnostics should be quoted, so %qs.  Also, what form is this name in?
> >> If it's the internal UTF-8 form, you need to use identifier_to_locale on
> >> it to produce something suitable for a diagnostic.  And for C++ you need
> >> to make sure the name is a proper pretty name (including classes /
> >> namespaces / type etc.) as produced by the decl_printable_name langhook,
> >> before passing it to identifier_to_locale.
> >
> > I think you just want to pass e->caller->decl (with corresponding % 
> > formatter)
> > rather than name()
> Hi,
> Thanks for the reviews. However after discussing with Richard,
> we decided to drop this warning for now, because it can lead to
> potentially false positives
> like for the following case in stor-layout.c:
> 
>/* Stop if the mode requires too much alignment.  */
>   if (GET_MODE_ALIGNMENT (m_mode) > m_align
>   && SLOW_UNALIGNED_ACCESS (m_mode, m_align))
> break;
> 
> On x86_64, SLOW_UNALIGNED_ACCESS is #defined to 0,
> so the condition essentially becomes:
> 
> if (get_mode_alignment (m_mode) > m_align && 0)
>   break;
> 
> and the patch warns for the above dead call.
> However the call might not always be dead, since it depends
> on conditionally defined macro SLOW_UNALIGNED_ACCESS,
> which other targets may perhaps define as a run-time value.
> 
> Unfortunately I don't have any good ideas to address this issue.
> We could restrict the warning for cases when call is not a
> sub-expression, however I suppose we would need some help from
> FE's to determine if call_expr is outermost expression ?
> I thought of adding another flag to tree_exp for this purpose,
> but that doesn't look like a good idea.
> I would be grateful for suggestions for addressing this issue.

Maybe it's possible to restructure it in a way to rely on
TREE_USED on the decl the result is assigned to and only warn
in the case the FE didn't set that (and the function later became
pure/const).  That would not warn for, say,

  x = foo ();
  x;

Well, I hope so.

Richard.


Re: Implement -Wimplicit-fallthrough (take 2): fix missing breaks

2016-08-01 Thread Richard Biener
On Fri, 29 Jul 2016, Marek Polacek wrote:

> On Fri, Jul 29, 2016 at 01:46:11AM -0500, Segher Boessenkool wrote:
> > On Thu, Jul 28, 2016 at 05:57:34PM +0200, Marek Polacek wrote:
> > > Segher, is the rs6000.c part ok?
> > 
> > Yes, that looks fine, thanks!  Will you handle the backports as well?
> 
> Happy to, just not sure about 4.9.  Richi, is it ok to commit a patch
> adding break; into 4.9?

Yes.

Thanks,
Richard.


Re: [PR71078] x / abs(x) -> copysign (1.0, x)

2016-08-01 Thread Richard Biener
On Mon, 1 Aug 2016, Prathamesh Kulkarni wrote:

> On 30 July 2016 at 02:57, Joseph Myers  wrote:
> > On Tue, 26 Jul 2016, Richard Sandiford wrote:
> >
> >> (which are really just extended tree codes).  I suppose copysign is
> >> a special case since we can always open code it, but in general we
> >> shouldn't fall back to something that could generate a call.
> >
> > We can't always open code copysign (IBM long double, see PR 58797).
> Hi,
> Thanks for pointing that out.
> The attached patch doesn't transform x/abs(x) -> copysign(1.0, x)
> for long double.
> OK for trunk ?

I don't think so - the pattern is as much a canonicalization as an
optimization.  If the target cannot expand copysign then the
middle-end expander should try an alternative like the a / abs(x)
form.  Though if copysign cannot be open-coded then I don't see
how abs can be opencoded (which basically is copysign (1.0, x)).

Richard.


[patch,avr] Adjust insn length of forward branches.

2016-08-01 Thread Georg-Johann Lay
The length computation of "branch" flavour insn length attribute computed a 
length one instruction too short, e.g. in the following test case:


#define N0(N) if (X++ < N) __builtin_avr_nop()

#define N1(N) N0(N);N0(N);N0(N);N0(N);N0(N);N0(N);N0(N);N0(N);N0(N);N0(N)
#define N2(N) N1(N);N1(N);N1(N);N1(N);N1(N);N1(N);N1(N);N1(N);N1(N);N1(N)
#define N3(N) N2(N);N2(N);N2(N);N2(N);N2(N);N2(N);N2(N);N2(N);N2(N);N2(N)
#define N4(N) N3(N);N3(N);N3(N);N3(N);N3(N);N3(N);N3(N);N3(N);N3(N);N3(N)

void bra_63 (unsigned char x)
{
if (x > 0)
{
unsigned X = 0;
N4 (63);
}
}

void bra_2045 (unsigned char x)
{
if (x > 0)
{
unsigned X = 0;
N4 (2045);
}
}

when compiled for MCU with JMP instruction like -mmcu=atmega168

Ok to apply?

Johann


* config/avr/avr.md (length) [branch]: Correct insn length
attribute for forward branches.
Index: config/avr/avr.md
===
--- config/avr/avr.md	(revision 238871)
+++ config/avr/avr.md	(working copy)
@@ -112,12 +112,12 @@ (define_attr "type" "branch,branch1,arit
 (define_attr "length" ""
   (cond [(eq_attr "type" "branch")
  (if_then_else (and (ge (minus (pc) (match_dup 0))
-(const_int -63))
+(const_int -62))
 (le (minus (pc) (match_dup 0))
 (const_int 62)))
(const_int 1)
(if_then_else (and (ge (minus (pc) (match_dup 0))
-  (const_int -2045))
+  (const_int -2044))
   (le (minus (pc) (match_dup 0))
   (const_int 2045)))
  (const_int 2)


[patch,avr] Add built-in function to emit NOPs

2016-08-01 Thread Georg-Johann Lay
This adds a new built-in function that inserts a requested number of NOPs into 
the instruction stream.  I found it useful when testing avr-gcc.


Ok to apply?


Johann


* doc/extend.texi (AVR Built-in Functions): Document
__builtin_avr_nops.
* config/avr/builtins.def (NOPS): New.
* config/avr/avr.c (avr_expand_nops): New static function.
(avr_expand_builtin): Use it to handle AVR_BUILTIN_NOPS.
Index: doc/extend.texi
===
--- doc/extend.texi	(revision 238849)
+++ doc/extend.texi	(working copy)
@@ -12552,6 +12552,14 @@ __builtin_avr_insert_bits (0x3210, b
 __builtin_avr_insert_bits (0x01234567, bits, 0)
 @end smallexample
 
+@smallexample
+void __builtin_avr_nops (unsigned count)
+@end smallexample
+
+@noindent
+Insert @code{count} @code{NOP} instructions.
+The number of instructions must be a compile-time integer constant.
+
 @node Blackfin Built-in Functions
 @subsection Blackfin Built-in Functions
 
Index: config/avr/builtins.def
===
--- config/avr/builtins.def	(revision 238849)
+++ config/avr/builtins.def	(working copy)
@@ -51,6 +51,7 @@ DEF_BUILTIN (FMULSU, 2, int_ftype_char_u
 /* More complex stuff that cannot be mapped 1:1 to an instruction.  */
 
 DEF_BUILTIN (DELAY_CYCLES, -1, void_ftype_ulong, nothing, NULL)
+DEF_BUILTIN (NOPS, -1, void_ftype_ulong, nothing, NULL)
 DEF_BUILTIN (INSERT_BITS, 3, uchar_ftype_ulong_uchar_uchar, insert_bits, NULL)
 DEF_BUILTIN (FLASH_SEGMENT, 1, char_ftype_const_memx_ptr, flash_segment, NULL)
 
Index: config/avr/avr.c
===
--- config/avr/avr.c	(revision 238879)
+++ config/avr/avr.c	(working copy)
@@ -12714,6 +12714,18 @@ avr_expand_delay_cycles (rtx operands0)
 }
 
 
+static void
+avr_expand_nops (rtx operands0)
+{
+  unsigned HOST_WIDE_INT n_nops = UINTVAL (operands0) & GET_MODE_MASK (HImode);
+
+  while (n_nops--)
+{
+  emit_insn (gen_nopv (const1_rtx));
+}
+}
+
+
 /* Compute the image of x under f, i.e. perform   x --> f(x)*/
 
 static int
@@ -13387,6 +13399,19 @@ avr_expand_builtin (tree exp, rtx target
 
 return NULL_RTX;
   }
+
+case AVR_BUILTIN_NOPS:
+  {
+arg0 = CALL_EXPR_ARG (exp, 0);
+op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+
+if (!CONST_INT_P (op0))
+  error ("%s expects a compile time integer constant", bname);
+else
+  avr_expand_nops (op0);
+
+return NULL_RTX;
+  }
 
 case AVR_BUILTIN_INSERT_BITS:
   {


[PATCH 4/4] Add tests for __gcov_dump and __gcov_reset

2016-08-01 Thread marxin
gcc/testsuite/ChangeLog:

2016-07-28  Martin Liska  

* g++.dg/gcov/gcov-dump-1.C: New test.
* g++.dg/gcov/gcov-dump-2.C: New test.
---
 gcc/testsuite/g++.dg/gcov/gcov-dump-1.C | 23 +++
 gcc/testsuite/g++.dg/gcov/gcov-dump-2.C | 32 
 2 files changed, 55 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-dump-1.C
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-dump-2.C

diff --git a/gcc/testsuite/g++.dg/gcov/gcov-dump-1.C 
b/gcc/testsuite/g++.dg/gcov/gcov-dump-1.C
new file mode 100644
index 000..f0e81e9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gcov/gcov-dump-1.C
@@ -0,0 +1,23 @@
+/* { dg-options "-fprofile-generate -ftest-coverage -lgcov" } */
+/* { dg-do run { target native } } */
+
+int value;
+
+extern "C" { void __gcov_dump(void); }
+
+int main(int argc, char **argv)
+{
+  value = 123; /* count(1) */
+
+  for (unsigned i = 0; i < 100; i++)
+value += argc; /* count(100) */
+
+  __gcov_dump();
+
+  for (unsigned i = 0; i < 1000; i++)  /* count(#) */
+value += argc;
+
+  return 0;/* count(#) */
+}
+
+/* { dg-final { run-gcov gcov-dump-1.C } } */
diff --git a/gcc/testsuite/g++.dg/gcov/gcov-dump-2.C 
b/gcc/testsuite/g++.dg/gcov/gcov-dump-2.C
new file mode 100644
index 000..6234a81
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gcov/gcov-dump-2.C
@@ -0,0 +1,32 @@
+/* { dg-options "-fprofile-generate -ftest-coverage -lgcov" } */
+/* { dg-do run { target native } } */
+
+int value;
+
+extern "C"
+{
+  void __gcov_dump(void);
+  void __gcov_reset(void);
+}
+
+int main(int argc, char **argv)
+{
+  value = 123; /* count(1) */
+
+  for (unsigned i = 0; i < 100; i++)
+value += argc; /* count(100) */
+
+  __gcov_dump();
+
+  for (unsigned i = 0; i < 1000; i++)  /* count(#) */
+value += argc;
+
+  __gcov_reset ();
+
+  for (unsigned i = 0; i < 1; i++) /* count(10001) */
+value += argc;
+
+  return 0;/* count(1) */
+}
+
+/* { dg-final { run-gcov gcov-dump-2.C } } */
-- 
2.9.2



[PATCH 2/4] Remove __gcov_indirect_call_profiler

2016-08-01 Thread marxin
libgcc/ChangeLog:

2016-07-28  Martin Liska  

* Makefile.in: Remove __gcov_indirect_call_profiler.
* libgcov-profiler.c (__gcov_indirect_call_profiler): Remove
function.
* libgcov.h: And the declaration of the function.
---
 libgcc/Makefile.in|  2 +-
 libgcc/libgcov-profiler.c | 27 ---
 libgcc/libgcov.h  |  2 --
 3 files changed, 1 insertion(+), 30 deletions(-)

diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
index 8b0fdd9..e2295ca 100644
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -859,7 +859,7 @@ include $(iterator)
 LIBGCOV_MERGE = _gcov_merge_add _gcov_merge_single _gcov_merge_delta   \
_gcov_merge_ior _gcov_merge_time_profile _gcov_merge_icall_topn
 LIBGCOV_PROFILER = _gcov_interval_profiler _gcov_pow2_profiler \
-   _gcov_one_value_profiler _gcov_indirect_call_profiler   \
+   _gcov_one_value_profiler\
_gcov_one_value_profiler_atomic \
_gcov_average_profiler _gcov_ior_profiler   \
_gcov_indirect_call_profiler_v2 \
diff --git a/libgcc/libgcov-profiler.c b/libgcc/libgcov-profiler.c
index 1b307ac..c1e287d 100644
--- a/libgcc/libgcov-profiler.c
+++ b/libgcc/libgcov-profiler.c
@@ -257,33 +257,6 @@ __gcov_indirect_call_topn_profiler (gcov_type value, void* 
cur_func)
 
 #endif
 
-#ifdef L_gcov_indirect_call_profiler
-/* This function exist only for workaround of binutils bug 14342.
-   Once this compatibility hack is obsolette, it can be removed.  */
-
-/* By default, the C++ compiler will use function addresses in the
-   vtable entries.  Setting TARGET_VTABLE_USES_DESCRIPTORS to nonzero
-   tells the compiler to use function descriptors instead.  The value
-   of this macro says how many words wide the descriptor is (normally 2).
-
-   It is assumed that the address of a function descriptor may be treated
-   as a pointer to a function.  */
-
-/* Tries to determine the most common value among its inputs. */
-void
-__gcov_indirect_call_profiler (gcov_type* counter, gcov_type value,
-   void* cur_func, void* callee_func)
-{
-  /* If the C++ virtual tables contain function descriptors then one
- function may have multiple descriptors and we need to dereference
- the descriptors to see if they point to the same function.  */
-  if (cur_func == callee_func
-  || (__LIBGCC_VTABLE_USES_DESCRIPTORS__ && callee_func
-  && *(void **) cur_func == *(void **) callee_func))
-__gcov_one_value_profiler_body (counter, value);
-}
-#endif
-
 #ifdef L_gcov_indirect_call_profiler_v2
 
 /* These two variables are used to actually track caller and callee.  Keep
diff --git a/libgcc/libgcov.h b/libgcc/libgcov.h
index 0bd905b..337e558 100644
--- a/libgcc/libgcov.h
+++ b/libgcc/libgcov.h
@@ -271,8 +271,6 @@ extern void __gcov_interval_profiler (gcov_type *, 
gcov_type, int, unsigned);
 extern void __gcov_pow2_profiler (gcov_type *, gcov_type);
 extern void __gcov_one_value_profiler (gcov_type *, gcov_type);
 extern void __gcov_one_value_profiler_atomic (gcov_type *, gcov_type);
-extern void __gcov_indirect_call_profiler (gcov_type*, gcov_type,
-   void*, void*);
 extern void __gcov_indirect_call_profiler_v2 (gcov_type, void *);
 extern void __gcov_indirect_call_profiler_v2_atomic (gcov_type, void *);
 extern void __gcov_time_profiler (gcov_type *);
-- 
2.9.2




[PATCH 3/4] Fix typo in gcov.texi

2016-08-01 Thread marxin
gcc/ChangeLog:

2016-07-28  Martin Liska  

* doc/gcov.texi: Change _gcov_dump to __gcov_dump and
_gcov_reset to __gcov_reset.
* doc/gcov-tool.texi: Fix typo.

libgcc/ChangeLog:

2016-08-01  Martin Liska  

* libgcov-util.c: Fix typo and GNU coding style.
---
 gcc/doc/gcov-tool.texi | 2 +-
 gcc/doc/gcov.texi  | 6 +++---
 libgcc/libgcov-util.c  | 3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/gcc/doc/gcov-tool.texi b/gcc/doc/gcov-tool.texi
index 845f14b..c4a9ce1 100644
--- a/gcc/doc/gcov-tool.texi
+++ b/gcc/doc/gcov-tool.texi
@@ -193,7 +193,7 @@ in the new profile.
 @end table
 
 @item overlap
-Computer the overlap score between the two specified profile directories.
+Compute the overlap score between the two specified profile directories.
 The overlap score is computed based on the arc profiles. It is defined as
 the sum of min (p1_counter[i] / p1_sum_all, p2_counter[i] / p2_sum_all),
 for all arc counter i, where p1_counter[i] and p2_counter[i] are two
diff --git a/gcc/doc/gcov.texi b/gcc/doc/gcov.texi
index 89d8049..df58df8 100644
--- a/gcc/doc/gcov.texi
+++ b/gcc/doc/gcov.texi
@@ -582,10 +582,10 @@ now be calculable at compile time in some instances.  
Because the
 coverage of all the uses of the inline function will be shown for the
 same source lines, the line counts themselves might seem inconsistent.
 
-Long-running applications can use the @code{_gcov_reset} and @code{_gcov_dump}
+Long-running applications can use the @code{__gcov_reset} and 
@code{__gcov_dump}
 facilities to restrict profile collection to the program region of
-interest. Calling @code{_gcov_reset(void)} will clear all profile counters
-to zero, and calling @code{_gcov_dump(void)} will cause the profile information
+interest. Calling @code{__gcov_reset(void)} will clear all profile counters
+to zero, and calling @code{__gcov_dump(void)} will cause the profile 
information
 collected at that point to be dumped to @file{.gcda} output files.
 
 @c man end
diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
index 7b3bc31..c8fb52d 100644
--- a/libgcc/libgcov-util.c
+++ b/libgcc/libgcov-util.c
@@ -1391,7 +1391,8 @@ calculate_overlap (struct gcov_info *gcov_list1,
   return prg_val;
 }
 
-/* Computer the overlap score of two lists of gcov_info objects PROFILE1 and 
PROFILE2.
+/* Compute the overlap score of two lists of gcov_info objects PROFILE1 and
+   PROFILE2.
Return 0 on success: without mismatch. Reutrn 1 on error.  */
 
 int
-- 
2.9.2




[PATCH 1/4] Cherry-pick fprofile-generate-atomic from google/gcc-4_9 branch

2016-08-01 Thread marxin
libgcc/ChangeLog:

2016-07-28  Martin Liska  

* Makefile.in: Add functions to LIBGCOV_PROFILER.
* libgcov-profiler.c (__gcov_one_value_profiler_body_atomic):
New function.
(__gcov_one_value_profiler_atomic): Likewise.
(__gcov_indirect_call_profiler_v2): Fix GNU coding style.
(__gcov_indirect_call_profiler_v2_atomic): New function.
* libgcov.h: Declare __gcov_indirect_call_profiler_v2_atomic and
__gcov_one_value_profiler_body_atomic.

gcc/ChangeLog:

2016-07-28  Martin Liska  

* common.opt (fprofile-generate-atomic): Add new flag.
* gcov-io.h: Declare GCOV_TYPE_ATOMIC_FETCH_ADD and
GCOV_TYPE_ATOMIC_FETCH_ADD_FN.
* tree-profile.c (gimple_init_edge_profiler): Generate
also atomic profiler update.
(gimple_gen_edge_profiler): Likewise.
* doc/invoke.texi: Document -fprofile-generate-atomic.

gcc/testsuite/ChangeLog:

2016-07-28  Martin Liska  

* g++.dg/gcov/gcov-threads-1.C: New test.
---
 gcc/common.opt |  9 +
 gcc/doc/invoke.texi| 11 ++
 gcc/gcov-io.h  | 22 +++
 gcc/testsuite/g++.dg/gcov/gcov-threads-1.C | 46 ++
 gcc/tree-profile.c | 62 +-
 libgcc/Makefile.in |  4 +-
 libgcc/libgcov-profiler.c  | 42 +++-
 libgcc/libgcov.h   |  2 +
 8 files changed, 177 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-threads-1.C

diff --git a/gcc/common.opt b/gcc/common.opt
index 8a292ed..1adb1d7 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1916,6 +1916,15 @@ fprofile-correction
 Common Report Var(flag_profile_correction)
 Enable correction of flow inconsistent profile data input.
 
+; fprofile-generate-atomic=0: disable atomically update.
+; fprofile-generate-atomic=1: atomically update edge profile counters.
+; fprofile-generate-atomic=2: atomically update value profile counters.
+; fprofile-generate-atomic=3: atomically update edge and value profile 
counters.
+; other values will be ignored (fall back to the default of 0).
+fprofile-generate-atomic=
+Common Joined UInteger Report Var(flag_profile_gen_atomic) Init(0) Optimization
+fprofile-generate-atomic=[0..3] Atomically increments for profile counters.
+
 fprofile-generate
 Common
 Enable common options for generating profile info for profile feedback 
directed optimizations.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 22001f9..147b448 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -9933,6 +9933,17 @@ the profile feedback data files. See 
@option{-fprofile-dir}.
 To optimize the program based on the collected profile information, use
 @option{-fprofile-use}.  @xref{Optimize Options}, for more information.
 
+@item -fprofile-generate-atomic
+@opindex fprofile-generate-atomic
+
+Enable atomic increments for profile counters.  By default, an instrumented
+application can produce a corrupted profiled if it utilizes threads in
+a massive way.  The option provides atomic updates for edge profile counters
+(@option{-fprofile-generate-atomic=1}) and indirect call counters
+(@option{-fprofile-generate-atomic=2}).  Both can be enabled with
+(@option{-fprofile-generate-atomic=3}), default value of the option
+is equal to 0.
+
 @item -fsanitize=address
 @opindex fsanitize=address
 Enable AddressSanitizer, a fast memory error detector.
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index bbf013a..96ed78b 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -169,6 +169,19 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 
 typedef unsigned gcov_unsigned_t;
 typedef unsigned gcov_position_t;
+
+#if LONG_LONG_TYPE_SIZE > 32
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_8
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_8
+#else
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_4
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_4
+#endif
+#define PROFILE_GEN_EDGE_ATOMIC (flag_profile_gen_atomic == 1 || \
+flag_profile_gen_atomic == 3)
+#define PROFILE_GEN_VALUE_ATOMIC (flag_profile_gen_atomic == 2 || \
+ flag_profile_gen_atomic == 3)
+
 /* gcov_type is typedef'd elsewhere for the compiler */
 #if IN_GCOV
 #define GCOV_LINKAGE static
@@ -196,6 +209,15 @@ typedef uint64_t gcov_type_unsigned;
 #endif
 
 #if IN_LIBGCOV
+
+#if LONG_LONG_TYPE_SIZE > 32
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_8
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_8
+#else
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_4
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_4
+#endif
+
 #define gcov_nonruntime_assert(EXPR) ((void)(0 && (EXPR)))
 #else
 #define gcov_nonruntime_assert(EXPR) gcc_asser

[PATCH 0/4] Various GCOV/PGO improvements

2016-08-01 Thread marxin
Hi.

My attempt in the following small series is to cover couple of issues
I've recently observed. I'll briefly describe changes in respect to
an individual patch:

marxin (4):
  Cherry-pick fprofile-generate-atomic from google/gcc-4_9 branch

As mentioned in [1], our current implementation can produce a corrupted
profile due to a massive usage of threads. Well, the absolutely robust
solution would be to either utilize TLS or to use atomics & locking
mechanism. However, as David Li pointed out, the most interesting
counters that can suffer from multithreading are -fprofile-arcs counters
and indirect call counters. I've just cherry picked the functionality
from google/gcc-4_9 branch.

[1] https://gcc.gnu.org/ml/gcc/2016-07/msg00131.html

  Remove __gcov_indirect_call_profiler

The profiler function is unused, thus can be removed.

  Fix typo in gcov.texi

Just a small typo in names of functions that one can call from
a user application.

  Add tests for __gcov_dump and __gcov_reset

Adding tests for the aforementioned functions.

Patch can bootstrap on ppc64le-redhat-linux and survives regression tests
(tested together).

Thoughts?
Thanks,
Martin

marxin (4):
  Cherry-pick fprofile-generate-atomic from google/gcc-4_9 branch
  Remove __gcov_indirect_call_profiler
  Fix typo in gcov.texi
  Add tests for __gcov_dump and __gcov_reset

 gcc/common.opt |  9 
 gcc/doc/gcov-tool.texi |  2 +-
 gcc/doc/gcov.texi  |  6 +--
 gcc/doc/invoke.texi| 11 +
 gcc/gcov-io.h  | 22 ++
 gcc/testsuite/g++.dg/gcov/gcov-dump-1.C| 23 ++
 gcc/testsuite/g++.dg/gcov/gcov-dump-2.C| 32 ++
 gcc/testsuite/g++.dg/gcov/gcov-threads-1.C | 46 
 gcc/tree-profile.c | 62 ++-
 libgcc/Makefile.in |  6 ++-
 libgcc/libgcov-profiler.c  | 67 ++
 libgcc/libgcov-util.c  |  3 +-
 libgcc/libgcov.h   |  4 +-
 13 files changed, 238 insertions(+), 55 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-dump-1.C
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-dump-2.C
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-threads-1.C

-- 
2.9.2



Re: [PATCH, vec-tails 07/10] Support loop epilogue combining

2016-08-01 Thread Ilya Enkovich
2016-07-26 18:38 GMT+03:00 Ilya Enkovich :
> 2016-07-26 18:26 GMT+03:00 Jeff Law :
>> On 07/26/2016 03:57 AM, Ilya Enkovich wrote:


 Ilya, what's the fundamental reason why we need to run
 if-conversion again? Yes, I know you want to if-convert the
 epilogue, but why?

 What are the consequences of not doing if-conversion on the
 epilogue? Presumably we miss a vectorization opportunity on the
 tail.  But that may be a reasonable limitation to allow the
 existing work to move forward while you go back and revamp things a
 little.
>>>
>>>
>>> If we have some control-flow in a loop then we have to if-convert it
>>> for vectorizer. We need to preserve both versions: if-converted one
>>> for vectorizer and the original one to be used if vectorization
>>> fails.  For epilogues we have similar situation and need two
>>> versions.  I do it by running if-conversion on a copy of original
>>> loop. Note that it doesn't run full if-conversion pass. If-conversion
>>> is called for epilogue loop only.
>>
>> Right.  So what I think Richi wants you to try is to use the if-converted
>> loop to construct the if-converted epilogue.  It seems conceptually simple
>> and low cost -- the question is on the implementation side.  I have no clue
>> how painful that would be.
>
> Probably another part of if-conversion may be re-used to build required
> epilogue.  I'll have a look.

Hi,

Yuri will continue my work from this point.

Thanks,
Ilya

>
> Thanks,
> Ilya
>
>>
>> jeff
>>


Re: [PATCH][RFC] PR middle-end/22141 GIMPLE store widening pass

2016-08-01 Thread Kyrill Tkachov


On 18/07/16 13:22, Richard Biener wrote:

On Fri, Jul 15, 2016 at 5:13 PM, Kyrill Tkachov
 wrote:

Hi all,

This is a GIMPLE pass to implement PR middle-end/22141. that is merge narrow
stores of constants
into fewer wider stores.  A 2009 patch from Jakub [1] contains many
testcases but a simple motivating
case can be:

struct bar {
   int a;
   char b;
   char c;
   char d;
   char e;
}; // packed 64-bit structure

void bar (struct bar *);

void
foo (struct bar *p)
{
   p->b = 0;
   p->a = 0;
   p->c = 0;
   p->d = 1;
   p->e = 0;
}

Currently on aarch64 this will generate:
foo:
 mov w1, 1
 str wzr, [x0]
 strbwzr, [x0, 4]
 strbwzr, [x0, 5]
 strbw1, [x0, 6]
 strbwzr, [x0, 7]
 ret

With this patch this can be improved into a single unaligned store:
foo:
 mov x1, 0x1
 str x1, [x0]
 ret

or, if compiled with -mstrict-align:
foo:
 mov w1, 0x1
 stp wzr, w1, [x0]
 ret

The pass is a tree-ssa pass that runs fairly late in the pipeline, after
pass_optimize_widening_mul.
I explain the approach taken in the comments in the new
tree-ssa-store-widening.c file but essentially
it has 3 phases applied to each basic block:

1) Scan through the statements recording assignments of constants to
destinations like ARRAY_REF,
COMPONENT_REF, MEM_REF which are determined to write to an ultimate common
destination. get_inner_reference
is used to decompose these destinations. Continue recording these until we
encounter a statement that may
interfere with the stores we've been recording (load or store that may
alias, volatile access etc).
These assignments of interest are recorded as store_immediate_info objects
in the m_store_info vector.

2) Analyse the stores recorded in phase one (they all write to a destination
offset from a common base)
and merge them into wider assignments up to BITS_PER_WORD bits wide. These
widened assignments are represented
as merged_store_group objects and they are recorded in the
m_merged_store_groups vector. This is the
coalesce_immediate_stores function. It sorts the stores by the bitposition
they write to and iterates through
them, merging consecutive stores (it fails the transformation on overlapping
stores, I don't think that case
appears often enough to warrant extra logic) up to BITS_PER_WORD-wide
accesses.

3) Go through the merged stores recorded in m_merged_store_groups and output
each widened store. Widened stores
that are not of a bitsize that is a power of two (for example 48 bits wide)
are output as multiple stores of decreasing
power-of-two width. So, for a widened store 48-bits wide this phase would a
emit a 32-bit store followed by a
16-bit store. The new sequence is only emitted if it contains fewer
statements than the original sequence that it
will replace.  This phase also avoids outputting unaligned stores for
STRICT_ALIGNMENT targets or targets where
SLOW_UNALIGNED_ACCESS forbids it. Since some configurations/targets may want
to avoid generation of unaligned
stores even when it is legal I've added the new param
PARAM_STORE_WIDENING_ALLOW_UNALIGNED that can be used
to disallow unaligned store generation.  Its default setting is to allow
them (assuming that STRICT_ALIGNMENT
and SLOW_UNALIGNED_ACCESS allows it).

This is my first GIMPLE-level pass so please do point out places where I'm
not using the interfaces correctly.
This patch handles bitfields as well, but only if they are a multiple of
BITS_PER_UNIT. It should be easily
extensible to handle other bitfields as well, but I'm not entirely sure of
the rules for laying out such bitfields
and in particular the byteswap logic that needs to be applied for big-endian
targets. If someone can shed some light
on how they should be handed I'll be happy to try it out, but I believe this
patch is an improvement as it is.

This has been bootstrapped and tested on aarch64-none-linux-gnu,
arm-none-linux-gnueabihf and x86_64-unknown-linux-gnu.
I've also tested it on the big-endian targets: armeb-none-eabi,
aarch64_be-none-elf. Also tested aarch64-none-elf/-mabi=ilp32.

I've benchmarked it on SPEC2006 on AArch64 on a Cortex-A72 and there were no
regressions, the overall score improved a bit
(about 0.1%). The interesting improvements were:
458.sjeng (+0.8%)
483.xalancbmk (+1.1%)
416.gamess(+1.0%)
454.calculix  (+1.1%)

An interesting effect was in BZ2_decompress from bzip2 where at -Ofast it
transformed a long sequence of constant
byte stores into a much shorter sequence of word-size stores (from ~550
instructions to ~190).

On x86_64 SPECINT there was no change in the overall score. The code size at
-Ofast is consistently smaller
with this patch but the preformance differences on sub-benchmarks are in the
noise.

I've included the testcases from Jakub's patch [1] and added a few of my
own.

Is this direction acceptable for the problem this is trying to solve?

+  /* Record the

PATCH: PR71818: Don't advance IVs with a variable step

2016-08-01 Thread Alan Hayward
In the given test case, the loop is split into vectorised and non
vectorised
versions due to peeling. At the end of the loop the IVs are incremented to
their latest value. This is achieved by taking the base of the loop (g_21)
and
adding the iterations (240) multiplied by the step (_6):

  :
  # _106 = PHI <_6(12)>
  _84 = _106 * 240;
  _85 = (char) _84;
  tmp.19_83 = g_21(D) + _85;

However, the step (_6) varies within the loop and therefore the
calculation is
incorrect.

This patch fixes the error by disallowing vectorization if the step of the
IV
is not an invariant within the loop.

Also added debug comment for when the optimisation fails due to chrec.

Tested on x86.

Ok to commit?

Alan.


gcc/
PR tree-optimization/71818
* tree-vect-loop-manip.c (vect_can_advance_ivs_p): Don't advance IVs
with non invariant evolutions

testsuite/
PR tree-optimization/71818
* gcc.dg/vect/pr71818.c: New





diff --git a/gcc/testsuite/gcc.dg/vect/pr71818.c
b/gcc/testsuite/gcc.dg/vect/pr71818.c
new file mode 100644
index 
..2946551f8bb8c552565c2e79b16359ca3
9d13ed6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr71818.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+
+char a;
+short b;
+int c, d;
+void fn1() {
+  char e = 75, g;
+  unsigned char *f = &e;
+  a = 21;
+  for (; a <= 48; a++) {
+for (; e <= 6;)
+  ;
+g -= e -= b || g <= c;
+  }
+  d = *f;
+}
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index 
819abcda81a25c4ed25749c29b357110fca647d2..4d68f7143e1117085aae8d2168ed1425e
7e6aa08 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfgloop.h"
 #include "tree-scalar-evolution.h"
 #include "tree-vectorizer.h"
+#include "tree-ssa-loop-ivopts.h"

 /*
   Simple Loop Peeling Utilities
@@ -1592,10 +1593,26 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
 }

   /* FORNOW: We do not transform initial conditions of IVs
+which evolution functions are not invariants in the loop.  */
+
+  if (!expr_invariant_in_loop_p (loop, evolution_part))
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+"evolution not invariant in loop.\n");
+ return false;
+   }
+
+  /* FORNOW: We do not transform initial conditions of IVs
 which evolution functions are a polynomial of degree >= 2.  */

   if (tree_is_chrec (evolution_part))
-   return false;
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+"evolution is chrec.\n");
+ return false;
+   }
 }

   return true;







[AARCH64/PATCH] update vulcan L1 cacheline size

2016-08-01 Thread Virendra Pathak
Hi gcc-patches group,

Please find the patch for updating vulcan L1 cacheline size.

Tested the patch with compiling cross aarch64-linux-gcc,
bootstrapped native aarch64-unknown-linux-gnu and
run gcc regression.

Kindly review and merge the patch to trunk, if the patch is okay.
Thanks.

gcc/ChangeLog:

Virendra Pathak  

* config/aarch64/aarch64.c (vulcan_tunings): Update
vulcan L1 cache_line_size.


with regards,
Virendra Pathak
From fb6ce7d1f4e4fd29b9a618467b5a52a8c69702e9 Mon Sep 17 00:00:00 2001
From: Virendra Pathak 
Date: Wed, 27 Jul 2016 03:41:47 -0700
Subject: [PATCH] AArch64: Update L1 cache line size for vulcan

---
 gcc/config/aarch64/aarch64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fe2683e..56e8650 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -768,7 +768,7 @@ static const struct tune_params vulcan_tunings =
   2,   /* min_div_recip_mul_sf.  */
   2,   /* min_div_recip_mul_df.  */
   0,   /* max_case_values.  */
-  0,   /* cache_line_size.  */
+  64,  /* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)/* tune_flags.  */
 };
-- 
2.1.0



[avr,backported,6] Fix PR 71151

2016-08-01 Thread Georg-Johann Lay

Applied the simple fix for PR71151: Set JUMP_TABLES_IN_TEXT_SECTION to 1.

https://gcc.gnu.org/r238935

Johann

gcc/
Backport from 2016-06-16 trunk r237536.

2016-06-16  Senthil Kumar Selvaraj  

PR target/71151
* config/avr/avr.c (avr_asm_init_sections): Remove setup of
progmem_swtable_section.
(progmem_swtable_section): Remove.
(avr_asm_function_rodata_section): Remove.
(TARGET_ASM_FUNCTION_RODATA_SECTION): Remove.
* config/avr/avr.h (JUMP_TABLES_IN_TEXT_SECTION): Define to 1.

testsuite/
Backport from 2016-06-16 trunk r237536, r237910.

2016-06-16  Senthil Kumar Selvaraj  

PR target/71151
* gcc.target/avr/pr71151-1.c: New test.
* gcc.target/avr/pr71151-2.c: New test.
* gcc.target/avr/pr71151-3.c: New test.
* gcc.target/avr/pr71151-4.c: New test.
* gcc.target/avr/pr71151-5.c: New test.
* gcc.target/avr/pr71151-6.c: New test.
* gcc.target/avr/pr71151-7.c: New test.
* gcc.target/avr/pr71151-8.c: New test.
* gcc.target/avr/pr71151-common.h: New file.


Index: config/avr/avr.c
===
--- config/avr/avr.c	(revision 238934)
+++ config/avr/avr.c	(working copy)
@@ -203,9 +203,6 @@ static GTY(()) rtx xstring_e;
 /* Current architecture.  */
 const avr_arch_t *avr_arch;
 
-/* Section to put switch tables in.  */
-static GTY(()) section *progmem_swtable_section;
-
 /* Unnamed sections associated to __attribute__((progmem)) aka. PROGMEM
or to address space __flash* or __memx.  Only used as singletons inside
avr_asm_select_section, but it must not be local there because of GTY.  */
@@ -9461,24 +9458,6 @@ avr_output_progmem_section_asm_op (const
 static void
 avr_asm_init_sections (void)
 {
-  /* Set up a section for jump tables.  Alignment is handled by
- ASM_OUTPUT_BEFORE_CASE_LABEL.  */
-
-  if (AVR_HAVE_JMP_CALL)
-{
-  progmem_swtable_section
-= get_unnamed_section (0, output_section_asm_op,
-   "\t.section\t.progmem.gcc_sw_table"
-   ",\"a\",@progbits");
-}
-  else
-{
-  progmem_swtable_section
-= get_unnamed_section (SECTION_CODE, output_section_asm_op,
-   "\t.section\t.progmem.gcc_sw_table"
-   ",\"ax\",@progbits");
-}
-
   /* Override section callbacks to keep track of `avr_need_clear_bss_p'
  resp. `avr_need_copy_data_p'.  */
 
@@ -9488,65 +9467,6 @@ avr_asm_init_sections (void)
 }
 
 
-/* Implement `TARGET_ASM_FUNCTION_RODATA_SECTION'.  */
-
-static section*
-avr_asm_function_rodata_section (tree decl)
-{
-  /* If a function is unused and optimized out by -ffunction-sections
- and --gc-sections, ensure that the same will happen for its jump
- tables by putting them into individual sections.  */
-
-  unsigned int flags;
-  section * frodata;
-
-  /* Get the frodata section from the default function in varasm.c
- but treat function-associated data-like jump tables as code
- rather than as user defined data.  AVR has no constant pools.  */
-  {
-int fdata = flag_data_sections;
-
-flag_data_sections = flag_function_sections;
-frodata = default_function_rodata_section (decl);
-flag_data_sections = fdata;
-flags = frodata->common.flags;
-  }
-
-  if (frodata != readonly_data_section
-  && flags & SECTION_NAMED)
-{
-  /* Adjust section flags and replace section name prefix.  */
-
-  unsigned int i;
-
-  static const char* const prefix[] =
-{
-  ".rodata",  ".progmem.gcc_sw_table",
-  ".gnu.linkonce.r.", ".gnu.linkonce.t."
-};
-
-  for (i = 0; i < sizeof (prefix) / sizeof (*prefix); i += 2)
-{
-  const char * old_prefix = prefix[i];
-  const char * new_prefix = prefix[i+1];
-  const char * name = frodata->named.name;
-
-  if (STR_PREFIX_P (name, old_prefix))
-{
-  const char *rname = ACONCAT ((new_prefix,
-name + strlen (old_prefix), NULL));
-  flags &= ~SECTION_CODE;
-  flags |= AVR_HAVE_JMP_CALL ? 0 : SECTION_CODE;
-
-  return get_section (rname, flags, frodata->named.decl);
-}
-}
-}
-
-  return progmem_swtable_section;
-}
-
-
 /* Implement `TARGET_ASM_NAMED_SECTION'.  */
 /* Track need of __do_clear_bss, __do_copy_data for named sections.  */
 
@@ -13749,9 +13669,6 @@ avr_fold_builtin (tree fndecl, int n_arg
 #undef  TARGET_FOLD_BUILTIN
 #define TARGET_FOLD_BUILTIN avr_fold_builtin
 
-#undef  TARGET_ASM_FUNCTION_RODATA_SECTION
-#define TARGET_ASM_FUNCTION_RODATA_SECTION avr_asm_function_rodata_section
-
 #undef  TARGET_SCALAR_MODE_SUPPORTED_P
 #define TARGET_SCALAR_MODE_SUPPORTED_P avr_scalar_mode_supported_p
 
Index: config/avr/avr.h
===

Re: [PATCH][AArch64] Allow multiple-of-8 immediate offsets for TImode LDP/STP

2016-08-01 Thread Kyrill Tkachov

Ping.
https://gcc.gnu.org/ml/gcc-patches/2016-07/msg00737.html

Thanks,
Kyrill

On 13/07/16 17:14, Kyrill Tkachov wrote:

Hi all,

The most common way to load and store TImode value in aarch64 is to perform an 
LDP/STP of two X-registers.
This is the *movti_aarch64 pattern in aarch64.md.
There is a bug in the logic in aarch64_classify_address where it validates the 
offset in the address used
to load a TImode value. It passes down TImode to the 
aarch64_offset_7bit_signed_scaled_p check which rejects
offsets that are not a multiple of the mode size of TImode (16). However, this 
is too conservative as X-reg LDP/STP
instructions accept immediate offsets that are a multiple of 8.

Also, considering that the definition of aarch64_offset_7bit_signed_scaled_p is:
  return (offset >= -64 * GET_MODE_SIZE (mode)
  && offset < 64 * GET_MODE_SIZE (mode)
  && offset % GET_MODE_SIZE (mode) == 0);

I think the range check may even be wrong for TImode as this will accept 
offsets in the range [-1024, 1024)
(as long as they are a multiple of 16)
whereas X-reg LDP/STP instructions only accept offsets in the range [-512, 512).
So since the check is for an X-reg LDP/STP address we should be passing down 
DImode.

This patch does that and enables more aggressive generation of REG+IMM 
addressing modes for 64-bit aligned
TImode values, eliminating many address calculation instructions.
For the testcase in the patch we currently generate:
bar:
add x1, x1, 8
add x0, x0, 8
ldp x2, x3, [x1]
stp x2, x3, [x0]
ret

whereas with this patch we generate:
bar:
ldp x2, x3, [x1, 8]
stp x2, x3, [x0, 8]
ret

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk?

Thanks,
Kyrill

2016-07-13  Kyrylo Tkachov  

* config/aarch64/aarch64.c (aarch64_classify_address): Use DImode when
performing aarch64_offset_7bit_signed_scaled_p check for TImode LDP/STP
addresses.

2016-07-13  Kyrylo Tkachov  

* gcc.target/aarch64/ldp_stp_unaligned_1.c: New test.




Re: [PATCH][AArch64] Allow multiple-of-8 immediate offsets for TImode LDP/STP

2016-08-01 Thread Richard Earnshaw (lists)
On 13/07/16 17:14, Kyrill Tkachov wrote:
> Hi all,
> 
> The most common way to load and store TImode value in aarch64 is to
> perform an LDP/STP of two X-registers.
> This is the *movti_aarch64 pattern in aarch64.md.
> There is a bug in the logic in aarch64_classify_address where it
> validates the offset in the address used
> to load a TImode value. It passes down TImode to the
> aarch64_offset_7bit_signed_scaled_p check which rejects
> offsets that are not a multiple of the mode size of TImode (16).
> However, this is too conservative as X-reg LDP/STP
> instructions accept immediate offsets that are a multiple of 8.
> 
> Also, considering that the definition of
> aarch64_offset_7bit_signed_scaled_p is:
>   return (offset >= -64 * GET_MODE_SIZE (mode)
>   && offset < 64 * GET_MODE_SIZE (mode)
>   && offset % GET_MODE_SIZE (mode) == 0);
> 
> I think the range check may even be wrong for TImode as this will accept
> offsets in the range [-1024, 1024)
> (as long as they are a multiple of 16)
> whereas X-reg LDP/STP instructions only accept offsets in the range
> [-512, 512).
> So since the check is for an X-reg LDP/STP address we should be passing
> down DImode.
> 
> This patch does that and enables more aggressive generation of REG+IMM
> addressing modes for 64-bit aligned
> TImode values, eliminating many address calculation instructions.
> For the testcase in the patch we currently generate:
> bar:
> add x1, x1, 8
> add x0, x0, 8
> ldp x2, x3, [x1]
> stp x2, x3, [x0]
> ret
> 
> whereas with this patch we generate:
> bar:
> ldp x2, x3, [x1, 8]
> stp x2, x3, [x0, 8]
> ret
> 
> Bootstrapped and tested on aarch64-none-linux-gnu.
> 
> Ok for trunk?

OK.

R.

> 
> Thanks,
> Kyrill
> 
> 2016-07-13  Kyrylo Tkachov  
> 
> * config/aarch64/aarch64.c (aarch64_classify_address): Use DImode when
> performing aarch64_offset_7bit_signed_scaled_p check for TImode LDP/STP
> addresses.
> 
> 2016-07-13  Kyrylo Tkachov  
> 
> * gcc.target/aarch64/ldp_stp_unaligned_1.c: New test.
> 
> aarch64-timode-addr.patch
> 
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> bea67f88b900be39b6f1ae002353b44c5a4a9f7d..8fd93a54c54ab86c6e600afba48fa441101b57c7
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4033,9 +4033,11 @@ aarch64_classify_address (struct aarch64_address_info 
> *info,
>X,X: 7-bit signed scaled offset
>Q:   9-bit signed offset
>We conservatively require an offset representable in either mode.
> -*/
> +  When performing the check for pairs of X registers i.e.  LDP/STP
> +  pass down DImode since that is the natural size of the LDP/STP
> +  instruction memory accesses.  */
> if (mode == TImode || mode == TFmode)
> - return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
> + return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
>   && offset_9bit_signed_unscaled_p (mode, offset));
>  
> /* A 7bit offset check because OImode will emit a ldp/stp
> diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_1.c 
> b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_1.c
> new file mode 100644
> index 
> ..a70f92100fb91bcfdcfd4af1cab6f58915038568
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_unaligned_1.c
> @@ -0,0 +1,20 @@
> +/* { dg-options "-O2" } */
> +
> +/* Check that we can use a REG + IMM addressing mode when moving an unaligned
> +   TImode value to and from memory.  */
> +
> +struct foo
> +{
> +  long long b;
> +  __int128 a;
> +} __attribute__ ((packed));
> +
> +void
> +bar (struct foo *p, struct foo *q)
> +{
> +  p->a = q->a;
> +}
> +
> +/* { dg-final { scan-assembler-not "add\tx\[0-9\]+, x\[0-9\]+" } } */
> +/* { dg-final { scan-assembler-times "ldp\tx\[0-9\]+, x\[0-9\], .*8" 1 } } */
> +/* { dg-final { scan-assembler-times "stp\tx\[0-9\]+, x\[0-9\], .*8" 1 } } */
> 



Re: [AARCH64/PATCH] update vulcan L1 cacheline size

2016-08-01 Thread Richard Earnshaw (lists)
On 01/08/16 10:40, Virendra Pathak wrote:
> Hi gcc-patches group,
> 
> Please find the patch for updating vulcan L1 cacheline size.
> 
> Tested the patch with compiling cross aarch64-linux-gcc,
> bootstrapped native aarch64-unknown-linux-gnu and
> run gcc regression.
> 
> Kindly review and merge the patch to trunk, if the patch is okay.
> Thanks.
> 
> gcc/ChangeLog:
> 
> Virendra Pathak  
> 
> * config/aarch64/aarch64.c (vulcan_tunings): Update
> vulcan L1 cache_line_size.
> 
> 

Thanks.  Installed.

R.

> with regards,
> Virendra Pathak
> 
> 
> 0001-AArch64-Update-L1-cache-line-size-for-vulcan.patch.txt
> 
> 
> From fb6ce7d1f4e4fd29b9a618467b5a52a8c69702e9 Mon Sep 17 00:00:00 2001
> From: Virendra Pathak 
> Date: Wed, 27 Jul 2016 03:41:47 -0700
> Subject: [PATCH] AArch64: Update L1 cache line size for vulcan
> 
> ---
>  gcc/config/aarch64/aarch64.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index fe2683e..56e8650 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -768,7 +768,7 @@ static const struct tune_params vulcan_tunings =
>2, /* min_div_recip_mul_sf.  */
>2, /* min_div_recip_mul_df.  */
>0, /* max_case_values.  */
> -  0, /* cache_line_size.  */
> +  64,/* cache_line_size.  */
>tune_params::AUTOPREFETCHER_OFF,   /* autoprefetcher_model.  */
>(AARCH64_EXTRA_TUNE_NONE)  /* tune_flags.  */
>  };
> 



Re: [Patch, libfortran] Multi-threaded random_number

2016-08-01 Thread Janne Blomqvist
PING?

Also, a minor bugfix on top of the previous patch:

diff --git a/libgfortran/intrinsics/random.c b/libgfortran/intrinsics/random.c
index 9b54a02..21395ea 100644
--- a/libgfortran/intrinsics/random.c
+++ b/libgfortran/intrinsics/random.c
@@ -787,7 +787,7 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4
*put, gfc_array_i4 *get)
   init_rand_state (true);

   /* Copy p & 15 */
-  rand_state.p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] & 15;
+  rand_state.p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] % 15;
 }


@@ -862,7 +862,7 @@ random_seed_i8 (GFC_INTEGER_8 *size, gfc_array_i8
*put, gfc_array_i8 *get)

   njumps = 0;
   init_rand_state (true);
-  rand_state.p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] & 15;
+  rand_state.p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] % 15;
  }


On Sun, Jul 24, 2016 at 4:45 PM, Janne Blomqvist
 wrote:
> Hi,
>
> the attached patch replaces the current random_number / random_seed
> implementations with an implementation that better supports threads.
> It's an improved version of the RFC patch I posted earlier at
> https://gcc.gnu.org/ml/gcc-patches/2015-12/msg02110.html . Please see
> that earlier message for a longer-winded explanation of what's wrong
> with the current implementation and how the patch addresses this.
>
> In short, with the patch the random number generator state is now
> per-thread and stored in a per-thread (TLS) variable, enabling a
> lockless fast-path. This provides up to 2 orders of magnitude better
> performance on a synthetic benchmark using 4 threads, and provides a
> more deterministic result as the order that threads are scheduled does
> not affect the random number streams for each thread.
>
> Compared to the RFC patch, a number of minor and not-so-minor bugs
> have been fixed, so the patch now passes the testsuite (with a few
> modifications to the suite, part of the patch). Also, for REAL kinds
> 4, 8, 10 the generated streams are identical (except precision, of
> course) (like the current implementation), enabling precision
> comparisons, as requested by Steve Kargl. However, this does not
> extend to REAL(16) as that would have necessitated doubling the size
> of the state, along with potential issues of slower escape from a
> low-entropy state, for a feature that I believe is not used by
> particularly many users in the end. So if one wants to do precision
> comparisons with REAL(16) one must employ a wrapper routine.
>
> Regtested on x86_64-pc-linux-gnu, Ok for trunk?
>
> frontend ChangeLog:
>
> 2016-07-27  Janne Blomqvist  
>
> * check.c (gfc_check_random_seed): Use new seed size in check.
> * intrinsic.texi (RANDOM_NUMBER): Updated documentation.
> (RANDOM_SEED): Likewise.
>
>
> testsuite:
>
> 2016-07-27  Janne Blomqvist  
>
> * gfortran.dg/random_7.f90: Take into account that the last seed
> value is the special p value.
> * gfortran.dg/random_seed_1.f90: Seed size is now constant.
>
>
> libgfortran:
> 2016-07-27  Janne Blomqvist  
>
> * intrinsics/random.c: Replace KISS with xorshift1024* with
> per-thread (TLS) state.
> * runtime/main.c (init): Don't call random_seed_i4.
>
>
> --
> Janne Blomqvist



-- 
Janne Blomqvist


Re: [PATCH] Fix early debug regression with DW_AT_string_length (PR debug/71906)

2016-08-01 Thread Richard Biener
On Fri, Jul 22, 2016 at 2:39 PM, Richard Biener
 wrote:
> On Fri, Jul 22, 2016 at 2:08 PM, Jakub Jelinek  wrote:
>> On Fri, Jul 22, 2016 at 01:55:22PM +0200, Richard Biener wrote:
>>> > @@ -19201,18 +19205,70 @@ gen_array_type_die (tree type, dw_die_re
>>> >if (size >= 0)
>>> > add_AT_unsigned (array_die, DW_AT_byte_size, size);
>>> >else if (TYPE_DOMAIN (type) != NULL_TREE
>>> > -  && TYPE_MAX_VALUE (TYPE_DOMAIN (type)) != NULL_TREE
>>> > -  && DECL_P (TYPE_MAX_VALUE (TYPE_DOMAIN (type
>>> > +  && TYPE_MAX_VALUE (TYPE_DOMAIN (type)) != NULL_TREE)
>>> > {
>>> >   tree szdecl = TYPE_MAX_VALUE (TYPE_DOMAIN (type));
>>> > - dw_loc_list_ref loc = loc_list_from_tree (szdecl, 2, NULL);
>>> > + tree rszdecl = szdecl;
>>> > + HOST_WIDE_INT rsize = 0;
>>> >
>>> >   size = int_size_in_bytes (TREE_TYPE (szdecl));
>>> > - if (loc && size > 0)
>>> > + if (!DECL_P (szdecl))
>>> > {
>>> > - add_AT_location_description (array_die, 
>>> > DW_AT_string_length, loc);
>>> > - if (size != DWARF2_ADDR_SIZE)
>>> > -   add_AT_unsigned (array_die, DW_AT_byte_size, size);
>>> > + if (TREE_CODE (szdecl) == INDIRECT_REF
>>>
>>> So I wonder how this can happen with variable-size type
>>> gimplification.  Shouldn't
>>> this be on, say, DECL_VALUE_EXPR of the DECL_P TYPE_MAX_VALUE?
>>
>> If you mean the INDIRECT_REF, that only happens with PARM_DECLs, and
>> conceptually a dereference of the argument is the right spot where the
>> length lives (if you reallocate the string with different character length,
>> then that is where you store the value.  If you add some artificial
>> decl that will hold the value of *_varb, then the trouble is that the
>> variable won't be assigned before the function prologue and most likely will
>> be optimized away anyway.
>
> True.  I wonder how other cases look like with the length not based on a
> parameter.

Note that reading the dwarf standard, it looks like it accepts a location
which means we could do an implicit location description using
DW_OP_stack_value which gives us access to arbitrary dwarf
expressions (and thus the possibility to handle it similar to VLAs).

But maybe I am missing something?  (now running into the issue
with LTO debug and gfortran.dg/save_5.f90 where during early debug
we emit a location that ends up refering to a symbol that might be
optimized away later - early debug cannot sanitize referenced symbols
via resolv_addr obviously).  Annotating the DIE late is also not
what I want to do as I'd need to pull in all type DIEs into the late CU
that way (well, at least selected ones, but I'm really trying to avoid
going down that route).

Thanks,
Richard.

>>>  <1><28d>: Abbrev Number: 19 (DW_TAG_string_type)
>>>  <1><28e>: Abbrev Number: 19 (DW_TAG_string_type)
>>>  <1><28f>: Abbrev Number: 6 (DW_TAG_pointer_type)
>>>
>>> so there is nothing to annotate with a location later.
>>
>> With the patch there will be DW_OP_call4 in 2 DW_AT_string_length
>> attributes and one DW_OP_call4; DW_OP_deref.
>>
>>> Note that even with GCC 5 'varb' didn't get a DW_AT_string_length,
>>> 'vara' did, though.
>>
>> Yeah, I've mentioned that in the mail.
>>
>> Jakub


Re: [Patch, libfortran] Multi-threaded random_number

2016-08-01 Thread Andreas Schwab
On Mo, Aug 01 2016, Janne Blomqvist  wrote:

> PING?
>
> Also, a minor bugfix on top of the previous patch:
>
> diff --git a/libgfortran/intrinsics/random.c b/libgfortran/intrinsics/random.c
> index 9b54a02..21395ea 100644
> --- a/libgfortran/intrinsics/random.c
> +++ b/libgfortran/intrinsics/random.c
> @@ -787,7 +787,7 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4
> *put, gfc_array_i4 *get)
>init_rand_state (true);
>
>/* Copy p & 15 */
> -  rand_state.p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] & 15;
> +  rand_state.p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] % 15;

The comment no longer fits.

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [Patch, libfortran] Multi-threaded random_number

2016-08-01 Thread Janne Blomqvist
On Mon, Aug 1, 2016 at 1:48 PM, Andreas Schwab  wrote:
> On Mo, Aug 01 2016, Janne Blomqvist  wrote:
>
>> PING?
>>
>> Also, a minor bugfix on top of the previous patch:
>>
>> diff --git a/libgfortran/intrinsics/random.c 
>> b/libgfortran/intrinsics/random.c
>> index 9b54a02..21395ea 100644
>> --- a/libgfortran/intrinsics/random.c
>> +++ b/libgfortran/intrinsics/random.c
>> @@ -787,7 +787,7 @@ random_seed_i4 (GFC_INTEGER_4 *size, gfc_array_i4
>> *put, gfc_array_i4 *get)
>>init_rand_state (true);
>>
>>/* Copy p & 15 */
>> -  rand_state.p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] & 
>> 15;
>> +  rand_state.p = put->base_addr[SZ * GFC_DESCRIPTOR_STRIDE(put, 0)] % 
>> 15;
>
> The comment no longer fits.

Indeed, thanks. I've fixed it in my own tree.

-- 
Janne Blomqvist


[PATCH] Add more Aarch64 NEON intrinsics

2016-08-01 Thread Tamar Christina

Hi all,

This patch adds the following NEON intrinsics to the ARM Aarch64 GCC:

* vmaxnm_f64
* vminnm_f64
* vmax_f64
* vmin_f64

Some refactoring was done as well to remove some superfluous UNSPECS
and iterators that could be combined into one.

3 was removed from aarch64-simd.md because after the
combining of the unspec FMAXMIN with FMAXMIN_UNS the pattern
immediately above it now generates the same cases.

Added new tests for these and ran regression tests on aarch64-none-elf.

Ok for trunk?

Thanks,
Tamar

gcc/
2016-07-08  Tamar Christina  

* config/aarch64/aarch64-simd-builtins.def
(__builtin_aarch64_fmindf): Change BUILTIN_VDQF to BUILTIN_VDQF_DF.
(__builtin_aarch64_fmaxdf): Likewise.
(__builtin_aarch64_smin_nandf): Likewise.
(__builtin_aarch64_smax_nandf): Likewise.
* config/aarch64/aarch64-simd.md (3): Remove.
* config/aarch64/aarch64.md (3): Rename to...
(3): ...this.
* config/aarch64/arm_neon.h (vmaxnm_f64): New.
(vminnm_f64): Likewise.
(vmin_f64): Likewise.
(vmax_f64): Likewise.
* config/aarch64/iterators.md (FMAXMIN): Merge with...
(FMAXMIN_UNS): ...this.
(fmaxmin): Merged with
(fmaxmin_op): ...this...
(maxmin_uns_op): ...in to this.

gcc/testsuite/
2016-07-11  Tamar Christina  

* gcc.target/aarch64/vminmaxnm.c: New.
* gcc.target/aarch64/simd/vminmaxnm_1.c (main): Added float64x1_t 
tests.
:100644 100644 c7fe08b... e1154b4... M	gcc/config/aarch64/aarch64-simd-builtins.def
:100644 100644 0bf3ac8... f2575a0... M	gcc/config/aarch64/aarch64-simd.md
:100644 100644 7d8b394... 08ce915... M	gcc/config/aarch64/aarch64.md
:100644 100644 ab3a00c... 07c04a3... M	gcc/config/aarch64/arm_neon.h
:100644 100644 91e2e64... f35d8d7... M	gcc/config/aarch64/iterators.md
:100644 100644 96608eb... 192bad9... M	gcc/testsuite/gcc.target/aarch64/simd/vminmaxnm_1.c
:00 100644 000... bdaa564... A	gcc/testsuite/gcc.target/aarch64/vminmaxnm.c

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index c7fe08b..e1154b4 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -241,19 +241,19 @@
   BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10)
   BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10)
 
-  /* Implemented by 3.
+  /* Implemented by 3.
  smax variants map to fmaxnm,
  smax_nan variants map to fmax.  */
   BUILTIN_VDQ_BHSI (BINOP, smax, 3)
   BUILTIN_VDQ_BHSI (BINOP, smin, 3)
   BUILTIN_VDQ_BHSI (BINOP, umax, 3)
   BUILTIN_VDQ_BHSI (BINOP, umin, 3)
-  BUILTIN_VHSDF (BINOP, smax_nan, 3)
-  BUILTIN_VHSDF (BINOP, smin_nan, 3)
+  BUILTIN_VHSDF_DF (BINOP, smax_nan, 3)
+  BUILTIN_VHSDF_DF (BINOP, smin_nan, 3)
 
-  /* Implemented by 3.  */
-  BUILTIN_VHSDF (BINOP, fmax, 3)
-  BUILTIN_VHSDF (BINOP, fmin, 3)
+  /* Implemented by 3.  */
+  BUILTIN_VHSDF_HSDF (BINOP, fmax, 3)
+  BUILTIN_VHSDF_HSDF (BINOP, fmin, 3)
 
   /* Implemented by aarch64_p.  */
   BUILTIN_VDQ_BHSI (BINOP, smaxp, 0)
@@ -549,8 +549,4 @@
   BUILTIN_GPI (UNOP, fix_truncdf, 2)
   BUILTIN_GPI_I16 (UNOPUS, fixuns_trunchf, 2)
   BUILTIN_GPI (UNOPUS, fixuns_truncsf, 2)
-  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
-
-  /* Implemented by 3.  */
-  VAR1 (BINOP, fmax, 3, hf)
-  VAR1 (BINOP, fmin, 3, hf)
+  BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2)
\ No newline at end of file
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 0bf3ac8..f2575a0 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2038,6 +2038,9 @@
   [(set_attr "type" "neon_fp_minmax_")]
 )
 
+;; Vector forms for fmax, fmin, fmaxnm, fminnm.
+;; fmaxnm and fminnm are used for the fmax3 standard pattern names,
+;; which implement the IEEE fmax ()/fmin () functions.
 (define_insn "3"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
@@ -2048,17 +2051,6 @@
   [(set_attr "type" "neon_fp_minmax_")]
 )
 
-;; Auto-vectorized forms for the IEEE-754 fmax()/fmin() functions
-(define_insn "3"
-  [(set (match_operand:VHSDF 0 "register_operand" "=w")
-	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
-		   (match_operand:VHSDF 2 "register_operand" "w")]
-		   FMAXMIN))]
-  "TARGET_SIMD"
-  "\\t%0., %1., %2."
-  [(set_attr "type" "neon_fp_minmax_")]
-)
-
 ;; 'across lanes' add.
 
 (define_expand "reduc_plus_scal_"
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7d8b394..08ce915 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4831,14 +4831,16 @@
   [(set_attr "type" "f_minmax")]
 )
 
-;; Scalar forms for the IEEE-754 fmax()/fmin() functions
-(define_insn "3"
+;; Scalar forms for fmax, fmin, fmaxnm, fminnm.
+;; fmaxnm and fminnm are used for the fmax3 standard pattern names,
+;; which implement the IEEE fmax ()/fmin () functions.
+(define_insn "3"
   [(set (match_operand:GPF_F16 0 "registe

[PATCH] Change dump expectation in PR71857

2016-08-01 Thread Martin Liška
Hello.

After changing the expected number of loop iterations, IVOPTS decides to pick 
up an another IV.
Bin agreed that the test-case still makes sense with changed expectation.

Ready to be installed?
Martin
>From 8073251d3827e0c7cd26680e97aa416e23132386 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Mon, 1 Aug 2016 11:11:29 +0200
Subject: [PATCH] Change dump expectation in PR71857

gcc/testsuite/ChangeLog:

2016-08-01  Martin Liska  

	* gcc.dg/tree-ssa/ivopt_mult_4.c: Change expectation.
---
 gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c
index 8abccf8..effb052 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopt_mult_4.c
@@ -21,4 +21,4 @@ long foo(long* p, long* p2, int N1, int N2)
   return s;
 }
 
-/* { dg-final { scan-tree-dump-times "Replacing" 0 "ivopts"} } */
+/* { dg-final { scan-tree-dump "Replacing exit test" "ivopts"} } */
-- 
2.9.2



Re: [PR71078] x / abs(x) -> copysign (1.0, x)

2016-08-01 Thread Joseph Myers
On Mon, 1 Aug 2016, Richard Biener wrote:

> I don't think so - the pattern is as much a canonicalization as an
> optimization.  If the target cannot expand copysign then the
> middle-end expander should try an alternative like the a / abs(x)
> form.  Though if copysign cannot be open-coded then I don't see
> how abs can be opencoded (which basically is copysign (1.0, x)).

It's expanded incorrectly in those cases (see bug 29253).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: fold strlen (s) eq/ne 0 to *s eq/ne 0 on GIMPLE

2016-08-01 Thread Richard Biener
On Mon, 1 Aug 2016, Prathamesh Kulkarni wrote:

> Hi Richard,
> The attached patch tries to fold strlen (s) eq/ne 0 to *s eq/ne 0 on GIMPLE.
> I am not sure where was the ideal place to put this transform in and ended up
> adding it to strlen_optimize_stmt().
> Does that look OK ?
> 
> I needed to add TODO_update_ssa to strlen pass, otherwise we hit the
> following assert in execute_todo():
> if (flag_checking
>   && cfun
>   && need_ssa_update_p (cfun))
> gcc_assert (flags & TODO_update_ssa_any);
> 
> Bootstrap+test in progress on x86_64-unknown-linux-gnu.

I believe you should factor small-size part of handle_builtin_memcmp and
re-use that for the code generation part.

You should also remove the corresponding fold-const.c code I think.

Richard.


Re: fold strlen (s) eq/ne 0 to *s eq/ne 0 on GIMPLE

2016-08-01 Thread Richard Biener
On Mon, 1 Aug 2016, Andrew Pinski wrote:

> On Mon, Aug 1, 2016 at 12:22 AM, Andrew Pinski  wrote:
> > On Mon, Aug 1, 2016 at 12:15 AM, Prathamesh Kulkarni
> >  wrote:
> >> Hi Richard,
> >> The attached patch tries to fold strlen (s) eq/ne 0 to *s eq/ne 0 on 
> >> GIMPLE.
> >> I am not sure where was the ideal place to put this transform in and ended 
> >> up
> >> adding it to strlen_optimize_stmt().
> >> Does that look OK ?
> >
> > I suspect it might be better in match.pd.
> 
> The main reason is it is already in fold-const.c:
>   /* Optimize comparisons of strlen vs zero to a compare of the
>  first character of the string vs zero.  To wit,
> strlen(ptr) == 0   =>  *ptr == 0
> strlen(ptr) != 0   =>  *ptr != 0
>  Other cases should reduce to one of these two (or a constant)
>  due to the return value of strlen being unsigned.  */
>   if (TREE_CODE (arg0) == CALL_EXPR
>   && integer_zerop (arg1))
> {
>   tree fndecl = get_callee_fndecl (arg0);
> 
>   if (fndecl
>   && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL
>   && DECL_FUNCTION_CODE (fndecl) == BUILT_IN_STRLEN
>   && call_expr_nargs (arg0) == 1
>   && TREE_CODE (TREE_TYPE (CALL_EXPR_ARG (arg0, 0))) ==
> POINTER_TYPE)
> {
>   tree iref = build_fold_indirect_ref_loc (loc,
>CALL_EXPR_ARG (arg0, 0));
>   return fold_build2_loc (loc, code, type, iref,
>   build_int_cst (TREE_TYPE (iref), 0));
> }
> }
> 
> So you are basically moving that to match.pd instead of adding extra code.

The issue is that we currently cannot update virtual operands in most
cases so

(simplify
 (ne (BUILT_IN_STRLEN @0) integer_zerop)
 (ne (MEM_REF:char_type_node @0 { build_int_cst (ptr_type_node, 0); } ) { 
build_zero_cst (char_type_node); } ))

will work but ICE on

int foo (const char *s)
{
  int a = 0;
  return __builtin_strlen (s) != a;
}

when optimizing.  I didn't yet arrive at a good way to solve this issue.
The most straight-forward way would maybe be sth explicit, like

(simplify
 (ne (BUILT_IN_STRLEN:vop@2 @0) integer_zerop)
 (ne (MEM_REF:char_type_node:vop@2 @0 { ...

but inventing a good syntax here is difficult.  Basically you have to
connect memory references in the matched pattern with those in the
replacement.

There is also the possibility to at least capture an existing VUSE during 
matching
and make sure to set that on the replacement.  That would work
automagically but doing this more generically to also handle
stores is difficult.

Note that it all works if the replacement has the only memory
reference in the last stmt.

Richard.


Re: [patch,avr] Add built-in function to emit NOPs

2016-08-01 Thread Denis Chertykov
2016-08-01 11:23 GMT+03:00 Georg-Johann Lay :
> This adds a new built-in function that inserts a requested number of NOPs
> into the instruction stream.  I found it useful when testing avr-gcc.
>
> Ok to apply?
>
>
> Johann
>
>
> * doc/extend.texi (AVR Built-in Functions): Document
> __builtin_avr_nops.
> * config/avr/builtins.def (NOPS): New.
> * config/avr/avr.c (avr_expand_nops): New static function.
> (avr_expand_builtin): Use it to handle AVR_BUILTIN_NOPS.

Please apply.


Re: [patch,avr] Adjust insn length of forward branches.

2016-08-01 Thread Denis Chertykov
2016-08-01 10:56 GMT+03:00 Georg-Johann Lay :
> The length computation of "branch" flavour insn length attribute computed a
> length one instruction too short, e.g. in the following test case:
>
> #define N0(N) if (X++ < N) __builtin_avr_nop()
>
> #define N1(N) N0(N);N0(N);N0(N);N0(N);N0(N);N0(N);N0(N);N0(N);N0(N);N0(N)
> #define N2(N) N1(N);N1(N);N1(N);N1(N);N1(N);N1(N);N1(N);N1(N);N1(N);N1(N)
> #define N3(N) N2(N);N2(N);N2(N);N2(N);N2(N);N2(N);N2(N);N2(N);N2(N);N2(N)
> #define N4(N) N3(N);N3(N);N3(N);N3(N);N3(N);N3(N);N3(N);N3(N);N3(N);N3(N)
>
> void bra_63 (unsigned char x)
> {
> if (x > 0)
> {
> unsigned X = 0;
> N4 (63);
> }
> }
>
> void bra_2045 (unsigned char x)
> {
> if (x > 0)
> {
> unsigned X = 0;
> N4 (2045);
> }
> }
>
> when compiled for MCU with JMP instruction like -mmcu=atmega168
>
> Ok to apply?
>
> Johann
>
>
> * config/avr/avr.md (length) [branch]: Correct insn length
> attribute for forward branches.

Please apply.


Re: [PR71078] x / abs(x) -> copysign (1.0, x)

2016-08-01 Thread Richard Biener
On Mon, 1 Aug 2016, Joseph Myers wrote:

> On Mon, 1 Aug 2016, Richard Biener wrote:
> 
> > I don't think so - the pattern is as much a canonicalization as an
> > optimization.  If the target cannot expand copysign then the
> > middle-end expander should try an alternative like the a / abs(x)
> > form.  Though if copysign cannot be open-coded then I don't see
> > how abs can be opencoded (which basically is copysign (1.0, x)).
> 
> It's expanded incorrectly in those cases (see bug 29253).

Heh, looks like I worked on this 8 years ago...

Fixing the copysign expansion issue should be as easy as
making sure expand_copysign_absneg is not used if the mode
has signed zeros.

Richard.


Re: [PATCH] Change dump expectation in PR71857

2016-08-01 Thread Richard Biener
On Mon, Aug 1, 2016 at 1:30 PM, Martin Liška  wrote:
> Hello.
>
> After changing the expected number of loop iterations, IVOPTS decides to pick 
> up an another IV.
> Bin agreed that the test-case still makes sense with changed expectation.
>
> Ready to be installed?

Hmm, but do all targets replace the exit test?  I think you want to
allow either no "Replacing"
or just "Replacing exit test" - not sure how to encode that in a regex.

Richard.

> Martin


Re: [PR71078] x / abs(x) -> copysign (1.0, x)

2016-08-01 Thread Joseph Myers
On Mon, 1 Aug 2016, Richard Biener wrote:

> On Mon, 1 Aug 2016, Joseph Myers wrote:
> 
> > On Mon, 1 Aug 2016, Richard Biener wrote:
> > 
> > > I don't think so - the pattern is as much a canonicalization as an
> > > optimization.  If the target cannot expand copysign then the
> > > middle-end expander should try an alternative like the a / abs(x)
> > > form.  Though if copysign cannot be open-coded then I don't see
> > > how abs can be opencoded (which basically is copysign (1.0, x)).
> > 
> > It's expanded incorrectly in those cases (see bug 29253).
> 
> Heh, looks like I worked on this 8 years ago...
> 
> Fixing the copysign expansion issue should be as easy as
> making sure expand_copysign_absneg is not used if the mode
> has signed zeros.

Correct expansion of abs or copysign for IBM long double (i.e., for the 
case where signbit_rw for the format is -1) requires special logic for 
that format; inhibiting particular incorrect expansion code may cause an 
external function call to be used in place of an incorrect inline 
expansion, but that won't help if you care about avoiding such a function 
call to fabsl or copysignl.  The logic for IBM long double isn't that 
complicated - a few bitwise manipulations - but we don't have it at 
present (for that soft-float case).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [RFC, v2] Test coverage for --param boundary values

2016-08-01 Thread Martin Liška
On 07/28/2016 11:26 PM, Joseph Myers wrote:
> On Mon, 18 Jul 2016, Martin Liška wrote:
> 
>> Well, I can imaging a guard which will test whether 
>> "$objdir/../../params.options" file exits, and if so, then the tests are 
>> executed? Is it acceptable approach?
> 
> The correct way to test for build-tree testing is [info exists 
> TESTING_IN_BUILD_TREE].  When testing outside the build tree, you should 
> not assume anything about directories outside of the test and source 
> directories, meaning you should not test for existence of paths in 
> $objdir/../ in that case.

Thank you for the hint, I'm attaching patch.

> 
> (The preferable approach is to factor out the code generating this file so 
> it can be run from the testsuite.  Then you don't need to distinguish 
> build-tree and other testing at all.)
> 

That would be the best approach, but I've got quite limited experience with 
DejaGNU,
I would postpone it and write it on my TODO list.

May I install the suggested patch?
Martin
>From 71b4f52b348098314c2b2351bcd7dbd72e051ec4 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Mon, 1 Aug 2016 13:59:34 +0200
Subject: [PATCH] Use TESTING_IN_BUILD_TREE in params.exp

gcc/testsuite/ChangeLog:

2016-08-01  Martin Liska  

	* gcc.dg/params/params.exp: Replace file exists with
	TESTING_IN_BUILD_TREE.
---
 gcc/testsuite/gcc.dg/params/params.exp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/params/params.exp b/gcc/testsuite/gcc.dg/params/params.exp
index 47f5af5..2eb797e 100644
--- a/gcc/testsuite/gcc.dg/params/params.exp
+++ b/gcc/testsuite/gcc.dg/params/params.exp
@@ -32,7 +32,7 @@ proc param_run_test { param_name param_value } {
 }
 
 set options_file "$objdir/../../params.options"
-if { [file exists $options_file] == 0 } {
+if { [info exists TESTING_IN_BUILD_TREE] == 0 } {
   return
 }
 
-- 
2.9.2



Re: [PATCH 0/4] Various GCOV/PGO improvements

2016-08-01 Thread Nathan Sidwell

On 08/01/16 04:48, marxin wrote:

Hi.

My attempt in the following small series is to cover couple of issues
I've recently observed. I'll briefly describe changes in respect to
an individual patch:

marxin (4):
  Cherry-pick fprofile-generate-atomic from google/gcc-4_9 branch

As mentioned in [1], our current implementation can produce a corrupted
profile due to a massive usage of threads. Well, the absolutely robust
solution would be to either utilize TLS or to use atomics & locking
mechanism. However, as David Li pointed out, the most interesting
counters that can suffer from multithreading are -fprofile-arcs counters
and indirect call counters. I've just cherry picked the functionality
from google/gcc-4_9 branch.

[1] https://gcc.gnu.org/ml/gcc/2016-07/msg00131.html

  Remove __gcov_indirect_call_profiler

The profiler function is unused, thus can be removed.

  Fix typo in gcov.texi

Just a small typo in names of functions that one can call from
a user application.

  Add tests for __gcov_dump and __gcov_reset

Adding tests for the aforementioned functions.






Patches 2,3 & 4 are ok.  patch 1 (the fprofile-generate-atomic) needs work. 
I'll respond to that one directly.


nathan


Re: [PATCH] Change dump expectation in PR71857

2016-08-01 Thread Martin Liška
On 08/01/2016 01:58 PM, Richard Biener wrote:
> On Mon, Aug 1, 2016 at 1:30 PM, Martin Liška  wrote:
>> Hello.
>>
>> After changing the expected number of loop iterations, IVOPTS decides to 
>> pick up an another IV.
>> Bin agreed that the test-case still makes sense with changed expectation.
>>
>> Ready to be installed?
> 
> Hmm, but do all targets replace the exit test?  I think you want to
> allow either no "Replacing"
> or just "Replacing exit test" - not sure how to encode that in a regex.
> 
> Richard.

Well, the test-case is run just on { i?86-*-* x86_64-*-* } && lp64
with -m64, thus I guess the exit condition should be always replaced.

Martin


> 
>> Martin



[patch,avr] PR70677: Use -fno-caller-saves for avr

2016-08-01 Thread Georg-Johann Lay
Problem with -fcaller-saves is that there are situations where it triggers an 
expensive frame just to store a variable around a function call even though 
there are plenty of call-saved registers.


Example:

typedef __UINT8_TYPE__ uint8_t;

extern uint8_t uart0_getc (void);

void foo (uint8_t *buffer, uint8_t cnt)
{
  while (--cnt)
{
  *buffer++ = uart0_getc();
}
}

$ avr-gcc -Os -S -dp -mmcu=atmega8 loop-buf.c

$ avr-gcc gcc -B$TV -Os -c -save-temps -dp -mmcu=atmega8 loop-buf.c && avr-size 
loop-buf.o

   textdata bss dec hex filename
 50   0   0  50  32 loop-buf.o

$ avr-gcc -Os -c -save-temps -dp -mmcu=atmega8 loop-buf.c -fno-caller-saves && 
avr-size loop-buf.o

   textdata bss dec hex filename
 32   0   0  32  20 loop-buf.o

I actually came never across a situation where -fcaller-saves improved the code 
performance, hence this patch proposes to switch off -fcaller-saved per default.


I can test the patch without regressions, but what bothers me is the following 
lines in ira-color.c:allocno_reload_assign()


  if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0
  && ira_hard_reg_set_intersection_p (hard_regno, ALLOCNO_MODE (a),
  call_used_reg_set))
{
  ira_assert (flag_caller_saves);
  caller_save_needed = 1;
}

What's not clear is whether this assertion is about the inner working of IRA as 
alloc depends on caller-saves in other places of IRA, or if caller-saves is 
needed because otherwise IRA cannot resolve complicated reload situations and 
hence the proposed change might trigger ICEs for complex programs.


Therefore CCed Vladimir who added the assertion to IRA.

Ok to apply if IRA can do without caller-saves?


Johann


PR 70677
* common/config/avr/avr-common.c (avr_option_optimization_table)
[OPT_LEVELS_ALL]: Turn off -fcaller-saves.


Index: common/config/avr/avr-common.c
===
--- common/config/avr/avr-common.c	(revision 238849)
+++ common/config/avr/avr-common.c	(working copy)
@@ -28,6 +28,9 @@
 static const struct default_options avr_option_optimization_table[] =
   {
 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
+// The only effect of -fcaller-saves might be that it triggers
+// a frame without need when it tries to be smart around calls.
+{ OPT_LEVELS_ALL, OPT_fcaller_saves, NULL, 0 },
 { OPT_LEVELS_NONE, 0, NULL, 0 }
   };
 


[PATCH] More testsuite cleanup

2016-08-01 Thread Jonathan Wakely

Another bunch of fixes to make the testsuite work with any -std
option, and remove some redundancy.


I don't know why this file has "-x c" in the options, but since those
options come after the filename it's ignored and has no effect anyway:

   Remove "-x c" from dg-options in libstdc++ C++ test
   
   	* testsuite/17_intro/freestanding.cc: Remove "-x c" from dg-options.



Self-explanatory:

   Limit std::tuple tests to run for C++11 and later
   
   	* testsuite/20_util/tuple/cons/66338.cc: Limit test to C++11 and

later.
* testsuite/20_util/tuple/cons/element_accepts_anything_byval.cc:
Likewise.

   Make libstdc++ debug mode test valid for C++98
   
   	* testsuite/25_algorithms/lower_bound/debug/irreflexive.cc: Use

C++98-compatible initialization for array.

   Remove unused headers and variables from libstdc++ tests
   
   	* testsuite/20_util/shared_ptr/assign/auto_ptr_rvalue.cc: Remove

unused header and variable from compile-only test.
* testsuite/20_util/shared_ptr/assign/unique_ptr_rvalue.cc: Likewise.
* testsuite/28_regex/basic_regex/assign/char/cstring.cc: Likewise.
* testsuite/28_regex/basic_regex/ctors/string_wchar_t.cc: Likewise.
* testsuite/experimental/memory/shared_ptr/cons/copy_ctor_neg.cc:
Likewise.

   Remove duplicate dg-options directive in string_view test
   
   	* testsuite/experimental/string_view/operations/rfind/char/2.cc:

Remove duplicate dg-options directive.


This is just to be consistent with every other pre-C++11 test, so that
grepping for (gnu|c)++98 finds them all:

   Use -std=gnu++98 instead of -std=gnu++03 in test
   
   	* testsuite/20_util/specialized_algorithms/uninitialized_copy/

808590.cc: Use -std=gnu++98 instead of -std=gnu++03.


The target-specific linker options became redundant a few years ago
when these were changed to use { dg-do compile } (r205766):

   Remove linker options from compile-only tests
   
   	* testsuite/20_util/make_signed/requirements/typedefs-2.cc: Remove

linker options from compile-only tests.
* testsuite/20_util/make_unsigned/requirements/typedefs-2.cc:
Likewise.


These tests currently use a C++14 alias template, but there's no
reason they can't be run for C++11 mode too:

   Run std::ios_base enum tests for C++11 and up
   
   	* testsuite/27_io/ios_base/types/fmtflags/case_label.cc: Make test

supported for C++11 and later.
* testsuite/27_io/ios_base/types/iostate/case_label.cc: Likewise.
* testsuite/27_io/ios_base/types/openmode/case_label.cc: Likewise.

(I suppose they could even run for C++98 if we added a preprocessor
check for __cplusplus >= 201103L around the static assertions at the
end.)

Tested powerpc64-linux, committed to trunk.

commit 87b28a03474977cea5e7135f5e914378acfb97cd
Author: Jonathan Wakely 
Date:   Mon Aug 1 12:29:09 2016 +0100

Remove "-x c" from dg-options in libstdc++ C++ test

	* testsuite/17_intro/freestanding.cc: Remove "-x c" from dg-options.

diff --git a/libstdc++-v3/testsuite/17_intro/freestanding.cc b/libstdc++-v3/testsuite/17_intro/freestanding.cc
index d0674dd..98e1d6c6 100644
--- a/libstdc++-v3/testsuite/17_intro/freestanding.cc
+++ b/libstdc++-v3/testsuite/17_intro/freestanding.cc
@@ -1,4 +1,4 @@
-// { dg-options "-x c -std=gnu++11 -lsupc++ -fvtable-verify=none" }
+// { dg-options "-std=gnu++11 -lsupc++ -fvtable-verify=none" }
 
 // Copyright (C) 2010-2016 Free Software Foundation, Inc.
 //

commit d54f9190792170a14be5dc4bde2a5da0325fb091
Author: Jonathan Wakely 
Date:   Mon Aug 1 11:03:26 2016 +0100

Limit std::tuple tests to run for C++11 and later

	* testsuite/20_util/tuple/cons/66338.cc: Limit test to C++11 and
	later.
	* testsuite/20_util/tuple/cons/element_accepts_anything_byval.cc:
	Likewise.

diff --git a/libstdc++-v3/testsuite/20_util/tuple/cons/66338.cc b/libstdc++-v3/testsuite/20_util/tuple/cons/66338.cc
index f57eae9..8dd29b6 100644
--- a/libstdc++-v3/testsuite/20_util/tuple/cons/66338.cc
+++ b/libstdc++-v3/testsuite/20_util/tuple/cons/66338.cc
@@ -15,6 +15,8 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
+// { dg-do compile { target c++11 } }
+
 #include 
 
 struct S {
diff --git a/libstdc++-v3/testsuite/20_util/tuple/cons/element_accepts_anything_byval.cc b/libstdc++-v3/testsuite/20_util/tuple/cons/element_accepts_anything_byval.cc
index fe9bea6..a9bf954 100644
--- a/libstdc++-v3/testsuite/20_util/tuple/cons/element_accepts_anything_byval.cc
+++ b/libstdc++-v3/testsuite/20_util/tuple/cons/element_accepts_anything_byval.cc
@@ -15,6 +15,8 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
+// { dg-do compile { target c++11 } }
+
 #include 
 using namespace std;
 
@@ -27,4 +29,3 @@ int main() {
 tuple t1;
 tuple t2 = t1;
 }
-

commit 651e790145a79cb3fd68bc23e161d474c8335605
Author: Jonathan Wak

Re: [PATCH 1/4] Cherry-pick fprofile-generate-atomic from google/gcc-4_9 branch

2016-08-01 Thread Nathan Sidwell

As I just  wrote, this patch needs work.  the general points are:
1) exposing integers 0-3 to the user as switch values.  Don't do that, give them 
names.  In this  case a comma separated list of orthogonal names seems 
appropriate.  But see below.
2) Poor documentation.  How might the user might choose an appropriate setting? 
(what happens if compilations need to use different settings).  What are 'edge' 
and 'value' counters.   Why might one want different settings for them?


I think this is jumping too deep into a solution with insufficient evidence. 
Particularly, why two edges and values can be set differently.  It doesn't lend 
itself to extending to TLS, if that proves to be a good solution (trading memory 
for time).  Something along the lines of 
'-fprofile-update={single,atomic,threaded},[edge,value]' might be better.  I.e. 
set the scheme as part of the option value, followed by  a list of the things it 
applies to.  (and as I hope I've  implied, it'd be good not to have that 
separate list until proven otherwise).



On 07/28/16 08:32, marxin wrote:

libgcc/ChangeLog:

2016-07-28  Martin Liska  


Shouldn't the original authors be named here too? (applies to the other patches 
too).




--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -169,6 +169,19 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see



+
+#if LONG_LONG_TYPE_SIZE > 32
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_8
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_8
+#else
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_4
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_4
+#endif

...

 #if IN_LIBGCOV
+
+#if LONG_LONG_TYPE_SIZE > 32
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_8
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_8
+#else
+#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_4
+#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_4
+#endif



BTW, these two blocks look stunningly similar.

nathan


Re: [PR71078] x / abs(x) -> copysign (1.0, x)

2016-08-01 Thread Richard Biener
On Mon, 1 Aug 2016, Joseph Myers wrote:

> On Mon, 1 Aug 2016, Richard Biener wrote:
> 
> > On Mon, 1 Aug 2016, Joseph Myers wrote:
> > 
> > > On Mon, 1 Aug 2016, Richard Biener wrote:
> > > 
> > > > I don't think so - the pattern is as much a canonicalization as an
> > > > optimization.  If the target cannot expand copysign then the
> > > > middle-end expander should try an alternative like the a / abs(x)
> > > > form.  Though if copysign cannot be open-coded then I don't see
> > > > how abs can be opencoded (which basically is copysign (1.0, x)).
> > > 
> > > It's expanded incorrectly in those cases (see bug 29253).
> > 
> > Heh, looks like I worked on this 8 years ago...
> > 
> > Fixing the copysign expansion issue should be as easy as
> > making sure expand_copysign_absneg is not used if the mode
> > has signed zeros.
> 
> Correct expansion of abs or copysign for IBM long double (i.e., for the 
> case where signbit_rw for the format is -1) requires special logic for 
> that format; inhibiting particular incorrect expansion code may cause an 
> external function call to be used in place of an incorrect inline 
> expansion, but that won't help if you care about avoiding such a function 
> call to fabsl or copysignl.  The logic for IBM long double isn't that 
> complicated - a few bitwise manipulations - but we don't have it at 
> present (for that soft-float case).

So I read your comment as a general remark and not as an objection
to the patch in question?

Richard.


Re: [PR71078] x / abs(x) -> copysign (1.0, x)

2016-08-01 Thread Joseph Myers
On Mon, 1 Aug 2016, Richard Biener wrote:

> > Correct expansion of abs or copysign for IBM long double (i.e., for the 
> > case where signbit_rw for the format is -1) requires special logic for 
> > that format; inhibiting particular incorrect expansion code may cause an 
> > external function call to be used in place of an incorrect inline 
> > expansion, but that won't help if you care about avoiding such a function 
> > call to fabsl or copysignl.  The logic for IBM long double isn't that 
> > complicated - a few bitwise manipulations - but we don't have it at 
> > present (for that soft-float case).
> 
> So I read your comment as a general remark and not as an objection
> to the patch in question?

Yes.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [patch] Some testsuite cleanup

2016-08-01 Thread Jonathan Wakely

On 28/07/16 22:06 +0100, Jonathan Wakely wrote:

When I added _Temporary_object to vector recently I updated the
construct/destroy counts in this test to match, but the changes only
apply for C++11 and later. This makes the test pass whether run as
C++98 or C++11 and later.

  Fix std::vector test to pass in C++98 mode
  * testsuite/23_containers/vector/check_construct_destroy.cc: Account
  for different construct/destroy counts in C++98 mode.


The same change is needed for a __gnu_cxx::hash_set test, because that
container uses std::vector internally:

   Fix __gnu_cxx::hash_set test to pass in C++98 mode
   
   * testsuite/backward/hash_set/check_construct_destroy.cc: Account

   for different construct/destroy counts in C++98 mode.

Tested x86_64-linux, committed to trunk.


commit efb4d741206e916e7457ee90867d2de3501d2a06
Author: Jonathan Wakely 
Date:   Mon Aug 1 13:40:20 2016 +0100

Fix __gnu_cxx::hash_set test to pass in C++98 mode

	* testsuite/backward/hash_set/check_construct_destroy.cc: Account
	for different construct/destroy counts in C++98 mode.

diff --git a/libstdc++-v3/testsuite/backward/hash_set/check_construct_destroy.cc b/libstdc++-v3/testsuite/backward/hash_set/check_construct_destroy.cc
index 5740fe1..821cb29 100644
--- a/libstdc++-v3/testsuite/backward/hash_set/check_construct_destroy.cc
+++ b/libstdc++-v3/testsuite/backward/hash_set/check_construct_destroy.cc
@@ -39,48 +39,50 @@ int main()
 
   int buckets;
 
-  // Add 1 to all counts, because the std::vector used internally by the
-  // hashtable creates and destroys a temporary object using the allocator.
+  // For C++11 and later add 1 to all counts, because the std::vector used
+  // internally by the hashtable creates and destroys a temporary object
+  // using its allocator.
+  const int extra = __cplusplus >= 201102L ? 1 : 0;
 
   tracker_allocator_counter::reset();
   {
 Container c;
 buckets = c.bucket_count();
-ok = check_construct_destroy("empty container", buckets+1, 1) && ok;
+ok = check_construct_destroy("empty container", buckets+extra, extra) && ok;
   }
-  ok = check_construct_destroy("empty container", buckets+1, buckets+1) && ok;
+  ok = check_construct_destroy("empty container", buckets+extra, buckets+extra) && ok;
 
 
   tracker_allocator_counter::reset();
   {
 Container c(arr10, arr10 + 10);
-ok = check_construct_destroy("Construct from range", buckets+10+1, 1) && ok;
+ok = check_construct_destroy("Construct from range", buckets+10+extra, extra) && ok;
   }
-  ok = check_construct_destroy("Construct from range", buckets+10+1, buckets+10+1) && ok;
+  ok = check_construct_destroy("Construct from range", buckets+10+extra, buckets+10+extra) && ok;
 
   tracker_allocator_counter::reset();
   {
 Container c(arr10, arr10 + 10);
 c.insert(arr10a[0]);
-ok = check_construct_destroy("Insert element", buckets+11+1, 1) && ok;
+ok = check_construct_destroy("Insert element", buckets+11+extra, extra) && ok;
   }
-  ok = check_construct_destroy("Insert element", buckets+11+1, buckets+11+1) && ok;
+  ok = check_construct_destroy("Insert element", buckets+11+extra, buckets+11+extra) && ok;
 
   tracker_allocator_counter::reset();
   {
 Container c(arr10, arr10 + 10);
 c.insert(arr10a, arr10a+3);
-ok = check_construct_destroy("Insert short range", buckets+13+1, 1) && ok;
+ok = check_construct_destroy("Insert short range", buckets+13+extra, extra) && ok;
   }
-  ok = check_construct_destroy("Insert short range", buckets+13+1, buckets+13+1) && ok;
+  ok = check_construct_destroy("Insert short range", buckets+13+extra, buckets+13+extra) && ok;
 
   tracker_allocator_counter::reset();
   {
 Container c(arr10, arr10 + 10);
 c.insert(arr10a, arr10a+10);
-ok = check_construct_destroy("Insert long range", buckets+20+1, 1) && ok;
+ok = check_construct_destroy("Insert long range", buckets+20+extra, extra) && ok;
   }
-  ok = check_construct_destroy("Insert long range", buckets+20+1, buckets+20+1) && ok;
+  ok = check_construct_destroy("Insert long range", buckets+20+extra, buckets+20+extra) && ok;
 
   return ok ? 0 : 1;
 }


Re: [PATCH] Change dump expectation in PR71857

2016-08-01 Thread Richard Biener
On Mon, Aug 1, 2016 at 2:12 PM, Martin Liška  wrote:
> On 08/01/2016 01:58 PM, Richard Biener wrote:
>> On Mon, Aug 1, 2016 at 1:30 PM, Martin Liška  wrote:
>>> Hello.
>>>
>>> After changing the expected number of loop iterations, IVOPTS decides to 
>>> pick up an another IV.
>>> Bin agreed that the test-case still makes sense with changed expectation.
>>>
>>> Ready to be installed?
>>
>> Hmm, but do all targets replace the exit test?  I think you want to
>> allow either no "Replacing"
>> or just "Replacing exit test" - not sure how to encode that in a regex.
>>
>> Richard.
>
> Well, the test-case is run just on { i?86-*-* x86_64-*-* } && lp64
> with -m64, thus I guess the exit condition should be always replaced.

Ah, ok then.

Richard.

> Martin
>
>
>>
>>> Martin
>


libcpp: Fix comment typo

2016-08-01 Thread Andreas Schwab
Committed.

Andreas.

* include/cpplib.h: Fix comment typo.

diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
index 543f3b9..4e0084c 100644
--- a/libcpp/include/cpplib.h
+++ b/libcpp/include/cpplib.h
@@ -880,7 +880,7 @@ struct cpp_num
 #define CPP_N_FLOATING 0x0002
 
 #define CPP_N_WIDTH0x00F0
-#define CPP_N_SMALL0x0010  /* int, float, shrot _Fract/Accum  */
+#define CPP_N_SMALL0x0010  /* int, float, short _Fract/Accum  */
 #define CPP_N_MEDIUM   0x0020  /* long, double, long _Fract/_Accum.  */
 #define CPP_N_LARGE0x0040  /* long long, long double,
   long long _Fract/Accum.  */
-- 
2.9.2

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


[PATCH AArch64/V3]Add new patterns for vcond_mask and vec_cmp

2016-08-01 Thread Bin Cheng
Hi,
This is the 3rd version patch implementing vcond_mask and vec_cmp patterns on 
AArch64.
Bootstrap and test along with next patch on AArch64, is it OK?

Thanks,
bin

2016-07-28  Alan Lawrence  
Renlin Li  
Bin Cheng  

* config/aarch64/aarch64-simd.md (vec_cmp): New pattern.
(vec_cmp): New pattern.
(vec_cmpu): New pattern.
(vcond_mask_): New pattern.diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index a19d171..3918136 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2271,6 +2271,277 @@
   DONE;
 })
 
+(define_expand "vcond_mask_"
+  [(match_operand:VALLDI 0 "register_operand")
+   (match_operand:VALLDI 1 "nonmemory_operand")
+   (match_operand:VALLDI 2 "nonmemory_operand")
+   (match_operand: 3 "register_operand")]
+  "TARGET_SIMD"
+{
+  /* If we have (a = (P) ? -1 : 0);
+ Then we can simply move the generated mask (result must be int).  */
+  if (operands[1] == CONSTM1_RTX (mode)
+  && operands[2] == CONST0_RTX (mode))
+emit_move_insn (operands[0], operands[3]);
+  /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask.  */
+  else if (operands[1] == CONST0_RTX (mode)
+  && operands[2] == CONSTM1_RTX (mode))
+emit_insn (gen_one_cmpl2 (operands[0], operands[3]));
+  else
+{
+  if (!REG_P (operands[1]))
+   operands[1] = force_reg (mode, operands[1]);
+  if (!REG_P (operands[2]))
+   operands[2] = force_reg (mode, operands[2]);
+  emit_insn (gen_aarch64_simd_bsl (operands[0], operands[3],
+operands[1], operands[2]));
+}
+
+  DONE;
+})
+
+;; Patterns comparing two vectors to produce a mask.
+
+(define_expand "vec_cmp"
+  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
+ (match_operator 1 "comparison_operator"
+   [(match_operand:VSDQ_I_DI 2 "register_operand")
+(match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
+  "TARGET_SIMD"
+{
+  rtx mask = operands[0];
+  enum rtx_code code = GET_CODE (operands[1]);
+
+  switch (code)
+{
+case NE:
+case LE:
+case LT:
+case GE:
+case GT:
+case EQ:
+  if (operands[3] == CONST0_RTX (mode))
+   break;
+
+  /* Fall through.  */
+default:
+  if (!REG_P (operands[3]))
+   operands[3] = force_reg (mode, operands[3]);
+
+  break;
+}
+
+  switch (code)
+{
+case LT:
+  emit_insn (gen_aarch64_cmlt (mask, operands[2], operands[3]));
+  break;
+
+case GE:
+  emit_insn (gen_aarch64_cmge (mask, operands[2], operands[3]));
+  break;
+
+case LE:
+  emit_insn (gen_aarch64_cmle (mask, operands[2], operands[3]));
+  break;
+
+case GT:
+  emit_insn (gen_aarch64_cmgt (mask, operands[2], operands[3]));
+  break;
+
+case LTU:
+  emit_insn (gen_aarch64_cmgtu (mask, operands[3], operands[2]));
+  break;
+
+case GEU:
+  emit_insn (gen_aarch64_cmgeu (mask, operands[2], operands[3]));
+  break;
+
+case LEU:
+  emit_insn (gen_aarch64_cmgeu (mask, operands[3], operands[2]));
+  break;
+
+case GTU:
+  emit_insn (gen_aarch64_cmgtu (mask, operands[2], operands[3]));
+  break;
+
+case NE:
+  /* Handle NE as !EQ.  */
+  emit_insn (gen_aarch64_cmeq (mask, operands[2], operands[3]));
+  emit_insn (gen_one_cmpl2 (mask, mask));
+  break;
+
+case EQ:
+  emit_insn (gen_aarch64_cmeq (mask, operands[2], operands[3]));
+  break;
+
+default:
+  gcc_unreachable ();
+}
+
+  DONE;
+})
+
+(define_expand "vec_cmp"
+  [(set (match_operand: 0 "register_operand")
+   (match_operator 1 "comparison_operator"
+   [(match_operand:VDQF 2 "register_operand")
+(match_operand:VDQF 3 "nonmemory_operand")]))]
+  "TARGET_SIMD"
+{
+  int use_zero_form = 0;
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx tmp = gen_reg_rtx (mode);
+
+  rtx (*comparison) (rtx, rtx, rtx);
+
+  switch (code)
+{
+case LE:
+case LT:
+case GE:
+case GT:
+case EQ:
+  if (operands[3] == CONST0_RTX (mode))
+   {
+ use_zero_form = 1;
+ break;
+   }
+  /* Fall through.  */
+default:
+  if (!REG_P (operands[3]))
+   operands[3] = force_reg (mode, operands[3]);
+
+  break;
+}
+
+  switch (code)
+{
+case LT:
+  if (use_zero_form)
+   {
+ comparison = gen_aarch64_cmlt;
+ break;
+   }
+  /* Else, fall through.  */
+case UNGE:
+  std::swap (operands[2], operands[3]);
+  /* Fall through.  */
+case UNLE:
+case GT:
+  comparison = gen_aarch64_cmgt;
+  break;
+case LE:
+  if (use_zero_form)
+   {
+ comparison = gen_aarch64_cmle;
+ break;
+   }
+  /* Else, fall through.  */
+case UNGT:
+  std::swap (operands[2], operands[3]);
+  /* Fall through.  */
+case UNLT:
+case 

[PATCH AArch64][V3]Rewrite vcond patterns using vcond_mask/vec_cmp, also support missing vect_cond_mixed patterns

2016-08-01 Thread Bin Cheng
Hi,
This is the 3rd version patch implementing vcond patterns on AArch64.  It 
rewrites vcond patterns using newly introduced vcond_mask and vec_cmp patterns 
in previous patch.  It also adds missing vect_cond_mixed patterns for AArch64.  
Note we rely on RTL combiner to optimize "mask = !x; res = mask ? T : F" into 
"res = x ? F : T", thus an additional inversion instruction can be saved.  The 
patch includes a test for this purpose.

Bootstrap and test along with previous one on AArch64, is it OK?

Thanks,
bin

2016-07-28  Alan Lawrence  
Renlin Li  
Bin Cheng  

* config/aarch64/iterators.md (V_cmp_mixed, v_cmp_mixed): New.
* config/aarch64/aarch64-simd.md (v2di3): Call
gen_vcondv2div2di instead of gen_aarch64_vcond_internalv2div2di.
(aarch64_vcond_internal): Delete pattern.
(aarch64_vcond_internal): Ditto.
(vcond): Ditto.
(vcond): Re-implement using vec_cmp and vcond_mask.
(vcondu): Ditto.
(vcond): New pattern.
(vcondu): New pattern.
(aarch64_cmtst): Revise comment using aarch64_vcond instead
of aarch64_vcond_internal.

gcc/testsuite/ChangeLog
2016-07-28  Bin Cheng  

* gcc.target/aarch64/simd/vcond-ne.c: New test.
diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index b31afb1..29d835e 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1087,7 +1087,7 @@
 }
 
   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
-  emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
+  emit_insn (gen_vcondv2div2di (operands[0], operands[1],
   operands[2], cmp_fmt, operands[1], operands[2]));
   DONE;
 })
@@ -2528,314 +2528,6 @@
   DONE;
 })
 
-(define_expand "aarch64_vcond_internal"
-  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
-   (if_then_else:VSDQ_I_DI
- (match_operator 3 "comparison_operator"
-   [(match_operand:VSDQ_I_DI 4 "register_operand")
-(match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
- (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
- (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
-  "TARGET_SIMD"
-{
-  rtx op1 = operands[1];
-  rtx op2 = operands[2];
-  rtx mask = gen_reg_rtx (mode);
-  enum rtx_code code = GET_CODE (operands[3]);
-
-  /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
- and desirable for other comparisons if it results in FOO ? -1 : 0
- (this allows direct use of the comparison result without a bsl).  */
-  if (code == NE
-  || (code != EQ
- && op1 == CONST0_RTX (mode)
- && op2 == CONSTM1_RTX (mode)))
-{
-  op1 = operands[2];
-  op2 = operands[1];
-  switch (code)
-{
-case LE: code = GT; break;
-case LT: code = GE; break;
-case GE: code = LT; break;
-case GT: code = LE; break;
-/* No case EQ.  */
-case NE: code = EQ; break;
-case LTU: code = GEU; break;
-case LEU: code = GTU; break;
-case GTU: code = LEU; break;
-case GEU: code = LTU; break;
-default: gcc_unreachable ();
-}
-}
-
-  /* Make sure we can handle the last operand.  */
-  switch (code)
-{
-case NE:
-  /* Normalized to EQ above.  */
-  gcc_unreachable ();
-
-case LE:
-case LT:
-case GE:
-case GT:
-case EQ:
-  /* These instructions have a form taking an immediate zero.  */
-  if (operands[5] == CONST0_RTX (mode))
-break;
-  /* Fall through, as may need to load into register.  */
-default:
-  if (!REG_P (operands[5]))
-operands[5] = force_reg (mode, operands[5]);
-  break;
-}
-
-  switch (code)
-{
-case LT:
-  emit_insn (gen_aarch64_cmlt (mask, operands[4], operands[5]));
-  break;
-
-case GE:
-  emit_insn (gen_aarch64_cmge (mask, operands[4], operands[5]));
-  break;
-
-case LE:
-  emit_insn (gen_aarch64_cmle (mask, operands[4], operands[5]));
-  break;
-
-case GT:
-  emit_insn (gen_aarch64_cmgt (mask, operands[4], operands[5]));
-  break;
-
-case LTU:
-  emit_insn (gen_aarch64_cmgtu (mask, operands[5], operands[4]));
-  break;
-
-case GEU:
-  emit_insn (gen_aarch64_cmgeu (mask, operands[4], operands[5]));
-  break;
-
-case LEU:
-  emit_insn (gen_aarch64_cmgeu (mask, operands[5], operands[4]));
-  break;
-
-case GTU:
-  emit_insn (gen_aarch64_cmgtu (mask, operands[4], operands[5]));
-  break;
-
-/* NE has been normalized to EQ above.  */
-case EQ:
-  emit_insn (gen_aarch64_cmeq (mask, operands[4], operands[5]));
-  break;
-
-default:
-  gcc_unreachable ();
-}
-
-/* If we have (a = (b CMP c) ? -1 : 0);
-   Then we can simply move the generated mask.  */
-
-if (op1 == CONSTM1_RTX (mode)
-   && op2 == CONST0_RTX (mode

Re: [PATCH 1/4] Cherry-pick fprofile-generate-atomic from google/gcc-4_9 branch

2016-08-01 Thread Martin Liška
On 08/01/2016 02:22 PM, Nathan Sidwell wrote:
> As I just  wrote, this patch needs work.  the general points are:

Thank for the comments.

> 1) exposing integers 0-3 to the user as switch values.  Don't do that, give 
> them names.  In this  case a comma separated list of orthogonal names seems 
> appropriate.  But see below.
> 2) Poor documentation.  How might the user might choose an appropriate 
> setting? (what happens if compilations need to use different settings).  What 
> are 'edge' and 'value' counters.   Why might one want different settings for 
> them?

Sure, fully agree that it currently doesn't make sense to distinguish between 
individual types of profiles (edge, value).

> 
> I think this is jumping too deep into a solution with insufficient evidence. 
> Particularly, why two edges and values can be set differently.  It doesn't 
> lend itself to extending to TLS, if that proves to be a good solution 
> (trading memory for time).  Something along the lines of 
> '-fprofile-update={single,atomic,threaded},[edge,value]' might be better.  
> I.e. set the scheme as part of the option value, followed by  a list of the 
> things it applies to.  (and as I hope I've  implied, it'd be good not to have 
> that separate list until proven otherwise).

Yes.

> 
> 
> On 07/28/16 08:32, marxin wrote:
>> libgcc/ChangeLog:
>>
>> 2016-07-28  Martin Liska  
> 
> Shouldn't the original authors be named here too? (applies to the other 
> patches too).

Adding a cherry-pick entry to the original commit of the functionality.

> 
> 
>> --- a/gcc/gcov-io.h
>> +++ b/gcc/gcov-io.h
>> @@ -169,6 +169,19 @@ see the files COPYING3 and COPYING.RUNTIME 
>> respectively.  If not, see
> 
>> +
>> +#if LONG_LONG_TYPE_SIZE > 32
>> +#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_8
>> +#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_8
>> +#else
>> +#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_4
>> +#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_4
>> +#endif
> ...
>>  #if IN_LIBGCOV
>> +
>> +#if LONG_LONG_TYPE_SIZE > 32
>> +#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_8
>> +#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_8
>> +#else
>> +#define GCOV_TYPE_ATOMIC_FETCH_ADD_FN __atomic_fetch_add_4
>> +#define GCOV_TYPE_ATOMIC_FETCH_ADD BUILT_IN_ATOMIC_FETCH_ADD_4
>> +#endif
> 
> 
> BTW, these two blocks look stunningly similar.

Fixed.

I also added a small hunk that describes problematic of app having not-joined 
(or detached) threads,
can you please take a look at documentation change, maybe it would need some 
transformation?

Martin

> 
> nathan

>From 7e19e28f3d6e227bb67fb770575831d637abe3aa Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 28 Jul 2016 14:32:47 +0200
Subject: [PATCH 1/4] Cherry-pick fprofile-generate-atomic from google/gcc-4_9
 branch

libgcc/ChangeLog:

2016-07-28  Martin Liska  

	Cherry picked (and modified) from google-4_7 branch
	2012-12-26  Rong Xu  

	* Makefile.in: Add functions to LIBGCOV_PROFILER.
	* libgcov-profiler.c (__gcov_one_value_profiler_body_atomic):
	New function.
	(__gcov_one_value_profiler_atomic): Likewise.
	(__gcov_indirect_call_profiler_v2): Fix GNU coding style.
	(__gcov_indirect_call_profiler_v2_atomic): New function.
	* libgcov.h: Declare __gcov_indirect_call_profiler_v2_atomic and
	__gcov_one_value_profiler_body_atomic.

gcc/ChangeLog:

2016-07-28  Martin Liska  

	Cherry picked (and modified) from google-4_7 branch
	2012-12-26  Rong Xu  

	* common.opt (fprofile-update): Add new flag.
	* gcov-io.h: Declare GCOV_TYPE_ATOMIC_FETCH_ADD and
	GCOV_TYPE_ATOMIC_FETCH_ADD_FN.
	* tree-profile.c (gimple_init_edge_profiler): Generate
	also atomic profiler update.
	(gimple_gen_edge_profiler): Likewise.
	* doc/invoke.texi: Document -fprofile-update.

gcc/testsuite/ChangeLog:

2016-07-28  Martin Liska  

	* g++.dg/gcov/gcov-threads-1.C: New test.
---
 gcc/common.opt | 13 +++
 gcc/coretypes.h|  6 +++
 gcc/doc/invoke.texi| 12 ++
 gcc/gcov-io.h  |  8 
 gcc/testsuite/g++.dg/gcov/gcov-threads-1.C | 46 ++
 gcc/tree-profile.c | 61 --
 libgcc/Makefile.in |  4 +-
 libgcc/libgcov-profiler.c  | 42 +++-
 libgcc/libgcov.h   |  2 +
 9 files changed, 173 insertions(+), 21 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/gcov/gcov-threads-1.C

diff --git a/gcc/common.opt b/gcc/common.opt
index 8a292ed..44adae8 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1916,6 +1916,19 @@ fprofile-correction
 Common Report Var(flag_profile_correction)
 Enable correction of flow inconsistent profile data input.
 
+fprofile-update=
+Common Joined RejectNegative Enum(profile_update) Var(flag_profile_update) Init(PROFILE_UPDATE_SINGLE)
+-fprofile-update=[single|atomic]	Set the pr

Re: PATCH: PR71818: Don't advance IVs with a variable step

2016-08-01 Thread Richard Biener
On Mon, Aug 1, 2016 at 11:24 AM, Alan Hayward  wrote:
> In the given test case, the loop is split into vectorised and non
> vectorised
> versions due to peeling. At the end of the loop the IVs are incremented to
> their latest value. This is achieved by taking the base of the loop (g_21)
> and
> adding the iterations (240) multiplied by the step (_6):
>
>   :
>   # _106 = PHI <_6(12)>
>   _84 = _106 * 240;
>   _85 = (char) _84;
>   tmp.19_83 = g_21(D) + _85;
>
> However, the step (_6) varies within the loop and therefore the
> calculation is
> incorrect.
>
> This patch fixes the error by disallowing vectorization if the step of the
> IV
> is not an invariant within the loop.
>
> Also added debug comment for when the optimisation fails due to chrec.
>
> Tested on x86.
>
> Ok to commit?

Ok.

To fix this we'd have to vectorize the induction variable itself, correct?

Richard.

> Alan.
>
>
> gcc/
> PR tree-optimization/71818
> * tree-vect-loop-manip.c (vect_can_advance_ivs_p): Don't advance IVs
> with non invariant evolutions
>
> testsuite/
> PR tree-optimization/71818
> * gcc.dg/vect/pr71818.c: New
>
>
>
>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/pr71818.c
> b/gcc/testsuite/gcc.dg/vect/pr71818.c
> new file mode 100644
> index
> ..2946551f8bb8c552565c2e79b16359ca3
> 9d13ed6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/pr71818.c
> @@ -0,0 +1,16 @@
> +/* { dg-do compile } */
> +
> +char a;
> +short b;
> +int c, d;
> +void fn1() {
> +  char e = 75, g;
> +  unsigned char *f = &e;
> +  a = 21;
> +  for (; a <= 48; a++) {
> +for (; e <= 6;)
> +  ;
> +g -= e -= b || g <= c;
> +  }
> +  d = *f;
> +}
> diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
> index
> 819abcda81a25c4ed25749c29b357110fca647d2..4d68f7143e1117085aae8d2168ed1425e
> 7e6aa08 100644
> --- a/gcc/tree-vect-loop-manip.c
> +++ b/gcc/tree-vect-loop-manip.c
> @@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "cfgloop.h"
>  #include "tree-scalar-evolution.h"
>  #include "tree-vectorizer.h"
> +#include "tree-ssa-loop-ivopts.h"
>
>  /*
>Simple Loop Peeling Utilities
> @@ -1592,10 +1593,26 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
>  }
>
>/* FORNOW: We do not transform initial conditions of IVs
> +which evolution functions are not invariants in the loop.  */
> +
> +  if (!expr_invariant_in_loop_p (loop, evolution_part))
> +   {
> + if (dump_enabled_p ())
> +   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +"evolution not invariant in loop.\n");
> + return false;
> +   }
> +
> +  /* FORNOW: We do not transform initial conditions of IVs
>  which evolution functions are a polynomial of degree >= 2.  */
>
>if (tree_is_chrec (evolution_part))
> -   return false;
> +   {
> + if (dump_enabled_p ())
> +   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> +"evolution is chrec.\n");
> + return false;
> +   }
>  }
>
>return true;
>
>
>
>
>


Re: [PATCH][AArch64] Optimize prolog/epilog

2016-08-01 Thread Richard Earnshaw (lists)
On 29/07/16 12:49, Wilco Dijkstra wrote:
> This patch optimizes the prolog and epilog code to reduce the number of
> instructions and avoid multiple writes to SP.  The key idea is that epilogs
> are almost exact reverses of prologs, and thus all the decisions only need
> to be taken once.  The frame layout is decided in aarch64_layout_frame()
> and decisions recorded in the new aarch64_frame fields initial_adjust,
> callee_adjust, callee_offset and final_adjust.
> 
> A generic frame setup consists of 5 basic steps:
> 
> 1. sub sp, sp, initial_adjust
> 2. stp reg1, reg2, [sp, -callee_adjust]!  (push if callee_adjust != 0)
> 3. add fp, sp, callee_offset  (if frame_pointer_needed)
> 4. stp reg3, reg4, [sp, callee_offset + N*16] (store remaining callee-saves)
> 5. sub sp, sp, final_adjust
> 
> The epilog reverses this, and may omit step 3 if alloca wasn't used.
> 
> Bootstrap, GCC & gdb regression OK.
> 
> ChangeLog:
> 2016-07-29  Wilco Dijkstra  
> 
> gcc/
>   * config/aarch64/aarch64.h (aarch64_frame):
>   Remove padding0 and hardfp_offset.  Add locals_offset,
>   initial_adjust, callee_adjust, callee_offset and final_adjust.
>   * config/aarch64/aarch64.c (aarch64_layout_frame):
>   Remove unused padding0 and hardfp_offset initializations.
>   Choose frame layout and set frame variables accordingly.
>   Use INVALID_REGNUM instead of FIRST_PSEUDO_REGISTER.
>   (aarch64_push_regs): Use INVALID_REGNUM, not FIRST_PSEUDO_REGISTER.
>   (aarch64_pop_regs): Likewise.
>   (aarch64_expand_prologue): Remove all decision code, just emit
>   prolog according to frame variables.
>   (aarch64_expand_epilogue): Remove all decision code, just emit
>   epilog according to frame variables.
>   (aarch64_initial_elimination_offset): Use offset to local/arg area.
> 
> testsuite/
>   * gcc.target/aarch64/test_frame_10.c: Fix test to check for a
>   single stack adjustment, no writeback.  
>   * gcc.target/aarch64/test_frame_12.c: Likewise.
>   * gcc.target/aarch64/test_frame_13.c: Likewise.
>   * gcc.target/aarch64/test_frame_15.c: Likewise.
>   * gcc.target/aarch64/test_frame_6.c: Likewise.
>   * gcc.target/aarch64/test_frame_7.c: Likewise.
>   * gcc.target/aarch64/test_frame_8.c: Likewise.
>   * gcc.target/aarch64/test_frame_16.c: New test.


Two minor nits, but otherwise OK.

R.

> ---
> 
> diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
> index 
> 58959229c004e58405076b0e691b6b5634720140..455869f074dd72a38b6f8e1b199d83aa75b408b1
>  100644
> --- a/gcc/config/aarch64/aarch64.h
> +++ b/gcc/config/aarch64/aarch64.h
> @@ -550,11 +550,14 @@ struct GTY (()) aarch64_frame
>   STACK_BOUNDARY.  */
>HOST_WIDE_INT saved_varargs_size;
>  
> +  /* The size of the saved callee-save int/FP registers.  */
> +
>HOST_WIDE_INT saved_regs_size;
> -  /* Padding if needed after the all the callee save registers have
> - been saved.  */
> -  HOST_WIDE_INT padding0;
> -  HOST_WIDE_INT hardfp_offset;   /* HARD_FRAME_POINTER_REGNUM */
> +
> +  /* Offset from the base of the frame (incomming SP) to the
> + top of the locals area.  This value is always a multiple of
> + STACK_BOUNDARY.  */
> +  HOST_WIDE_INT locals_offset;
>  
>/* Offset from the base of the frame (incomming SP) to the
>   hard_frame_pointer.  This value is always a multiple of
> @@ -564,12 +567,25 @@ struct GTY (()) aarch64_frame
>/* The size of the frame.  This value is the offset from base of the
> * frame (incomming SP) to the stack_pointer.  This value is always
> * a multiple of STACK_BOUNDARY.  */
> +  HOST_WIDE_INT frame_size;
> +
> +  /* The size of the initial stack adjustment before saving callee-saves.  */
> +  HOST_WIDE_INT initial_adjust;
> +
> +  /* The writeback value when pushing callee-save registers.
> + It is zero when no push is used.  */
> +  HOST_WIDE_INT callee_adjust;
> +
> +  /* The offset from SP to the callee-save registers after initial_adjust.
> + It may be non-zero if no push is used (ie. callee_adjust == 0).  */
> +  HOST_WIDE_INT callee_offset;
> +
> +  /* The size of the stack adjustment after saving callee-saves.  */
> +  HOST_WIDE_INT final_adjust;
>  
>unsigned wb_candidate1;
>unsigned wb_candidate2;
>  
> -  HOST_WIDE_INT frame_size;
> -
>bool laid_out;
>  };
>  
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 2023cb45fc8e87d94b48ae894bea78235056d4a4..7179dac29736409e1679e4bc932b95ba4c9aa1a5
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -2728,8 +2728,8 @@ aarch64_layout_frame (void)
>  #define SLOT_NOT_REQUIRED (-2)
>  #define SLOT_REQUIRED (-1)
>  
> -  cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
> -  cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
> +  cfun->machine->frame.wb_candidate1 = INVALID_REGNUM;
> +  cfun->machine->frame.wb_can

[PATCH] Remove ignored "xfail" from dg-do compile directives

2016-08-01 Thread Jonathan Wakely

As documented at https://gcc.gnu.org/onlinedocs/gccint/Directives.html
an "xfail" is ignored for dg-do what unless what==run.

This removes five such ignored uses of xfail in the dg-do directive.
These tests are already expected to XFAIL because of the
dg-excess-errors directives (with optional target selector for the
uclibc ones).

Removing the xfail from the dg-do directive allows { target c++11 }
there instead (if appropriate).

* testsuite/23_containers/unordered_map/requirements/53339.cc: Remove
ignored "xfail" from { dg-do compile { xfail selector } } directive.
* testsuite/23_containers/unordered_multimap/requirements/53339.cc:
Likewise.
* testsuite/26_numerics/headers/cmath/
c99_classification_macros_c++11.cc: Likewise.
* testsuite/26_numerics/headers/cmath/
c99_classification_macros_c++98.cc: Likewise.
* testsuite/26_numerics/headers/cmath/
c99_classification_macros_c.cc: Likewise.

Tested x86_64-linux, powerpc64-linux, committed to trunk.

commit 065333c062fffbb5d349f7d11a30ab605f180706
Author: Jonathan Wakely 
Date:   Mon Aug 1 14:53:43 2016 +0100

Remove ignored "xfail" from dg-do compile directives

* testsuite/23_containers/unordered_map/requirements/53339.cc: Remove
ignored "xfail" from { dg-do compile { xfail selector } } directive.
* testsuite/23_containers/unordered_multimap/requirements/53339.cc:
Likewise.
* testsuite/26_numerics/headers/cmath/
c99_classification_macros_c++11.cc: Likewise.
* testsuite/26_numerics/headers/cmath/
c99_classification_macros_c++98.cc: Likewise.
* testsuite/26_numerics/headers/cmath/
c99_classification_macros_c.cc: Likewise.

diff --git 
a/libstdc++-v3/testsuite/23_containers/unordered_map/requirements/53339.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_map/requirements/53339.cc
index cef863b..009cf5e 100644
--- a/libstdc++-v3/testsuite/23_containers/unordered_map/requirements/53339.cc
+++ b/libstdc++-v3/testsuite/23_containers/unordered_map/requirements/53339.cc
@@ -1,7 +1,5 @@
-// XFAIL because of PR libstdc++/55043 fix
-// { dg-do compile { xfail *-*-* } }
-// { dg-excess-errors "" }
-// { dg-options "-std=gnu++11" }
+// { dg-do compile { target c++11 } }
+// { dg-excess-errors "XFAIL because of PR libstdc++/55043 fix" }
 
 // Copyright (C) 2012-2016 Free Software Foundation, Inc.
 //
diff --git 
a/libstdc++-v3/testsuite/23_containers/unordered_multimap/requirements/53339.cc 
b/libstdc++-v3/testsuite/23_containers/unordered_multimap/requirements/53339.cc
index 2bf11a2..b9c4898 100644
--- 
a/libstdc++-v3/testsuite/23_containers/unordered_multimap/requirements/53339.cc
+++ 
b/libstdc++-v3/testsuite/23_containers/unordered_multimap/requirements/53339.cc
@@ -1,7 +1,5 @@
-// XFAIL because of PR libstdc++/55043 fix
-// { dg-do compile { xfail *-*-* } }
-// { dg-excess-errors "" }
-// { dg-options "-std=gnu++11" }
+// { dg-do compile { target c++11 } }
+// { dg-excess-errors "XFAIL because of PR libstdc++/55043 fix" }
 
 // Copyright (C) 2012-2016 Free Software Foundation, Inc.
 //
diff --git 
a/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c++11.cc
 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c++11.cc
index 1638667..8db9fdb 100644
--- 
a/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c++11.cc
+++ 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c++11.cc
@@ -15,8 +15,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-options "-std=gnu++11" }
-// { dg-do compile { xfail uclibc } }
+// { dg-do compile { target c++11 } }
 // { dg-excess-errors "" { target uclibc } }
 
 #include 
diff --git 
a/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c++98.cc
 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c++98.cc
index 4c4cc4e..6480f93 100644
--- 
a/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c++98.cc
+++ 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c++98.cc
@@ -18,7 +18,7 @@
 // .
 
 // { dg-options "-std=gnu++98" }
-// { dg-do compile { xfail uclibc } }
+// { dg-do compile }
 // { dg-excess-errors "" { target uclibc } }
 
 #include 
diff --git 
a/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c.cc
 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c.cc
index cbced7d..8a42db4 100644
--- 
a/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c.cc
+++ 
b/libstdc++-v3/testsuite/26_numerics/headers/cmath/c99_classification_macros_c.cc
@@ -17,7 +17,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// { dg-do compile { xfail uclibc } }
+// { dg-do compile

Re: [PATCH] accept flexible arrays in struct in unions (c++/71912 - [6/7 regression])

2016-08-01 Thread Jason Merrill
On Sun, Jul 31, 2016 at 4:27 PM, Martin Sebor  wrote:
> On 07/31/2016 10:28 AM, Jason Merrill wrote:
>> On Fri, Jul 29, 2016 at 7:22 PM, Martin Sebor  wrote:
>>> On 07/26/2016 12:53 PM, Jason Merrill wrote:
 On 07/23/2016 01:18 PM, Martin Sebor wrote:
>
> +  /* A pair of the first non-static non-empty data members following
> + either the flexible array member, if found, or the zero-length
> + array member otherwise.  AFTER[1] refers to the first such data
> + member of a union that the struct containing the flexible array
> + member or zero-length array is a member, or NULL when no such
> + union exists.  AFTER[0] refers to the first such data member
> + that is not a member of such a union.  */

 This is pretty hard to follow, could you add an example?  Why do we want
 to track these separately rather than look at DECL_CONTEXT at diagnostic
 time?
>>>
>>> Sure, I've added an example.
>>>
>>> Whether or not a given flexible array member is valid depends not
>>> only on the context in which it's defined (its enclosing class) but
>>> also on the members of other classes whose objects may be defined
>>> in the same or other contexts (e.g., enclosing structs).  I don't
>>> know of a way to reach those other members from the context of
>>> the ARRAY.
>>
>> Not from the context of the array, but when we see a field following the
>> flexible array (or aggregate ending in a flexible array), can't we look
>> at the DECL_CONTEXT of that field and see whether it's a union or not?
>
> I don't think that would work in cases like this:
>
>   struct S1 {
> struct S2 { int i, a[]; } s2;
> union U { int x; } u;
>   };
>
> that need to be treated differently from this one:
>
>   union U1 {
> struct S { int i, a[]; } s;
> union U2 { int x; } u2;
>   };

Ah, I'm thinking of the following field as u/u2 rather than x.  Why
does it improve clarity to look inside U/U2 for a following field?

>>> + For example, in the following, the flexible array member
>>> + S::U::X::a overlaps S::U::Y::i and so AFTER[1] is set to refer to
>>> + the latter.  This potential problem is independent of union U's
>>> + membership in struct S.  In addition, in the definition of struct
>>> + S, S::U::x::a is followed by S::z, and so AFTER[0] is set to refer
>>> + to the latter.  The two problems result in two diagnostics, the
>>> + first one being a pedantic warning and the second a hard error.
>>> +
>>> +   struct S {
>>> + union U {
>>> +   struct X { int i, a[]; } x;
>>> +   struct Y { long i, a[]; } y;
>>> + } u;
>>> +int z;
>>> +   };
>>
>>
>> Hmm, I'm not convinced that the first problem is really a problem.  Only
>> one of the union members is active at any time, so overlapping with
>> another union member seems is irrelevant.
>>
>> I also can't find the language in the C standard that prohibits nesting
>> a struct ending with a flexible array in another struct or union, do you
>> have a citation?
>
> 6.7.2.1, p3:
>
>   A structure or union shall not contain a member with incomplete
>   or function type [...], except that the last member of a structure
>   with more than one named member may have incomplete array type; such
>   a structure (and any union containing, possibly recursively, a member
>   that is such a structure) shall not be a member of a structure or
>   an element of an array.

Ah, thanks.  I note that this says "structure or union" at the
beginning of the paragraph but not at the end, which suggests strongly
to me that such a structure can be a member of a union.

> +  /* The type in which an anonymous struct or union containing ARRAY
> + is defined or null if no such anonymous struct or union exists.  */
> +  tree anonctx;

 It seems clearer to find this at diagnostic time by following
 TYPE_CONTEXT.
>>>
>>> I tried this approach and while it's doable (with recursion) I'm
>>> not happy with the results.  The diagnostics point to places that
>>> I think are unclear.  For example, given:
>>>
>>>   struct A { int i, a[]; };
>>>   struct B { long j, b[]; };
>>>   struct D: A, B { };
>>
>> I don't see any anonymous aggregates in this example, so how does
>> anonctx come into it?
>
> If there is no anonctx then diagnose_flexrrays will have to figure
> out whether the array is a member of an anonymous struct and if so,
> find the struct of which it's effectively a member, and otherwise
> use the array's immediate context.  The above was an example of
> the result of a simple implementation of your suggestion.

And I don't understand why it would change the output on this testcase.

> As I said, the code could probably be tweaked to produce the same result
> as it does now in both cases (anonymous and not), but at the cost
> of additional complexity and, IMO, to the detriment of clarity.
> What aspect of clarity do you find lacking in the cur

Re: PATCH: PR71818: Don't advance IVs with a variable step

2016-08-01 Thread Alan Hayward


On 01/08/2016 14:49, "Richard Biener"  wrote:

>On Mon, Aug 1, 2016 at 11:24 AM, Alan Hayward 
>wrote:
>> In the given test case, the loop is split into vectorised and non
>> vectorised
>> versions due to peeling. At the end of the loop the IVs are incremented
>>to
>> their latest value. This is achieved by taking the base of the loop
>>(g_21)
>> and
>> adding the iterations (240) multiplied by the step (_6):
>>
>>   :
>>   # _106 = PHI <_6(12)>
>>   _84 = _106 * 240;
>>   _85 = (char) _84;
>>   tmp.19_83 = g_21(D) + _85;
>>
>> However, the step (_6) varies within the loop and therefore the
>> calculation is
>> incorrect.
>>
>> This patch fixes the error by disallowing vectorization if the step of
>>the
>> IV
>> is not an invariant within the loop.
>>
>> Also added debug comment for when the optimisation fails due to chrec.
>>
>> Tested on x86.
>>
>> Ok to commit?
>
>Ok.
>
>To fix this we'd have to vectorize the induction variable itself, correct?


Yes, then extract final value with BIT_FIELD_REF (like
vectorise_live_operation).


Alan.




Re: [PATCH][expr.c] PR middle-end/71700: zero-extend sub-word value when widening constructor element

2016-08-01 Thread Kyrill Tkachov


On 11/07/16 18:55, Bernd Schmidt wrote:

On 07/11/2016 04:52 PM, Kyrill Tkachov wrote:

Based on that, I think that code block is a useful optimisation, we just
need
to take care with immediates.

What do you think?


Yeah, I think the patch is ok.



This patch (https://gcc.gnu.org/ml/gcc-patches/2016-07/msg00017.html) has been 
in trunk
for a month with no reported problems.

I'd like to backport it to the GCC 5 and 6 branches.
I've bootstrapped and tested it there on arm-none-linux-gnueabihf.

Ok?

Thanks,
Kyrill



Bernd





Create multiple directory through fixinc.in script.

2016-08-01 Thread mbilal

Hi,

This patch fixes the mkheaders directory creation. It failed to create 
the multiple multilib directory on some cases.

e.g
For i686-pc-linux-gnu target config, I have following 'fixinc_list' for 
multilibs.


/system32;
/system64;/64
/sgxx-glibc;/sgxx-glibc
/sgxx-glibc;/64/sgxx-glibc
/sgxx-glibc;/x32/sgxx-glibc

Now, mkheaders failed to create fixed include directory for the case of 
 '/sgxx-glibc;/x32/sgxx-glibc'.
here fixinc.sh tries to make 'mkdir /x32/sgxx-glibc' and 
failed because parent 'x32' directory doesn't exist. (we defined 
MULTILIB_EXCEPTIONS for x32 directory)



Following patch fixes the problem.

fixincludes/ChangeLog:
2016-08-01  Muhammad Bilal  

* fixinc.in: Use --parents option to make LIB directory.


Index: fixincludes/fixinc.in
===
--- fixincludes/fixinc.in(revision 238952)
+++ fixincludes/fixinc.in(working copy)
@@ -41,7 +41,7 @@

 # Make sure it exists.
 if [ ! -d $LIB ]; then
-  mkdir $LIB || {
+  mkdir -p $LIB || {
 echo fixincludes:  output dir '`'$LIB"' cannot be created"
 exit 1
   }


I think 'mkdir -p' option is now portable as I'm seeing that 
mkinstalldirs script also uses 'mkdir -p' options.



OK to commit?


Thanks,
-Bilal
/system32;
/system64;/64
/sgxx-glibc;/sgxx-glibc
/sgxx-glibc;/64/sgxx-glibc
/sgxx-glibc;/x32/sgxx-glibc


C++ PATCH for c++/72766 (ICE with VLA)

2016-08-01 Thread Jason Merrill
cxx_eval_pointer_plus_expression was assuming that
array_type_nelts_top would be a constant expression, which it might
not be.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit b9f0335823eff3e6d6f562268cb36c05b41790ab
Author: Jason Merrill 
Date:   Mon Aug 1 10:28:50 2016 -0400

PR c++/72766 - ICE with VLA

* constexpr.c (cxx_eval_pointer_plus_expression): Check constancy
of nelts.
* cp-gimplify.c (cp_fully_fold): Only maybe_constant_value in
C++11 and up.

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 8bda973..edade48 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -3581,6 +3581,10 @@ cxx_eval_pointer_plus_expression (const constexpr_ctx 
*ctx, tree t,
   tree type = TREE_TYPE (op00);
   t = fold_convert_loc (loc, ssizetype, TREE_OPERAND (op00, 1));
   tree nelts = array_type_nelts_top (TREE_TYPE (TREE_OPERAND (op00, 0)));
+  nelts = cxx_eval_constant_expression (ctx, nelts, false, non_constant_p,
+   overflow_p);
+  if (*non_constant_p)
+   return NULL_TREE;
   /* Don't fold an out-of-bound access.  */
   if (!tree_int_cst_le (t, nelts))
return NULL_TREE;
diff --git a/gcc/cp/cp-gimplify.c b/gcc/cp/cp-gimplify.c
index 59953a6..e28c9df 100644
--- a/gcc/cp/cp-gimplify.c
+++ b/gcc/cp/cp-gimplify.c
@@ -1967,7 +1967,8 @@ cp_fully_fold (tree x)
 return x;
   /* FIXME cp_fold ought to be a superset of maybe_constant_value so we don't
  have to call both.  */
-  x = maybe_constant_value (x);
+  if (cxx_dialect >= cxx11)
+x = maybe_constant_value (x);
   return cp_fold (x);
 }
 
diff --git a/gcc/testsuite/g++.dg/ext/vla16.C b/gcc/testsuite/g++.dg/ext/vla16.C
new file mode 100644
index 000..c3e6ea1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/vla16.C
@@ -0,0 +1,8 @@
+// PR c++/72766
+// { dg-options "-Wno-vla" }
+
+long fn1() {
+  const int a = fn1();
+  int b[a];
+  int c = *(&b[0] + sizeof(0));
+}


Use verify_oacc_routine_clauses for C/C++

2016-08-01 Thread Thomas Schwinge
Hi!

Some checking of OpenACC clauses currently done in the front ends should
be moved later, and be unified.  (Also, I suppose, for supporting of the
device_type clause, such checking actually *must* be moved later, into
the oaccdevlow pass, or similar.)  Here is a first preparatory patch.  OK
for trunk?

commit e02a9b65c505b404f8d985b0ec6ccb99d73515d3
Author: Thomas Schwinge 
Date:   Wed Jul 27 15:54:38 2016 +0200

Use verify_oacc_routine_clauses for C/C++

gcc/
* omp-low.c (build_oacc_routine_dims): Move some of its processing
into...
(verify_oacc_routine_clauses): ... this new function.
* omp-low.h (verify_oacc_routine_clauses): New prototype.
gcc/c/
* c-parser.c (c_parser_oacc_routine): Normalize order of clauses.
(c_finish_oacc_routine): Call verify_oacc_routine_clauses.
gcc/cp/
* parser.c (cp_parser_oacc_routine)
(cp_parser_late_parsing_oacc_routine): Normalize order of clauses.
(cp_finalize_oacc_routine): Call verify_oacc_routine_clauses.
gcc/testsuite/
* c-c++-common/goacc/routine-2.c: Update, and move some test
into...
* c-c++-common/goacc/routine-level-of-parallelism-1.c: ... this
new file.
---
 gcc/c/c-parser.c   |   8 +
 gcc/cp/parser.c|   9 +
 gcc/omp-low.c  |  67 +-
 gcc/omp-low.h  |   1 +
 gcc/testsuite/c-c++-common/goacc/routine-2.c   |  20 +-
 .../goacc/routine-level-of-parallelism-1.c | 265 +
 6 files changed, 341 insertions(+), 29 deletions(-)

diff --git gcc/c/c-parser.c gcc/c/c-parser.c
index 795842f..24f60cf 100644
--- gcc/c/c-parser.c
+++ gcc/c/c-parser.c
@@ -14072,6 +14072,9 @@ c_parser_oacc_routine (c_parser *parser, enum 
pragma_context context)
   data.clauses
= c_parser_oacc_all_clauses (parser, OACC_ROUTINE_CLAUSE_MASK,
 "#pragma acc routine");
+  /* The clauses are in reverse order; fix that to make later diagnostic
+emission easier.  */
+  data.clauses = nreverse (data.clauses);
 
   if (TREE_CODE (decl) != FUNCTION_DECL)
{
@@ -14086,6 +14089,9 @@ c_parser_oacc_routine (c_parser *parser, enum 
pragma_context context)
   data.clauses
= c_parser_oacc_all_clauses (parser, OACC_ROUTINE_CLAUSE_MASK,
 "#pragma acc routine");
+  /* The clauses are in reverse order; fix that to make later diagnostic
+emission easier.  */
+  data.clauses = nreverse (data.clauses);
 
   /* Emit a helpful diagnostic if there's another pragma following this
 one.  Also don't allow a static assertion declaration, as in the
@@ -14149,6 +14155,8 @@ c_finish_oacc_routine (struct oacc_routine_data *data, 
tree fndecl,
   return;
 }
 
+  verify_oacc_routine_clauses (&data->clauses, data->loc);
+
   if (get_oacc_fn_attrib (fndecl))
 {
   error_at (data->loc,
diff --git gcc/cp/parser.c gcc/cp/parser.c
index 1008725..6197fc9 100644
--- gcc/cp/parser.c
+++ gcc/cp/parser.c
@@ -36614,6 +36614,9 @@ cp_parser_oacc_routine (cp_parser *parser, cp_token 
*pragma_tok,
= cp_parser_oacc_all_clauses (parser, OACC_ROUTINE_CLAUSE_MASK,
  "#pragma acc routine",
  cp_lexer_peek_token (parser->lexer));
+  /* The clauses are in reverse order; fix that to make later diagnostic
+emission easier.  */
+  data.clauses = nreverse (data.clauses);
 
   if (decl && is_overloaded_fn (decl)
  && (TREE_CODE (decl) != FUNCTION_DECL
@@ -36710,6 +36713,9 @@ cp_parser_late_parsing_oacc_routine (cp_parser *parser, 
tree attrs)
   parser->oacc_routine->clauses
 = cp_parser_oacc_all_clauses (parser, OACC_ROUTINE_CLAUSE_MASK,
  "#pragma acc routine", pragma_tok);
+  /* The clauses are in reverse order; fix that to make later diagnostic
+ emission easier.  */
+  parser->oacc_routine->clauses = nreverse (parser->oacc_routine->clauses);
   cp_parser_pop_lexer (parser);
   /* Later, cp_finalize_oacc_routine will process the clauses, and then set
  fndecl_seen.  */
@@ -36744,6 +36750,9 @@ cp_finalize_oacc_routine (cp_parser *parser, tree 
fndecl, bool is_defn)
  return;
}
 
+  verify_oacc_routine_clauses (&parser->oacc_routine->clauses,
+  parser->oacc_routine->loc);
+
   if (get_oacc_fn_attrib (fndecl))
{
  error_at (parser->oacc_routine->loc,
diff --git gcc/omp-low.c gcc/omp-low.c
index c75452c..ec5704c 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -12599,9 +12599,62 @@ set_oacc_fn_attrib (tree fn, tree clauses, bool 
is_kernel, vec *args)
 }
 }
 
-/*  Process the routine's dimension clauess to generate an attribute
-value.  Issue diagnostics as 

Use verify_oacc_routine_clauses for Fortran (was: Use verify_oacc_routine_clauses for C/C++)

2016-08-01 Thread Thomas Schwinge
Hi!

On Mon, 01 Aug 2016 17:21:37 +0200, I wrote:
> Some checking of OpenACC clauses currently done in the front ends should
> be moved later, and be unified.  (Also, I suppose, for supporting of the
> device_type clause, such checking actually *must* be moved later, into
> the oaccdevlow pass, or similar.)  Here is a first preparatory patch.  OK
> for trunk?
> 
> commit e02a9b65c505b404f8d985b0ec6ccb99d73515d3
> Author: Thomas Schwinge 
> Date:   Wed Jul 27 15:54:38 2016 +0200
> 
> Use verify_oacc_routine_clauses for C/C++

Here is a Fortran patch.  This depends on other Fortran patches in flight
(such as Cesar's), and on PR72741 "Fortran OpenACC routine directive
doesn't properly handle clauses specifying the level of parallelism" be
resolved, and thereabouts, but I'm posting it anyway, in case anyone has
any review comments already.  I suppose, to begin with, the call of
gfc_oacc_routine_dims will move later into the Fortran front end
pipeline, to the point then function declarations' attributes are set, or
similar.  Also, as discussed already, the Fortran front end currently is
very "forgetful" in regards to OpenACC/OpenMP clauses' specific location
information, so we're not able at present to produce diagnostics with
precise location information.

commit 6480b966af617e61b35b59bb089dd009064743e5
Author: Thomas Schwinge 
Date:   Fri Jul 29 13:12:49 2016 +0200

Use verify_oacc_routine_clauses for Fortran

gcc/fortran/
* openmp.c: Include "trans-stmt.h".
(gfc_oacc_routine_dims): Move function...
* trans-openmp.c: ... here, and re-implement it.  Adjust all
users.
* trans-stmt.h (gfc_oacc_routine_dims): New prototype.
gcc/testsuite/
* gfortran.dg/goacc/pr72741.f90: Update.
* gfortran.dg/goacc/routine-level-of-parallelism-1.f: New file.
---
 gcc/fortran/openmp.c   |  47 +-
 gcc/fortran/trans-openmp.c |  67 +
 gcc/fortran/trans-stmt.h   |   7 +-
 gcc/testsuite/gfortran.dg/goacc/pr72741.f90|  10 +-
 .../goacc/routine-level-of-parallelism-1.f | 104 +
 5 files changed, 181 insertions(+), 54 deletions(-)

diff --git gcc/fortran/openmp.c gcc/fortran/openmp.c
index f7a907d..b0a10a8 100644
--- gcc/fortran/openmp.c
+++ gcc/fortran/openmp.c
@@ -27,6 +27,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "parse.h"
 #include "diagnostic.h"
 #include "gomp-constants.h"
+#include "trans-stmt.h"
 
 /* Match an end of OpenMP directive.  End of OpenMP directive is optional
whitespace, followed by '\n' or comment '!'.  */
@@ -1714,44 +1715,6 @@ gfc_match_oacc_cache (void)
   return MATCH_YES;
 }
 
-/* Determine the loop level for a routine.  Returns OACC_FUNCTION_NONE if
-   any error is detected.  */
-
-static oacc_function
-gfc_oacc_routine_dims (gfc_omp_clauses *clauses)
-{
-  int level = -1;
-  oacc_function ret = OACC_FUNCTION_SEQ;
-
-  if (clauses)
-{
-  unsigned mask = 0;
-
-  if (clauses->gang)
-   {
- level = GOMP_DIM_GANG, mask |= GOMP_DIM_MASK (level);
- ret = OACC_FUNCTION_GANG;
-   }
-  if (clauses->worker)
-   {
- level = GOMP_DIM_WORKER, mask |= GOMP_DIM_MASK (level);
- ret = OACC_FUNCTION_WORKER;
-   }
-  if (clauses->vector)
-   {
- level = GOMP_DIM_VECTOR, mask |= GOMP_DIM_MASK (level);
- ret = OACC_FUNCTION_VECTOR;
-   }
-  if (clauses->seq)
-   level = GOMP_DIM_MAX, mask |= GOMP_DIM_MASK (level);
-
-  if (mask != (mask & -mask))
-   ret = OACC_FUNCTION_NONE;
-}
-
-  return ret;
-}
-
 match
 gfc_match_oacc_routine (void)
 {
@@ -1828,13 +1791,7 @@ gfc_match_oacc_routine (void)
  != MATCH_YES))
 return MATCH_ERROR;
 
-  dims = gfc_oacc_routine_dims (c);
-  if (dims == OACC_FUNCTION_NONE)
-{
-  gfc_error ("Multiple loop axes specified for routine %C");
-  gfc_current_locus = old_loc;
-  return MATCH_ERROR;
-}
+  dims = gfc_oacc_routine_dims (c, old_loc);
 
   if (isym != NULL)
 /* There is nothing to do for intrinsic procedures.  */
diff --git gcc/fortran/trans-openmp.c gcc/fortran/trans-openmp.c
index 0d646ed..254732c 100644
--- gcc/fortran/trans-openmp.c
+++ gcc/fortran/trans-openmp.c
@@ -4570,3 +4570,70 @@ gfc_trans_omp_declare_simd (gfc_namespace *ns)
   DECL_ATTRIBUTES (fndecl) = c;
 }
 }
+
+/* Determine and verify the level of parallelism for an OpenACC routine.  */
+
+oacc_function
+gfc_oacc_routine_dims (gfc_omp_clauses *clauses, locus location)
+{
+  /* This is implemented in terms of OMP_CLAUSE trees, so that we can use the
+ generic functions for checking validity.  This has a little bit of
+ overhead, but as the number of clauses on OpenACC routine directives as
+ well as the number of OpenACC routine directives will both be rather
+ small, this is acceptable.  */
+  tree clauses_t = NULL_TREE;
+  /

Repeated use of the OpenACC routine directive

2016-08-01 Thread Thomas Schwinge
Hi!

We found that it's not correct that we currently unconditionally diagnose
an error for repeated use of the OpenACC routine directive on one
function/declaration.  (For reference, it is also permissible for an
"ordinary" function to have several declarations plus a definition, as
long as these are compatible.)  This is, the following shall be valid:

#pragma acc routine worker
void f(void)
{
}
#pragma acc routine (f) worker
#pragma acc routine worker
extern void f(void);

As it is not detailed in the specification, the semantics of repeated use
of OpenACC routine will be implementation-defined.  Within one
translation unit, we just remove the existing diagnostic, and declare it
user error if any two usages are not compatible, but we try to be
helpful, and produce a compile-time diagnostic.  For incompatible use
spanning over multiple translation units, we're not currently attempting
to produce meaningful diagnostics/semantics; this would be more difficult
(out of scope at present).

Depending on other patches posted (such as "Use
verify_oacc_routine_clauses for C/C++"), here is a patch to implement
this for C/C++ -- Fortran is blocked on the issues raised in the "Use
verify_oacc_routine_clauses for Fortran" patch submission.

One change is to attach an OpenACC routine directive's clauses to the
"omp declare target" attribute (in spirit similar to how they're attached
to OMP_*/GIMPLE_OMP_* codes).  (It will then also be possible to get rid
of the "oacc function" attribute and instead have these represented as
OpenACC gang/worker/vector/seq clauses attached to "omp declare target".)
While currently not used for OpenMP, it won't do any harm: continue using
NULL_TREE there, the "empty" clause list.  As discussed months ago, on
gomp-4_0-branch we're already using that for implementing the OpenACC
bind and nohost clauses.

Is this conceptually OK for trunk?

commit 91f0742f64f9f0671480023c9949a596691e8476
Author: Thomas Schwinge 
Date:   Fri Jul 29 17:47:47 2016 +0200

Repeated use of the OpenACC routine directive

gcc/
* omp-low.c (verify_oacc_routine_clauses): Change formal
parameters.  Add checking if already marked as an accelerator
routine.  Adjust all users.
gcc/c-family/
* c-common.c (c_common_attribute_table): Set -1 max_len for "omp
declare target".
gcc/c/
* c-parser.c (c_finish_oacc_routine): Rework checking if already
marked as an accelerator routine.
gcc/cp/
* parser.c (cp_finalize_oacc_routine): Rework checking if already
marked as an accelerator routine.
gcc/fortran/
* f95-lang.c (gfc_attribute_table): Set -1 max_len for "omp
declare target".
gcc/testsuite/
* c-c++-common/goacc/oaccdevlow-routine.c: Update.
* c-c++-common/goacc/routine-5.c: Likewise.
* c-c++-common/goacc/routine-level-of-parallelism-1.c: Likewise.
* c-c++-common/goacc/routine-level-of-parallelism-2.c: New file.
---
 gcc/c-family/c-common.c|   2 +-
 gcc/c/c-parser.c   |  42 ++--
 gcc/cp/parser.c|  47 +++--
 gcc/fortran/f95-lang.c |   2 +-
 gcc/fortran/trans-openmp.c |   4 +-
 gcc/omp-low.c  |  61 +-
 gcc/omp-low.h  |   2 +-
 .../c-c++-common/goacc/oaccdevlow-routine.c|   2 +-
 gcc/testsuite/c-c++-common/goacc/routine-5.c   |  46 +---
 .../goacc/routine-level-of-parallelism-1.c | 233 ++---
 .../goacc/routine-level-of-parallelism-2.c |  71 +++
 11 files changed, 395 insertions(+), 117 deletions(-)

diff --git gcc/c-family/c-common.c gcc/c-family/c-common.c
index 16e3965..63ed75a 100644
--- gcc/c-family/c-common.c
+++ gcc/c-family/c-common.c
@@ -822,7 +822,7 @@ const struct attribute_spec c_common_attribute_table[] =
  handle_omp_declare_simd_attribute, false },
   { "simd",  0, 1, true,  false, false,
  handle_simd_attribute, false },
-  { "omp declare target", 0, 0, true, false, false,
+  { "omp declare target", 0, -1, true, false, false,
  handle_omp_declare_target_attribute, false },
   { "omp declare target link", 0, 0, true, false, false,
  handle_omp_declare_target_attribute, false },
diff --git gcc/c/c-parser.c gcc/c/c-parser.c
index 24f60cf..8e67ed8 100644
--- gcc/c/c-parser.c
+++ gcc/c/c-parser.c
@@ -14155,33 +14155,37 @@ c_finish_oacc_routine (struct oacc_routine_data 
*data, tree fndecl,
   return;
 }
 
-  verify_oacc_routine_clauses (&data->clauses, data->loc);
-
-  if (get_oacc_fn_attrib (fndecl))
+  int compatible
+= verify_oacc_routine_clauses (fndecl, &data->clauses, data->loc,
+

[PATCH] Disable std::string and std::wstring extern templates for C++17

2016-08-01 Thread Jonathan Wakely

As I mentioned last week, the explicit instantiation definitions for
std::string and std::wstring are compiled as C++11, so do not
instantiate the new member functions added for C++17.

Rather than change what we instantiate and export from the library I'd
prefer to suppress the explicit instantiation declarations for C++17.
Once C++17 is stable and we know which new symbols are needed we can
decide whether to change how we compile the instantiations, and add
new exports to the linker script.

* include/bits/basic_string.tcc: Disable explicit instantiation
declarations for C++17.

Tested powerpc64-linux, committed to trunk.


commit 248b39f993ea5f1fcc9668e38b7683e58c0facb8
Author: Jonathan Wakely 
Date:   Mon Aug 1 16:28:09 2016 +0100

Disable std::string and std::wstring extern templates for C++17

* include/bits/basic_string.tcc: Disable explicit instantiation
declarations for C++17.

diff --git a/libstdc++-v3/include/bits/basic_string.tcc 
b/libstdc++-v3/include/bits/basic_string.tcc
index 2b6644d..0560b46 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -1569,7 +1569,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   // Inhibit implicit instantiations for required instantiations,
   // which are defined via explicit instantiations elsewhere.
-#if _GLIBCXX_EXTERN_TEMPLATE > 0
+#if _GLIBCXX_EXTERN_TEMPLATE > 0 && __cplusplus <= 201402L
   extern template class basic_string;
   extern template
 basic_istream&


Re: Create multiple directory through fixinc.in script.

2016-08-01 Thread Jeff Law

On 08/01/2016 08:55 AM, mbilal wrote:

Hi,

This patch fixes the mkheaders directory creation. It failed to create
the multiple multilib directory on some cases.
e.g
For i686-pc-linux-gnu target config, I have following 'fixinc_list' for
multilibs.

/system32;
/system64;/64
/sgxx-glibc;/sgxx-glibc
/sgxx-glibc;/64/sgxx-glibc
/sgxx-glibc;/x32/sgxx-glibc

Now, mkheaders failed to create fixed include directory for the case of
 '/sgxx-glibc;/x32/sgxx-glibc'.
here fixinc.sh tries to make 'mkdir /x32/sgxx-glibc' and
failed because parent 'x32' directory doesn't exist. (we defined
MULTILIB_EXCEPTIONS for x32 directory)


Following patch fixes the problem.

fixincludes/ChangeLog:
2016-08-01  Muhammad Bilal  

* fixinc.in: Use --parents option to make LIB directory.
OK.  Please install if you have permissions.  If not, let me know and 
I'll do the commit.





Index: fixincludes/fixinc.in
===
--- fixincludes/fixinc.in(revision 238952)
+++ fixincludes/fixinc.in(working copy)
@@ -41,7 +41,7 @@

 # Make sure it exists.
 if [ ! -d $LIB ]; then
-  mkdir $LIB || {
+  mkdir -p $LIB || {
 echo fixincludes:  output dir '`'$LIB"' cannot be created"
 exit 1
   }


I think 'mkdir -p' option is now portable as I'm seeing that
mkinstalldirs script also uses 'mkdir -p' options.
Right.   I actually double-checked and -p is part of the current posix 
specification.


Thanks,
Jeff


Re: [PATCH][RFC] PR middle-end/22141 GIMPLE store widening pass

2016-08-01 Thread Jeff Law

On 08/01/2016 03:15 AM, Kyrill Tkachov wrote:


On 18/07/16 13:22, Richard Biener wrote:

On Fri, Jul 15, 2016 at 5:13 PM, Kyrill Tkachov
 wrote:

Hi all,

This is a GIMPLE pass to implement PR middle-end/22141. that is merge
narrow
stores of constants
into fewer wider stores.  A 2009 patch from Jakub [1] contains many
testcases but a simple motivating

[ ... ]
Given your work on 22141, you might want to pick up my work on 33562. 
They're different issues, but they touch on similar concepts.  IIRC the 
major thing left on my plate for 33562 was rewriting existing stores 
into smaller pieces when parts of the original store are found to be dead.


jeff



Re: [PATCH, vec-tails 07/10] Support loop epilogue combining

2016-08-01 Thread Jeff Law

On 08/01/2016 03:09 AM, Ilya Enkovich wrote:

2016-07-26 18:38 GMT+03:00 Ilya Enkovich :

2016-07-26 18:26 GMT+03:00 Jeff Law :

On 07/26/2016 03:57 AM, Ilya Enkovich wrote:



Ilya, what's the fundamental reason why we need to run
if-conversion again? Yes, I know you want to if-convert the
epilogue, but why?

What are the consequences of not doing if-conversion on the
epilogue? Presumably we miss a vectorization opportunity on the
tail.  But that may be a reasonable limitation to allow the
existing work to move forward while you go back and revamp things a
little.



If we have some control-flow in a loop then we have to if-convert it
for vectorizer. We need to preserve both versions: if-converted one
for vectorizer and the original one to be used if vectorization
fails.  For epilogues we have similar situation and need two
versions.  I do it by running if-conversion on a copy of original
loop. Note that it doesn't run full if-conversion pass. If-conversion
is called for epilogue loop only.


Right.  So what I think Richi wants you to try is to use the if-converted
loop to construct the if-converted epilogue.  It seems conceptually simple
and low cost -- the question is on the implementation side.  I have no clue
how painful that would be.


Probably another part of if-conversion may be re-used to build required
epilogue.  I'll have a look.


Hi,

Yuri will continue my work from this point.
Understood.  I'm actually got some comments on #5 and Yuri is already on 
the CC list for that draft message.


Jeff


Re: [patch] Some testsuite cleanup

2016-08-01 Thread Mike Stump
On Jul 31, 2016, at 1:30 PM, Jonathan Wakely  wrote:
> 
> -fno-show-column 

is a good general option.  If you guys want to add column number test cases, 
they can avoid it, and test down to the column.  Most people don't care, and 
most test aren't interested in column testing anyway.  But, if you want to do a 
sea change, you can add column numbers to every expected line and that way, 
they all will fail, if any of the numbers go wrong.  If someone wants to sign 
up for that, it is slightly better, but requires that someone do all the work.


Re: [patch] Some testsuite cleanup

2016-08-01 Thread Jonathan Wakely

On 01/08/16 09:23 -0700, Mike Stump wrote:

On Jul 31, 2016, at 1:30 PM, Jonathan Wakely  wrote:


-fno-show-column


is a good general option.  If you guys want to add column number test cases, 
they can avoid it, and test down to the column.  Most people don't care, and 
most test aren't interested in column testing anyway.  But, if you want to do a 
sea change, you can add column numbers to every expected line and that way, 
they all will fail, if any of the numbers go wrong.  If someone wants to sign 
up for that, it is slightly better, but requires that someone do all the work.


OK, thanks Mike.

I plan to make this change then (rather than adding it to individual
tests as and when we find one that needs it).

--- a/libstdc++-v3/scripts/testsuite_flags.in
+++ b/libstdc++-v3/scripts/testsuite_flags.in
@@ -56,7 +56,7 @@ case ${query} in
  echo ${CC}
  ;;
--cxxflags)
-  CXXFLAGS_default="-D_GLIBCXX_ASSERT -fmessage-length=0"
+  CXXFLAGS_default="-D_GLIBCXX_ASSERT -fmessage-length=0 -fno-show-column"
  CXXFLAGS_config="@SECTION_FLAGS@ @EXTRA_CXX_FLAGS@"
  echo ${CXXFLAGS_default} ${CXXFLAGS_config} 
  ;;


This adds it to the default flags used for the entire libstdc++ testsuite. 





Re: [PATCH 2/4] BRIG (HSAIL) frontend: The FE itself.

2016-08-01 Thread Martin Jambor
Hi,

On Mon, May 16, 2016 at 08:25:57PM +0300, Pekka Jääskeläinen wrote:
> The BRIG frontend itself.

thanks making the effort to submit patches against trunk.  I would be
very glad to see this included in the upcoming gcc version, not least
because it would make it easy to do basic testing of the HSA back-end
on any computer.

Even though I have spent considerable time reading the patches, I have
not yet read it all, but I think I have accumulated enough notes to
post them back.  Also please note that I have actually never written a
front-end myself so it can easily happen that on many accounts you
will be right and my feedback wrong.  I hope it will still be useful,
though.

Generally speaking, I think you have made quite a good effort trying
to adhere to the GNU coding standard (you know, blanks before
parenthesis and stuff), but:

  - I would definitely appreciate more comments.  All but the most
trivial functions should have one that describes what the function
does, what are its arguments and what it returns.  (And there
should be one blank line between the comment and the function
itself).

  - We very much prefer c-style comments to c++ ones.  I hope they can
be converted easily in some automated way.

So far I have the following specific comments:

- brig-c.h
  + please remove commented out extern GTY (()) tree brig_non_zero_struct

- brig-lang.c:
  + In the long run I would like to get rid of
  opts->x_flag_whole_program = 0 in
brig_langhook_init_options_struct, when did it cause issues, when
you tried LTO?  Since there obviously is no linker-plugin support,
I think we can leave it this way for now, though.
  + brig_langhook_handle_option has argument scode marked as unused 
but it is used.
  + brig_langhook_type_for_size uses both supposedly unused arguments
and I am surprised that handling just 64 bits is sufficient.
  + brig_langhook_type_for_mode: the "FIXME: This static_cast should
be in machmode.h" can be removed, the cast is in machmode.h
  + brig_langhook_eh_personality comment refers to file
libbrig/runtime/brig-unwind.c which does not seem to exist?
  + convert has attributes marked as unused but they are used
  + The "FIXME: This is a hack to preserve trees that we create from
the garbage collector." IMHO does not describe any real issue,
using GTY roots for that is common practice.

- brigspec.c:
  + lang_specific_driver: if (len > 3 && strcmp (arg + len - 3,
".brig") == 0) can never be true, even if str end with brig.
Consequently, the if branch marked by FIXME later on in the
function never gets executed.  So perhaps it is not necessary?

- brigfrontend/*.cc in general:

  + A lot of functions should have a comment.  I prefer to have them
above the function body but if it is a method, a comment in the
class definition is fin e too (I think).  Often you have helpful
comments inside the function but it really helps if you know what
to expect before you start reading the function.  For example,
brig_to_generic::add_global_variable needs a comment that it adds
VAR_DECL to the list of global variables and if there is a "host
def var" (I guess I know what that means but an explanation
somewhere would not hurt either), it makes sure it points to the
new VAR_DECL.  Another example: call_builtin is very difficult to
review without a comment explaining what arguments it expects.
Please make sure that all functions have a comment somewhere,
perhaps with the exception of only the most trivial and
self-evident.

  + Is there any reason why you use internal_error instead of
more common gcc_assert and/or gcc_unreachable?


- brigfrontend/brig_to_generic.cc: 

  + Why does this filename have underscores while all the others have
dashes? :-)
  + What should the sanity check of data secion in parse accomplish?
  + In build_reinterpret_cast, it would be best if you could avoid
constructing VIEW_CONVERT_EXPRs that have type with a different
size from the type of its operand (even though I think that Ada
also does this, it is considered unfortunate).  What are the cases
when this happens?

OK, adding another note later: For converting scalars (anything
!AGGREGATE_TYPE_P), I think you pretty much always want to use
NOP_EXPR rather than generating a V_C_E.  V_C_E is mainly used to
change how we interpret aggregates (or type-cast between
aggregates and scalars).  In particular, NOP_EXPR will also
sign-extend properly when you are extending intergers according to
the type, whereas what will be in the "extended" part of a V_C_E
is basically undefined.

  + in brig_to_generic::append_group_variable, I think you should not
add align_padding to m_next_group_offset twice.  You do not do
that in append_private_variable.

  + Remove the commented out debug_function in
brig_to_generic::finish_function.

  + call_builtin - diff

Re: [PATCH] Fix wrong code on aarch64 due to paradoxical subreg

2016-08-01 Thread Jeff Law

On 07/30/2016 02:17 AM, Bernd Edlinger wrote:


In your first mail you showed reg 481 as _not_ being REG_POINTER:

(insn 1047 1046 1048 (set (reg:DI 481)
 (subreg:DI (reg/f:SI 479) 0)) y.c:12702 -1
  (nil))

(note the lack of /f).  So which is it?  REG_POINTER here is not correct
as far as I can see.



Oh yes, that's an interesting point, in expand I still see this:


(insn 1047 1046 1048 (set (reg:DI 481)
 (subreg:DI (reg/f:SI 479) 0)) isl_input.c:2496 -1
  (nil))

But in the last dump before combine I have this:

(insn 1047 1044 1048 101 (set (reg/f:DI 481)
 (subreg:DI (reg/f:SI 545) 0)) isl_input.c:2496 50 {*movdi_aarch64}
  (nil))


However I was not really surpised by that, because the reg 545 does
in deed hold a pointer value: &isl_obj_map_vtable
So just an FYI.  It should always be safe to fail to mark something with 
REG_POINTER -- though it is possible that something has violated that 
design decision.


So one interesting test would be to hack up things so that REG_POINTER 
never gets set on anything and see what that does to your testcase.





(insn 22 17 23 51 (set (reg/f:SI 544)
 (high:SI (symbol_ref:SI ("isl_obj_map_vtable") [flags 0xc0]
))) isl_input.c:2415 49
{*movsi_aarch64}
  (nil))
(insn 23 22 24 51 (set (reg/f:SI 545)
 (lo_sum:SI (reg/f:SI 544)
 (symbol_ref:SI ("isl_obj_map_vtable") [flags 0xc0]
))) isl_input.c:2415 917
{add_losym_si}
  (expr_list:REG_DEAD (reg/f:SI 544)
 (expr_list:REG_EQUAL (symbol_ref:SI ("isl_obj_map_vtable")
[flags 0xc0] )
 (nil

The "reg/f:DI 481" first appeared in cse1.


I'll try to see what's happening there next



Ok, I the incorrect REG_POINTER is done here:

cse_main -> reg_scan -> reg_scan_mark_refs -> set_reg_attrs_from_value
(reg 544) is technically not a pointer, though I think we have allowed 
REG_POINTER to be set on (HIGH (SYMBOL_REF)).  I would expect (reg 545) 
to be marked as a pointer.


The hesitation I have is because Pmode != SImode on this target, so 
technically the value has to be zero extended out to Pmode to ensure its 
validity.  One could argue that only a properly extended object should 
have REG_POINTER set.








and here I see a bug, because if POINTERS_EXTEND_UNSIGNED
can_be_reg_pointer is only set to false if x is SIGN_EXTEND but not
if x is a SUBREG as in this case.

Seems like a bug to me.





So I think that should be fixed this way:

Index: emit-rtl.c
===
--- emit-rtl.c  (revision 238891)
+++ emit-rtl.c  (working copy)
@@ -1155,7 +1155,7 @@ set_reg_attrs_from_value (rtx reg, rtx x)
 || (GET_CODE (x) == SUBREG && subreg_lowpart_p (x)))
  {
  #if defined(POINTERS_EXTEND_UNSIGNED)
-  if (((GET_CODE (x) == SIGN_EXTEND && POINTERS_EXTEND_UNSIGNED)
+  if (((GET_CODE (x) != ZERO_EXTEND && POINTERS_EXTEND_UNSIGNED)
   || (GET_CODE (x) != SIGN_EXTEND && ! POINTERS_EXTEND_UNSIGNED))
  && !targetm.have_ptr_extend ())
can_be_reg_pointer = false;


What do you think does this look like the right fix?
As Segher pointed out, I think you also want to look at 
SUBREG_PROMOTED_VAR_P and SUBREG_PROMOTED_UNSIGNED_P as well.  I don't 
think they're applicable for this target, but it still seems like the 
right thing to do.




With this patch the code the reg/f:DI 481 does no longer appear,
and also the invalid combine does no longer happen.

However the test case from pr70903 does not get fixed by this.

But when I look at the dumps, I see again the first incorrect
transformation in cse2 (again cse!):

(insn 20 19 21 2 (set (reg:DI 94)
 (subreg:DI (reg:QI 93) 0)) pr.c:8 50 {*movdi_aarch64}
  (expr_list:REG_EQUAL (const_int 255 [0xff])
 (expr_list:REG_DEAD (reg:QI 93)
 (nil

but that is simply wrong, because later optimization passes
expect reg 94 to be 0x00ff but the upper bits are unspecified,
so that REG_EQUAL should better not exist.
Now this one could be related to PROMOTE_MODE and friends.  You might 
want to review Jim W's comments in pr65932 which describe some problems 
with the way the port uses PROMOTE_MODE.





When I looked at cse.c I saw a comment in #if 0, which exactly
describes the problem that we have with paradoxical subreg here:

Index: cse.c
===
--- cse.c   (revision 238891)
+++ cse.c   (working copy)
@@ -4716,10 +4716,6 @@ cse_insn (rtx_insn *insn)
}
}

-#if 0
-  /* It is no longer clear why we used to do this, but it doesn't
-appear to still be needed.  So let's try without it since this
-code hurts cse'ing widened ops.  */
/* If source is a paradoxical subreg (such as QI treated as an SI),
 treat it as volatile.  It may do the work of an SI in one context
 where the extra bits are not being used, but cannot replace an SI
@@ -4726,7 +4722,6 

Re: [PATCH] Fix wrong code on aarch64 due to paradoxical subreg

2016-08-01 Thread Jeff Law

On 07/31/2016 04:44 AM, Bernd Edlinger wrote:


like this?

Index: emit-rtl.c
===
--- emit-rtl.c  (revision 238891)
+++ emit-rtl.c  (working copy)
@@ -1156,7 +1156,11 @@
  {
  #if defined(POINTERS_EXTEND_UNSIGNED)
if (((GET_CODE (x) == SIGN_EXTEND && POINTERS_EXTEND_UNSIGNED)
-  || (GET_CODE (x) != SIGN_EXTEND && ! POINTERS_EXTEND_UNSIGNED))
+  || (GET_CODE (x) == ZERO_EXTEND && ! POINTERS_EXTEND_UNSIGNED)
+  || (paradoxical_subreg_p (x)
+  && ! (SUBREG_PROMOTED_VAR_P (x)
+&& SUBREG_CHECK_PROMOTED_SIGN (x,
+   POINTERS_EXTEND_UNSIGNED
  && !targetm.have_ptr_extend ())
can_be_reg_pointer = false;
  #endif

In the test case of pr71779 the subreg is no promoted var, so this
has no influence at this time.  Also I have not POINTERS_EXTEND_SIGNED
target, but for the symmetry it ought to check explicitly for
ZERO_EXTEND as well, and allow the pointer value to pass thru a
TRUNCATE.
I believe MIPS is likely the only target that extends signed, a few need 
special extension code (ia64/s390).  But this looks reasonable to me.  I 
don't think it's worth the complication of dealing with truncation.







I debugged the cse again, to see how it works and why it mis-compiles
this example.

I found out that the trouble starts one instruction earlier:


[ Fixing the missing bits from the insn using your later message... ]

(insn 19 40 20 2 (set (subreg:DI (reg:QI 93) 0)
 (const_int 255 [0xff])) pr.c:8 50 {*movdi_aarch64}
  (expr_list:REG_DEAD (reg:DI 110 [ D.3037 ])
 (nil)))




cse_main sees the constant value and maps:
(reg:QI 93)  =>  (const_int 255 [0xff])
OK.  This looks OK to me.  We know unambiguously that (reg 93) has the 
value 0xff -- that's because (reg 93) is a QImode register.   There are 
no outside QImode.






plus (I mean that is wrong):
(subreg:DI (reg:QI 93) 0)  =>  (const_int 255 [0xff])

When the next insn is scanned

(insn 20 19 21 2 (set (reg:DI 94)
 (subreg:DI (reg:QI 93) 0)) pr.c:8 50 {*movdi_aarch64}
  (expr_list:REG_DEAD (reg:QI 93)
 (nil)))

I see fold_rtx (subreg:DI (reg:QI 93) 0))
return (const_int 255 [0xff]) which is wrong.
I think this is the key point where things have gone wrong.  While we 
know the QImode bits are 0xff the bits outside QImode are undefined.  So 
we can't legitimately return 0xff when folding that rtx.






now cse maps:
(reg:DI 94)  =>  (const_int 255 [0xff])

And now we propagate the mistake from the previous step






Now I think I found a better place for a patch, where the first bogus
mapping is recorded:

Index: cse.c
===
--- cse.c   (revision 238891)
+++ cse.c   (working copy)
@@ -5898,15 +5898,7 @@ cse_insn (rtx_insn *insn)
|| GET_MODE (dest) == BLKmode
/* If we didn't put a REG_EQUAL value or a source into the hash
   table, there is no point is recording DEST.  */
-   || sets[i].src_elt == 0
-   /* If DEST is a paradoxical SUBREG and SRC is a ZERO_EXTEND
-  or SIGN_EXTEND, don't record DEST since it can cause
-  some tracking to be wrong.
-
-  ??? Think about this more later.  */
-   || (paradoxical_subreg_p (dest)
-   && (GET_CODE (sets[i].src) == SIGN_EXTEND
-   || GET_CODE (sets[i].src) == ZERO_EXTEND)))
+   || sets[i].src_elt == 0)
  continue;

/* STRICT_LOW_PART isn't part of the value BEING set,
@@ -5925,6 +5917,11 @@ cse_insn (rtx_insn *insn)
  sets[i].dest_hash = HASH (dest, GET_MODE (dest));
}

+   /* If DEST is a paradoxical SUBREG, don't record DEST since it can
+  cause some tracking to be wrong.  */
+   if (paradoxical_subreg_p (dest))
+ continue;
+
elt = insert (dest, sets[i].src_elt,
  sets[i].dest_hash, GET_MODE (dest));
Instead of saying "cause some tracking to be wrong", it might be better 
to say "the bits outside the mode of GET_MODE (SUBREG_REG (dest)) are 
undefined".







So apparently there was already an attempt of a fix for a similar bug,
and svn blame points to:

svn log -v -r8354

r8354 | kenner | 1994-10-28 23:55:05 +0100 (Fri, 28 Oct 1994) | 3 lines
Changed paths:
M /trunk/gcc/cse.c

(cse_insn): Don't record a DEST a paradoxical SUBREG and SRC is a
SIGN_EXTEND or ZERO_EXTEND.



This way we can still map the underlying QI register to 255 but
not the SUBREG if it is a paradoxical subreg.

In the test case this patch still works (output code does not change).

What do you think?
Looks like you've probably nailed it.  It'll be interesting see if 
there's

Re: [patch,avr] PR70677: Use -fno-caller-saves for avr

2016-08-01 Thread Denis Chertykov
2016-08-01 15:17 GMT+03:00 Georg-Johann Lay :
> Problem with -fcaller-saves is that there are situations where it triggers
> an expensive frame just to store a variable around a function call even
> though there are plenty of call-saved registers.
>
> Example:
>
> typedef __UINT8_TYPE__ uint8_t;
>
> extern uint8_t uart0_getc (void);
>
> void foo (uint8_t *buffer, uint8_t cnt)
> {
>   while (--cnt)
> {
>   *buffer++ = uart0_getc();
> }
> }
>
> $ avr-gcc -Os -S -dp -mmcu=atmega8 loop-buf.c
>
> $ avr-gcc gcc -B$TV -Os -c -save-temps -dp -mmcu=atmega8 loop-buf.c &&
> avr-size loop-buf.o
>textdata bss dec hex filename
>  50   0   0  50  32 loop-buf.o
>
> $ avr-gcc -Os -c -save-temps -dp -mmcu=atmega8 loop-buf.c -fno-caller-saves
> && avr-size loop-buf.o
>textdata bss dec hex filename
>  32   0   0  32  20 loop-buf.o
>
> I actually came never across a situation where -fcaller-saves improved the
> code performance, hence this patch proposes to switch off -fcaller-saved per
> default.
>
> I can test the patch without regressions, but what bothers me is the
> following lines in ira-color.c:allocno_reload_assign()
>
>   if (ALLOCNO_CALLS_CROSSED_NUM (a) != 0
>   && ira_hard_reg_set_intersection_p (hard_regno, ALLOCNO_MODE (a),
>   call_used_reg_set))
> {
>   ira_assert (flag_caller_saves);
>   caller_save_needed = 1;
> }
>
> What's not clear is whether this assertion is about the inner working of IRA
> as alloc depends on caller-saves in other places of IRA, or if caller-saves
> is needed because otherwise IRA cannot resolve complicated reload situations
> and hence the proposed change might trigger ICEs for complex programs.
>
> Therefore CCed Vladimir who added the assertion to IRA.
>
> Ok to apply if IRA can do without caller-saves?


Ok.



>
> Johann
>
>
> PR 70677
> * common/config/avr/avr-common.c (avr_option_optimization_table)
> [OPT_LEVELS_ALL]: Turn off -fcaller-saves.
>
>


Re: [PATCH] Fix wrong code on aarch64 due to paradoxical subreg

2016-08-01 Thread Bernd Edlinger
Hi Jeff,

On 08/01/16 19:54, Jeff Law wrote:
> Looks like you've probably nailed it.  It'll be interesting see if
> there's any fallout (though our RTL optimizer testing is pretty weak, so
> even if there were, I doubt we'd catch it).
>

If there is, it will probably a performance regression...

Anyway I'd say these two patches do just disable actually wrong
transformations.  So here are both patches as separate diffs
with your suggestion for the comment in cse_insn.

I believe that on x86_64 both patches do not change a single bit.

However I think there are more paradoxical subregs generated all over,
but the aarch64 insv code pattern did trigger more hidden bugs than
any other port.  It is certainly unfortunate that the major source
of paradoxical subreg is in a target-dependent code path :(

Please apologize that I am not able to reduce/finalize the aarch64 test
case at this time, as I usually only work with arm and intel targets, 
but I made an exception here, because a bug like that may affect all
targets sooner or later.


Boot-strap and reg-testing on x86_64-linux-gnu.
Plus aarch64 bootstrap and isl-testing by Andreas.


Is it OK for trunk?



Thanks
Bernd.
2016-08-01  Bernd Edlinger  

	PR rtl-optimization/70903
	* cse.c (cse_insn): If DEST is a paradoxical SUBREG, don't record DEST.

Index: gcc/cse.c
===
--- gcc/cse.c	(revision 238915)
+++ gcc/cse.c	(working copy)
@@ -5898,15 +5898,7 @@ cse_insn (rtx_insn *insn)
 	|| GET_MODE (dest) == BLKmode
 	/* If we didn't put a REG_EQUAL value or a source into the hash
 	   table, there is no point is recording DEST.  */
-	|| sets[i].src_elt == 0
-	/* If DEST is a paradoxical SUBREG and SRC is a ZERO_EXTEND
-	   or SIGN_EXTEND, don't record DEST since it can cause
-	   some tracking to be wrong.
-
-	   ??? Think about this more later.  */
-	|| (paradoxical_subreg_p (dest)
-		&& (GET_CODE (sets[i].src) == SIGN_EXTEND
-		|| GET_CODE (sets[i].src) == ZERO_EXTEND)))
+	|| sets[i].src_elt == 0)
 	  continue;
 
 	/* STRICT_LOW_PART isn't part of the value BEING set,
@@ -5925,6 +5917,11 @@ cse_insn (rtx_insn *insn)
 	  sets[i].dest_hash = HASH (dest, GET_MODE (dest));
 	}
 
+	/* If DEST is a paradoxical SUBREG, don't record DEST since the bits
+	   outside the mode of GET_MODE (SUBREG_REG (dest)) are undefined.  */
+	if (paradoxical_subreg_p (dest))
+	  continue;
+
 	elt = insert (dest, sets[i].src_elt,
 		  sets[i].dest_hash, GET_MODE (dest));
 
2016-08-01  Bernd Edlinger  

	PR rtl-optimization/71779
	* emit-rtl.c (set_reg_attrs_from_value): Only propagate REG_POINTER,
	if the value was sign-extended according to POINTERS_EXTEND_UNSIGNED
	or if it was truncated.

Index: gcc/emit-rtl.c
===
--- gcc/emit-rtl.c	(revision 238915)
+++ gcc/emit-rtl.c	(working copy)
@@ -1156,7 +1156,11 @@ set_reg_attrs_from_value (rtx reg, rtx x)
 {
 #if defined(POINTERS_EXTEND_UNSIGNED)
   if (((GET_CODE (x) == SIGN_EXTEND && POINTERS_EXTEND_UNSIGNED)
-	   || (GET_CODE (x) != SIGN_EXTEND && ! POINTERS_EXTEND_UNSIGNED))
+	   || (GET_CODE (x) == ZERO_EXTEND && ! POINTERS_EXTEND_UNSIGNED)
+	   || (paradoxical_subreg_p (x)
+	   && ! (SUBREG_PROMOTED_VAR_P (x)
+		 && SUBREG_CHECK_PROMOTED_SIGN (x,
+		POINTERS_EXTEND_UNSIGNED
 	  && !targetm.have_ptr_extend ())
 	can_be_reg_pointer = false;
 #endif


[PATCH] Add non-const std::basic_string::data() for C++17

2016-08-01 Thread Jonathan Wakely

This is a minimal version of the change I reverted recently. This adds
the non-const data() overload, without supporting fancy pointers for
allocator_type::pointer and without needing to export the new function
(because we suppress the explicit instantiation declarations for C++17
now).

* include/bits/basic_string.h (data() const): Update comment.
(data()): Add non-const overload for C++17.
* testsuite/21_strings/basic_string/operations/data/char/2.cc: New.
* testsuite/21_strings/basic_string/operations/data/wchar_t/2.cc: New.

Tested powerpc64-linux, committed to trunk.


commit f03b0d6c21528ee11a0e46c3e9393ce1ebfae990
Author: Jonathan Wakely 
Date:   Mon Aug 1 18:33:20 2016 +0100

Add non-const std::basic_string::data() for C++17

* include/bits/basic_string.h (data() const): Update comment.
(data()): Add non-const overload for C++17.
* testsuite/21_strings/basic_string/operations/data/char/2.cc: New.
* testsuite/21_strings/basic_string/operations/data/wchar_t/2.cc: New.

diff --git a/libstdc++-v3/include/bits/basic_string.h 
b/libstdc++-v3/include/bits/basic_string.h
index 49290ad..59f1c64 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -2159,13 +2159,27 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
   /**
*  @brief  Return const pointer to contents.
*
-   *  This is a handle to internal data.  Do not modify or dire things may
-   *  happen.
+   *  This is a pointer to internal data.  It is undefined to modify
+   *  the contents through the returned pointer. To get a pointer that
+   *  allows modifying the contents use @c &str[0] instead,
+   *  (or in C++17 the non-const @c str.data() overload).
   */
   const _CharT*
   data() const _GLIBCXX_NOEXCEPT
   { return _M_data(); }
 
+#if __cplusplus > 201402L
+  /**
+   *  @brief  Return non-const pointer to contents.
+   *
+   *  This is a pointer to the character sequence held by the string.
+   *  Modifying the characters in the sequence is allowed.
+  */
+  _CharT*
+  data() noexcept
+  { return _M_data(); }
+#endif
+
   /**
*  @brief  Return copy of allocator used to construct this string.
   */
@@ -4658,13 +4672,27 @@ _GLIBCXX_END_NAMESPACE_CXX11
   /**
*  @brief  Return const pointer to contents.
*
-   *  This is a handle to internal data.  Do not modify or dire things may
-   *  happen.
+   *  This is a pointer to internal data.  It is undefined to modify
+   *  the contents through the returned pointer. To get a pointer that
+   *  allows modifying the contents use @c &str[0] instead,
+   *  (or in C++17 the non-const @c str.data() overload).
   */
   const _CharT*
   data() const _GLIBCXX_NOEXCEPT
   { return _M_data(); }
 
+#if __cplusplus > 201402L
+  /**
+   *  @brief  Return non-const pointer to contents.
+   *
+   *  This is a pointer to the character sequence held by the string.
+   *  Modifying the characters in the sequence is allowed.
+  */
+  _CharT*
+  data() noexcept
+  { return _M_data(); }
+#endif
+
   /**
*  @brief  Return copy of allocator used to construct this string.
   */
diff --git 
a/libstdc++-v3/testsuite/21_strings/basic_string/operations/data/char/2.cc 
b/libstdc++-v3/testsuite/21_strings/basic_string/operations/data/char/2.cc
new file mode 100644
index 000..7608a0d
--- /dev/null
+++ b/libstdc++-v3/testsuite/21_strings/basic_string/operations/data/char/2.cc
@@ -0,0 +1,40 @@
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++17" }
+
+// C++17 21.3.1.7 [string.ops] string operations
+
+#include 
+#include 
+
+void
+test01()
+{
+  std::string s;
+  char* p = s.data();
+  VERIFY( *p == '\0' );
+  s = "a string that is longer than a short string";
+  p = s.data();
+  VERIFY( p == &s.front() );
+}
+
+int
+main()
+{
+  test01();
+}
diff --git 
a/libstdc++-v3/testsuite/21_strings/basic_string/operations/data/wchar_t/2.cc 
b/libstdc++-v3/testsuite/21_strings/basic_string/operations/data/wchar_t/2.cc
new file mode 100644
index 000..3c977e0
--- /dev/

C++ PATCHes to tweak -Wabi mangling warning

2016-08-01 Thread Jason Merrill
The first patch fixes the warning about mangled name changes for some
current testcases to associate a mangled name with the right ABI
version; they had been swapped around.

The second patch changes that warning to be given when mangling
external references, not just when mangling a definition.

The third patch simplifies some code dealing with ABI tags by
factoring out the code for looking up ABI tags on a decl or type.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 07a860836dfff34c8d4d891ffdd2da84d5cb77bb
Author: Jason Merrill 
Date:   Fri Jul 29 22:30:01 2016 -0400

* mangle.c (mangle_decl): Fix mangled name change warning.

diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c
index d5b26d6..e0bbfc9 100644
--- a/gcc/cp/mangle.c
+++ b/gcc/cp/mangle.c
@@ -1,4 +1,4 @@
-/* Name mangling for the 3.0 C++ ABI.
+/* Name mangling for the 3.0 -*- C++ -*- ABI.
Copyright (C) 2000-2016 Free Software Foundation, Inc.
Written by Alex Samuel 
 
@@ -3732,6 +3732,7 @@ mangle_decl (const tree decl)
  id2 = mangle_decl_string (decl);
  id2 = targetm.mangle_decl_assembler_name (decl, id2);
}
+ flag_abi_version = save_ver;
 
  if (id2 == id)
/* OK.  */;
@@ -3740,8 +3741,8 @@ mangle_decl (const tree decl)
warning_at (DECL_SOURCE_LOCATION (G.entity), OPT_Wabi,
"the mangled name of %qD changed between "
"-fabi-version=%d (%D) and -fabi-version=%d (%D)",
-   G.entity, save_ver, id2,
-   warn_abi_version, id);
+   G.entity, warn_abi_version, id2,
+   save_ver, id);
  else
warning_at (DECL_SOURCE_LOCATION (G.entity), OPT_Wabi,
"the mangled name of %qD changes between "
diff --git a/gcc/testsuite/g++.dg/abi/Wabi-2-3.C 
b/gcc/testsuite/g++.dg/abi/Wabi-2-3.C
index 5aadf5d..96a0f22 100644
--- a/gcc/testsuite/g++.dg/abi/Wabi-2-3.C
+++ b/gcc/testsuite/g++.dg/abi/Wabi-2-3.C
@@ -12,5 +12,5 @@ template  struct S { };
 
 // Expect the diagnostic to reference the ABI version specified via
 // -fabi-version=3 and the ABI version specified via -Wabi=2.
-void foo (S) { }   // { dg-warning "the mangled name of .void 
foo\\(S\\). changed between -fabi-version=3 \\(_Z3foo1SILZ1NEE\\) and 
-fabi-version=2 \\(_Z3foo1SIL_Z1NEE\\)" }
+void foo (S) { }   // { dg-warning "the mangled name of .void 
foo\\(S\\). changed between -fabi-version=2 \\(_Z3foo1SILZ1NEE\\) and 
-fabi-version=3 \\(_Z3foo1SIL_Z1NEE\\)" }
 
diff --git a/gcc/testsuite/g++.dg/abi/Wabi-3-2.C 
b/gcc/testsuite/g++.dg/abi/Wabi-3-2.C
index c65e3b6..46bd713 100644
--- a/gcc/testsuite/g++.dg/abi/Wabi-3-2.C
+++ b/gcc/testsuite/g++.dg/abi/Wabi-3-2.C
@@ -12,5 +12,5 @@ template  struct S { };
 
 // Expect the diagnostic to reference the ABI version specified via
 // -fabi-version=2 and the ABI version specified via -Wabi=3.
-void foo (S) { }   // { dg-warning "the mangled name of .void 
foo\\(S\\). changed between -fabi-version=2 \\(_Z3foo1SIL_Z1NEE\\) and 
-fabi-version=3 \\(_Z3foo1SILZ1NEE\\)" }
+void foo (S) { }   // { dg-warning "the mangled name of .void 
foo\\(S\\). changes between -fabi-version=2 \\(_Z3foo1SILZ1NEE\\) and 
-fabi-version=3 \\(_Z3foo1SIL_Z1NEE\\)" }
 
commit 136f83de4c5c144e1671f823a3cf0a397ef28e22
Author: Jason Merrill 
Date:   Fri Jul 29 22:09:28 2016 -0400

Warn about mangled name change even if DECL_REALLY_EXTERN.

* mangle.c (mangle_decl): Warn about mangled name change even if
DECL_REALLY_EXTERN.

diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c
index 29be7fd..d1bf3ef 100644
--- a/gcc/cp/mangle.c
+++ b/gcc/cp/mangle.c
@@ -3700,50 +3700,52 @@ mangle_decl (const tree decl)
   SET_DECL_ASSEMBLER_NAME (decl, id);
 
   if (id != DECL_NAME (decl)
-  && !DECL_REALLY_EXTERN (decl)
   /* Don't do this for a fake symbol we aren't going to emit anyway.  */
   && TREE_CODE (decl) != TYPE_DECL
   && !DECL_MAYBE_IN_CHARGE_CONSTRUCTOR_P (decl)
   && !DECL_MAYBE_IN_CHARGE_DESTRUCTOR_P (decl))
 {
-  bool set = false;
+  int save_ver = flag_abi_version;
+  tree id2 = NULL_TREE;
 
-  /* Check IDENTIFIER_GLOBAL_VALUE before setting to avoid redundant
-errors from multiple definitions.  */
-  tree d = IDENTIFIER_GLOBAL_VALUE (id);
-  if (!d || decl_implicit_alias_p (d))
+  if (!DECL_REALLY_EXTERN (decl))
{
- set = true;
- SET_IDENTIFIER_GLOBAL_VALUE (id, decl);
-   }
+ bool set = false;
 
-  if (!G.need_abi_warning)
-   return;
+ /* Check IDENTIFIER_GLOBAL_VALUE before setting to avoid redundant
+errors from multiple definitions.  */
+ tree d = IDENTIFIER_GLOBAL_VALUE (id);
+ if (!d || decl_implicit_alias_p (d))
+   {
+ set = true;
+ SET_IDENTIFIER_GLOBAL_VALUE (id, decl);
+   }
 
-  /* If the mangling will change in the

Re: RFA: new pass to warn on questionable uses of alloca() and VLAs

2016-08-01 Thread Joseph Myers
On Tue, 19 Jul 2016, Aldy Hernandez wrote:

> +  // Do not warn on VLAs occurring in a loop, since VLAs are
> +  // guaranteed to be cleaned up when they go out of scope.
> +  // That is, there is a corresponding __builtin_stack_restore
> +  // at the end of the scope in which the VLA occurs.

Given this ...

> + case ALLOCA_IN_LOOP:
> +   warning_at (loc, wcode,
> +   is_vla ? "use of variable-length array "
> +   "within a loop"
> +   : "use of % within a loop");
> +   break;

 ... why is there a VLA case for this diagnostic at all?  I'd expect an 
assertion that only the alloca case can reach this diagnostic.

Also, if the format string for a diagnostic function is a ? : conditional 
expression you need to mark up each half with G_() so that both halves are 
properly extracted for translation.  This applies to lots of diagnostics 
in this patch.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] nvptx: do not implicitly enable -ftoplevel-reorder

2016-08-01 Thread Joseph Myers
On Fri, 22 Jul 2016, Bernd Schmidt wrote:

> What's the motivation for supporting -fno-toplevel-reorder anyway? That's
> practically just a legacy mode as far as I know.

It's for code that uses toplevel asms in ways for which it matters where 
they appear in relation to functions in the .s file, or otherwise uses 
asms or has a build system that manipulates the .s file in ways that break 
with reordering.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] nvptx: do not implicitly enable -ftoplevel-reorder

2016-08-01 Thread Bernd Schmidt

On 08/01/2016 10:44 PM, Joseph Myers wrote:

On Fri, 22 Jul 2016, Bernd Schmidt wrote:


What's the motivation for supporting -fno-toplevel-reorder anyway? That's
practically just a legacy mode as far as I know.


It's for code that uses toplevel asms in ways for which it matters where
they appear in relation to functions in the .s file, or otherwise uses
asms or has a build system that manipulates the .s file in ways that break
with reordering.


Yes, but that sort of thing tends not to work with nvptx anyway, so I 
still don't really see the point.



Bernd



Re: [PATCH,rs6000] Add built-in function support Power9 binary floating point operations

2016-08-01 Thread Segher Boessenkool
Hi Kelvin,

On Fri, Jul 29, 2016 at 09:47:55AM -0600, Kelvin Nilsen wrote:
> This patch adds built-in support for the following fourteen new binary
> floating point instructions introduced with the Power9 architecture: 

Some comments, mostly about whitespace:

> --- gcc/doc/extend.texi   (revision 238014)
> +++ gcc/doc/extend.texi   (working copy)

> +int scalar_test_data_class (float source, unsigned int condition);
> +int scalar_test_data_class (double source, unsigned int condition);
> +
> +int scalar_test_neg (float source)
> +int scalar_test_neg (double source)

These last two probably want a semicolon as well?

> +The @code{scalar_extract_sig} and @code{scalar_insert_exp}
> +functions require a 64-bit environment supporting ISA 3.0 or later.
> +The @code{scalar_extract_exp} and @code{vec_extract_sig} built-in
> +functions return the significand and exponent respectively of their
> +@code{source} arguments.  The 

Trailing space.

> +The @code{scalar_cmp_exp_gt}, @code{scalar_cmp_exp_lt},
> +@code{scalar_cmp_exp_eq}, and @code{scalar_cmp_exp_unordered} built-in
> +functions return a non-zero value if @code{arg1} is greater than, less
> +than, equal to, or not comparable to @code{arg2} respectively.  The
> +arguments are not comparable if one or the other equals NaN (not a
> +number). 

Trailing space.

> +__vector float
> +vec_insert_exp (__vector unsigned int significands,  __vector unsigned int 
> exponents);
> +__vector double
> +vec_insert_exp (__vector unsigned long long int significands,
> +__vector unsigned long long int exponents);

Break up the first of these to two lines as well?

> +/* { dg-skip-if "" { powerpc*-*-aix* } } */

I think you can do this in bfp.exp, for all tests at the same time?
Or will bfp/ include tests that can run on AIX, eventually?

> +/* This test should succeed only 64-bit configuration.  */

Maybe "only on 64-bit configurations"?

> +;; Iterator for scalar floating point types
> +(define_mode_iterator VSX_SF [DF SF])

There is an iterator SFDF already; is there a reason to use a new one?

> +(define_mode_attr vsx_sf_suffix [(DF "dp") (SF "sp")])

That is Fvsx already.

> +(define_mode_attr vsx_f_suffix [(V4SF "dp") (V2DF "sp")])

Those seem swapped?

You can make vsx_sf_suffix (or Fvsx) do these types as well.

> +(define_mode_attr VSX_F_INTEGER [(V4SF "V4SI") (V2DF "V2DI")])

VSi does this, too.

> +;; ISA 3.0 Binary Floating-Point Support
> +
> +;; VSX Scalar Extract Exponent Double-Precision
> +(define_insn "xsxexpdp"
> +  [(set (match_operand:DI 0 "register_operand" "=r")
> + (unspec:DI [(match_operand:DF 1 "vsx_register_operand" "wa")] 
> +UNSPEC_VSX_SXEXPDP))]

These last two lines aren't indented correctly (no spaces before tabs,
and no spaces where you could use a tab).  Many variations below.
"contrib/check_GNU_style.sh" might help a bit.

> +  "xsxexpdp %0,%x1"
> +  [(set_attr "type" "fp")])

I think this should be "fpsimple"?

> +(define_insn "*xscmpexpdp"
> +  [(set (match_operand:CCFP 0 "" "=y")

No predicate?  cc_reg_operand?

> +;; VSX Scalar Test Data Class Double- and Single-Precision
> +;;  (The lt bit is set if operand 1 is negative.  The eq bit is set 
> +;;   if any of the conditions tested by operand 2 are satisfied.  
> +;;   The gt and unordered bits are cleared to zero.)

Trailing spaces.  There are more.

> +(define_expand "xststdc"
> +  [(set (match_dup 3)
> +(compare:CCFP
> + (unspec:VSX_SF
> +  [(match_operand:VSX_SF 1 "vsx_register_operand" "wa")
> +   (match_operand:SI 2 "u7bit_cint_operand" "n")]
> +  UNSPEC_VSX_STSTDC)
> +(match_dup 4)))
> +   (set (match_operand:SI 0 "register_operand" "=r")
> +(eq:SI (match_dup 3)
> +   (const_int 0)))
> +  ]

Don't put this ] on a separate line please.

> +(define_insn "*xststdc"
> +  [(set (match_operand:CCFP 0 "" "=y")
> +(compare:CCFP
> + (unspec:VSX_SF [(match_operand:VSX_SF 1 "vsx_register_operand" "wa")
> +(match_operand:SI 2 "u7bit_cint_operand" "n")]

The ( should line up here.  And, tabs.

> +;; VSX Vector Test Data Class Double and Single Precision
> +;;  The corresponding elements of the result vector are all ones
> +;;   if any of the conditions tested by operand 3 are satisfied.  

Something went wrong with the leading spaces here :-)

> @@ -4721,6 +4799,7 @@ altivec_resolve_overloaded_builtin (location_t loc
>tree types[3], args[3];
>const struct altivec_builtin_types *desc;
>unsigned int n;
> +  bool unsupported_builtin;
>  
>if (!rs6000_overloaded_builtin_p (fcode))
>  return NULL_TREE;
> @@ -5480,6 +5559,7 @@ assignment for unaligned loads and stores");
>return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0]));
>  }
>  
> +  unsupported_builtin = false;

Declare it here, instead of 750 lines above?

> +BU_P9V_VSX_2 (VSCEDPGT,  "scalar_cmp_exp_dp_gt", CONST, xscmpexpdp_gt)
> +BU_P9V_VSX_2 (VSCEDP

Re: [PATCH 1/3] (v2) On-demand locations within string-literals

2016-08-01 Thread Joseph Myers
On Thu, 28 Jul 2016, Martin Sebor wrote:

> like it as well.  So perhaps the problem to solve is how to teach
> LTO to talk to the front end.  One way to do it would be to build
> the front ends as shared libraries.

I think building front ends as shared libraries would run into different 
platforms (e.g. Windows) having very different conceptual models for 
shared libraries, especially when you get into shared libraries depending 
on symbols from the main executable (you might need to make all the 
language-independent parts of the compiler into a shared library as well).  
But a useful starting point could be to eliminate all cases where 
different front ends define external functions / variables with the same 
name (which would also enable statically linking multiple front ends 
together, to do such things without depending on shared libraries at all).

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH, 4 of 4], Enhance PowerPC vec_extract support for power8/power9 machines

2016-08-01 Thread Michael Meissner
This is the 4th path to enhance vec_extract on 64-bit power8/power9 machines.

This patch uses the load from memory support and the variable elment patch that
were part of the previous patches for vector long/vector double, and adds the
same support for vector float, vector int, vector short, and vector char.

I have tested these patches with bootstrap builds and running make check on:

1) Big endian power7 (both -m32 and -m64 tests done)
2) Big endian power8 (only -m64 tests were done)
3) Little endian power8

There were no regressions.  Can I check these patches into the trunk?

One further optimization would be to add support for constant element extracts
if the vector is currently in GPRs rather than vector registers on 64-bit
systems.  I'm not sure if it would be a win in general, or if it would cause
the register allocators to generate more moves between the GPR and vector
register banks.

[gcc]
2016-08-01  Michael Meissner  

* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Add support for vec_extract on vector float, vector int, vector
short, and vector char vector types.
* config/rs6000/rs6000.c (rs6000_expand_vector_extract): Add
vector float, vector int, vector short, and vector char
optimizations on 64-bit ISA 2.07 systems for both constant and
variable element numbers.
(rs6000_split_vec_extract_var): Likewise.
* config/rs6000/vsx.md (vsx_xscvspdp_scalar2): Allow SFmode to be
Altivec registers on ISA 2.07 and above.
(vsx_extract_v4sf): Delete alternative that hard coded element 0,
which never was matched due to the split occuring before register
allocation (and the code would not have worked on little endian
systems if it did match).  Allow extracts to go to the Altivec
registers if ISA 2.07 (power8).  Change from using "" around the
C++ code to using {}'s.
(vsx_extract_v4sf__load): New insn to optimize vector float
vec_extracts when the vector is in memory.
(vsx_extract_v4sf_var): New insn to optimize vector float
vec_extracts when the element number is variable on 64-bit ISA
2.07 systems.
(vsx_extract_, VSX_EXTRACT_I iterator): Add optimizations
for 64-bit ISA 2.07 as well as ISA 3.0.
(vsx_extract__p9, VSX_EXTRACT_I iterator): Likewise.
(vsx_extract__p8, VSX_EXTRACT_I iterator): Likewise.
(vsx_extract__load, VSX_EXTRACT_I iterator): New insn to
optimize vector int, vector short, and vector char vec_extracts
when the vector is in memory.
(vsx_extract__var, VSX_EXTRACT_I iterator): New insn to
optimize vector int, vector short, and vector char vec_extracts
when the element number is variable.

[gcc/testsuite]
2016-08-01  Michael Meissner  

* gcc.target/powerpc/vec-extract-5.c: New tests to test
vec_extract for vector float, vector int, vector short, and vector
char.
* gcc.target/powerpc/vec-extract-6.c: Likewise.
* gcc.target/powerpc/vec-extract-7.c: Likewise.
* gcc.target/powerpc/vec-extract-8.c: Likewise.
* gcc.target/powerpc/vec-extract-9.c: Likewise.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000-c.c
===
--- gcc/config/rs6000/rs6000-c.c(revision 238892)
+++ gcc/config/rs6000/rs6000-c.c(working copy)
@@ -5135,6 +5135,25 @@ altivec_resolve_overloaded_builtin (loca
case V2DImode:
  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
  break;
+
+   case V4SFmode:
+ call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF];
+ break;
+
+   case V4SImode:
+ if (TARGET_DIRECT_MOVE_64BIT)
+   call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI];
+ break;
+
+   case V8HImode:
+ if (TARGET_DIRECT_MOVE_64BIT)
+   call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI];
+ break;
+
+   case V16QImode:
+ if (TARGET_DIRECT_MOVE_64BIT)
+   call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI];
+ break;
}
}
 
@@ -5154,6 +5173,22 @@ altivec_resolve_overloaded_builtin (loca
case V2DImode:
  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
  break;
+
+   case V4SFmode:
+ call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF];
+ break;
+
+   case V4SImode:
+ call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI];
+ brea

Re: [PATCH, 4 of 4], Enhance PowerPC vec_extract support for power8/power9 machines

2016-08-01 Thread Segher Boessenkool
On Mon, Aug 01, 2016 at 06:37:42PM -0400, Michael Meissner wrote:
> One further optimization would be to add support for constant element extracts
> if the vector is currently in GPRs rather than vector registers on 64-bit
> systems.  I'm not sure if it would be a win in general, or if it would cause
> the register allocators to generate more moves between the GPR and vector
> register banks.

I don't know if it'll help either, you'll have to try it to make sure.
I don't think it will be terribly important, either way.

One nit:

>  ;; Extraction of a single element in a small integer vector.  None of the 
> small
>  ;; types are currently allowed in a vector register, so we extract to a 
> DImode
>  ;; and either do a direct move or store.
> -(define_insn_and_split  "vsx_extract_"
> +(define_expand  "vsx_extract_"
> +  [(parallel [(set (match_operand: 0 "nonimmediate_operand" "")
> +(vec_select:
> + (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand" "")
> + (parallel [(match_operand:QI 2 "const_int_operand" "")])))
> +   (clobber (match_dup 3))])]

Drop the superfluous ""s?  And the predicates are never used either I think?

This is okay for trunk.  Thanks,


Segher


Re: [PATCH] Fix wrong code on aarch64 due to paradoxical subreg

2016-08-01 Thread Segher Boessenkool
Hi,

On Mon, Aug 01, 2016 at 06:52:54PM +, Bernd Edlinger wrote:
> On 08/01/16 19:54, Jeff Law wrote:
> > Looks like you've probably nailed it.  It'll be interesting see if
> > there's any fallout (though our RTL optimizer testing is pretty weak, so
> > even if there were, I doubt we'd catch it).
> 
> If there is, it will probably a performance regression...

I tested building Linux with and without the patch, on many archs.
The few that show differences are:

   alpha   6148872   6148776
ia64  16946958  16946670
s390  12345770  12345850
tile  12016086  12016070

(left before, right after; arm and aarch64 did not build, kernel problems).

So all except s390 generate smaller code even.

> However I think there are more paradoxical subregs generated all over,
> but the aarch64 insv code pattern did trigger more hidden bugs than
> any other port.  It is certainly unfortunate that the major source
> of paradoxical subreg is in a target-dependent code path :(

It is certainly unfortunate that paradoxical subregs exist at all!  :-)


Segher


Re: [PATCH build/doc] Replacing libiberty with gnulib

2016-08-01 Thread Manuel López-Ibáñez
On 29 July 2016 at 17:55, Manuel López-Ibáñez  wrote:
> On 29 July 2016 at 17:51, Joseph Myers  wrote:
>> On Wed, 20 Jul 2016, Manuel López-Ibáñez wrote:
>>
>>> On 20 July 2016 at 19:21, ayush goel  wrote:
>>> > Hey,
>>> > As a first step of my GSOC project
>>> > (https://gcc.gnu.org/wiki/replacelibibertywithgnulib) I have imported
>>> > the gnulib library inside the gcc tree. I have created gnulib as a top
>>> > level directory which contains the necessary scripts to import the
>>> > modules. It also contains the necessary Makefile.in and configure.ac
>>> > files.
>>>
>>> Looks good to me, but I cannot approve it. Joseph, what do you think?
>>
>> That this would best be reviewed by a build-system maintainer.
>
> Sure, who could that be?

Jeff, as a global reviewer, how can we move this forward? You have
said in the past: "I suspect we'll probably want to go with direct use
of gnulib obstack at some point." Well, here it is.

If there is something wrong or missing, ideally we would like to know
so that Ayush can work on fixing it before the Summer of Code is over
in less than two weeks.

Cheers,

Manuel.


Re: [PR49366] emit loc exprs for C++ non-virtual pmf template value parms

2016-08-01 Thread Alexandre Oliva
On Jul 23, 2016, Alexandre Oliva  wrote:

> We used to emit, in debug information, the values bound to pointer to
> member function template parameters only when they were NULL or
> virtual member functions, because those can be represented with
> DW_AT_const_value.

> In order to represent the symbolic pointer to member function
> constants for non-virtual member functions, we'd need to be able to
> emit relocations for part of DW_AT_const_value, which we don't.  The
> more viable alternative is to use DW_AT_location to represent such
> values, as slated for inclusion in DWARFv5, according to
> http://www.dwarfstd.org/ShowIssue.php?issue=130412.1>.

> With this patch, when we can't emit a DW_AT_const_value, we emit each
> "member" of the pointer to member function "record" as a
> DW_OP_stack_value DW_OP_piece, as long as the referenced member
> function is output in the same translation unit, otherwise we'd get
> relocations to external symbols, something to avoid in debug sections.

> Regstrapped on x86_64-linux-gnu and i686-pc-linux-gnu, and manually
> cross-tested with both endiannesses (just to be sure ;-) with a
> mips64-elf target.  Ok to install?

Ping?  (patch attached to gcc.gnu.org/PR49366)

-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


[PR63240] generate debug info for defaulted member functions

2016-08-01 Thread Alexandre Oliva
This implements , a
proposal already accepted for inclusion in DWARF-5, but using
DW_AT_GNU_defaulted instead of DW_AT_defaulted as the attribute name,
because the attribute id for DW_AT_defaulted is not yet publicly
available.

Regstrapped on x86_64-linux-gnu and i686-linux-gnu.  Ok to install?

for  include/ChangeLog

PR debug/63240
* dwarf2.def (DW_AT_GNU_defaulted): New.
* dwarf2.h (enu dwarf_defaulted_attribute): New.

for  gcc/ChangeLog

PR debug/63240
* langhooks-def.h
(LANG_HOOKS_FUNCTION_DECL_DEFAULTED_INCLASS_P): Set default.
(LANG_HOOKS_FUNCTION_DECL_DEFAULTED_OUTOFCLASS_P): Likewise.
(LANG_HOOKS_DECLS): Add them.
* langhooks.h (struct lang_hooks_for_decls): Add
function_decl_defaulted_inclass_p and
function_decl_defaulted_outofclass_p.
* dwarf2out.c (gen_subprogram_die): Add DW_AT_GNU_defaulted
attribute.

for  gcc/cp/ChangeLog

PR debug/63240
* cp-objcp-common.c (cp_function_decl_defaulted_inclass_p,
cp_function_decl_defaulted_outofclass_p): New.
* cp-objcp-common.h (cp_function_decl_defaulted_inclass_p,
cp_function_decl_defaulted_outofclass_p): Declare.
(LANG_HOOKS_FUNCTION_DECL_DEFAULTED_INCLASS_P): Redefine.
(LANG_HOOKS_FUNCTION_DECL_DEFAULTED_OUTOFCLASS_P): Redefine.

for  gcc/testsuite/ChangeLog

PR debug/63240
* g++.dg/debug/dwarf2/defaulted-member-function-1.C: New.
* g++.dg/debug/dwarf2/defaulted-member-function-2.C: New.
* g++.dg/debug/dwarf2/defaulted-member-function-3.C: New.
---
 gcc/cp/cp-objcp-common.c   |   22 ++
 gcc/cp/cp-objcp-common.h   |8 +++
 gcc/dwarf2out.c|   24 
 gcc/langhooks-def.h|4 +++
 gcc/langhooks.h|8 +++
 .../debug/dwarf2/defaulted-member-function-1.C |   14 
 .../debug/dwarf2/defaulted-member-function-2.C |   16 +
 .../debug/dwarf2/defaulted-member-function-3.C |   13 +++
 include/dwarf2.def |2 ++
 include/dwarf2.h   |8 +++
 10 files changed, 119 insertions(+)
 create mode 100644 
gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-1.C
 create mode 100644 
gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-2.C
 create mode 100644 
gcc/testsuite/g++.dg/debug/dwarf2/defaulted-member-function-3.C

diff --git a/gcc/cp/cp-objcp-common.c b/gcc/cp/cp-objcp-common.c
index f7ddb00..daa1b7e 100644
--- a/gcc/cp/cp-objcp-common.c
+++ b/gcc/cp/cp-objcp-common.c
@@ -150,6 +150,28 @@ cp_function_decl_deleted_p (tree decl)
  && DECL_DELETED_FN (decl));
 }
 
+/* Return true if DECL is a special member function defaulted within
+   the class body.  */
+
+bool
+cp_function_decl_defaulted_inclass_p (tree decl)
+{
+  return (decl
+ && DECL_LANG_SPECIFIC (STRIP_TEMPLATE (decl))
+ && DECL_DEFAULTED_IN_CLASS_P (decl));
+}
+
+/* Return true if DECL is a special member function defaulted outside
+   the class body.  */
+
+bool
+cp_function_decl_defaulted_outofclass_p (tree decl)
+{
+  return (decl
+ && DECL_LANG_SPECIFIC (STRIP_TEMPLATE (decl))
+ && DECL_DEFAULTED_OUTSIDE_CLASS_P (decl));
+}
+
 /* Stubs to keep c-opts.c happy.  */
 void
 push_file_scope (void)
diff --git a/gcc/cp/cp-objcp-common.h b/gcc/cp/cp-objcp-common.h
index 1bb19ee..7bba00d 100644
--- a/gcc/cp/cp-objcp-common.h
+++ b/gcc/cp/cp-objcp-common.h
@@ -28,6 +28,8 @@ extern tree objcp_tsubst_copy_and_build (tree, tree, 
tsubst_flags_t,
 
 extern bool cp_function_decl_explicit_p (tree decl);
 extern bool cp_function_decl_deleted_p (tree decl);
+extern bool cp_function_decl_defaulted_inclass_p (tree decl);
+extern bool cp_function_decl_defaulted_outofclass_p (tree decl);
 extern void cp_common_init_ts (void);
 
 /* Lang hooks that are shared between C++ and ObjC++ are defined here.  Hooks
@@ -134,6 +136,12 @@ extern void cp_common_init_ts (void);
 #define LANG_HOOKS_FUNCTION_DECL_EXPLICIT_P cp_function_decl_explicit_p
 #undef LANG_HOOKS_FUNCTION_DECL_DELETED_P
 #define LANG_HOOKS_FUNCTION_DECL_DELETED_P cp_function_decl_deleted_p
+#undef LANG_HOOKS_FUNCTION_DECL_DEFAULTED_INCLASS_P
+#define LANG_HOOKS_FUNCTION_DECL_DEFAULTED_INCLASS_P \
+  cp_function_decl_defaulted_inclass_p
+#undef LANG_HOOKS_FUNCTION_DECL_DEFAULTED_OUTOFCLASS_P
+#define LANG_HOOKS_FUNCTION_DECL_DEFAULTED_OUTOFCLASS_P \
+  cp_function_decl_defaulted_outofclass_p
 #undef LANG_HOOKS_OMP_PREDETERMINED_SHARING
 #define LANG_HOOKS_OMP_PREDETERMINED_SHARING cxx_omp_predetermined_sharing
 #undef LANG_HOOKS_OMP_CLAUSE_DEFAULT_CTOR
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 8d6eeed..71fa5ad 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c

[PR55641] drop spurious const_type from reference_type variables

2016-08-01 Thread Alexandre Oliva
Although C++ reference types, denoted by DW_TAG_reference_type in
DWARFv2+ debug info, are unchangeable, we output names of reference type
with DW_TAG_const_type, because internally we mark such variables as
TREE_READONLY.  That's an internal implementation detail that shouldn't
leak to debug information.  This patch fixes this.

The testcase is slightly changed from the one attached to the bug
report, so that it runs in C++98 mode too.

Regstrapped on x86_64-linux-gnu and i686-linux-gnu.  Ok to install?

for  gcc/ChangeLog

PR debug/55641
* dwarf2out.c (decl_quals): Don't map TREE_READONLY to
TYPE_QUAL_CONST in reference-typed decls.

for  gcc/testsuite/ChangeLog

PR debug/55641
* g++.dg/debug/dwarf2/ref-1.C: New.
---
 gcc/dwarf2out.c   |4 
 gcc/testsuite/g++.dg/debug/dwarf2/ref-1.C |   19 +++
 2 files changed, 23 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/debug/dwarf2/ref-1.C

diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 8d6eeed..103095f 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -11135,6 +11135,10 @@ static int
 decl_quals (const_tree decl)
 {
   return ((TREE_READONLY (decl)
+  /* The C++ front-end correctly marks reference-typed
+ variables as readonly, but from a language (and debug
+ info) standpoint they are not const-qualified.  */
+  && TREE_CODE (TREE_TYPE (decl)) != REFERENCE_TYPE
   ? TYPE_QUAL_CONST : TYPE_UNQUALIFIED)
  | (TREE_THIS_VOLATILE (decl)
 ? TYPE_QUAL_VOLATILE : TYPE_UNQUALIFIED));
diff --git a/gcc/testsuite/g++.dg/debug/dwarf2/ref-1.C 
b/gcc/testsuite/g++.dg/debug/dwarf2/ref-1.C
new file mode 100644
index 000..75e9fca
--- /dev/null
+++ b/gcc/testsuite/g++.dg/debug/dwarf2/ref-1.C
@@ -0,0 +1,19 @@
+// { dg-do compile }
+// { dg-options "-O -g -dA -gno-strict-dwarf" }
+// { dg-final { scan-assembler-not "DW_TAG_const_type" { xfail { 
powerpc-ibm-aix* } } } }
+
+int x;
+int &y = x;
+
+typedef int &z_t;
+z_t z = x;
+
+void f(int &p) {}
+
+struct foo {
+  int &bar;
+  typedef int &bart;
+  bart fool;
+};
+
+void f3(struct foo &p) {}

-- 
Alexandre Oliva, freedom fighterhttp://FSFLA.org/~lxoliva/
You must be the change you wish to see in the world. -- Gandhi
Be Free! -- http://FSFLA.org/   FSF Latin America board member
Free Software Evangelist|Red Hat Brasil GNU Toolchain Engineer


Re: [patch,avr] PR70677: Use -fno-caller-saves for avr

2016-08-01 Thread Senthil Kumar Selvaraj

Denis Chertykov writes:

> 2016-08-01 15:17 GMT+03:00 Georg-Johann Lay :
>> Problem with -fcaller-saves is that there are situations where it triggers
>> an expensive frame just to store a variable around a function call even
>> though there are plenty of call-saved registers.
>>
>> Example:
>>
>> typedef __UINT8_TYPE__ uint8_t;
>>
>> extern uint8_t uart0_getc (void);
>>
>> void foo (uint8_t *buffer, uint8_t cnt)
>> {
>>   while (--cnt)
>> {
>>   *buffer++ = uart0_getc();
>> }
>> }
>>
>> $ avr-gcc -Os -S -dp -mmcu=atmega8 loop-buf.c
>>
>> $ avr-gcc gcc -B$TV -Os -c -save-temps -dp -mmcu=atmega8 loop-buf.c &&
>> avr-size loop-buf.o
>>textdata bss dec hex filename
>>  50   0   0  50  32 loop-buf.o
>>
>> $ avr-gcc -Os -c -save-temps -dp -mmcu=atmega8 loop-buf.c -fno-caller-saves
>> && avr-size loop-buf.o
>>textdata bss dec hex filename
>>  32   0   0  32  20 loop-buf.o
>>
>> I actually came never across a situation where -fcaller-saves improved the
>> code performance, hence this patch proposes to switch off -fcaller-saved per
>> default.

Like you mentioned in the bug report, would fixing the costs be a better
way to fix this rather than a blanket disabling of the option?

Regards
Senthil


Re: Create multiple directory through fixinc.in script.

2016-08-01 Thread mbilal


Hi Jeff,

On 01/08/2016 21:07, Jeff Law wrote:

On 08/01/2016 08:55 AM, mbilal wrote:

Hi,

This patch fixes the mkheaders directory creation. It failed to create
the multiple multilib directory on some cases.
e.g
For i686-pc-linux-gnu target config, I have following 'fixinc_list' for
multilibs.

/system32;
/system64;/64
/sgxx-glibc;/sgxx-glibc
/sgxx-glibc;/64/sgxx-glibc
/sgxx-glibc;/x32/sgxx-glibc

Now, mkheaders failed to create fixed include directory for the case of
 '/sgxx-glibc;/x32/sgxx-glibc'.
here fixinc.sh tries to make 'mkdir /x32/sgxx-glibc' and
failed because parent 'x32' directory doesn't exist. (we defined
MULTILIB_EXCEPTIONS for x32 directory)


Following patch fixes the problem.

fixincludes/ChangeLog:
2016-08-01  Muhammad Bilal  

* fixinc.in: Use --parents option to make LIB directory.

OK.  Please install if you have permissions.  If not, let me know and
I'll do the commit.


Please commit this patch as I don't have write permission in gcc repo.

Thanks for the review.




Index: fixincludes/fixinc.in
===
--- fixincludes/fixinc.in(revision 238952)
+++ fixincludes/fixinc.in(working copy)
@@ -41,7 +41,7 @@

 # Make sure it exists.
 if [ ! -d $LIB ]; then
-  mkdir $LIB || {
+  mkdir -p $LIB || {
 echo fixincludes:  output dir '`'$LIB"' cannot be created"
 exit 1
   }


I think 'mkdir -p' option is now portable as I'm seeing that
mkinstalldirs script also uses 'mkdir -p' options.

Right.   I actually double-checked and -p is part of the current posix
specification.

Thanks,
Jeff



Thanks,
-Bilal


Re: [PATCH] c++/60760 - arithmetic on null pointers should not be allowed in constant expressions

2016-08-01 Thread Thomas Schwinge
Hi!

On Wed, 6 Jul 2016 16:20:44 -0600, Martin Sebor  wrote:
> PR c++/60760 - arithmetic on null pointers should not be allowed in constant
>   expressions
> PR c++/71091 - constexpr reference bound to a null pointer dereference
>accepted
> 
> [...]
>   * g++.dg/cpp0x/constexpr-cast.C: New test.

In x86_64 GNU/Linux testing, I see that one FAIL for the -m32 multilib:

+FAIL: g++.dg/cpp0x/constexpr-cast.C  -std=c++11  (test for errors, line 10)
+FAIL: g++.dg/cpp0x/constexpr-cast.C  -std=c++11  (test for errors, line 11)
+PASS: g++.dg/cpp0x/constexpr-cast.C  -std=c++11  (test for errors, line 24)
+FAIL: g++.dg/cpp0x/constexpr-cast.C  -std=c++11 (test for excess errors)
+XFAIL: g++.dg/cpp0x/constexpr-cast.C  -std=c++11 bug c++/49171 (test for 
errors, line 8)
+FAIL: g++.dg/cpp0x/constexpr-cast.C  -std=c++14  (test for errors, line 10)
+FAIL: g++.dg/cpp0x/constexpr-cast.C  -std=c++14  (test for errors, line 11)
+PASS: g++.dg/cpp0x/constexpr-cast.C  -std=c++14  (test for errors, line 24)
+FAIL: g++.dg/cpp0x/constexpr-cast.C  -std=c++14 (test for excess errors)
+XFAIL: g++.dg/cpp0x/constexpr-cast.C  -std=c++14 bug c++/49171 (test for 
errors, line 8)
+UNSUPPORTED: g++.dg/cpp0x/constexpr-cast.C  -std=c++98

[...]/source-gcc/gcc/testsuite/g++.dg/cpp0x/constexpr-cast.C:10:22: error: 
'reinterpret_cast(1)' is not a constant-expression
[...]/source-gcc/gcc/testsuite/g++.dg/cpp0x/constexpr-cast.C:11:22: error: 
'reinterpret_cast(1u)' is not a constant-expression
[...]/source-gcc/gcc/testsuite/g++.dg/cpp0x/constexpr-cast.C:24:26:   in 
constexpr expansion of 'f()'
[...]/source-gcc/gcc/testsuite/g++.dg/cpp0x/constexpr-cast.C:24:27: error: 
value '4u' of type 'int*' is not a constant expression

For the -m64 multilib, it looks as follows (all PASSes):

[...]/source-gcc/gcc/testsuite/g++.dg/cpp0x/constexpr-cast.C:10:47: error: 
value '1u' of type 'void*' is not a constant expression
[...]/source-gcc/gcc/testsuite/g++.dg/cpp0x/constexpr-cast.C:11:22: error: 
'reinterpret_cast(1ul)' is not a constant-expression
[...]/source-gcc/gcc/testsuite/g++.dg/cpp0x/constexpr-cast.C:24:26:   in 
constexpr expansion of 'f()'
[...]/source-gcc/gcc/testsuite/g++.dg/cpp0x/constexpr-cast.C:24:27: error: 
value '4u' of type 'int*' is not a constant expression

For reference:

> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-cast.C
> @@ -0,0 +1,24 @@
> +// Test to verify that evaluating reinterpret_cast is diagnosed in
> +// constant expressions.
> +// { dg-do compile { target c++11 } }
> +
> +int i;
> +
> +// The following is accepted due to bug 49171.
> +constexpr void *q = reinterpret_cast(&i);// { dg-error "" "bug 
> c++/49171" { xfail *-*-*-* } }
> +
> +constexpr void *r0 = reinterpret_cast(1);// { dg-error "not a 
> constant expression" }
> +constexpr void *r1 = reinterpret_cast(sizeof 'x');  // { dg-error 
> ".reinterpret_cast\\(1ul\\). is not a constant-expression" }
> +
> +template 
> +constexpr bool f ()
> +{
> +#if __cplusplus > 201103L
> +  T *p = reinterpret_cast(sizeof (T));
> +  return p;
> +#else
> +  return *reinterpret_cast(sizeof (T));
> +#endif
> +}
> +
> +constexpr bool b = f();   // { dg-error "not a constant expression" }


Grüße
 Thomas


signature.asc
Description: PGP signature