Re: [PATCH] Fix PR53733

2012-07-31 Thread Richard Guenther
On Mon, 30 Jul 2012, William J. Schmidt wrote:

> This fixes the de-canonicalization of commutative GIMPLE operations in
> the vectorizer that occurs when processing reductions.  A loop_vec_info
> is flagged for cleanup when a de-canonicalization has occurred in that
> loop, and the cleanup is done when the loop_vec_info is destroyed.
> 
> Bootstrapped on powerpc64-unknown-linux-gnu with no new regressions.  Ok
> for trunk?

Ok.

Thanks,
Richard.

> Thanks,
> Bill
> 
> 
> gcc:
> 
> 2012-07-30  Bill Schmidt  
> 
>   PR tree-optimization/53773
>   * tree-vectorizer.h (struct _loop_vec_info): Add operands_swapped.
>   (LOOP_VINFO_OPERANDS_SWAPPED): New macro.
>   * tree-vect-loop.c (new_loop_vec_info): Initialize
>   LOOP_VINFO_OPERANDS_SWAPPED field.
>   (destroy_loop_vec_info): Restore canonical form.
>   (vect_is_slp_reduction): Set LOOP_VINFO_OPERANDS_SWAPPED field.
>   (vect_is_simple_reduction_1): Likewise.
> 
> gcc/testsuite:
> 
> 2012-07-30  Bill Schmidt  
> 
>   PR tree-optimization/53773
>   * testsuite/gcc.dg/vect/pr53773.c: New test.
> 
> 
> Index: gcc/testsuite/gcc.dg/vect/pr53773.c
> ===
> --- gcc/testsuite/gcc.dg/vect/pr53773.c   (revision 0)
> +++ gcc/testsuite/gcc.dg/vect/pr53773.c   (revision 0)
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +
> +int
> +foo (int integral, int decimal, int power_ten)
> +{
> +  while (power_ten > 0)
> +{
> +  integral *= 10;
> +  decimal *= 10;
> +  power_ten--;
> +}
> +
> +  return integral+decimal;
> +}
> +
> +/* Two occurrences in annotations, two in code.  */
> +/* { dg-final { scan-tree-dump-times "\\* 10" 4 "vect" } } */
> +/* { dg-final { cleanup-tree-dump "vect" } } */
> +
> Index: gcc/tree-vectorizer.h
> ===
> --- gcc/tree-vectorizer.h (revision 189938)
> +++ gcc/tree-vectorizer.h (working copy)
> @@ -296,6 +296,12 @@ typedef struct _loop_vec_info {
>   this.  */
>bool peeling_for_gaps;
>  
> +  /* Reductions are canonicalized so that the last operand is the reduction
> + operand.  If this places a constant into RHS1, this decanonicalizes
> + GIMPLE for other phases, so we must track when this has occurred and
> + fix it up.  */
> +  bool operands_swapped;
> +
>  } *loop_vec_info;
>  
>  /* Access Functions.  */
> @@ -326,6 +332,7 @@ typedef struct _loop_vec_info {
>  #define LOOP_VINFO_PEELING_HTAB(L) (L)->peeling_htab
>  #define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data
>  #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps
> +#define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped
>  
>  #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
>  VEC_length (gimple, (L)->may_misalign_stmts) > 0
> Index: gcc/tree-vect-loop.c
> ===
> --- gcc/tree-vect-loop.c  (revision 189938)
> +++ gcc/tree-vect-loop.c  (working copy)
> @@ -853,6 +853,7 @@ new_loop_vec_info (struct loop *loop)
>LOOP_VINFO_PEELING_HTAB (res) = NULL;
>LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
>LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
> +  LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
>  
>return res;
>  }
> @@ -873,6 +874,7 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, b
>int j;
>VEC (slp_instance, heap) *slp_instances;
>slp_instance instance;
> +  bool swapped;
>  
>if (!loop_vinfo)
>  return;
> @@ -881,6 +883,7 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, b
>  
>bbs = LOOP_VINFO_BBS (loop_vinfo);
>nbbs = loop->num_nodes;
> +  swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
>  
>if (!clean_stmts)
>  {
> @@ -905,6 +908,22 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, b
>for (si = gsi_start_bb (bb); !gsi_end_p (si); )
>  {
>gimple stmt = gsi_stmt (si);
> +
> +   /* We may have broken canonical form by moving a constant
> +  into RHS1 of a commutative op.  Fix such occurrences.  */
> +   if (swapped && is_gimple_assign (stmt))
> + {
> +   enum tree_code code = gimple_assign_rhs_code (stmt);
> +
> +   if ((code == PLUS_EXPR
> +|| code == POINTER_PLUS_EXPR
> +|| code == MULT_EXPR)
> +   && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
> + swap_tree_operands (stmt,
> + gimple_assign_rhs1_ptr (stmt),
> + gimple_assign_rhs2_ptr (stmt));
> + }
> +
> /* Free stmt_vec_info.  */
> free_stmt_vec_info (stmt);
>gsi_next (&si);
> @@ -1920,6 +1939,9 @@ vect_is_slp_reduction (loop_vec_info loop_info, gi
> gimple_assign_rhs1_ptr (next_stmt),
>gimple_assign_rhs2_ptr (next_stmt));
> update_s

Commit: XStormy16: Add __clrsbhi2() to libgcc

2012-07-31 Thread Nick Clifton
Hi Guys,

  I am checking in the patch below to add a __clrsbhi2 function to
  libgcc for the XStormy16 port.  This fixes several gcc testsuite
  failures that need this particular function.

Cheers
  Nick

libgcc/ChangeLog
2012-07-31  Nick Clifton  

* config/stormy16/lib2funcs.c (__clrsbhi2): New function.
Implements __clrsb for an HImode argument.
* config/stormy16/clrsbhi2.c: New file:
* config/stormy16/t-stormy16 (LIB2ADD): Add clrsbhi2.c.

Index: libgcc/config/stormy16/clrsbhi2.c
===
--- libgcc/config/stormy16/clrsbhi2.c   (revision 0)
+++ libgcc/config/stormy16/clrsbhi2.c   (working copy)
@@ -0,0 +1,2 @@
+#define XSTORMY16_CLRSBHI2
+#include "lib2funcs.c"
Index: libgcc/config/stormy16/t-stormy16
===
--- libgcc/config/stormy16/t-stormy16   (revision 189996)
+++ libgcc/config/stormy16/t-stormy16   (working copy)
@@ -33,6 +33,7 @@
$(srcdir)/config/stormy16/clzhi2.c \
$(srcdir)/config/stormy16/ctzhi2.c \
$(srcdir)/config/stormy16/ffshi2.c \
+   $(srcdir)/config/stormy16/clrsbhi2.c \
$(srcdir)/config/stormy16/cmpsi2.c \
$(srcdir)/config/stormy16/ucmpsi2.c
 
Index: libgcc/config/stormy16/lib2funcs.c
===
--- libgcc/config/stormy16/lib2funcs.c  (revision 189996)
+++ libgcc/config/stormy16/lib2funcs.c  (working copy)
@@ -311,6 +311,22 @@
 }
 #endif
 
+#ifdef XSTORMY16_CLRSBHI2
+/* Returns the number of leading redundant sign bits in X.
+   I.e. the number of bits following the most significant bit which are
+   identical to it.  There are no special cases for 0 or other values.  */
+
+int
+__clrsbhi2 (HWtype x)
+{
+  if (x < 0)
+x = ~x;
+  if (x == 0)
+return 15;
+  return __builtin_clz (x) - 1;
+}
+#endif
+
 #ifdef XSTORMY16_UCMPSI2
 /* Performs an unsigned comparison of two 32-bit values: A and B.
If A is less than B, then 0 is returned.  If A is greater than B,


Re: [PATCH 0/2] Convert s390 to atomic optabs, v2

2012-07-31 Thread Richard Guenther
On Mon, 30 Jul 2012, Richard Henderson wrote:

> The atomic_load/storedi_1 patterns are fixed to use LM, STM.
> 
> I've had a go at generating better code in the HQImode CAS
> loop for aligned memory, but I don't know that I'd call it
> the most efficient thing ever.  Some of this is due to 
> deficiencies in other parts of the compiler (including the
> s390 backend):
> 
>   (1) MEM_ALIGN can't pass down full align+ofs data that we had
>   during cfgexpand.  This means the opportunities for using
>   the "aligned" path are less than they ought to be.
> 
>   (2) In get_pointer_alignment (used by get_builtin_sync_mem),
>   we don't consider an ADDR_EXPR to return the full alignment
>   that the type is due.  I'm sure this is to work around some
>   other sort of usage via the  builtins, but it's
>   less-than-handy in this case.
> 
>   I wonder if in get_builtin_sync_mem we ought to be using
>   get_object_alignment (build_fold_indirect_ref (addr)) instead?
> 
>   Consider
> 
>   struct S { int x; unsigned short y; } g_s;
>   unsigned short o, n;
>   void good() {
> __builtin_compare_exchange (&g_s.y, &o, n, 0, 0, 0);
>   }
>   void bad(S *p_s) {
> __builtin_compare_exchange (&p_s->y, &o, n, 0, 0, 0);
>   }
> 
>   where GOOD produces the aligned MEM that we need, and BAD doesn't.

You cannot generally use get_object_alignment here.  Once we have
an address in the middle-end we treat it as generic pointer, happily
not caring about the actual pointer types.  But it seems we do

  /* The alignment needs to be at least according to that of the mode.  */
  set_mem_align (mem, MAX (GET_MODE_ALIGNMENT (mode),
   get_pointer_alignment (loc)));

anyway?  What do we expect __builtin_compare_exchange to do for
unaligned inputs?  Like

typedef int uint __attribute__((aligned((8;
unsigned short o, n;
void very_bad (uint *p) {
  __builtin_compare_exchange (p, &o, n, 0, 0, 0);
}

?  Doesn't the above set a wrong alignment?

Back to using get_object_alignment - you cannot blindly use
build_fold_indirect_ref at least, you could use get_object_alignment
on the operand of an ADDR_EXPR address but I am sure we can construct
a testcase where that would give a wrong answer, too (maybe not
easily without violating the C standards rule that pointers have
to be aligned according to their type ... but nobody in practice
follows this and the middle-end does not require this either).

Thus, the bad news is that it's hard for the middle-end to
recover alignment of a memory access that is represented as
a builtin function call that takes addresses as parameters
(which also makes them address-taken and thus possibly aliased).
Didn't Andrew have some patches to introduce a GIMPLE_ATOMIC
eventually side-stepping this issue (maybe that used addresses, too)?

Richard.

>   (3) Support for IC, and ICM via the insv pattern is lacking.
>   I've added a tiny bit of support here, in the form of using
>   the existing strict_low_part patterns, but most definitely we
>   could do better.
> 
>   (4) The *sethighpartsi and *sethighpartdi_64 patterns ought to be
>   more different.  As is, we can't insert into bits 48-56 of a
>   DImode quantity, because we don't generate ICM for DImode,
>   only ICMH.
> 
>   (5) Missing support for RISBGZ in the form of an extv/z expander.
>   The existing *extv/z splitters probably ought to be conditionalized
>   on !Z10.
> 
>   (6) The strict_low_part patterns should allow registers for at
>   least Z10.  The SImode strict_low_part can use LR everywhere.
> 
>   (7) RISBGZ could be used for a 3-address constant lshrsi3 before
>   srlk is available.
> 
> For the GOOD function above, and this patch set, for -O3 -march=z10:
> 
> larl%r3,s+4
> lhrl%r0,o
> lhi %r2,1
> l   %r1,0(%r3)
> nilh%r1,0
> .L2:
> lr  %r5,%r1
> larl%r12,n
> lr  %r4,%r1
> risbg   %r4,%r0,32,47,16
> icm %r5,3,0(%r12)
> cs  %r4,%r5,0(%r3)
> je  .L3
> lr  %r5,%r4
> nilh%r5,0
> cr  %r5,%r1
> lr  %r1,%r5
> jne .L2
> lhi %r2,0
> .L3:
> srl %r4,16
> sthrl   %r4,o
> 
> Odd things:
> 
>* O is forced into a register before reaching the expander, so we
>  get the RISBG for that.  N is left in a memory and so we commit
>  to using ICM for that.  Further, because of how strict_low_part
>  is implemented we're committed to leaving that in memory.
> 
>* We don't optimize the loop and hoist the LARL of N outside the loop.
> 
>* Given that we're having to zap the mask in %r1 for the second
>  compare anyway, I wonder if RISBG is really beneficial over OR.
>  Is RISBG (or ICM for that matter) any faster (or even smaller)?
> 
> 
> r~
> 
> 
> Richard Henderson (2):

Re: [Patch, Fortran] Update c_funloc/c_f_procpointer for TS29113

2012-07-31 Thread Mikael Morin
On 26/07/2012 16:01, Tobias Burnus wrote:
> TS29113 allows also non interoperable procedures with
> c_funloc/c_f_procpointer; hence, this patch allows them with -std=f2008ts:
> 
> "The function C F PROCPOINTER from the intrinsic module ISO C BINDING
> has the restriction in ISO/IEC 1539-1:2010 that CPTR and FPTR shall not
> be the C address and interface of a noninteroperable Fortran procedure.
> 
> "The function C FUNLOC from the intrinsic module ISO C BINDING has the
> restriction in ISO/IEC 1539-1:2010 that its argument shall be
> interoperable.
> 
> "These restrictions are removed."
> 
> 
> Additionally, I changed "parameter" to "argument" and added a diagnostic
> that the first argument to c_f_pointer/c_f_procpointer is the correct
> one - before both accepted c_ptr and c_funptr.
> 
> Build and regtested on x86-64-linux.
> OK for the trunk?
> 
Yes, thanks

Mikael


Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Richard Guenther
On Tue, Jul 31, 2012 at 5:17 AM, Dehao Chen  wrote:
> Hi,
>
> This patch fixed the problem when a LOOP_EXIT edge for the inner loop
> happened to target at the LOOP_LATCH of the outer loop. As the outer
> loop is processed first, the LOOP_BRANCH heuristic is honored
> (first_match), thus the inner loop's trip count is 0. (The attached
> unittest demonstrates this).
>
> Bootstrapped and passed gcc regression test.
>
> Is it ok for trunk?
>
> Thanks,
> Dehao
>
> gcc/ChangeLog
>
> 2012-07-30  Dehao Chen  
>
> * predict.c (predict_loops): Fix the prediction of LOOP_BRANCH.
>
> gcc/testsuite/ChangeLog
>
> 2012-07-31  Dehao Chen  
>
> * gcc.dg/predict-7.c: New test.
>
> Index: gcc/testsuite/gcc.dg/predict-7.c
> ===
> --- gcc/testsuite/gcc.dg/predict-7.c(revision 0)
> +++ gcc/testsuite/gcc.dg/predict-7.c(revision 0)
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
> +
> +extern int global;
> +
> +int bar (int);
> +
> +void foo (int base)
> +{
> +  int i;
> +  while (global < 10)
> +for (i = base; i < 10; i++)
> +  bar (i);
> +}
> +
> +/* { dg-final { scan-tree-dump-times "loop branch heuristics" 0
> "profile_estimate"} } */
> +/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
> Index: gcc/predict.c
> ===
> --- gcc/predict.c   (revision 189835)
> +++ gcc/predict.c   (working copy)
> @@ -1404,7 +1404,7 @@
>
>   /* Loop branch heuristics - predict an edge back to a
>  loop's head as taken.  */
> - if (bb == loop->latch)
> + if (bb == loop->latch && bb->loop_father == loop)
> {
>   e = find_edge (loop->latch, loop->header);
>   if (e)

I think this heuristic should instead move out of the loop iterating over loop
nodes and be done before like

  if (loop->latch)
{
   e = find_edge (loop->latch, loop->header);
   ...
}

which also makes header_found initialized before we visit loop blocks.

Instead the code

  /* Loop exit heuristics - predict an edge exiting the loop if the
 conditional has no loop header successors as not taken.  */
  if (!header_found
  /* If we already used more reliable loop exit predictors, do not
 bother with PRED_LOOP_EXIT.  */
...
  FOR_EACH_EDGE (e, ei, bb->succs)
if (e->dest->index < NUM_FIXED_BLOCKS
|| !flow_bb_inside_loop_p (loop, e->dest))
  predict_edge (e, PRED_LOOP_EXIT, probability);

looks wrong for bb's that are parts of an inner loop of loop - assuming we
only want to predicate exits from loop and not exits from an inner loop
that also happen to exit loop (we will do that when predicating the inner loop).

Is that what you experienced?

Thanks,
Richard.


[PATCH]: Fix -Wmissing-format-attribute warnings

2012-07-31 Thread Uros Bizjak
Hello!

This patch removes all -Wmissing-format-attribute warnings on
alphaev68-pc-linux-gnu native build.

gcc/ChangeLog:

2012-07-31  Uros Bizjak  

* collect2.c (collect_execute): Add format strint to fatal_error call.
* dwarf2asm.c (dw2_asm_output_vms_delta): Add format string to
dw2_asm_output_delta call.
* dwarf2out.c (output_die): Add format string to
dw2_asm_output_data call.
* tree-ssa-uninit.c (dump_predicates): Add format string to
fprintf call.
* fold-const.c (fold_overflow_warning): Add format string to
warning call.
* gcc.c (execute): Add format string to fatal_error call.
* lto-wrapper.c (collect_execute): Add format strings to
fatal_perror and fatal calls.
* opts.c (print_filtered_help): Add format strings to snprintf calls.
* tree-sra.c (dump_dereferences_table): Add format string to
fprintf call.
* tree-ssa-uninit.c (dump_predicates): Ditto.

gcc/c-family/ChangeLog:

2012-07-31  Uros Bizjak  

* c-common.c (c_parse_error): Add format string to error calls.

gcc/c/ChangeLog:

2012-07-31  Uros Bizjak  

* c-typeck.c (convert_arguments): Add format string to error call.
(error_init): Ditto.
(build_unary_op): Add format string to error_at call.
(build_binary_op): Ditto.
(pedwarn_init): Add format string to pedwarn call.
(warning_init): Add format string to warning call.
* c-decl.c (grokdeclarator): Add format string to error call.
(grokparms): Ditto.

gcc/cp/ChangeLog:

2012-07-31  Uros Bizjak  

* typeck.c (cp_build_binary_op): Add format string to error call.
(cp_build_unary_op): Ditto.
* decl.c (grokdeclarator): Add format string to error call.
(grokparms): Ditto.
* pt.c (tsubst_copy_and_build): Ditto.
* parser.c (cp_parser_check_type_definition): Ditto.
* cvt.c (ocp_convert): Ditto.

Tested on alphaev68-pc-linux-gnu and x86_64-pc-linux-gnu.

OK for mainline?

Uros.
Index: gcc/dwarf2asm.c
===
--- gcc/dwarf2asm.c (revision 189995)
+++ gcc/dwarf2asm.c (working copy)
@@ -163,7 +163,7 @@ dw2_asm_output_vms_delta (int size ATTRIBUTE_UNUSE
 #ifndef ASM_OUTPUT_DWARF_VMS_DELTA
   /* VMS Delta is only special on ia64-vms, but this function also gets
  called on alpha-vms so it has to do something sane.  */
-  dw2_asm_output_delta (size, lab1, lab2, comment);
+  dw2_asm_output_delta (size, lab1, lab2, "%s", comment);
 #else
   ASM_OUTPUT_DWARF_VMS_DELTA (asm_out_file, size, lab1, lab2);
   if (flag_debug_asm && comment)
Index: gcc/tree-ssa-uninit.c
===
--- gcc/tree-ssa-uninit.c   (revision 189995)
+++ gcc/tree-ssa-uninit.c   (working copy)
@@ -599,7 +599,7 @@ dump_predicates (gimple usestmt, size_t num_preds,
 {
   size_t i, j;
   VEC(use_pred_info_t, heap) *one_pred_chain;
-  fprintf (dump_file, msg);
+  fprintf (dump_file, "%s", msg);
   print_gimple_stmt (dump_file, usestmt, 0, 0);
   fprintf (dump_file, "is guarded by :\n");
   /* do some dumping here:  */
Index: gcc/c-family/c-common.c
===
--- gcc/c-family/c-common.c (revision 189995)
+++ gcc/c-family/c-common.c (working copy)
@@ -8938,11 +8938,11 @@ c_parse_error (const char *gmsgid, enum cpp_ttype
   message = NULL;
 }
   else
-error (gmsgid);
+error ("%s", gmsgid);
 
   if (message)
 {
-  error (message);
+  error ("%s", message);
   free (message);
 }
 #undef catenate_messages
Index: gcc/c/c-convert.c
===
--- gcc/c/c-convert.c   (revision 189995)
+++ gcc/c/c-convert.c   (working copy)
@@ -80,7 +80,7 @@ convert (tree type, tree expr)
   if ((invalid_conv_diag
= targetm.invalid_conversion (TREE_TYPE (expr), type)))
 {
-  error (invalid_conv_diag);
+  error ("%s", invalid_conv_diag);
   return error_mark_node;
 }
 
Index: gcc/c/c-typeck.c
===
--- gcc/c/c-typeck.c(revision 189995)
+++ gcc/c/c-typeck.c(working copy)
@@ -3167,7 +3167,7 @@ convert_arguments (tree typelist, VEC(tree,gc) *va
   else if ((invalid_func_diag =
targetm.calls.invalid_arg_for_unprototyped_fn (typelist, 
fundecl, val)))
{
- error (invalid_func_diag);
+ error ("%s", invalid_func_diag);
  return -1;
}
   else
@@ -3452,7 +3452,7 @@ build_unary_op (location_t location,
   if ((invalid_op_diag
= targetm.invalid_unary_op (code, TREE_TYPE (xarg
 {
-  error_at (location, invalid_op_diag);
+  error_at (location, "%s", invalid_op_diag);
   return error_mark_node;
 }
 
@@ -5930,7 +5930,7 @@ error_init (const char *gmsgid)
   c

Re: [patch] PR pch/53880

2012-07-31 Thread Steven Bosscher
On Tue, Jul 31, 2012 at 7:04 AM, Laurynas Biveinis
 wrote:
> Steven -
>
>> Bootstrapped&tested on powerpc64-unknown-linux-gnu. OK for trunk?
>
> Thanks for working on this. It looks good, couple of minor comments:

Thanks for the review. I've made the adjustments you suggested and
committed the patch.

Ciao!
Steven


[Patch, Fortran] PR 42418: PROCEDURE: Rejects interface which is both specific and generic procedure

2012-07-31 Thread Janus Weil
Hi all,

here is a patch which does several things:

1) It fixes the original problem in the PR (cf. comments 0 and 6) by
adding code which checks if a generic interface has a specific
procedure of the same name (see resolve_procedure_interface).

2) It fixes other problems found along the way (cf. comment 7), which
are all due to the fact that the checks for interface declarations in
PROCEDURE statements came too early (checks for generics, statement
functions and intrinsics are moved from "match_procedure_interface" to
"resolve_procedure_interface"). For the intrinsics we do the same
trick as for PR51081, by setting the flavor at parsing stage and later
setting the INTRINSIC attribute at resolution stage.

3) It does minor cleanup related to "gfc_is_intrinsic", by moving some
checks into the routine, which were typically done before calling the
routine in a several places. (This is also related to the recent patch
for PR51081.) Note that 'use_assoc' is not sufficient to identify a
routine as non-intrinsic (apparently this was wrongly assumed in some
cases).

The patch was regtested successfully on x86_64-unknown-linux-gnu. Ok for trunk?

Cheers,
Janus


2012-07-31  Janus Weil  

PR fortran/42418
* decl.c (match_procedure_interface): Move some checks to
'resolve_procedure_interface'. Set flavor if appropriate.
* expr.c (gfc_check_pointer_assign): Cleanup of 'gfc_is_intrinsic'.
* intrinsic.c (gfc_is_intrinsic): Additional checks for attributes which
identify a procedure as being non-intrinsic.
* resolve.c (resolve_procedure_interface): Checks moved here from
'match_procedure_interface'. Minor cleanup.
(resolve_formal_arglist,resolve_symbol): Cleanup of
'resolve_procedure_interface'
(resolve_actual_arglist,is_external_proc): Cleanup of
'gfc_is_intrinsic'.

2012-07-31  Janus Weil  

PR fortran/42418
* gfortran.dg/proc_decl_29.f90: New.


pr42418_v4.diff
Description: Binary data


proc_decl_29.f90
Description: Binary data


[C++ PATCH] Introduce context struct for cxx_eval_constant_expression

2012-07-31 Thread Florian Weimer
The attached patch adds a context parameter to 
cxx_eval_constant_expression and its subprograms.  This way, we do not 
have to thread all context parameters manually.  This will simplify the 
introduction of additional location information, as suggested by Jason 
in .


Bootstrapped and tested on x86_64-redhat-linux-gnu.

--
Florian Weimer / Red Hat Product Security Team
2012-07-31  Florian Weimer  

	* semantics.c (cxx_eval_builtin_function_call): Introduce
	const cxx_eval_constant_ctx * parameter.
	(cxx_bind_parameters_in_call): Likewise.
	(cxx_eval_call_expression): Likewise.
	(VERIFY_CONSTANT): Likewise.
	(cxx_eval_unary_expression): Likewise.
	(cxx_eval_binary_expression): Likewise.
	(cxx_eval_conditional_expression): Likewise.
	(cxx_eval_array_reference): Likewise.
	(cxx_eval_component_reference): Likewise.
	(cxx_eval_bit_field_ref): Likewise.
	(cxx_eval_logical_expression): Likewise.
	(cxx_eval_bare_aggregate): Likewise.
	(cxx_eval_vec_init_1): Likewise.
	(cxx_eval_vec_init): Likewise.
	(cxx_eval_indirect_ref): Likewise.
	(cxx_eval_vec_perm_expr): Likewise.
	(cxx_eval_constant_expression): Likewise.
	(cxx_eval_outermost_constant_expr): Pass cxx_eval_constant_ctx
	object as parameter.
	(is_sub_constant_expr): Pass cxx_eval_constant_ctx object as
	parameter.

diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index b27e8ab..94a0633 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -6222,10 +6222,19 @@ typedef struct GTY(()) constexpr_call {
 
 static GTY ((param_is (constexpr_call))) htab_t constexpr_call_table;
 
+/* Context passsed down from cxx_eval_constant_expression.
+   Called functions can copy this data structure, so if changes need
+   to be passed upwards, pointers must be used. */
+typedef struct {
+  bool allow_non_constant;
+  bool addr;
+  bool *non_constant_p;
+} cxx_eval_constant_ctx;
+
 static tree cxx_eval_constant_expression (const constexpr_call *, tree,
-	  bool, bool, bool *);
-static tree cxx_eval_vec_perm_expr (const constexpr_call *, tree, bool, bool,
-bool *);
+	  const cxx_eval_constant_ctx *);
+static tree cxx_eval_vec_perm_expr (const constexpr_call *, tree,
+const cxx_eval_constant_ctx *ctx);
 
 
 /* Compute a hash value for a constexpr call representation.  */
@@ -6351,8 +6360,7 @@ lookup_parameter_binding (const constexpr_call *call, tree t)
 
 static tree
 cxx_eval_builtin_function_call (const constexpr_call *call, tree t,
-bool allow_non_constant, bool addr,
-bool *non_constant_p)
+const cxx_eval_constant_ctx *ctx)
 {
   const int nargs = call_expr_nargs (t);
   tree *args = (tree *) alloca (nargs * sizeof (tree));
@@ -6360,13 +6368,11 @@ cxx_eval_builtin_function_call (const constexpr_call *call, tree t,
   int i;
   for (i = 0; i < nargs; ++i)
 {
-  args[i] = cxx_eval_constant_expression (call, CALL_EXPR_ARG (t, i),
-	  allow_non_constant, addr,
-	  non_constant_p);
-  if (allow_non_constant && *non_constant_p)
+  args[i] = cxx_eval_constant_expression (call, CALL_EXPR_ARG (t, i), ctx);
+  if (ctx->allow_non_constant && *ctx->non_constant_p)
 	return t;
 }
-  if (*non_constant_p)
+  if (*ctx->non_constant_p)
 return t;
   new_call = build_call_array_loc (EXPR_LOCATION (t), TREE_TYPE (t),
CALL_EXPR_FN (t), nargs, args);
@@ -6397,8 +6403,7 @@ adjust_temp_type (tree type, tree temp)
 static void
 cxx_bind_parameters_in_call (const constexpr_call *old_call, tree t,
  constexpr_call *new_call,
-			 bool allow_non_constant,
-			 bool *non_constant_p)
+			 const cxx_eval_constant_ctx *ctx)
 {
   const int nargs = call_expr_nargs (t);
   tree fun = new_call->fundef->decl;
@@ -6408,22 +6413,23 @@ cxx_bind_parameters_in_call (const constexpr_call *old_call, tree t,
 {
   tree x, arg;
   tree type = parms ? TREE_TYPE (parms) : void_type_node;
+  cxx_eval_constant_ctx ctx1;
   /* For member function, the first argument is a pointer to the implied
  object.  And for an object contruction, don't bind `this' before
  it is fully constructed.  */
   if (i == 0 && DECL_CONSTRUCTOR_P (fun))
 goto next;
   x = get_nth_callarg (t, i);
-  arg = cxx_eval_constant_expression (old_call, x, allow_non_constant,
-	  TREE_CODE (type) == REFERENCE_TYPE,
-	  non_constant_p);
+  ctx1 = *ctx;
+  ctx1.addr = TREE_CODE (type) == REFERENCE_TYPE;
+  arg = cxx_eval_constant_expression (old_call, x, &ctx1);
   /* Don't VERIFY_CONSTANT here.  */
-  if (*non_constant_p && allow_non_constant)
+  if (*ctx->non_constant_p && ctx->allow_non_constant)
 	return;
   /* Just discard ellipsis args after checking their constantitude.  */
   if (!parms)
 	continue;
-  if (*non_constant_p)
+  if (*ctx->non_constant_p)
 	/* Don't try to adjust the type of non-constant args.  */
 	goto next;
 

Commit: FRV: Fix typo in definition of QUAD macro

2012-07-31 Thread Nick Clifton
Hi Guys,

  I am checking in the patch below as an obvious fix for the definition
  of the QUAD macro in the FRV backend.

Cheers
  Nick

gcc/ChangeLog
2012-07-31  Nick Clifton  

* config/frv/frv.c (QUAD): Fix typo.

Index: gcc/config/frv/frv.c
===
--- gcc/config/frv/frv.c(revision 189996)
+++ gcc/config/frv/frv.c(working copy)
@@ -8435,7 +8435,7 @@
   build_function_type_list (RET, T1, T2, T3, NULL_TREE)
 
 #define QUAD(RET, T1, T2, T3, T4) \
-  build_function_type_list (RET, T1, T2, T3, NULL_TREE)
+  build_function_type_list (RET, T1, T2, T3, T4, NULL_TREE)
 
   tree void_ftype_void = build_function_type_list (voidt, NULL_TREE);
 


[patch] Dump slim RTL to assembly file if the final dump is slim

2012-07-31 Thread Steven Bosscher
Hello,

For me, the slim RTL dumps are much easier to read than the default
lisp-like representation. I use the -dAP option frequently, to see
where an assembly instruction came from. This patch scratches an itch:
The insns dumped to the assembly file are always dumped as lisp-like,
ignoring the "slim" in the -fdump-rtl-final-slim flag. With the patch
applied, final will dump slim RTL if the final dump is asked as slim
RTL.

Bootstrapped&tested on x86_64-unknown-linux-gnu. OK for trunk?

Ciao!
Steven



* sched-vis.c (dump_insn_slim): Print print_rtx_head at the
start of each new line.
* final.c (final_scan_insn): If the final dump is requested as
slim RTL, dump slim RTL to the assembly file also.

Index: sched-vis.c
===
--- sched-vis.c (revision 189997)
+++ sched-vis.c (working copy)
@@ -767,11 +767,13 @@ dump_insn_slim (FILE *f, const_rtx x)
   rtx note;

   print_insn (t, x, 1);
+  fputs (print_rtx_head, f);
   fputs (t, f);
   putc ('\n', f);
   if (INSN_P (x) && REG_NOTES (x))
 for (note = REG_NOTES (x); note; note = XEXP (note, 1))
   {
+   fputs (print_rtx_head, f);
 print_value (t, XEXP (note, 0), 1);
fprintf (f, "  %s: %s\n",
 GET_REG_NOTE_NAME (REG_NOTE_KIND (note)), t);
Index: final.c
===
--- final.c (revision 189997)
+++ final.c (working copy)
@@ -2747,11 +2747,16 @@ final_scan_insn (rtx insn, FILE *file, int optimiz
insn_code_number = recog_memoized (insn);
cleanup_subreg_operands (insn);

-   /* Dump the insn in the assembly for debugging.  */
+   /* Dump the insn in the assembly for debugging (-dAP).
+  If the final dump is requested as slim RTL, dump slim
+  RTL to the assembly file also.  */
if (flag_dump_rtl_in_asm)
  {
print_rtx_head = ASM_COMMENT_START;
-   print_rtl_single (asm_out_file, insn);
+   if (! (dump_flags & TDF_SLIM))
+ print_rtl_single (asm_out_file, insn);
+   else
+ dump_insn_slim (asm_out_file, insn);


Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Dehao Chen
Are you suggesting a patch like this:

Index: gcc/predict.c
===
--- gcc/predict.c   (revision 189835)
+++ gcc/predict.c   (working copy)
@@ -1319,6 +1319,7 @@
   tree loop_bound_var = NULL;
   tree loop_iv_base = NULL;
   gimple stmt = NULL;
+  int header_found = 0;

   exits = get_loop_exit_edges (loop);
   n_exits = VEC_length (edge, exits);
@@ -1387,9 +1388,20 @@

   bbs = get_loop_body (loop);

+  /* Loop branch heuristics - predict an edge back to a
+loop's head as taken.  */
+  if (loop->latch && loop->latch->loop_father == loop)
+   {
+ edge e = find_edge (loop->latch, loop->header);
+ if (e)
+   {
+ header_found = 1;
+ predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
+   }
+   }
+
   for (j = 0; j < loop->num_nodes; j++)
{
- int header_found = 0;
  edge e;
  edge_iterator ei;

@@ -1402,21 +1414,9 @@
  if (predicted_by_p (bb, PRED_CONTINUE))
continue;

- /* Loop branch heuristics - predict an edge back to a
-loop's head as taken.  */
- if (bb == loop->latch)
-   {
- e = find_edge (loop->latch, loop->header);
- if (e)
-   {
- header_found = 1;
- predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
-   }
-   }
-
  /* Loop exit heuristics - predict an edge exiting the loop if the
 conditional has no loop header successors as not taken.  */
- if (!header_found
+ if (!(header_found && bb == loop->latch)
  /* If we already used more reliable loop exit predictors, do not
 bother with PRED_LOOP_EXIT.  */
  && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)

On Tue, Jul 31, 2012 at 5:18 PM, Richard Guenther
 wrote:
> On Tue, Jul 31, 2012 at 5:17 AM, Dehao Chen  wrote:
>> Hi,
>>
>> This patch fixed the problem when a LOOP_EXIT edge for the inner loop
>> happened to target at the LOOP_LATCH of the outer loop. As the outer
>> loop is processed first, the LOOP_BRANCH heuristic is honored
>> (first_match), thus the inner loop's trip count is 0. (The attached
>> unittest demonstrates this).
>>
>> Bootstrapped and passed gcc regression test.
>>
>> Is it ok for trunk?
>>
>> Thanks,
>> Dehao
>>
>> gcc/ChangeLog
>>
>> 2012-07-30  Dehao Chen  
>>
>> * predict.c (predict_loops): Fix the prediction of LOOP_BRANCH.
>>
>> gcc/testsuite/ChangeLog
>>
>> 2012-07-31  Dehao Chen  
>>
>> * gcc.dg/predict-7.c: New test.
>>
>> Index: gcc/testsuite/gcc.dg/predict-7.c
>> ===
>> --- gcc/testsuite/gcc.dg/predict-7.c(revision 0)
>> +++ gcc/testsuite/gcc.dg/predict-7.c(revision 0)
>> @@ -0,0 +1,17 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
>> +
>> +extern int global;
>> +
>> +int bar (int);
>> +
>> +void foo (int base)
>> +{
>> +  int i;
>> +  while (global < 10)
>> +for (i = base; i < 10; i++)
>> +  bar (i);
>> +}
>> +
>> +/* { dg-final { scan-tree-dump-times "loop branch heuristics" 0
>> "profile_estimate"} } */
>> +/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
>> Index: gcc/predict.c
>> ===
>> --- gcc/predict.c   (revision 189835)
>> +++ gcc/predict.c   (working copy)
>> @@ -1404,7 +1404,7 @@
>>
>>   /* Loop branch heuristics - predict an edge back to a
>>  loop's head as taken.  */
>> - if (bb == loop->latch)
>> + if (bb == loop->latch && bb->loop_father == loop)
>> {
>>   e = find_edge (loop->latch, loop->header);
>>   if (e)
>
> I think this heuristic should instead move out of the loop iterating over loop
> nodes and be done before like
>
>   if (loop->latch)
> {
>e = find_edge (loop->latch, loop->header);
>...
> }
>
> which also makes header_found initialized before we visit loop blocks.
>
> Instead the code
>
>   /* Loop exit heuristics - predict an edge exiting the loop if the
>  conditional has no loop header successors as not taken.  */
>   if (!header_found
>   /* If we already used more reliable loop exit predictors, do not
>  bother with PRED_LOOP_EXIT.  */
> ...
>   FOR_EACH_EDGE (e, ei, bb->succs)
> if (e->dest->index < NUM_FIXED_BLOCKS
> || !flow_bb_inside_loop_p (loop, e->dest))
>   predict_edge (e, PRED_LOOP_EXIT, probability);
>
> looks wrong for bb's that are parts of an inner loop of loop - assuming we
> only want to predicate exits from loop and not exits from an inner loop
> that also happen to exit loop (we will do that when predicating the inner 
> l

Re: [patch] Dump slim RTL to assembly file if the final dump is slim

2012-07-31 Thread Richard Guenther
On Tue, Jul 31, 2012 at 12:13 PM, Steven Bosscher  wrote:
> Hello,
>
> For me, the slim RTL dumps are much easier to read than the default
> lisp-like representation. I use the -dAP option frequently, to see
> where an assembly instruction came from. This patch scratches an itch:
> The insns dumped to the assembly file are always dumped as lisp-like,
> ignoring the "slim" in the -fdump-rtl-final-slim flag. With the patch
> applied, final will dump slim RTL if the final dump is asked as slim
> RTL.
>
> Bootstrapped&tested on x86_64-unknown-linux-gnu. OK for trunk?

Ok.

Thanks,
Richard.

> Ciao!
> Steven
>
>
>
> * sched-vis.c (dump_insn_slim): Print print_rtx_head at the
> start of each new line.
> * final.c (final_scan_insn): If the final dump is requested as
> slim RTL, dump slim RTL to the assembly file also.
>
> Index: sched-vis.c
> ===
> --- sched-vis.c (revision 189997)
> +++ sched-vis.c (working copy)
> @@ -767,11 +767,13 @@ dump_insn_slim (FILE *f, const_rtx x)
>rtx note;
>
>print_insn (t, x, 1);
> +  fputs (print_rtx_head, f);
>fputs (t, f);
>putc ('\n', f);
>if (INSN_P (x) && REG_NOTES (x))
>  for (note = REG_NOTES (x); note; note = XEXP (note, 1))
>{
> +   fputs (print_rtx_head, f);
>  print_value (t, XEXP (note, 0), 1);
> fprintf (f, "  %s: %s\n",
>  GET_REG_NOTE_NAME (REG_NOTE_KIND (note)), t);
> Index: final.c
> ===
> --- final.c (revision 189997)
> +++ final.c (working copy)
> @@ -2747,11 +2747,16 @@ final_scan_insn (rtx insn, FILE *file, int optimiz
> insn_code_number = recog_memoized (insn);
> cleanup_subreg_operands (insn);
>
> -   /* Dump the insn in the assembly for debugging.  */
> +   /* Dump the insn in the assembly for debugging (-dAP).
> +  If the final dump is requested as slim RTL, dump slim
> +  RTL to the assembly file also.  */
> if (flag_dump_rtl_in_asm)
>   {
> print_rtx_head = ASM_COMMENT_START;
> -   print_rtl_single (asm_out_file, insn);
> +   if (! (dump_flags & TDF_SLIM))
> + print_rtl_single (asm_out_file, insn);
> +   else
> + dump_insn_slim (asm_out_file, insn);


Commit: FRV: Add support for -fstack-usage

2012-07-31 Thread Nick Clifton
Hi Guys,

  I am applying the patch below to add support for the -fstack-usage
  option to the FRV backend.

Cheers
  Nick

gcc/ChangeLog
2012-07-31  Nick Clifton  

* config/frv/frv.c (frv_expand_prologue): Report stack usage.

gcc/testsuite/ChangeLog
2012-07-31  Nick Clifton  

* gcc.dg/stack-usage-1.c (SIZE): Define for FRV.

Index: gcc/config/frv/frv.c
===
--- gcc/config/frv/frv.c(revision 190002)
+++ gcc/config/frv/frv.c(working copy)
@@ -1761,6 +1761,9 @@
   if (TARGET_DEBUG_STACK)
 frv_debug_stack (info);
 
+  if (flag_stack_usage_info)
+current_function_static_stack_size = info->total_size;
+
   if (info->total_size == 0)
 return;
 
Index: gcc/testsuite/gcc.dg/stack-usage-1.c
===
--- gcc/testsuite/gcc.dg/stack-usage-1.c(revision 189996)
+++ gcc/testsuite/gcc.dg/stack-usage-1.c(working copy)
@@ -62,6 +62,8 @@
 #  define SIZE 254
 #elif defined (__sh__)
 #  define SIZE 252
+#elif defined (__frv__)
+#  define SIZE 248
 #else
 #  define SIZE 256
 #endif


Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Jan Hubicka
> Are you suggesting a patch like this:
> 
> Index: gcc/predict.c
> ===
> --- gcc/predict.c (revision 189835)
> +++ gcc/predict.c (working copy)
> @@ -1319,6 +1319,7 @@
>tree loop_bound_var = NULL;
>tree loop_iv_base = NULL;
>gimple stmt = NULL;
> +  int header_found = 0;

We should use bool these days.

> 
>exits = get_loop_exit_edges (loop);
>n_exits = VEC_length (edge, exits);
> @@ -1387,9 +1388,20 @@
> 
>bbs = get_loop_body (loop);
> 
> +  /* Loop branch heuristics - predict an edge back to a
> +  loop's head as taken.  */
> +  if (loop->latch && loop->latch->loop_father == loop)
> + {
> +   edge e = find_edge (loop->latch, loop->header);
> +   if (e)
> + {
> +   header_found = 1;
> +   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
> + }
> + }
> +
>for (j = 0; j < loop->num_nodes; j++)
>   {
> -   int header_found = 0;
> edge e;
> edge_iterator ei;
> 
> @@ -1402,21 +1414,9 @@
> if (predicted_by_p (bb, PRED_CONTINUE))
>   continue;
> 
> -   /* Loop branch heuristics - predict an edge back to a
> -  loop's head as taken.  */
> -   if (bb == loop->latch)
> - {
> -   e = find_edge (loop->latch, loop->header);
> -   if (e)
> - {
> -   header_found = 1;
> -   predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
> - }
> - }
> -
> /* Loop exit heuristics - predict an edge exiting the loop if the
>conditional has no loop header successors as not taken.  */
> -   if (!header_found
> +   if (!(header_found && bb == loop->latch)

Yes, this seems resonable to me.

> > which also makes header_found initialized before we visit loop blocks.
> >
> > Instead the code
> >
> >   /* Loop exit heuristics - predict an edge exiting the loop if the
> >  conditional has no loop header successors as not taken.  */
> >   if (!header_found
> >   /* If we already used more reliable loop exit predictors, do 
> > not
> >  bother with PRED_LOOP_EXIT.  */
> > ...
> >   FOR_EACH_EDGE (e, ei, bb->succs)
> > if (e->dest->index < NUM_FIXED_BLOCKS
> > || !flow_bb_inside_loop_p (loop, e->dest))
> >   predict_edge (e, PRED_LOOP_EXIT, probability);
> >
> > looks wrong for bb's that are parts of an inner loop of loop - assuming we
> > only want to predicate exits from loop and not exits from an inner loop
> > that also happen to exit loop (we will do that when predicating the inner 
> > loop).
> 
> You are right. And if we want to change this, we'd also need to modify
> get_loop_exit_edges to only count edges whose src is in the same loop
> level. However, this is relatively minor issue because it only
> predicts inaccurate bias ratio, while in the testcase I gave,
> LOOP_BRANCH is predicting in the wrong direction.

Indeed, it is not the most important thing around. 

Patch seems OK.  We should get the statistic about branch prediction 
effectivity working again.
It is broken since we moved tree-profile early. I will try to look into it.

Honza

> 
> Thanks,
> Dehao
> 
> 
> >
> > Is that what you experienced?
> >
> > Thanks,
> > Richard.


Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Richard Guenther
On Tue, Jul 31, 2012 at 12:20 PM, Dehao Chen  wrote:
> Are you suggesting a patch like this:
>
> Index: gcc/predict.c
> ===
> --- gcc/predict.c   (revision 189835)
> +++ gcc/predict.c   (working copy)
> @@ -1319,6 +1319,7 @@
>tree loop_bound_var = NULL;
>tree loop_iv_base = NULL;
>gimple stmt = NULL;
> +  int header_found = 0;
>
>exits = get_loop_exit_edges (loop);
>n_exits = VEC_length (edge, exits);
> @@ -1387,9 +1388,20 @@
>
>bbs = get_loop_body (loop);
>
> +  /* Loop branch heuristics - predict an edge back to a
> +loop's head as taken.  */
> +  if (loop->latch && loop->latch->loop_father == loop)

Hmm, so the issue is that loop->latch does not belong to loop?  That looks
like a bogus loop structure.  Indeed we have the loop header of the inner
loop as latch of the outer loop.  It still looks ok to predict this as unlikely
as the edge is not only the latch edge of the outer loop but also an exit
of the inner loop.

Easier for profile would be to force canonicalization via

  loop_optimizer_init (LOOPS_NORMAL);

instead of

  loop_optimizer_init (0);
  if (dump_file && (dump_flags & TDF_DETAILS))
flow_loops_dump (dump_file, NULL, 0);

  mark_irreducible_loops ();


> +   {
> + edge e = find_edge (loop->latch, loop->header);
> + if (e)
> +   {
> + header_found = 1;
> + predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
> +   }
> +   }
> +
>for (j = 0; j < loop->num_nodes; j++)
> {
> - int header_found = 0;
>   edge e;
>   edge_iterator ei;
>
> @@ -1402,21 +1414,9 @@
>   if (predicted_by_p (bb, PRED_CONTINUE))
> continue;
>
> - /* Loop branch heuristics - predict an edge back to a
> -loop's head as taken.  */
> - if (bb == loop->latch)
> -   {
> - e = find_edge (loop->latch, loop->header);
> - if (e)
> -   {
> - header_found = 1;
> - predict_edge_def (e, PRED_LOOP_BRANCH, TAKEN);
> -   }
> -   }
> -

Yes until here,

>   /* Loop exit heuristics - predict an edge exiting the loop if the
>  conditional has no loop header successors as not taken.  */
> - if (!header_found
> + if (!(header_found && bb == loop->latch)

here instead

  !header_found
  && bb->loop_father == loop

>   /* If we already used more reliable loop exit predictors, do not
>  bother with PRED_LOOP_EXIT.  */
>   && !predicted_by_p (bb, PRED_LOOP_ITERATIONS_GUESSED)
>
> On Tue, Jul 31, 2012 at 5:18 PM, Richard Guenther
>  wrote:
>> On Tue, Jul 31, 2012 at 5:17 AM, Dehao Chen  wrote:
>>> Hi,
>>>
>>> This patch fixed the problem when a LOOP_EXIT edge for the inner loop
>>> happened to target at the LOOP_LATCH of the outer loop. As the outer
>>> loop is processed first, the LOOP_BRANCH heuristic is honored
>>> (first_match), thus the inner loop's trip count is 0. (The attached
>>> unittest demonstrates this).
>>>
>>> Bootstrapped and passed gcc regression test.
>>>
>>> Is it ok for trunk?
>>>
>>> Thanks,
>>> Dehao
>>>
>>> gcc/ChangeLog
>>>
>>> 2012-07-30  Dehao Chen  
>>>
>>> * predict.c (predict_loops): Fix the prediction of LOOP_BRANCH.
>>>
>>> gcc/testsuite/ChangeLog
>>>
>>> 2012-07-31  Dehao Chen  
>>>
>>> * gcc.dg/predict-7.c: New test.
>>>
>>> Index: gcc/testsuite/gcc.dg/predict-7.c
>>> ===
>>> --- gcc/testsuite/gcc.dg/predict-7.c(revision 0)
>>> +++ gcc/testsuite/gcc.dg/predict-7.c(revision 0)
>>> @@ -0,0 +1,17 @@
>>> +/* { dg-do compile } */
>>> +/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
>>> +
>>> +extern int global;
>>> +
>>> +int bar (int);
>>> +
>>> +void foo (int base)
>>> +{
>>> +  int i;
>>> +  while (global < 10)
>>> +for (i = base; i < 10; i++)
>>> +  bar (i);
>>> +}
>>> +
>>> +/* { dg-final { scan-tree-dump-times "loop branch heuristics" 0
>>> "profile_estimate"} } */
>>> +/* { dg-final { cleanup-tree-dump "profile_estimate" } } */
>>> Index: gcc/predict.c
>>> ===
>>> --- gcc/predict.c   (revision 189835)
>>> +++ gcc/predict.c   (working copy)
>>> @@ -1404,7 +1404,7 @@
>>>
>>>   /* Loop branch heuristics - predict an edge back to a
>>>  loop's head as taken.  */
>>> - if (bb == loop->latch)
>>> + if (bb == loop->latch && bb->loop_father == loop)
>>> {
>>>   e = find_edge (loop->latch, loop->header);
>>>   if (e)
>>
>> I think this heuristic should instead move out of the loop iterating over 
>> loop
>> nodes and be done before like
>>
>>   if (loop->latch)
>> {
>>e = find_edge (

Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Jan Hubicka
> On Tue, Jul 31, 2012 at 12:20 PM, Dehao Chen  wrote:
> > Are you suggesting a patch like this:
> >
> > Index: gcc/predict.c
> > ===
> > --- gcc/predict.c   (revision 189835)
> > +++ gcc/predict.c   (working copy)
> > @@ -1319,6 +1319,7 @@
> >tree loop_bound_var = NULL;
> >tree loop_iv_base = NULL;
> >gimple stmt = NULL;
> > +  int header_found = 0;
> >
> >exits = get_loop_exit_edges (loop);
> >n_exits = VEC_length (edge, exits);
> > @@ -1387,9 +1388,20 @@
> >
> >bbs = get_loop_body (loop);
> >
> > +  /* Loop branch heuristics - predict an edge back to a
> > +loop's head as taken.  */
> > +  if (loop->latch && loop->latch->loop_father == loop)
> 
> Hmm, so the issue is that loop->latch does not belong to loop?  That looks
> like a bogus loop structure.  Indeed we have the loop header of the inner
> loop as latch of the outer loop.  It still looks ok to predict this as 
> unlikely
> as the edge is not only the latch edge of the outer loop but also an exit
> of the inner loop.
> 
> Easier for profile would be to force canonicalization via
> 
>   loop_optimizer_init (LOOPS_NORMAL);
> 
> instead of
> 
>   loop_optimizer_init (0);
>   if (dump_file && (dump_flags & TDF_DETAILS))
> flow_loops_dump (dump_file, NULL, 0);
> 
>   mark_irreducible_loops ();

Yeah, this may also work.  The reason it is not done is that
 1) it seemed expensive to force CFG changes just to compute profile decade ago
 2) cfgcleanup afterwards will anyway remove the headers again.
So I originally hoped to do the right thing without normalization.

Honza


Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Richard Guenther
On Tue, Jul 31, 2012 at 12:38 PM, Jan Hubicka  wrote:
>> On Tue, Jul 31, 2012 at 12:20 PM, Dehao Chen  wrote:
>> > Are you suggesting a patch like this:
>> >
>> > Index: gcc/predict.c
>> > ===
>> > --- gcc/predict.c   (revision 189835)
>> > +++ gcc/predict.c   (working copy)
>> > @@ -1319,6 +1319,7 @@
>> >tree loop_bound_var = NULL;
>> >tree loop_iv_base = NULL;
>> >gimple stmt = NULL;
>> > +  int header_found = 0;
>> >
>> >exits = get_loop_exit_edges (loop);
>> >n_exits = VEC_length (edge, exits);
>> > @@ -1387,9 +1388,20 @@
>> >
>> >bbs = get_loop_body (loop);
>> >
>> > +  /* Loop branch heuristics - predict an edge back to a
>> > +loop's head as taken.  */
>> > +  if (loop->latch && loop->latch->loop_father == loop)
>>
>> Hmm, so the issue is that loop->latch does not belong to loop?  That looks
>> like a bogus loop structure.  Indeed we have the loop header of the inner
>> loop as latch of the outer loop.  It still looks ok to predict this as 
>> unlikely
>> as the edge is not only the latch edge of the outer loop but also an exit
>> of the inner loop.
>>
>> Easier for profile would be to force canonicalization via
>>
>>   loop_optimizer_init (LOOPS_NORMAL);
>>
>> instead of
>>
>>   loop_optimizer_init (0);
>>   if (dump_file && (dump_flags & TDF_DETAILS))
>> flow_loops_dump (dump_file, NULL, 0);
>>
>>   mark_irreducible_loops ();
>
> Yeah, this may also work.  The reason it is not done is that
>  1) it seemed expensive to force CFG changes just to compute profile decade 
> ago
>  2) cfgcleanup afterwards will anyway remove the headers again.
> So I originally hoped to do the right thing without normalization.

Ok ... then you should pass AVOID_CFG_MODIFICATIONS instead.  And be
prepared for odd situations like this ;)

> Honza


Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Richard Guenther
On Tue, Jul 31, 2012 at 12:43 PM, Richard Guenther
 wrote:
> On Tue, Jul 31, 2012 at 12:38 PM, Jan Hubicka  wrote:
>>> On Tue, Jul 31, 2012 at 12:20 PM, Dehao Chen  wrote:
>>> > Are you suggesting a patch like this:
>>> >
>>> > Index: gcc/predict.c
>>> > ===
>>> > --- gcc/predict.c   (revision 189835)
>>> > +++ gcc/predict.c   (working copy)
>>> > @@ -1319,6 +1319,7 @@
>>> >tree loop_bound_var = NULL;
>>> >tree loop_iv_base = NULL;
>>> >gimple stmt = NULL;
>>> > +  int header_found = 0;
>>> >
>>> >exits = get_loop_exit_edges (loop);
>>> >n_exits = VEC_length (edge, exits);
>>> > @@ -1387,9 +1388,20 @@
>>> >
>>> >bbs = get_loop_body (loop);
>>> >
>>> > +  /* Loop branch heuristics - predict an edge back to a
>>> > +loop's head as taken.  */
>>> > +  if (loop->latch && loop->latch->loop_father == loop)
>>>
>>> Hmm, so the issue is that loop->latch does not belong to loop?  That looks
>>> like a bogus loop structure.  Indeed we have the loop header of the inner
>>> loop as latch of the outer loop.  It still looks ok to predict this as 
>>> unlikely
>>> as the edge is not only the latch edge of the outer loop but also an exit
>>> of the inner loop.
>>>
>>> Easier for profile would be to force canonicalization via
>>>
>>>   loop_optimizer_init (LOOPS_NORMAL);
>>>
>>> instead of
>>>
>>>   loop_optimizer_init (0);
>>>   if (dump_file && (dump_flags & TDF_DETAILS))
>>> flow_loops_dump (dump_file, NULL, 0);
>>>
>>>   mark_irreducible_loops ();
>>
>> Yeah, this may also work.  The reason it is not done is that
>>  1) it seemed expensive to force CFG changes just to compute profile decade 
>> ago
>>  2) cfgcleanup afterwards will anyway remove the headers again.
>> So I originally hoped to do the right thing without normalization.
>
> Ok ... then you should pass AVOID_CFG_MODIFICATIONS instead.  And be
> prepared for odd situations like this ;)

In which case the bug looks like that we predict the inner loop exit as unlikely
but not the outer loop exit which should compensate things and not end up
predicting zero iterations?  That is, all patches seem to paper over a
real issue
elsewhere.

Richard.

>> Honza


Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Jan Hubicka
> >
> > Yeah, this may also work.  The reason it is not done is that
> >  1) it seemed expensive to force CFG changes just to compute profile decade 
> > ago
> >  2) cfgcleanup afterwards will anyway remove the headers again.
> > So I originally hoped to do the right thing without normalization.
> 
> Ok ... then you should pass AVOID_CFG_MODIFICATIONS instead.  And be
> prepared for odd situations like this ;)

Well, I guess we could do the extra work to avoid strange side cases like this.
Does normalization fix the testcase, too?

Honza
> 
> > Honza


RFA: M32C: Add __clrsbhi2 to libgcc

2012-07-31 Thread Nick Clifton
Hi DJ,

  The patch below adds an implementation of the __clrsbhi2 function to
  libgcc for the M32C port.  Implementing this function resolves several
  gcc testsuite failures that rely upon the function being present.

  Ok to apply ?

Cheers
  Nick

libgcc/ChangeLog
2012-07-31  Nick Clifton  

* config/m32c/lib2funcs.c (__clrsbhi2): New function.

Index: libgcc/config/m32c/lib2funcs.c
===
--- libgcc/config/m32c/lib2funcs.c  (revision 189996)
+++ libgcc/config/m32c/lib2funcs.c  (working copy)
@@ -132,3 +132,17 @@
 {
   return udivmodsi4 (a, b, 1);
 }
+
+/* Returns the number of leading redundant sign bits in X.
+   I.e. the number of bits following the most significant bit which are
+   identical to it.  There are no special cases for 0 or other values.  */
+
+int
+__clrsbhi2 (word_type x)
+{
+  if (x < 0)
+x = ~x;
+  if (x == 0)
+return 15;
+  return __builtin_clz (x) - 1;
+}




[PATCH] PR 53975

2012-07-31 Thread Andrey Belevantsev

Hello,

This PR is about wrong speculation of an insn that doesn't support storing 
NaT bits done by the selective scheduler (more details in the PR audit 
trail).  The reason for this is the wrong one-liner patch committed last 
year, the fix is to revert that patch and to clarify the comment before the 
patched code.


Bootstrapped and tested on ia64, approved by Alexander offline.  No test as 
I don't know how to check whether an insn got moved through another insn.


Andrey

PR target/53975

* sel-sched-ir.c (has_dependence_note_reg_use): Clarify comment.

Revert
2011-08-04  Sergey Grechanik  

* sel-sched-ir.c (has_dependence_note_reg_use): Call ds_full_merge
only if producer writes to the register given by regno.
Index: gcc/sel-sched-ir.c
===
*** gcc/sel-sched-ir.c	(revision 190004)
--- gcc/sel-sched-ir.c	(revision 190005)
*** has_dependence_note_reg_use (int regno)
*** 3228,3234 
if (reg_last->clobbers)
  	*dsp = (*dsp & ~SPECULATIVE) | DEP_ANTI;
  
!   /* Handle BE_IN_SPEC.  */
if (reg_last->uses)
  	{
  	  ds_t pro_spec_checked_ds;
--- 3228,3238 
if (reg_last->clobbers)
  	*dsp = (*dsp & ~SPECULATIVE) | DEP_ANTI;
  
!   /* Merge BE_IN_SPEC bits into *DSP when the dependency producer
! 	 is actually a check insn.  We need to do this for any register
! 	 read-read dependency with the check unless we track properly
! 	 all registers written by BE_IN_SPEC-speculated insns, as
! 	 we don't have explicit dependence lists.  See PR 53975.  */
if (reg_last->uses)
  	{
  	  ds_t pro_spec_checked_ds;
*** has_dependence_note_reg_use (int regno)
*** 3236,3244 
  	  pro_spec_checked_ds = INSN_SPEC_CHECKED_DS (has_dependence_data.pro);
  	  pro_spec_checked_ds = ds_get_max_dep_weak (pro_spec_checked_ds);
  
! 	  if (pro_spec_checked_ds != 0
! 	  && bitmap_bit_p (INSN_REG_SETS (has_dependence_data.pro), regno))
! 	/* Merge BE_IN_SPEC bits into *DSP.  */
  	*dsp = ds_full_merge (*dsp, pro_spec_checked_ds,
    NULL_RTX, NULL_RTX);
  	}
--- 3240,3246 
  	  pro_spec_checked_ds = INSN_SPEC_CHECKED_DS (has_dependence_data.pro);
  	  pro_spec_checked_ds = ds_get_max_dep_weak (pro_spec_checked_ds);
  
! 	  if (pro_spec_checked_ds != 0)
  	*dsp = ds_full_merge (*dsp, pro_spec_checked_ds,
    NULL_RTX, NULL_RTX);
  	}


[C++ Patch] Mini int -> bool clean up

2012-07-31 Thread Paolo Carlini

Hi,

noticed yesterday, booted & tested x86_64-linux. Ok?

Thanks,
Paolo.

//
2012-07-31  Paolo Carlini  

* pt.c (check_default_tmpl_args): Change is_primary and is_partial
parameters to bool type, adjust.
(push_template_decl_real): Tidy.
* parser.c (cp_parser_init_declarator): Adjust.
* decl.c (redeclaration_error_message): Likewise.
* cp-tree.h (check_default_tmpl_args): Update prototype.
Index: decl.c
===
--- decl.c  (revision 19)
+++ decl.c  (working copy)
@@ -2480,7 +2480,8 @@ redeclaration_error_message (tree newdecl, tree ol
   if ((cxx_dialect != cxx98) 
   && TREE_CODE (ot) == FUNCTION_DECL && DECL_FRIEND_P (ot)
   && !check_default_tmpl_args (nt, DECL_TEMPLATE_PARMS (newdecl), 
-   /*is_primary=*/1, /*is_partial=*/0,
+   /*is_primary=*/true,
+  /*is_partial=*/false,
/*is_friend_decl=*/2))
 return G_("redeclaration of friend %q#D "
  "may not have default template arguments");
Index: pt.c
===
--- pt.c(revision 190001)
+++ pt.c(working copy)
@@ -4210,10 +4210,9 @@ process_partial_specialization (tree decl)
 
 /* Check that a template declaration's use of default arguments and
parameter packs is not invalid.  Here, PARMS are the template
-   parameters.  IS_PRIMARY is nonzero if DECL is the thing declared by
-   a primary template.  IS_PARTIAL is nonzero if DECL is a partial
+   parameters.  IS_PRIMARY is true if DECL is the thing declared by
+   a primary template.  IS_PARTIAL is true if DECL is a partial
specialization.
-   
 
IS_FRIEND_DECL is nonzero if DECL is a friend function template
declaration (but not a definition); 1 indicates a declaration, 2
@@ -4223,8 +4222,8 @@ process_partial_specialization (tree decl)
Returns TRUE if there were no errors found, FALSE otherwise. */
 
 bool
-check_default_tmpl_args (tree decl, tree parms, int is_primary, 
- int is_partial, int is_friend_decl)
+check_default_tmpl_args (tree decl, tree parms, bool is_primary,
+ bool is_partial, int is_friend_decl)
 {
   const char *msg;
   int last_level_to_check;
@@ -4456,8 +4455,8 @@ push_template_decl_real (tree decl, bool is_friend
   tree args;
   tree info;
   tree ctx;
-  int primary;
-  int is_partial;
+  bool is_primary;
+  bool is_partial;
   int new_template_p = 0;
   /* True if the template is a member template, in the sense of
  [temp.mem].  */
@@ -4499,11 +4498,11 @@ push_template_decl_real (tree decl, bool is_friend
 /* A friend template that specifies a class context, i.e.
  template  friend void A::f();
is not primary.  */
-primary = 0;
+is_primary = false;
   else
-primary = template_parm_scope_p ();
+is_primary = template_parm_scope_p ();
 
-  if (primary)
+  if (is_primary)
 {
   if (DECL_CLASS_SCOPE_P (decl))
member_template_p = true;
@@ -4556,7 +4555,7 @@ push_template_decl_real (tree decl, bool is_friend
   /* Check to see that the rules regarding the use of default
  arguments are not being violated.  */
   check_default_tmpl_args (decl, current_template_parms,
-  primary, is_partial, /*is_friend_decl=*/0);
+  is_primary, is_partial, /*is_friend_decl=*/0);
 
   /* Ensure that there are no parameter packs in the type of this
  declaration that have not been expanded.  */
@@ -4773,7 +4772,7 @@ template arguments to %qD do not match original te
}
 }
 
-  if (primary)
+  if (is_primary)
 {
   tree parms = DECL_TEMPLATE_PARMS (tmpl);
   int i;
@@ -4815,7 +4814,7 @@ template arguments to %qD do not match original te
 SET_TYPE_TEMPLATE_INFO (TREE_TYPE (tmpl), info);
   else
 {
-  if (primary && !DECL_LANG_SPECIFIC (decl))
+  if (is_primary && !DECL_LANG_SPECIFIC (decl))
retrofit_lang_decl (decl);
   if (DECL_LANG_SPECIFIC (decl))
DECL_TEMPLATE_INFO (decl) = info;
Index: parser.c
===
--- parser.c(revision 19)
+++ parser.c(working copy)
@@ -15849,8 +15849,8 @@ cp_parser_init_declarator (cp_parser* parser,
 /* Core issue #226 (C++0x only): A default template-argument
shall not be specified in a friend class template
declaration. */
-check_default_tmpl_args (decl, current_template_parms, /*is_primary=*/1, 
- /*is_partial=*/0, /*is_friend_decl=*/1);
+check_default_tmpl_args (decl, current_template_parms, 
/*is_primary=*/true, 
+ /*is_partial=*/false, /*is_friend_decl=*/1);
 
   if (!friend_p && pushed_sco

Re: [PATCH]: Fix -Wmissing-format-attribute warnings

2012-07-31 Thread Uros Bizjak
On Tue, Jul 31, 2012 at 11:20 AM, Uros Bizjak  wrote:

> This patch removes all -Wmissing-format-attribute warnings on
> alphaev68-pc-linux-gnu native build.

Ouch, I didn't notice that some of these strings may be a format
string with %< and %>, etc and should not be printed with "%s". Is
there a way to supress warning and also process strings in the right
way?

Patch is retraced.

Uros.


[PATCH] Intrinsics for ADCX

2012-07-31 Thread Michael Zolotukhin
Hi guys,
Here is a third part of patch, refactored by Kirill. This one adds
_addcarryx_u[32|64]  intrinsics.

Is it ok?

Changelog entry:
2012-07-31 Michael Zolotukhin 

* common/config/i386/i386-common.c (OPTION_MASK_ISA_ADX_SET): New.
(OPTION_MASK_ISA_ADX_UNSET): Likewise.
(ix86_handle_option): Handle madx option.
* config.gcc (i[34567]86-*-*): Add adxintrin.h.
(x86_64-*-*): Likewise.
* config/i386/adxintrin.h: New header.
* config/i386/driver-i386.c (host_detect_local_cpu): Detect ADCX/ADOX
support.
* config/i386/i386-builtin-types.def
(UCHAR_FTYPE_UCHAR_UINT_UINT_PINT): New function type.
(UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PINT): Likewise.
* config/i386/i386-c.c: Define __ADX__ if needed.
* config/i386/i386.c (ix86_target_string): Define -madx option.
(PTA_ADX): New.
(ix86_option_override_internal): Handle new option.
(ix86_valid_target_attribute_inner_p): Add OPT_madx.
(ix86_builtins): Add IX86_BUILTIN_ADDCARRYX32,
IX86_BUILTIN_ADDCARRYX64.
(ix86_init_mmx_sse_builtins): Define corresponding built-ins.
(ix86_expand_builtin): Handle these built-ins.
(ix86_expand_args_builtin): Handle new function types.
* config/i386/i386.h (TARGET_ADX): New.
* config/i386/i386.md (adcx): New define_expand.
(adcx_carry): New define_insn.
* config/i386/i386.opt (madx): New.
* config/i386/x86intrin.h: Include adxintrin.h.

testsuite/Changelog entry:
2012-07-31 Michael Zolotukhin 

* gcc.target/i386/adx-addcarryx32-1.c: New.
* gcc.target/i386/adx-addcarryx32-2.c: New.
* gcc.target/i386/adx-addcarryx64-1.c: New.
* gcc.target/i386/adx-addcarryx64-2.c: New.
* gcc.target/i386/adx-check.h: New.
* gcc.target/i386/i386.exp (check_effective_target_adx): New.
* gcc.target/i386/sse-12.c: Add -madx.
* gcc.target/i386/sse-13.c: Ditto.
* gcc.target/i386/sse-14.c: Ditto.
* gcc.target/i386/sse-22.c: Ditto.
* gcc.target/i386/sse-23.c: Ditto.
* g++.dg/other/i386-2.C: Ditto.
* g++.dg/other/i386-3.C: Ditto.


Bootstrap and new tests are passing, other testing is in progress.


-- 
---
Best regards,
Michael V. Zolotukhin,
Software Engineer
Intel Corporation.


bdw-adx-1.gcc.patch
Description: Binary data


[RS6000] Fix PR54131, ICE building 416.gamess

2012-07-31 Thread Alan Modra
This cures the 'Y' constraint of being overly restrictive with lo_sum
offsets.  I've added a comment that explains why it is wrong to limit
the range of lo_sum offsets.  Bootstrapped and regressiotn tested
powerpc-linux.  OK to apply?

PR target/54131
* config/rs6000/rs6000.c (mem_operand_gpr): Don't limit range
of lo_sum offsets.  Comment.  Assert mode at least word size
rather than bypassing powerpc64 word offset check.

Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 189996)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -5008,24 +5008,38 @@
 
Offsetting a lo_sum should not be allowed, except where we know by
alignment that a 32k boundary is not crossed, but see the ???
-   comment in rs6000_legitimize_reload_address.  */
+   comment in rs6000_legitimize_reload_address.  Note that by
+   "offsetting" here we mean a further offset to access parts of the
+   MEM.  It's fine to have a lo_sum where the inner address is offset
+   from a sym, since the same sym+offset will appear in the high part
+   of the address calculation.  */
 
 bool
 mem_operand_gpr (rtx op, enum machine_mode mode)
 {
   unsigned HOST_WIDE_INT offset;
   int extra;
+  rtx addr = XEXP (op, 0);
 
-  op = address_offset (XEXP (op, 0));
+  op = address_offset (addr);
   if (op == NULL_RTX)
 return true;
 
   offset = INTVAL (op);
+  if (TARGET_POWERPC64 && (offset & 3) != 0)
+return false;
+
+  if (GET_CODE (addr) == LO_SUM)
+/* We know by alignment that ABI_AIX medium/large model toc refs
+   will not cross a 32k boundary, since all entries in the
+   constant pool are naturally aligned and we check alignment for
+   other medium model toc-relative addresses.  For ABI_V4 and
+   ABI_DARWIN lo_sum addresses, we just check that 64-bit
+   offsets are 4-byte aligned.  */
+return true;
+
   extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
-  if (extra < 0)
-extra = 0;
-  else if (TARGET_POWERPC64 && (offset & 3) != 0)
-return false;
+  gcc_assert (extra >= 0);
   return offset + 0x8000 < 0x1u - extra;
 }
 


-- 
Alan Modra
Australia Development Lab, IBM


[PATCH][RFC][6/n] into-SSA TLC

2012-07-31 Thread Richard Guenther

This makes rewriting decls into SSA form via SSA updating fully
transparent and no longer requiring mark_sym_for_renaming calls.
Much like the operand scanner marks virtual SSA form for renaming
automagically if it adds new virtual operands the following patch
makes sure it does so for real operands as well.  Previously
if you'd forgot to mark such operands for renaming manually the
first SSA verification phase would have complained.  So this
patch removes burden from the programmer and in turn even makes
the compiler faster.

I did not yet touch the remaining mark_sym_for_renaming calls
which are still necessary if you want to update the virtual SSA
web in some cases.  A followup will merge the still existing
very many variants of code that do so and unify those calls
(maybe removing a few unnecessary ones).

Bootstrap and regtest is running on x86_64-unknown-linux-gnu.

Scary code removed in matrix-reorg.c reminds me of removing that
useless beast ...

Thanks,
Richard.

2012-07-31  Richard Guenther  

* tree-flow.h (struct gimple_df): Remove syms_to_rename member,
add ssa_renaming_needed and rename_vops flags.
(SYMS_TO_RENAME): Remove.
(symbol_marked_for_renaming): Likewise.
(mark_virtual_ops_for_renaming): Likewise.
* tree-into-ssa.c (symbols_to_rename_set, symbols_to_rename):
New globals.
(mark_for_renaming, marked_for_renaming): New functions.
(symbol_marked_for_renaming): Remove.
(dump_currdefs): Adjust.
(maybe_replace_use): Likewise.
(maybe_replace_use_in_debug_stmt): Likewise.
(maybe_register_def): Likewise.
(rewrite_update_phi_arguments): Likewise.
(rewrite_update_enter_block): Likewise.
(fini_ssa_renamer): Clear update SSA status here ...
(delete_update_ssa): ... not here.  Free rename set.
(prepare_block_for_update): Compute which decls need renaming.
(dump_update_ssa): Adjust.
(mark_sym_for_renaming): Adjust update SSA status for virtuals.
(need_ssa_update_p): Adjust.
(insert_updated_phi_nodes_for): Likewise.
(update_ssa): Likewise.
* tree-ssa-operands.c (finalize_ssa_defs): Adjust update SSA status
for virtual and real operands.
(finalize_ssa_uses): Likewise.
* tree-ssanames.c (init_ssanames): Adjust.
* tree-ssa.c (maybe_rewrite_mem_ref_base, maybe_optimize_var,
execute_update_addresses_taken): Add bitmap to keep track of which
candidates are suitable for rewriting and later renaming by SSA
update.
* matrix-reorg.c (transform_access_sites): Do not rename all defs.
* tree-dfa.c (make_rename_temp): Do not mark real operands for renaming.
* cgraphunit.c (assemble_thunk): Likewise.
* gimplify.c (gimple_regimplify_operands): Likewise.
(force_gimple_operand_1): Likewise.
* ipa-prop.c (ipa_modify_formal_parameters): Likewise.
* tree-inline.c (declare_return_variable): Likewise.
* tree-parloops.c (separate_decls_in_region_stmt): Do not call
mark_virtual_ops_for_renaming.
(create_stores_for_reduction): Likewise.
(create_loads_and_stores_for_name): Likewise.
* tree-predcom.c (mark_virtual_ops_for_renaming): Remove.
(initialize_root_vars_lm): Do not call mark_virtual_ops_for_renaming.
(execute_load_motion): Likewise.
(remove_stmt): Likewise.
(execute_pred_commoning_chain): Likewise.
* tree-sra.c (create_access_replacement): Do not rename real
operands.
(get_unrenamed_access_replacement): Unify with ...
(get_access_replacement): ... this.
(get_repl_default_def_ssa_name): Adjust.
* tree-ssa-loop-im.c (move_computations_stmt): Manually update
virtual SSA form.
(rewrite_mem_ref_loc): Do not call mark_virtual_ops_for_renaming.
* tree-ssa-loop-prefetch.c (emit_mfence_after_loop): Likewise.

Index: trunk/gcc/tree-flow.h
===
*** trunk.orig/gcc/tree-flow.h  2012-07-31 11:36:00.0 +0200
--- trunk/gcc/tree-flow.h   2012-07-31 13:13:18.190881662 +0200
*** struct GTY(()) gimple_df {
*** 77,85 
   for this variable with an empty defining statement.  */
htab_t GTY((param_is (union tree_node))) default_defs;
  
!   /* Symbols whose SSA form needs to be updated or created for the first
!  time.  */
!   bitmap syms_to_rename;
  
/* True if the code is in ssa form.  */
unsigned int in_ssa_p : 1;
--- 77,87 
   for this variable with an empty defining statement.  */
htab_t GTY((param_is (union tree_node))) default_defs;
  
!   /* True if there are any symbols that need to be renamed.  */
!   unsigned int ssa_renaming_needed : 1;
! 
!   /* True if all virtual operands need to be renamed.  */
!   unsigned int rename_vops : 1;
  
/* True if the code is in ssa for

Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Dehao Chen
On Tue, Jul 31, 2012 at 6:56 PM, Jan Hubicka  wrote:
>> >
>> > Yeah, this may also work.  The reason it is not done is that
>> >  1) it seemed expensive to force CFG changes just to compute profile 
>> > decade ago
>> >  2) cfgcleanup afterwards will anyway remove the headers again.
>> > So I originally hoped to do the right thing without normalization.
>>
>> Ok ... then you should pass AVOID_CFG_MODIFICATIONS instead.  And be
>> prepared for odd situations like this ;)
>
> Well, I guess we could do the extra work to avoid strange side cases like 
> this.
> Does normalization fix the testcase, too?

Normalization indeed fixed this issue too. So what shall we do about
this patch? Shall we simply change to use normalization instead?

Thanks,
Dehao

>
> Honza
>>
>> > Honza


Re: PR middle-end/53321: [4.8 Regression] LTO bootstrap failed with bootstrap-profiled

2012-07-31 Thread Jan Hubicka
> Hi,
> 
> 2012-07-06  H.J. Lu  
> 
>   PR middle-end/53321
>   PR middle-end/53865
>   * Makefile.in (tree-profile.o): Depend on ipa-inline.h.
> 
>   * ipa.c (symtab_remove_unreachable_nodes): Restore
>   cgraph_propagate_frequency call when something was changed.
> 
>   * tree-profile.c: Include "ipa-inline.h".
>   (gimple_gen_ic_func_profiler): Return bool.
>   (tree_profiling): Call inline_free_summary to clear stale inline
>   summary if gimple_gen_ic_func_profiler returns true.
> 
>   * value-prof.h (gimple_gen_ic_func_profiler): Change return
>   type to bool.

Hi,
My local tree is profedbootstrapping since I had local patch to
prevent recomputation it before ipa_profile pass, but I agree it is better
to not have stale inline summaries around.

> +  /* Clear stale inline summary.  */
> +  if (gen_ic_func_profiler)
> +inline_free_summary ();

The inline summary is completely dead here. It is computed for purposes of
early inlining and the real inliner will trash it and recompute soon.
(because the one computer by real inliner is more detailed).

matrix_reorg, emutls and tm passes will also suffer from the problem of
invalidating the inling summary. It is better to free it early this.

Can you simply add a micro pass just after pass_early_local_passes calliing
inline_free_summary unconditionally?

Path is preapproved (or I will make it tomorrow unless you beat me).

Thank you!
Honza


Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Jan Hubicka
> On Tue, Jul 31, 2012 at 6:56 PM, Jan Hubicka  wrote:
> >> >
> >> > Yeah, this may also work.  The reason it is not done is that
> >> >  1) it seemed expensive to force CFG changes just to compute profile 
> >> > decade ago
> >> >  2) cfgcleanup afterwards will anyway remove the headers again.
> >> > So I originally hoped to do the right thing without normalization.
> >>
> >> Ok ... then you should pass AVOID_CFG_MODIFICATIONS instead.  And be
> >> prepared for odd situations like this ;)
> >
> > Well, I guess we could do the extra work to avoid strange side cases like 
> > this.
> > Does normalization fix the testcase, too?
> 
> Normalization indeed fixed this issue too. So what shall we do about
> this patch? Shall we simply change to use normalization instead?

Yes, I think it is not _that_ expensive and we do some relatively tricky CFG
analysis there these days that should also get bit better.
(the code was written at a time CFG was new citizen to GCC and changes in CFG
was hard and considered harmful :)

Honza
> 
> Thanks,
> Dehao
> 
> >
> > Honza
> >>
> >> > Honza


Commit: XStormy16: Add support for -fstack-usage

2012-07-31 Thread Nick Clifton
Hi Guys,

  I am checking in the patch below to add support for reporting stack
  usage by the XStormy16 backend.

Cheers
  Nick

gcc/ChangeLog
Index: gcc/ChangeLog
2012-07-31  Nick Clifton  
 
* config/stormy16/stormy16.c (xstormy16_expand_prologue): Add
support for reporting stack usage.

testsuite/ChangeLog
2012-07-31  Nick Clifton  
 
* gcc.dg/stack-usage-1.c (SIZE): Define for FRV,
and for XStormy16.

Index: gcc/config/stormy16/stormy16.c
===
--- gcc/config/stormy16/stormy16.c  (revision 190007)
+++ gcc/config/stormy16/stormy16.c  (working copy)
@@ -1035,6 +1035,9 @@
   if (layout.locals_size >= 32768)
 error ("local variable memory requirements exceed capacity");
 
+  if (flag_stack_usage_info)
+current_function_static_stack_size = layout.frame_size;
+
   /* Save the argument registers if necessary.  */
   if (layout.stdarg_save_size)
 for (regno = FIRST_ARGUMENT_REGISTER;
Index: gcc/testsuite/gcc.dg/stack-usage-1.c
===
--- gcc/testsuite/gcc.dg/stack-usage-1.c(revision 190007)
+++ gcc/testsuite/gcc.dg/stack-usage-1.c(working copy)
@@ -64,6 +64,8 @@
 #  define SIZE 252
 #elif defined (__frv__)
 #  define SIZE 248
+#elif defined (xstormy16)
+#  define SIZE 254
 #else
 #  define SIZE 256
 #endif


Re: [PATCH] Intrinsics for ADCX

2012-07-31 Thread Uros Bizjak
On Tue, Jul 31, 2012 at 1:33 PM, Michael Zolotukhin
 wrote:
> Hi guys,
> Here is a third part of patch, refactored by Kirill. This one adds
> _addcarryx_u[32|64]  intrinsics.
>
> Is it ok?
>
> Changelog entry:
> 2012-07-31 Michael Zolotukhin 
>
> * common/config/i386/i386-common.c (OPTION_MASK_ISA_ADX_SET): New.
> (OPTION_MASK_ISA_ADX_UNSET): Likewise.
> (ix86_handle_option): Handle madx option.
> * config.gcc (i[34567]86-*-*): Add adxintrin.h.
> (x86_64-*-*): Likewise.
> * config/i386/adxintrin.h: New header.
> * config/i386/driver-i386.c (host_detect_local_cpu): Detect ADCX/ADOX
> support.
> * config/i386/i386-builtin-types.def
> (UCHAR_FTYPE_UCHAR_UINT_UINT_PINT): New function type.
> (UCHAR_FTYPE_UCHAR_ULONGLONG_ULONGLONG_PINT): Likewise.
> * config/i386/i386-c.c: Define __ADX__ if needed.
> * config/i386/i386.c (ix86_target_string): Define -madx option.
> (PTA_ADX): New.
> (ix86_option_override_internal): Handle new option.
> (ix86_valid_target_attribute_inner_p): Add OPT_madx.
> (ix86_builtins): Add IX86_BUILTIN_ADDCARRYX32,
> IX86_BUILTIN_ADDCARRYX64.
> (ix86_init_mmx_sse_builtins): Define corresponding built-ins.
> (ix86_expand_builtin): Handle these built-ins.
> (ix86_expand_args_builtin): Handle new function types.
> * config/i386/i386.h (TARGET_ADX): New.
> * config/i386/i386.md (adcx): New define_expand.
> (adcx_carry): New define_insn.
> * config/i386/i386.opt (madx): New.
> * config/i386/x86intrin.h: Include adxintrin.h.
>
> testsuite/Changelog entry:
> 2012-07-31 Michael Zolotukhin 
>
> * gcc.target/i386/adx-addcarryx32-1.c: New.
> * gcc.target/i386/adx-addcarryx32-2.c: New.
> * gcc.target/i386/adx-addcarryx64-1.c: New.
> * gcc.target/i386/adx-addcarryx64-2.c: New.
> * gcc.target/i386/adx-check.h: New.
> * gcc.target/i386/i386.exp (check_effective_target_adx): New.
> * gcc.target/i386/sse-12.c: Add -madx.
> * gcc.target/i386/sse-13.c: Ditto.
> * gcc.target/i386/sse-14.c: Ditto.
> * gcc.target/i386/sse-22.c: Ditto.
> * gcc.target/i386/sse-23.c: Ditto.
> * g++.dg/other/i386-2.C: Ditto.
> * g++.dg/other/i386-3.C: Ditto.
>
>
> Bootstrap and new tests are passing, other testing is in progress.

Following is the correct definition of new insn:

--cut here--
Index: i386.md
===
--- i386.md (revision 190005)
+++ i386.md (working copy)
@@ -6604,6 +6604,27 @@
(set_attr "pent_pair" "pu")
(set_attr "mode" "")])

+(define_insn "adcx3"
+  [(set (reg:CCC FLAGS_REG)
+   (compare
+ (plus:SWI48
+   (match_operand:SWI48 1 "nonimmediate_operand" "%0")
+   (plus:SWI48
+ (match_operator 4 "ix86_carry_flag_operator"
+  [(match_operand 3 "flags_reg_operand") (const_int 0)])
+ (match_operand:SWI48 2 "nonimmediate_operand" "rm")))
+ (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+   (plus:SWI48 (match_dup 1)
+   (plus:SWI48 (match_op_dup 4
+[(match_dup 3) (const_int 0)])
+   (match_dup 2]
+  "TARGET_ADX && ix86_binary_operator_ok (PLUS, mode, operands)"
+  "adcx\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "mode" "")])
+
 (define_insn "*addsi3_carry_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
--cut here--

You don't need expander to emit insns via emit_insn (gen_).
Please put the code from the expander back to i386.c and rewrite the
sequence according to new insn pattern.

+  /* Generate CF from input operand.  */
+  emit_insn (gen_addqi3_cc (gen_reg_rtx (QImode), operands[2], constm1_rtx));

This insn should be in correct mode, you can make the pattern public if needed.

+  if (!REG_P (op1))
+   op1 = copy_to_mode_reg (QImode, op1);
+  else
+   op1 = gen_rtx_SUBREG (QImode, op1, 0);

This is not needed, just pass the register in the correct mode. You
should use something like:

  if (!insn_data[icode].operand[2].predicate (op1, mode1))
op1 = copy_to_mode_reg (mode1, op1);

Uros.


[Patch, Fortran, committed] PR 54134: [OOP] ICE overriding derived type bound function with allocatable character as result

2012-07-31 Thread Janus Weil
Hi all,

I have just committed as obvious a small patch for the PR in the subject line:

http://gcc.gnu.org/viewcvs?view=revision&revision=190010

Cheers,
Janus


Re: [PATCH][5/n] into-SSA TLC

2012-07-31 Thread Richard Guenther
On Mon, 30 Jul 2012, Michael Matz wrote:

> Hi,
> 
> On Mon, 30 Jul 2012, Richard Guenther wrote:
> 
> > 
> > This makes into-SSA no longer rely on variable annotations and instead
> > uses on-the-side information local to into/update-SSA.  Lookups can
> > probably be avoided in some places if we pass around the auxiliar
> > information instead of looking it up all the time.
> > 
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, queued for now.
> > The remaining var-ann users are remove_unused_locals (the used flag)
> > and cfgexpands out-of-SSA.
> 
> I have both removed locally.  There's one more implicit use of var_ann, 
> namely as flag "in-referenced-vars", I'm currently working to remove that
> too.

Great.  Meanwhile 6/n depends on this as well so I have gone ahead
and checked this in.

Richard.


Re: [C++ Patch] Mini int -> bool clean up

2012-07-31 Thread Jason Merrill

OK.

Jason


Re: [patch, fortran] Fix PR 54033, problems with -I, with test cases

2012-07-31 Thread Tobias Burnus

On 07/29/2012 11:24 AM, Thomas Koenig wrote:

here is an updated patch for PR 54033, this time with test cases.
Thanks to Janis for pointing me in the right direction with these.
Regression-tested. OK for trunk?


Ok. Thanks for the patch - and to Janis for the first review.

Can you eliminate the extra line before "else if"?

Tobias


2012-07-29 Thomas König  

PR fortran/54033
* scanner.c (add_path_to_list): Emit warning if an error occurs
for an include path, if it is not present or if it is not a
directory.  Do not add the path in these cases.

2012-07-29  Thomas König  

PR fortran/54033
* gfortran.dg/include_6.f90:  New test case.
* gfortran.dg/include_7.f90:  New test case.
* gfortran.dg/include_3.f90:  Add dg-warning for missing 
directory.







Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Dehao Chen
Thanks, Honza,

Then shall I check in the following patch to trunk (after testing)?

Dehao

Index: gcc/testsuite/gcc.dg/predict-7.c
===
--- gcc/testsuite/gcc.dg/predict-7.c(revision 0)
+++ gcc/testsuite/gcc.dg/predict-7.c(revision 0)
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-profile_estimate" } */
+
+extern int global;
+
+int bar (int);
+
+void foo (int base)
+{
+  int i;
+  while (global < 10)
+for (i = base; i < 10; i++)
+  bar (i);
+}
+
+/* { dg-final { scan-tree-dump-times "loop branch heuristics" 0
"profile_estimate"} } */
+/* { dg-final { cleanup-tree-dump "profile_estimate" } } */

Index: gcc/predict.c
===
--- gcc/predict.c   (revision 189835)
+++ gcc/predict.c   (working copy)
@@ -2177,7 +2177,7 @@
 {
   unsigned nb_loops;

-  loop_optimizer_init (0);
+  loop_optimizer_init (LOOPS_NORMAL);
   if (dump_file && (dump_flags & TDF_DETAILS))
 flow_loops_dump (dump_file, NULL, 0);


On Tue, Jul 31, 2012 at 8:43 PM, Jan Hubicka  wrote:
>> On Tue, Jul 31, 2012 at 6:56 PM, Jan Hubicka  wrote:
>> >> >
>> >> > Yeah, this may also work.  The reason it is not done is that
>> >> >  1) it seemed expensive to force CFG changes just to compute profile 
>> >> > decade ago
>> >> >  2) cfgcleanup afterwards will anyway remove the headers again.
>> >> > So I originally hoped to do the right thing without normalization.
>> >>
>> >> Ok ... then you should pass AVOID_CFG_MODIFICATIONS instead.  And be
>> >> prepared for odd situations like this ;)
>> >
>> > Well, I guess we could do the extra work to avoid strange side cases like 
>> > this.
>> > Does normalization fix the testcase, too?
>>
>> Normalization indeed fixed this issue too. So what shall we do about
>> this patch? Shall we simply change to use normalization instead?
>
> Yes, I think it is not _that_ expensive and we do some relatively tricky CFG
> analysis there these days that should also get bit better.
> (the code was written at a time CFG was new citizen to GCC and changes in CFG
> was hard and considered harmful :)
>
> Honza
>>
>> Thanks,
>> Dehao
>>
>> >
>> > Honza
>> >>
>> >> > Honza


Re: [RS6000] Fix PR54131, ICE building 416.gamess

2012-07-31 Thread David Edelsohn
On Tue, Jul 31, 2012 at 7:51 AM, Alan Modra  wrote:
> This cures the 'Y' constraint of being overly restrictive with lo_sum
> offsets.  I've added a comment that explains why it is wrong to limit
> the range of lo_sum offsets.  Bootstrapped and regressiotn tested
> powerpc-linux.  OK to apply?
>
> PR target/54131
> * config/rs6000/rs6000.c (mem_operand_gpr): Don't limit range
> of lo_sum offsets.  Comment.  Assert mode at least word size
> rather than bypassing powerpc64 word offset check.

Okay.

Thanks, David


Re: [PATCH] Fix the LOOP_BRANCH prediction

2012-07-31 Thread Jan Hubicka
> Thanks, Honza,
> 
> Then shall I check in the following patch to trunk (after testing)?

Yes, this is OK (with a changelog).
Thanks!
Honza


Re: [PATCH]: Fix -Wmissing-format-attribute warnings

2012-07-31 Thread Joseph S. Myers
On Tue, 31 Jul 2012, Uros Bizjak wrote:

> On Tue, Jul 31, 2012 at 11:20 AM, Uros Bizjak  wrote:
> 
> > This patch removes all -Wmissing-format-attribute warnings on
> > alphaev68-pc-linux-gnu native build.
> 
> Ouch, I didn't notice that some of these strings may be a format
> string with %< and %>, etc and should not be printed with "%s". Is
> there a way to supress warning and also process strings in the right
> way?

I've previously suggested adding functions such as error_no_args taking a 
format string that is checked to have only no-argument formats such as %< 
and %>.  (Well, error_at_no_args would be better than adding more legacy 
functions using an implicit location.)

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH 0/2] Convert s390 to atomic optabs, v2

2012-07-31 Thread Andrew MacLeod

On 07/31/2012 05:09 AM, Richard Guenther wrote:

On
Thus, the bad news is that it's hard for the middle-end to
recover alignment of a memory access that is represented as
a builtin function call that takes addresses as parameters
(which also makes them address-taken and thus possibly aliased).
Didn't Andrew have some patches to introduce a GIMPLE_ATOMIC
eventually side-stepping this issue (maybe that used addresses, too)?

yes, but Im not sure I'm going to be able to gimple atomic in for 
4.8...  Im trying to make sure the C11 stuff gets in then its back to 
gimple atomic...  you never know tho...  I'm just concerned about 
introducing it that late in the cycle...  I'll reconsider the plan in 
the next week or two.. maybe I can get gimple atomic in first then the 
C11 stuff... put on a mega-push in august...



Andrew


Re: [Patch, Fortran] PR 42418: PROCEDURE: Rejects interface which is both specific and generic procedure

2012-07-31 Thread Tobias Burnus

On 07/31/2012 11:50 AM, Janus Weil wrote:

The patch was regtested successfully on x86_64-unknown-linux-gnu. Ok for trunk?


The patch looks  okay. I assume, the patch doesn't help to get PR 54035 
fixed,does it? (That PR is about TBP binding to the generic instead of 
the specific name.)


Tobias


2012-07-31  Janus Weil  

PR fortran/42418
* decl.c (match_procedure_interface): Move some checks to
'resolve_procedure_interface'. Set flavor if appropriate.
* expr.c (gfc_check_pointer_assign): Cleanup of 'gfc_is_intrinsic'.
* intrinsic.c (gfc_is_intrinsic): Additional checks for attributes which
identify a procedure as being non-intrinsic.
* resolve.c (resolve_procedure_interface): Checks moved here from
'match_procedure_interface'. Minor cleanup.
(resolve_formal_arglist,resolve_symbol): Cleanup of
'resolve_procedure_interface'
(resolve_actual_arglist,is_external_proc): Cleanup of
'gfc_is_intrinsic'.

2012-07-31  Janus Weil  

PR fortran/42418
* gfortran.dg/proc_decl_29.f90: New.




Re: [Patch, fortran] Remove gfc_array_ref::offset field

2012-07-31 Thread Tobias Burnus

On 07/27/2012 06:06 PM, Mikael Morin wrote:

The offset field is never set; this patch removes it.
Regression tested on x86_64-unknown-linux-gnu. OK for trunk?


OK. Thanks for the nice clean-up patch.

Tobias


Re: [PATCH 0/2] Convert s390 to atomic optabs, v2

2012-07-31 Thread Richard Henderson
On 2012-07-31 02:09, Richard Guenther wrote:
> What do we expect __builtin_compare_exchange to do for
> unaligned inputs?

At the moment we expect it to SIGBUS, as a rule.

We'd *like* to defer to the library routine for unaligned,
but we don't do that yet.

Too bad about not being able to query addresses/ssa_names
for their alignment; I thought we could do that already.


r~


Re: [PATCH] Intrinsics for ADCX

2012-07-31 Thread Richard Henderson
On 2012-07-31 04:33, Michael Zolotukhin wrote:
> Here is a third part of patch, refactored by Kirill. This one adds
> _addcarryx_u[32|64]  intrinsics.

Frankly I don't understand the point of these instructions
being added to the ISA at all.  I would have understood an
add-with-carry that did *not* modify the flags at all, but
two separate ones that modify C and O separately is just
downright strange.

But to the point: I don't understand the point of having
this as a builtin.  Is the code generated by this builtin
any better than plain C?

And if you're going to have the builtin, why is this restricted
to adx anyway?  You obviously can produce the same results with
the good old fashioned adc instruction as well.

Which begs the question of why you've got a separate pattern
for the adx anyway.  If the insn is so much better, it ought to
be used in the same pattern we use for adc now.


r~


Re: TPF: disable discriminators

2012-07-31 Thread Richard Henderson
On 2012-07-30 21:07, DJ Delorie wrote:
> +
> +/* GAS supports it, but the debuggers don't, so avoid it.  */
> +#define SUPPORTS_DISCRIMINATOR 0

Then you shouldn't be fiddling this, but rather dwarf_strict.
See e.g. darwin and vxworks ports.


r~




[patch] sched-vis fixes

2012-07-31 Thread Steven Bosscher
Hello,

The pretty-printer in sched-vis.c hasn't been updated for all new RTX
codes that have been added over time. The attached patch resolves all
things that needed fixing for pretty -dAP -fdump-rtl-final-slim prints
on powerpc64 (and probably everywhere else). It also changes things
such that new RTL expressions are always automatically printed as
pseudo-functions. For FMA I didn't like that, so I added explicit
pretty-printing for it.

Will commit as obvious after the usual testing here and there...

Ciao!
Steven


sched-vis-fixes.diff
Description: Binary data


Re: [Patch, Fortran] PR 42418: PROCEDURE: Rejects interface which is both specific and generic procedure

2012-07-31 Thread Janus Weil
>> The patch was regtested successfully on x86_64-unknown-linux-gnu. Ok for
>> trunk?
>
>
> The patch looks  okay.

Thanks!

> I assume, the patch doesn't help to get PR 54035
> fixed,does it? (That PR is about TBP binding to the generic instead of the
> specific name.)

Not directly. Although - since they're not completely unrelated - one
might think about applying a similar strategy for that one. However, I
expect it will be a bit harder to fix. Maybe I'll have a look soon ...

In general I don't really like the sort of special-case treatment that
is needed, e.g. in this PR, to handle generics and specifics with the
same. In the long run, it might be worth to think about a separate
symtree for generics in each namespace. That would also help to
simplify the treatment of 'constructors' etc, but it's surely not a
quick'n'easy project (and I'm not sure if there are any pitfalls
lurking).

Cheers,
Janus



>> 2012-07-31  Janus Weil  
>>
>> PR fortran/42418
>> * decl.c (match_procedure_interface): Move some checks to
>> 'resolve_procedure_interface'. Set flavor if appropriate.
>> * expr.c (gfc_check_pointer_assign): Cleanup of
>> 'gfc_is_intrinsic'.
>> * intrinsic.c (gfc_is_intrinsic): Additional checks for attributes
>> which
>> identify a procedure as being non-intrinsic.
>> * resolve.c (resolve_procedure_interface): Checks moved here from
>> 'match_procedure_interface'. Minor cleanup.
>> (resolve_formal_arglist,resolve_symbol): Cleanup of
>> 'resolve_procedure_interface'
>> (resolve_actual_arglist,is_external_proc): Cleanup of
>> 'gfc_is_intrinsic'.
>>
>> 2012-07-31  Janus Weil  
>>
>> PR fortran/42418
>> * gfortran.dg/proc_decl_29.f90: New.
>
>


[gimplefe] Patch for recognizing function declarations

2012-07-31 Thread Sandeep Soni
Hi Diego,

The following patch recognizes function declarations. I am now trying
to create a gimple sequence of all the statements within the function
body.
The chagelog is as follows:

2012-07-31   Sandeep Soni 

* parser.c (gl_token_starts_decl): Modify. Matches function decls.
(gp_parse_parm_decl): New.
(gp_parse_return_type): New.
(gp_parse_function_decl): New.
(gp_parse_decl): Modify. Adds case for function decls.

Index: gcc/gimple/parser.c
===
--- gcc/gimple/parser.c (revision 188546)
+++ gcc/gimple/parser.c (working copy)
@@ -234,7 +271,7 @@
 gl_token_starts_decl (gimple_token *token)
 {
   enum tree_code code = gl_tree_code_for_token (token);
-  return code == VAR_DECL;
+  return code == VAR_DECL || code == FUNCTION_DECL;
 }


@@ -746,6 +817,24 @@
 }


+/* Helper for gp_parse_function_decl. The parm_decl's
+   are read from gimple_parser PARSER.  */
+
+static void
+gp_parse_parm_decl (gimple_parser *parser)
+{
+  gl_consume_expected_token (parser->lexer, CPP_LESS);
+  gl_consume_expected_token (parser->lexer, CPP_NAME);
+  gl_consume_expected_token (parser->lexer, CPP_COMMA);
+  gl_consume_expected_token (parser->lexer, CPP_NAME);
+  gl_consume_expected_token (parser->lexer, CPP_LESS);
+  gl_consume_expected_token (parser->lexer, CPP_NUMBER);
+  gl_consume_expected_token (parser->lexer, CPP_GREATER);
+  gl_consume_expected_token (parser->lexer, CPP_GREATER);
+
+}
+
+
 /* Helper for gp_parse_expect_record_type and
gp_parse_expect_union_type. The field_decl's
are read from gimple_parser PARSER.  */
@@ -939,6 +1028,27 @@
 }


+/* Recognizes the return type of the function. The tuple is read from
+   PARSER.  */
+
+static void
+gp_parse_return_type (gimple_parser *parser)
+{
+  gimple_token *next_token = gl_consume_token (parser->lexer);
+  enum tree_code code = gl_tree_code_for_token (next_token);
+
+  switch (code)
+  {
+  case INTEGER_TYPE:
+  case REAL_TYPE:
+gl_consume_expected_token (parser->lexer, CPP_LESS);
+gl_consume_expected_token (parser->lexer, CPP_NUMBER);
+gl_consume_expected_token (parser->lexer, CPP_GREATER);
+break;
+  }
+}
+
+
 /* The Declaration section within a .gimple file can consist of
a) Declaration of variables.
b) Declaration of functions.
@@ -1070,6 +1180,103 @@
 }


+/* The syntax of a function declaration is as follows:
+
+   FUNCTION_DECL
+   <
+  function body
+   >
+
+   Here, each of the PARMS in itself is a parameter declaration similar to a
+   variable declaration, TYPE is the type of the variable that this
+   function returns and FUNCTION BODY is the series of statements that define
+   the beahavior of the function.
+
+   Following are some of the examples for which the syntax of the function
+   declarations are described.
+
+   1. C-like function as
+  void foo (int first, float second)
+{
+  first = second;
+}
+
+   The corresponding gimple syntax is:
+ FUNCTION_DECL >,
+PARM_DECL>>
+ <
+   GIMPLE_ASSIGN 
+ >
+
+   2. C-like function as
+  int foo (int first, float second)
+   {
+  int local_first;
+  float local_second;
+   
+  local_first = first;
+  local_second = second;
+
+  return local_first;
+   }
+
+   The corresponding gimple syntax is:
+ FUNCTION_DECL ,
+PARM_DECL>,
+PARM_DECL>>
+ <
+VAR_DECL >
+   VAR_DECL >
+   VAR_DECL >
+
+   gimple_assign 
+   gimple_assign 
+   gimple_assign 
+   gimple_return 
+ >
+
+   Note: 1) The syntax closely mimics the -fdump-tree-gimple-raw option.
+2) The function declaration tuple needs to be checked against the
+   call of the function for order and the number of arguments.
+3) A symbol table entry for the function should be made. The
+   variables defined within the function should be made to have
+   function scope.  */
+
+/* Recognizer function for function declarations. The declaration tuple is read
+   from gimple_parser PARSER.  */
+
+static void
+gp_parse_function_decl (gimple_parser *parser)
+{
+  const gimple_token *next_token;
+
+  gl_consume_expected_token (parser->lexer, CPP_LESS);
+  gl_consume_expected_token (parser->lexer, CPP_NAME);
+  gl_consume_expected_token (parser->lexer, CPP_COMMA);
+  gp_parse_return_type (parser);
+
+  next_token = gl_consume_token (parser->lexer);
+  while (!gl_at_eof(parser->lexer))
+{
+  if (next_token->type == CPP_COMMA)
+   {
+ next_token = gl_consume_token (parser->lexer);
+ if (gl_tree_code_for_token (next_token) == PARM_DECL)
+gp_parse_parm_decl (parser);
+ else
+   error_at (next_token->location,
+ "Unsupported declaration of parameters");
+  else if (next_token->type == CPP_GREATER)
+   break;
+}
+
+  

Re: [PATCH 0/2] Convert s390 to atomic optabs, v2

2012-07-31 Thread Ulrich Weigand
Richard Henderson wrote:

> I've had a go at generating better code in the HQImode CAS
> loop for aligned memory, but I don't know that I'd call it
> the most efficient thing ever.

Thanks for having a look at this!

>   (3) Support for IC, and ICM via the insv pattern is lacking.
>   I've added a tiny bit of support here, in the form of using
>   the existing strict_low_part patterns, but most definitely we
>   could do better.

This doesn't look correct:
+  /* Emit a strict_low_part pattern if possible.  */
+  if (bitpos == 0 && GET_MODE_BITSIZE (smode) == bitsize)

With bitpos == 0 we need to insert into the *high* part, not
the low part on a big-endian platform.  This probably causes
this incorrect code below:
 icm %r5,3,0(%r12)
We'd need icm mask 12, not 3, to load into the two upper bytes.

[ This is also probably causing the testing failures I'm seeing
with the patch as-is.  I haven't looked into them in detail yet.  ]

>   (4) The *sethighpartsi and *sethighpartdi_64 patterns ought to be
>   more different.  As is, we can't insert into bits 48-56 of a
>   DImode quantity, because we don't generate ICM for DImode,
>   only ICMH.
> 
>   (5) Missing support for RISBGZ in the form of an extv/z expander.
>   The existing *extv/z splitters probably ought to be conditionalized
>   on !Z10.
> 
>   (6) The strict_low_part patterns should allow registers for at
>   least Z10.  The SImode strict_low_part can use LR everywhere.
> 
>   (7) RISBGZ could be used for a 3-address constant lshrsi3 before
>   srlk is available.

Good points, agreed with all of that.  None of that ought to be
a prerequisite for the atomic patch, of course ...

>* Given that we're having to zap the mask in %r1 for the second
>  compare anyway, I wonder if RISBG is really beneficial over OR.
>  Is RISBG (or ICM for that matter) any faster (or even smaller)?

Just a plain OR is preferable to a RISBG.  I guess the point of the
RISBG is that you can avoid the extra shift ...  Now, if that shift
can be moved ahead of the loop, that may not be all that big of a
win.  On the other hand, these loops hopefully don't loop very often
if we don't have a lot of contention ...

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU Toolchain for Linux on System z and Cell BE
  ulrich.weig...@de.ibm.com



Re: [Patch, Fortran] PR 42418: PROCEDURE: Rejects interface which is both specific and generic procedure

2012-07-31 Thread Janus Weil
>>> The patch was regtested successfully on x86_64-unknown-linux-gnu. Ok for
>>> trunk?
>>
>>
>> The patch looks  okay.
>
> Thanks!

Committed as r190017.

Cheers,
Janus



>>> 2012-07-31  Janus Weil  
>>>
>>> PR fortran/42418
>>> * decl.c (match_procedure_interface): Move some checks to
>>> 'resolve_procedure_interface'. Set flavor if appropriate.
>>> * expr.c (gfc_check_pointer_assign): Cleanup of
>>> 'gfc_is_intrinsic'.
>>> * intrinsic.c (gfc_is_intrinsic): Additional checks for attributes
>>> which
>>> identify a procedure as being non-intrinsic.
>>> * resolve.c (resolve_procedure_interface): Checks moved here from
>>> 'match_procedure_interface'. Minor cleanup.
>>> (resolve_formal_arglist,resolve_symbol): Cleanup of
>>> 'resolve_procedure_interface'
>>> (resolve_actual_arglist,is_external_proc): Cleanup of
>>> 'gfc_is_intrinsic'.
>>>
>>> 2012-07-31  Janus Weil  
>>>
>>> PR fortran/42418
>>> * gfortran.dg/proc_decl_29.f90: New.
>>
>>


Re: RFA: M32C: Add __clrsbhi2 to libgcc

2012-07-31 Thread DJ Delorie

Ok.


Re: [PATCH 0/2] Convert s390 to atomic optabs, v2

2012-07-31 Thread Richard Henderson
On 2012-07-31 11:17, Ulrich Weigand wrote:
> This doesn't look correct:
> +  /* Emit a strict_low_part pattern if possible.  */
> +  if (bitpos == 0 && GET_MODE_BITSIZE (smode) == bitsize)
> 
> With bitpos == 0 we need to insert into the *high* part, not
> the low part on a big-endian platform.  This probably causes
> this incorrect code below:
>  icm %r5,3,0(%r12)

Oops, yes.  I forget about bits-big-endian.

> Just a plain OR is preferable to a RISBG.  I guess the point of the
> RISBG is that you can avoid the extra shift ...  Now, if that shift
> can be moved ahead of the loop, that may not be all that big of a
> win.  On the other hand, these loops hopefully don't loop very often
> if we don't have a lot of contention ...

Indeed.  So it's mostly about minimizing size.

I guess with RISBG available we can always implement with an input in memory 
with two insns -- full addressing mode on a load plus the risbg to shift and 
insert.

If we use ICM, we might get away with 1 insn, but might need a second to reload 
the address into an s_operand.


r~


[PATCH, rs6000] Vectorizer heuristic

2012-07-31 Thread William J. Schmidt
Now that the vectorizer cost model is set up to facilitate per-target
heuristics, I'm revisiting the "density" heuristic I submitted
previously.  This allows the vec_permute and vec_promote_demote costs to
be set to their natural values, but inhibits vectorization in cases like
sphinx3 where vectorizing a loop leads to issue stalls from
overcommitted resources.

Bootstrapped on powerpc64-unknown-linux-gnu with no new regressions.
Measured performance on cpu2000 and cpu2006 with no significant changes
in performance.  Ok for trunk?

Thanks,
Bill


2012-07-31  Bill Schmidt  

* config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Revise
costs for vec_perm and vec_promote_demote down to more natural values.
(struct _rs6000_cost_data): New data structure.
(rs6000_density_test): New function.
(rs6000_init_cost): Change to use rs6000_cost_data.
(rs6000_add_stmt_cost): Likewise.
(rs6000_finish_cost): Perform density test when vectorizing a loop.


Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 189845)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -60,6 +60,7 @@
 #include "params.h"
 #include "tm-constrs.h"
 #include "opts.h"
+#include "tree-vectorizer.h"
 #if TARGET_XCOFF
 #include "xcoffout.h"  /* get declarations of xcoff_*_section_name */
 #endif
@@ -3378,13 +3379,13 @@ rs6000_builtin_vectorization_cost (enum vect_cost_
 
   case vec_perm:
if (TARGET_VSX)
- return 4;
+ return 3;
else
  return 1;
 
   case vec_promote_demote:
 if (TARGET_VSX)
-  return 5;
+  return 4;
 else
   return 1;
 
@@ -3520,14 +3521,71 @@ rs6000_preferred_simd_mode (enum machine_mode mode
   return word_mode;
 }
 
+typedef struct _rs6000_cost_data
+{
+  struct loop *loop_info;
+  unsigned cost[3];
+} rs6000_cost_data;
+
+/* Test for likely overcommitment of vector hardware resources.  If a
+   loop iteration is relatively large, and too large a percentage of
+   instructions in the loop are vectorized, the cost model may not
+   adequately reflect delays from unavailable vector resources.
+   Penalize the loop body cost for this case.  */
+
+static void
+rs6000_density_test (rs6000_cost_data *data)
+{
+  const int DENSITY_PCT_THRESHOLD = 85;
+  const int DENSITY_SIZE_THRESHOLD = 70;
+  const int DENSITY_PENALTY = 10;
+  struct loop *loop = data->loop_info;
+  basic_block *bbs = get_loop_body (loop);
+  int nbbs = loop->num_nodes;
+  int vec_cost = data->cost[vect_body], not_vec_cost = 0;
+  int i, density_pct;
+
+  for (i = 0; i < nbbs; i++)
+{
+  basic_block bb = bbs[i];
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+   {
+ gimple stmt = gsi_stmt (gsi);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ && !STMT_VINFO_IN_PATTERN_P (stmt_info))
+   not_vec_cost++;
+   }
+}
+
+  density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
+
+  if (density_pct > DENSITY_PCT_THRESHOLD
+  && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
+{
+  data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
+  if (vect_print_dump_info (REPORT_DETAILS))
+   fprintf (vect_dump,
+"density %d%%, cost %d exceeds threshold, penalizing "
+"loop body cost by %d%%", density_pct, 
+vec_cost + not_vec_cost, DENSITY_PENALTY);
+}
+}
+
 /* Implement targetm.vectorize.init_cost.  */
 
 static void *
-rs6000_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
+rs6000_init_cost (struct loop *loop_info)
 {
-  unsigned *cost = XNEWVEC (unsigned, 3);
-  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
-  return cost;
+  rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
+  data->loop_info = loop_info;
+  data->cost[vect_prologue] = 0;
+  data->cost[vect_body] = 0;
+  data->cost[vect_epilogue] = 0;
+  return data;
 }
 
 /* Implement targetm.vectorize.add_stmt_cost.  */
@@ -3537,7 +3595,7 @@ rs6000_add_stmt_cost (void *data, int count, enum
  struct _stmt_vec_info *stmt_info, int misalign,
  enum vect_cost_model_location where)
 {
-  unsigned *cost = (unsigned *) data;
+  rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
   unsigned retval = 0;
 
   if (flag_vect_cost_model)
@@ -3552,7 +3610,7 @@ rs6000_add_stmt_cost (void *data, int count, enum
count *= 50;  /* FIXME.  */
 
   retval = (unsigned) (count * stmt_cost);
-  cost[where] += retval;
+  cost_data->cost[where] += retval;
 }
 
   return retval;
@@ -3564,10 +3622,14 @@ static void
 rs6000_finish_cost (void *data, unsigned *prologue_cost,
unsigned *body_cost, unsigned *epilogue_cost)
 {
-  unsigned *cost 

Re: [PATCH, rs6000] Vectorizer heuristic

2012-07-31 Thread David Edelsohn
On Tue, Jul 31, 2012 at 3:54 PM, William J. Schmidt
 wrote:
> Now that the vectorizer cost model is set up to facilitate per-target
> heuristics, I'm revisiting the "density" heuristic I submitted
> previously.  This allows the vec_permute and vec_promote_demote costs to
> be set to their natural values, but inhibits vectorization in cases like
> sphinx3 where vectorizing a loop leads to issue stalls from
> overcommitted resources.
>
> Bootstrapped on powerpc64-unknown-linux-gnu with no new regressions.
> Measured performance on cpu2000 and cpu2006 with no significant changes
> in performance.  Ok for trunk?
>
> Thanks,
> Bill
>
>
> 2012-07-31  Bill Schmidt  
>
> * config/rs6000/rs6000.c (rs6000_builtin_vectorization_cost): Revise
> costs for vec_perm and vec_promote_demote down to more natural values.
> (struct _rs6000_cost_data): New data structure.
> (rs6000_density_test): New function.
> (rs6000_init_cost): Change to use rs6000_cost_data.
> (rs6000_add_stmt_cost): Likewise.
> (rs6000_finish_cost): Perform density test when vectorizing a loop.

Okay.

Thanks, David


Re: TPF: disable discriminators

2012-07-31 Thread DJ Delorie

Ah, the original complaint was for a gcc branch which doesn't have
your strict-dwarf/discriminator patch.

How's this?

Index: gcc/config/s390/s390.c
===
--- gcc/config/s390/s390.c  (revision 190017)
+++ gcc/config/s390/s390.c  (working copy)
@@ -1651,12 +1651,21 @@ s390_option_override (void)
 flag_prefetch_loop_arrays = 1;
 
   /* Use the alternative scheduling-pressure algorithm by default.  */
   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
  global_options.x_param_values,
  global_options_set.x_param_values);
+
+#ifdef TARGET_TPF
+  /* Don't emit DWARF3/4 unless specifically selected.  The TPF
+ debuggers do not yet support DWARF 3/4.  */
+  if (!global_options_set.x_dwarf_strict) 
+dwarf_strict = 1;
+  if (!global_options_set.x_dwarf_version)
+dwarf_version = 2;
+#endif
 }
 
 /* Map for smallest class containing reg regno.  */
 
 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,



Re: Commit: XStormy16: Add __clrsbhi2() to libgcc

2012-07-31 Thread Georg-Johann Lay

Nick Clifton schrieb:

Hi Guys,

  I am checking in the patch below to add a __clrsbhi2 function to
  libgcc for the XStormy16 port.  This fixes several gcc testsuite
  failures that need this particular function.

Cheers
  Nick


Hi, just out of curiosity: Is there a special reason to add a
stormy16-specific implementation instead of using the code in
libgcc2.c? t-avr for example uses the C implementation in libgcc2.c
for HI. Are there disadvantages?

Johann


libgcc/ChangeLog
2012-07-31  Nick Clifton  

* config/stormy16/lib2funcs.c (__clrsbhi2): New function.
Implements __clrsb for an HImode argument.
* config/stormy16/clrsbhi2.c: New file:
* config/stormy16/t-stormy16 (LIB2ADD): Add clrsbhi2.c.

Index: libgcc/config/stormy16/clrsbhi2.c
===
--- libgcc/config/stormy16/clrsbhi2.c   (revision 0)
+++ libgcc/config/stormy16/clrsbhi2.c   (working copy)
@@ -0,0 +1,2 @@
+#define XSTORMY16_CLRSBHI2
+#include "lib2funcs.c"
Index: libgcc/config/stormy16/t-stormy16
===
--- libgcc/config/stormy16/t-stormy16   (revision 189996)
+++ libgcc/config/stormy16/t-stormy16   (working copy)
@@ -33,6 +33,7 @@
$(srcdir)/config/stormy16/clzhi2.c \
$(srcdir)/config/stormy16/ctzhi2.c \
$(srcdir)/config/stormy16/ffshi2.c \
+   $(srcdir)/config/stormy16/clrsbhi2.c \
$(srcdir)/config/stormy16/cmpsi2.c \
$(srcdir)/config/stormy16/ucmpsi2.c
 
Index: libgcc/config/stormy16/lib2funcs.c

===
--- libgcc/config/stormy16/lib2funcs.c  (revision 189996)
+++ libgcc/config/stormy16/lib2funcs.c  (working copy)
@@ -311,6 +311,22 @@
 }
 #endif
 
+#ifdef XSTORMY16_CLRSBHI2

+/* Returns the number of leading redundant sign bits in X.
+   I.e. the number of bits following the most significant bit which are
+   identical to it.  There are no special cases for 0 or other values.  */
+
+int
+__clrsbhi2 (HWtype x)
+{
+  if (x < 0)
+x = ~x;
+  if (x == 0)
+return 15;
+  return __builtin_clz (x) - 1;
+}
+#endif
+
 #ifdef XSTORMY16_UCMPSI2
 /* Performs an unsigned comparison of two 32-bit values: A and B.
If A is less than B, then 0 is returned.  If A is greater than B,





Re: [PATCH, 4.7] Enable the libstdc++ prettyprinters test suite

2012-07-31 Thread Michael Hope

cc'ed the libstdc++ list.  Ping?

-- Michael

On 25/07/12 12:12, Michael Hope wrote:

The libstdc++ prettyprinters test suite isn't called out in Makefile.am.  
Backport Andreas's
patch from mainline so that a sequential test run gives the same results as a 
parallel test.

Tested with no regressions on x86_64-linux-gnu and arm-linux-gnueabi.  The 
prettyprinter
tests are enabled as expected.

OK for 4.7?

-- Michael

2012-07-25  Michael Hope  

 Backport from mainline r186389:
 2012-04-12  Andreas Schwab  

 * testsuite/Makefile.am (check_DEJAGNUnormal0): Run
 prettyprinters.exp.
 * testsuite/Makefile.in: Regenerated.

diff --git a/libstdc++-v3/testsuite/Makefile.am 
b/libstdc++-v3/testsuite/Makefile.am
index 7094ad5..0cf8de5 100644
--- a/libstdc++-v3/testsuite/Makefile.am
+++ b/libstdc++-v3/testsuite/Makefile.am
@@ -134,7 +134,7 @@ check-DEJAGNU $(check_DEJAGNU_normal_targets): 
check-DEJAGNU%: site.exp
normal0) \
  if $(SHELL) -c "$$runtest --version" > /dev/null 2>&1; then \
$$runtest $(AM_RUNTESTFLAGS) $(RUNTESTDEFAULTFLAGS) \
-$(RUNTESTFLAGS) abi.exp; \
+$(RUNTESTFLAGS) abi.exp prettyprinters.exp; \
  else echo "WARNING: could not find \`runtest'" 1>&2; :;\
  fi; \
  dirs="`cd $$srcdir; echo [013-9][0-9]_*/*`";; \
diff --git a/libstdc++-v3/testsuite/Makefile.in 
b/libstdc++-v3/testsuite/Makefile.in
index e433bb9..bb077d1 100644
--- a/libstdc++-v3/testsuite/Makefile.in
+++ b/libstdc++-v3/testsuite/Makefile.in
@@ -572,7 +572,7 @@ check-DEJAGNU $(check_DEJAGNU_normal_targets): 
check-DEJAGNU%: site.exp
normal0) \
  if $(SHELL) -c "$$runtest --version" > /dev/null 2>&1; then \
$$runtest $(AM_RUNTESTFLAGS) $(RUNTESTDEFAULTFLAGS) \
-$(RUNTESTFLAGS) abi.exp; \
+$(RUNTESTFLAGS) abi.exp prettyprinters.exp; \
  else echo "WARNING: could not find \`runtest'" 1>&2; :;\
  fi; \
  dirs="`cd $$srcdir; echo [013-9][0-9]_*/*`";; \




Re: TPF: disable discriminators

2012-07-31 Thread Richard Henderson
On 2012-07-31 14:09, DJ Delorie wrote:
> +#ifdef TARGET_TPF
> +  /* Don't emit DWARF3/4 unless specifically selected.  The TPF
> + debuggers do not yet support DWARF 3/4.  */
> +  if (!global_options_set.x_dwarf_strict) 
> +dwarf_strict = 1;
> +  if (!global_options_set.x_dwarf_version)
> +dwarf_version = 2;
> +#endif

TARGET_TPF is always defined.  Just use a C if.
Otherwise ok.


r~


Re: TPF: disable discriminators

2012-07-31 Thread DJ Delorie

> TARGET_TPF is always defined.  Just use a C if.
> Otherwise ok.

Thanks, checked in as attached.

What about older branches?  4.7 needs this patch, 4.6 needs my
original patch.

2012-07-31  DJ Delorie  

* config/s390/s390.c (s390_option_override): Disable DWARF 3/4
extensions for TPF, unless specifically selected.

Index: gcc/config/s390/s390.c
===
--- gcc/config/s390/s390.c  (revision 190022)
+++ gcc/config/s390/s390.c  (working copy)
@@ -1651,12 +1651,22 @@ s390_option_override (void)
 flag_prefetch_loop_arrays = 1;
 
   /* Use the alternative scheduling-pressure algorithm by default.  */
   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
  global_options.x_param_values,
  global_options_set.x_param_values);
+
+  if (TARGET_TPF)
+{
+  /* Don't emit DWARF3/4 unless specifically selected.  The TPF
+debuggers do not yet support DWARF 3/4.  */
+  if (!global_options_set.x_dwarf_strict) 
+   dwarf_strict = 1;
+  if (!global_options_set.x_dwarf_version)
+   dwarf_version = 2;
+}
 }
 
 /* Map for smallest class containing reg regno.  */
 
 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS,


Re: TPF: disable discriminators

2012-07-31 Thread Richard Henderson
On 2012-07-31 15:21, DJ Delorie wrote:
> What about older branches?  4.7 needs this patch, 4.6 needs my
> original patch.

I don't see that 4.6 requires a different patch.
Otherwise ok to backport.


r~


PING [PATCH] Fix PR libstdc++/54036, problem negating DFP NaNs

2012-07-31 Thread Peter Bergner
I'd like to ping the following libstdc++ DFP patch that fixes PR54036:

  http://gcc.gnu.org/ml/gcc-patches/2012-07/msg00959.html

Peter



Re: TPF: disable discriminators

2012-07-31 Thread Richard Henderson
On 2012-07-31 15:51, DJ Delorie wrote:
>> I don't see that 4.6 requires a different patch.
> 
> 4.6 is missing this:
> 
> 2011-04-01  Richard Henderson  
> 
>   PR 48400
>   * dwarf2out.c (dwarf2out_source_line): Disable discriminators
>   in strict mode before dwarf4.  Re-order tests to early out
>   before switching sections.
> 
> So either *that* patch needs to be back-ported, or the tpf-specific
> workaround does.
> 

In 4.6 we do not emit discriminators except with DWARF2_ASM_LINE_DEBUG_INFO,
and in that section we have

  if (SUPPORTS_DISCRIMINATOR && discriminator != 0
  && (dwarf_version >= 4 || !dwarf_strict))
fprintf (asm_out_file, " discriminator %d", discriminator);

and sure enough dwarf_strict is honored.

That patch is only required if you have the other patches that introduce
discriminators to the non-dwarf2_asm_line path.


r~


Re: TPF: disable discriminators

2012-07-31 Thread DJ Delorie

> I don't see that 4.6 requires a different patch.

4.6 is missing this:

2011-04-01  Richard Henderson  

PR 48400
* dwarf2out.c (dwarf2out_source_line): Disable discriminators
in strict mode before dwarf4.  Re-order tests to early out
before switching sections.

So either *that* patch needs to be back-ported, or the tpf-specific
workaround does.


Re: PR middle-end/53321: [4.8 Regression] LTO bootstrap failed with bootstrap-profiled

2012-07-31 Thread H.J. Lu
On Tue, Jul 31, 2012 at 5:39 AM, Jan Hubicka  wrote:
>> Hi,
>>
>> 2012-07-06  H.J. Lu  
>>
>>   PR middle-end/53321
>>   PR middle-end/53865
>>   * Makefile.in (tree-profile.o): Depend on ipa-inline.h.
>>
>>   * ipa.c (symtab_remove_unreachable_nodes): Restore
>>   cgraph_propagate_frequency call when something was changed.
>>
>>   * tree-profile.c: Include "ipa-inline.h".
>>   (gimple_gen_ic_func_profiler): Return bool.
>>   (tree_profiling): Call inline_free_summary to clear stale inline
>>   summary if gimple_gen_ic_func_profiler returns true.
>>
>>   * value-prof.h (gimple_gen_ic_func_profiler): Change return
>>   type to bool.
>
> Hi,
> My local tree is profedbootstrapping since I had local patch to
> prevent recomputation it before ipa_profile pass, but I agree it is better
> to not have stale inline summaries around.
>
>> +  /* Clear stale inline summary.  */
>> +  if (gen_ic_func_profiler)
>> +inline_free_summary ();
>
> The inline summary is completely dead here. It is computed for purposes of
> early inlining and the real inliner will trash it and recompute soon.
> (because the one computer by real inliner is more detailed).
>
> matrix_reorg, emutls and tm passes will also suffer from the problem of
> invalidating the inling summary. It is better to free it early this.
>
> Can you simply add a micro pass just after pass_early_local_passes calliing
> inline_free_summary unconditionally?
>
> Path is preapproved (or I will make it tomorrow unless you beat me).
>

This patch works passed profiledbootstrap with LTO as well as LTO -O3
on 176.gcc in SPEC CPU 2000.  I have to add 2 inline_edge_summary_vec
checks to avoid ICE.  OK to install?

Thanks.

-- 
H.J.
---
2012-07-06  H.J. Lu  

PR middle-end/53321
PR middle-end/53865
* ipa-inline-analysis.c (inline_free_summary): Return if
inline_edge_summary_vec is NULL.

* ipa-split.c (execute_split_functions): Check if a function
is inlinable only if inline_edge_summary_vec != NULL.

* ipa.c (symtab_remove_unreachable_nodes): Restore
cgraph_propagate_frequency call when something was changed.
(free_inline_summary): New function.
(pass_ipa_free_inline_summary): New pass.

* passes.c (init_optimization_passes): Add
pass_ipa_free_inline_summary before pass_ipa_tree_profile.

* timevar.def (TV_IPA_FREE_INLINE_SUMMARY): New.

* tree-pass.h (pass_ipa_free_inline_summary): New.

diff --git a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c
index ae32131..970be1e 100644
--- a/gcc/ipa-inline-analysis.c
+++ b/gcc/ipa-inline-analysis.c
@@ -3242,6 +3242,8 @@ void
 inline_free_summary (void)
 {
   struct cgraph_node *node;
+  if (inline_edge_summary_vec == NULL)
+return;
   FOR_EACH_DEFINED_FUNCTION (node)
 reset_inline_summary (node);
   if (function_insertion_hook_holder)
diff --git a/gcc/ipa-split.c b/gcc/ipa-split.c
index 33cf7d2..7a8844f 100644
--- a/gcc/ipa-split.c
+++ b/gcc/ipa-split.c
@@ -1415,7 +1415,7 @@ execute_split_functions (void)
 }
   /* This can be relaxed; function might become inlinable after splitting
  away the uninlinable part.  */
-  if (!inline_summary (node)->inlinable)
+  if (inline_edge_summary_vec && !inline_summary (node)->inlinable)
 {
   if (dump_file)
fprintf (dump_file, "Not splitting: not inlinable.\n");
diff --git a/gcc/ipa.c b/gcc/ipa.c
index 9329d9b..e270591 100644
--- a/gcc/ipa.c
+++ b/gcc/ipa.c
@@ -448,6 +448,11 @@ symtab_remove_unreachable_nodes (bool
before_inlining_p, FILE *file)
   verify_symtab ();
 #endif

+  /* If we removed something, perhaps profile could be improved.  */
+  if (changed && optimize && inline_edge_summary_vec)
+FOR_EACH_DEFINED_FUNCTION (node)
+  cgraph_propagate_frequency (node);
+
   return changed;
 }

@@ -960,6 +965,34 @@ struct simple_ipa_opt_pass
pass_ipa_function_and_variable_visibility =
  }
 };

+/* Free inline summary.  */
+
+static unsigned
+free_inline_summary (void)
+{
+  inline_free_summary ();
+  return 0;
+}
+
+struct simple_ipa_opt_pass pass_ipa_free_inline_summary =
+{
+ {
+  SIMPLE_IPA_PASS,
+  "*free_inline_summary",  /* name */
+  NULL,/* gate */
+  free_inline_summary, /* execute */
+  NULL,/* sub */
+  NULL,/* next */
+  0,   /* static_pass_number */
+  TV_IPA_FREE_INLINE_SUMMARY,  /* tv_id */
+  0,   /* properties_required */
+  0,   /* properties_provided */
+  0,   /* properties_destroyed */
+  0,   /* todo_flags_start */
+  TODO_ggc_collect /* todo_flags_finish */
+ }
+};
+
 /* Do not re-run on ltrans stage.  */

 static bool
diff --git a/gcc/passes.c 

Re: TPF: disable discriminators

2012-07-31 Thread DJ Delorie

Ah, ok.


[RFC] Fix pr34548 -- unnecessary alignment from alloca

2012-07-31 Thread Richard Henderson
I've bootstrapped this on both ppc64 and x86_64.  I'll leave some time
for comment, but I can't immediately see how this could go wrong anywhere.


r~

* function.h (struct rtl_data): Add max_dynamic_stack_alignment.
* cfgexpand.c (gimple_expand_cfg): Initialise it.
* explow.c (allocate_dynamic_stack_space): Set it.  Simplify
alignment requirements given the known alignment of dynamic_offset.
* function.c (instantiate_virtual_regs): Align dtnamic_offset.


diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 96c2e2e..1f16534 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -4363,6 +4363,7 @@ gimple_expand_cfg (void)
   crtl->max_used_stack_slot_alignment = STACK_BOUNDARY;
   crtl->stack_alignment_estimated = 0;
   crtl->preferred_stack_boundary = STACK_BOUNDARY;
+  crtl->max_dynamic_stack_alignment = 0;
   cfun->cfg->max_jumptable_ents = 0;
 
   /* Resovle the function section.  Some targets, like ARM EABI rely on 
knowledge
diff --git a/gcc/explow.c b/gcc/explow.c
index 1cfe93b..c7581b0 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1173,7 +1173,6 @@ allocate_dynamic_stack_space (rtx size, unsigned 
size_align,
 {
   HOST_WIDE_INT stack_usage_size = -1;
   rtx final_label, final_target, target;
-  unsigned extra_align = 0;
   bool must_align;
 
   /* If we're asking for zero bytes, it doesn't matter what we point
@@ -1237,58 +1236,40 @@ allocate_dynamic_stack_space (rtx size, unsigned 
size_align,
   else if (size_align < BITS_PER_UNIT)
 size_align = BITS_PER_UNIT;
 
-  /* We can't attempt to minimize alignment necessary, because we don't
- know the final value of preferred_stack_boundary yet while executing
- this code.  */
-  if (crtl->preferred_stack_boundary < PREFERRED_STACK_BOUNDARY)
-crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
-
   /* We will need to ensure that the address we return is aligned to
- REQUIRED_ALIGN.  If STACK_DYNAMIC_OFFSET is defined, we don't
- always know its final value at this point in the compilation (it
- might depend on the size of the outgoing parameter lists, for
- example), so we must align the value to be returned in that case.
- (Note that STACK_DYNAMIC_OFFSET will have a default nonzero value if
- STACK_POINTER_OFFSET or ACCUMULATE_OUTGOING_ARGS are defined).
- We must also do an alignment operation on the returned value if
- the stack pointer alignment is less strict than REQUIRED_ALIGN.
-
- If we have to align, we must leave space in SIZE for the hole
- that might result from the alignment operation.  */
-
-  must_align = (crtl->preferred_stack_boundary < required_align);
-  if (must_align)
+ REQUIRED_ALIGN.  If that alignment is no larger than
+ PREFERRED_STACK_BOUNDARY, we can handle everything without an
+ explicit alignment.  */
+  if (required_align <= PREFERRED_STACK_BOUNDARY)
 {
-  if (required_align > PREFERRED_STACK_BOUNDARY)
-   extra_align = PREFERRED_STACK_BOUNDARY;
-  else if (required_align > STACK_BOUNDARY)
-   extra_align = STACK_BOUNDARY;
-  else
-   extra_align = BITS_PER_UNIT;
+  if (crtl->preferred_stack_boundary < required_align)
+   crtl->preferred_stack_boundary = required_align;
+  if (crtl->max_dynamic_stack_alignment < required_align)
+   crtl->max_dynamic_stack_alignment = required_align;
+  must_align = false;
 }
+  else
+{
+  unsigned extra, extra_align;
 
-  /* ??? STACK_POINTER_OFFSET is always defined now.  */
-#if defined (STACK_DYNAMIC_OFFSET) || defined (STACK_POINTER_OFFSET)
-  must_align = true;
-  extra_align = BITS_PER_UNIT;
-#endif
+  crtl->preferred_stack_boundary = PREFERRED_STACK_BOUNDARY;
+  crtl->max_dynamic_stack_alignment = PREFERRED_STACK_BOUNDARY;
 
-  if (must_align)
-{
-  unsigned extra = (required_align - extra_align) / BITS_PER_UNIT;
+  extra_align = PREFERRED_STACK_BOUNDARY;
+  extra = (required_align - extra_align) / BITS_PER_UNIT;
 
   size = plus_constant (Pmode, size, extra);
   size = force_operand (size, NULL_RTX);
 
   if (flag_stack_usage_info)
stack_usage_size += extra;
-
   if (extra && size_align > extra_align)
size_align = extra_align;
+  must_align = true;
 }
 
   /* Round the size to a multiple of the required stack alignment.
- Since the stack if presumed to be rounded before this allocation,
+ Since the stack is presumed to be rounded before this allocation,
  this will maintain the required alignment.
 
  If the stack grows downward, we could save an insn by subtracting
diff --git a/gcc/function.c b/gcc/function.c
index f1e0b2d..827f687 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -1892,7 +1892,14 @@ instantiate_virtual_regs (void)
   /* Compute the offsets to use for this function.  */
   in_arg_offset = FIRST_PARM_OFFSET (current_function_decl);
   var_offset = STARTING_FRAME_OFFSET;
+
   dynamic_offset = STACK_DYNAMIC_OFFS

[PATCH, MIPS] Add 34Kn cpu

2012-07-31 Thread Sandra Loosemore
This patch makes GCC know about -march=34kn.  MIPS asked us to add 
support for this processor to our local source base a couple of years 
ago; it's basically a 34Kc without the DSP ASE.  OK for mainline?


I have posted the corresponding assembler patch here:
http://sourceware.org/ml/binutils/2012-08/msg8.html

-Sandra


2012-08-01  Catherine Moore  
Sandra Loosemore  

gcc/
* config/mips/mips-cpus.def (34kn): New.
* config/mips/mips.h (MIPS_ARCH_FLOAT_SPEC): Add 34kn.
(BASE_DRIVER_SELF_SPECS): Do not imply -mdsp for the 34kn.
Index: config/mips/mips-cpus.def
===
--- config/mips/mips-cpus.def	(revision 189988)
+++ config/mips/mips-cpus.def	(working copy)
@@ -120,6 +120,7 @@ MIPS_CPU ("34kf", PROCESSOR_24KF2_1, 33,
 MIPS_CPU ("34kf1_1", PROCESSOR_24KF1_1, 33, 0)
 MIPS_CPU ("34kfx", PROCESSOR_24KF1_1, 33, 0)
 MIPS_CPU ("34kx", PROCESSOR_24KF1_1, 33, 0)
+MIPS_CPU ("34kn", PROCESSOR_24KC, 33, 0)  /* 34K with MT but no DSP.  */
 
 MIPS_CPU ("74kc", PROCESSOR_74KC, 33, 0) /* 74K with DSPr2.  */
 MIPS_CPU ("74kf2_1", PROCESSOR_74KF2_1, 33, 0)
Index: config/mips/mips.h
===
--- config/mips/mips.h	(revision 189989)
+++ config/mips/mips.h	(working copy)
@@ -715,7 +715,7 @@ struct mips_cpu_info {
 #define MIPS_ARCH_FLOAT_SPEC \
   "%{mhard-float|msoft-float|mno-float|march=mips*:; \
  march=vr41*|march=m4k|march=4k*|march=24kc|march=24kec \
- |march=34kc|march=74kc|march=1004kc|march=5kc \
+ |march=34kc|march=34kn|march=74kc|march=1004kc|march=5kc \
  |march=octeon|march=xlr: -msoft-float;		  \
  march=*: -mhard-float}"
 
@@ -763,7 +763,7 @@ struct mips_cpu_info {
 /* A spec that infers the -mdsp setting from an -march argument.  */
 #define BASE_DRIVER_SELF_SPECS \
   "%{!mno-dsp: \
- %{march=24ke*|march=34k*|march=1004k*: -mdsp} \
+ %{march=24ke*|march=34kc*|march=34kf*|march=34kx*|march=1004k*: -mdsp} \
  %{march=74k*:%{!mno-dspr2: -mdspr2 -mdsp}}}"
 
 #define DRIVER_SELF_SPECS BASE_DRIVER_SELF_SPECS