Re: [df-scan.c] Optimise DF_REFs ordering in collection_rec, use HARD_REG_SETs instead of bitmaps

2011-07-08 Thread Dimitrios Apostolou

On Fri, 8 Jul 2011, Steven Bosscher wrote:

On Fri, Jul 8, 2011 at 5:20 AM, Dimitrios Apostolou  wrote:

The attached patch does two things for df_get_call_refs():


How did you test this patch?

Normally, a patch submission comes with text like, "Bootstrapped &
tested on ..., no regressions.". Also, you chould write a ChangeLog
entry, best included in your mail somewhere at the end ;-)


Hi Steven, thanks for the instructions. I've not run the mandatory tests 
you have told me about, only done some minor testing due to lack of time. 
I'm not yet posting patches for inclusion, but more as an RFC. Should such 
patches be sent to gcc instead of gcc-patches?



Thanks,
Dimitris



Re: [df-scan.c] Optimise DF_REFs ordering in collection_rec, use HARD_REG_SETs instead of bitmaps

2011-07-08 Thread Paolo Bonzini

On 07/08/2011 05:51 AM, Dimitrios Apostolou wrote:

+ /* first write DF_REF_BASE */


This is not necessary.  These uses are written to use_vec, while the 
uses from REG_EQUIV and REG_EQUAL are written to eq_use_vec (see 
df_ref_create_structure).


Also, anyway this wouldn't work because you would have to split the loop 
in two.  I'll attribute that to the time of day when you were writing 
the message. :)



+case REG_NON_LOCAL_GOTO:
+  /* The frame ptr is used by a non-local goto.  */
+  df_ref_record (DF_REF_BASE, collection_rec,
+ regno_reg_rtx[FRAME_POINTER_REGNUM],
+ NULL, bb, insn_info,
+ DF_REF_REG_USE, 0);
+#if !HARD_FRAME_POINTER_IS_FRAME_POINTER
+  df_ref_record (DF_REF_BASE, collection_rec,
+ regno_reg_rtx[HARD_FRAME_POINTER_REGNUM],
+ NULL, bb, insn_info,
+ DF_REF_REG_USE, 0);
+#endif
+  break;


Also note that you have to check which of FRAME_POINTER_REGNUM and 
HARD_FRAME_POINTER_REGNUM comes first here, if you want to ensure the 
DF_REF_BASE refs are created sorted.  But it's likely better to _not_ 
create them sorted and just replace qsort with an insertion sort, as 
discussed offlist.  It will cost a single swap in a pretty rare case.


Paolo


what can be in a group set?

2011-07-08 Thread Paolo Bonzini

df-scan.c has this code to deal with group sets:

  /* It is legal to have a set destination be a parallel. */
  if (GET_CODE (dst) == PARALLEL)
{
  int i;

  for (i = XVECLEN (dst, 0) - 1; i >= 0; i--)
{
  rtx temp = XVECEXP (dst, 0, i);
  if (GET_CODE (temp) == EXPR_LIST || GET_CODE (temp) == CLOBBER
  || GET_CODE (temp) == SET)
df_def_record_1 (collection_rec,
 temp, bb, insn_info,
 GET_CODE (temp) == CLOBBER
 ? flags | DF_REF_MUST_CLOBBER : flags);
}
  return;
}

It seems to me that the case of (set (parallel [(set ...)])) and (set 
(parallel [(clobber ...)])) is bogus.  I would like to simplify it to 
the following:


  /* It is legal to have a set destination be a parallel. */
  if (GET_CODE (dst) == PARALLEL)
{
  int i;

  for (i = XVECLEN (dst, 0) - 1; i >= 0; i--)
{
  rtx temp = XVECEXP (dst, 0, i);
  assert (GET_CODE (temp) == EXPR_LIST);
  df_def_record_1 (collection_rec, temp, bb, insn_info, flags);
}
  return;
}

Does this make sense?  See the attached patch for the overall thing I 
was thinking of.


Paolo
* df-scan.c (df_def_record_1): Assert a parallel must contain
an EXPR_LIST at this point.  Receive the LOC and move its
extraction...
(df_defs_record): ... here.  Remove superfluous braces.

Index: df-scan.c
===
--- df-scan.c	(revision 169877)
+++ df-scan.c	(working copy)
@@ -111,7 +111,7 @@ static void df_ref_record (enum df_ref_c
 			   rtx, rtx *,
 			   basic_block, struct df_insn_info *,
 			   enum df_ref_type, int ref_flags);
-static void df_def_record_1 (struct df_collection_rec *, rtx,
+static void df_def_record_1 (struct df_collection_rec *, rtx *,
 			 basic_block, struct df_insn_info *,
 			 int ref_flags);
 static void df_defs_record (struct df_collection_rec *, rtx,
@@ -2922,19 +2922,10 @@ df_read_modify_subreg_p (rtx x)
 
 static void
 df_def_record_1 (struct df_collection_rec *collection_rec,
- rtx x, basic_block bb, struct df_insn_info *insn_info,
+ rtx *loc, basic_block bb, struct df_insn_info *insn_info,
 		 int flags)
 {
-  rtx *loc;
-  rtx dst;
-
- /* We may recursively call ourselves on EXPR_LIST when dealing with PARALLEL
- construct.  */
-  if (GET_CODE (x) == EXPR_LIST || GET_CODE (x) == CLOBBER)
-loc = &XEXP (x, 0);
-  else
-loc = &SET_DEST (x);
-  dst = *loc;
+  rtx dst = *loc;
 
   /* It is legal to have a set destination be a parallel. */
   if (GET_CODE (dst) == PARALLEL)
@@ -2944,12 +2935,9 @@ df_def_record_1 (struct df_collection_re
   for (i = XVECLEN (dst, 0) - 1; i >= 0; i--)
 	{
 	  rtx temp = XVECEXP (dst, 0, i);
-	  if (GET_CODE (temp) == EXPR_LIST || GET_CODE (temp) == CLOBBER
-	  || GET_CODE (temp) == SET)
-	df_def_record_1 (collection_rec,
- temp, bb, insn_info,
-			 GET_CODE (temp) == CLOBBER
-			 ? flags | DF_REF_MUST_CLOBBER : flags);
+	  gcc_assert (GET_CODE (temp) == EXPR_LIST);
+	  df_def_record_1 (collection_rec, &XEXP (temp, 0),
+			   bb, insn_info, flags);
 	}
   return;
 }
@@ -3003,18 +2991,16 @@ df_defs_record (struct df_collection_rec
 {
   RTX_CODE code = GET_CODE (x);
 
-  if (code == SET || code == CLOBBER)
-{
-  /* Mark the single def within the pattern.  */
-  int clobber_flags = flags;
-  clobber_flags |= (code == CLOBBER) ? DF_REF_MUST_CLOBBER : 0;
-  df_def_record_1 (collection_rec, x, bb, insn_info, clobber_flags);
-}
+  if (code == SET)
+df_def_record_1 (collection_rec, &SET_DEST (x), bb, insn_info, flags);
+  else if (code == CLOBBER)
+{
+  flags |= DF_REF_MUST_CLOBBER;
+  df_def_record_1 (collection_rec, &XEXP (x, 0), bb, insn_info, flags);
+}
   else if (code == COND_EXEC)
-{
-  df_defs_record (collection_rec, COND_EXEC_CODE (x),
-		  bb, insn_info, DF_REF_CONDITIONAL);
-}
+df_defs_record (collection_rec, COND_EXEC_CODE (x),
+		bb, insn_info, DF_REF_CONDITIONAL);
   else if (code == PARALLEL)
 {
   int i;


Re: Fix PR 49014

2011-07-08 Thread Andrey Belevantsev

On 07.07.2011 20:18, Vladimir Makarov wrote:

On 07/01/2011 10:50 AM, Andrey Belevantsev wrote:

On 26.05.2011 17:32, Andrey Belevantsev wrote:

On 25.05.2011 19:31, Bernd Schmidt wrote:

On 05/25/2011 03:29 PM, Andrey Belevantsev wrote:

I think the hook is a better idea than the attribute because nobody will
care to mark all offending insns with an attribute.


I don't know. IIRC when I looked at sh or whatever the broken port was,
it was only two insns - there would still be some value in being able to
assert that all other insns have a reservation.

OK, I will take a look on x86-64 and will get back with more information.

Andrey

So, I have made an attempt to bootstrap on x86-64 with the extra assert
in selective scheduling that assumes the DFA state always changes when
issuing a recog_memoized >=0 insn (patch attached). Indeed, there are
just a few general insns that don't have proper reservations. However, it
was a surprise to me to see that almost any insn with SSE registers fails
this assert and thus does not get properly scheduled.

Overall, the work on fixing those seems doable, it took just a day to get
the compiler bootstrapped (of course, the testsuite may bring much more
issues). So, if there is an agreement on marking a few offending insns
with the new attribute, we can proceed with the help of somebody from the
x86 land on fixing those and researching for other targets.


The changes in sel-sched.c is ok for me. i386.md changes look ok for me too
but you should ask a x86 maintainer to get an approval for the change.

I think you should describe the attribute in the documentation because it
is common for all targets.

I can not approve common.opt changes because it makes selective scheduler
is default for the 2nd insn scheduling for all targets. Such change should
be justified by thorough testing and benchmarking (compilation speed, code
size, performance improvements) on several platforms (at least on major ones).
I didn't intend to enable sel-sched for all targets, the patch was just an 
RFC to see whether there is an agreement about usefulness of such 
attribute, and the common.opt change was to show how I tested the patch.  I 
am sorry for not making it clear in the mail.


I am planning to check Bernd's thought about whether I selected the right 
-mcpu switch for testing, as I was under impression that nowadays this 
should be autodetected by configure.  I will also modify the attribute as 
suggested.  Then we can discuss further.  I am going to leave on vacation 
soon though so I don't know when exactly I can proceed with this.


Thanks!

Andrey


Re: [PATCH 4/6] Shrink-wrapping

2011-07-08 Thread Richard Earnshaw
On 07/07/11 21:08, Richard Sandiford wrote:
> Richard Earnshaw  writes:
>> On 07/07/11 15:34, Richard Sandiford wrote:
>>> It seems a shame to have both (return) and (simple_return).  You said
>>> that we need the distinction in order to cope with targets like ARM,
>>> whose (return) instruction actually performs some of the epilogue too.
>>> It feels like the load of the saved registers should really be expressed
>>> in rtl, in parallel with the return.  I realise that'd prevent
>>> conditional returns though.  Maybe there's no elegant way out...
>>
>> You'd still need to deal with distinct returns for shrink-wrapped code
>> when the full (return) expands to
>>
>>  ldm sp, {regs..., pc}
>>
>> The shrink wrapped version would always be
>>  bx  lr
> 
> Sure, I understand that returns does more than return on ARM.
> What I meant was: we'd normally want that other stuff to be
> expressed in rtl alongside the (return) rtx.  E.g. something like:
> 
>   (parallel
> [(return)
>  (set (reg r4) (mem (plus (reg sp) (const_int ...
>  (set (reg r5) (mem (plus (reg sp) (const_int ...
>  (set (reg sp) (plus (reg sp) (const_int ...)))])
> 
> And what I meant was: the reason we can't do that is that it would make
> conditional execution harder.  But the downside is that (return) and
> (simple_return) will appear to do the same thing to register r4
> (i.e. nothing).  I.e. we are to some extent going to be lying to
> the rtl optimisers.
>

Hmm, yes, that would certainly help in terms of ensuring the compiler
knew the liveness correctly.  But as you say, that doesn't match a
simple-jump and that could lead to other problems.

R.



Re: [PATCH, SRA] Dump that a structure is too big for total scalarization

2011-07-08 Thread Richard Guenther
On Thu, 7 Jul 2011, Martin Jambor wrote:

> Hi,
> 
> in order to better analyze what SRA is or is not doing, it is
> sometimes advantageous to have in the dump information that a
> structure was not subject to total scalarization because it was too
> big - if we have detailed dumping on, that is.
> 
> This is accomplished by the patch below.  It is currently undergoing a
> bootstrap and testsuite run on x86_64-linux.  OK if it passes?

Ok.

Thanks,
Richard.

> Thanks,
> 
> Martin
> 
> 
> 2011-07-07  Martin Jambor  
> 
>   * tree-sra.c (analyze_all_variable_accesses): Dump that a struture
>   is too big for total scalarization.
> 
> Index: src/gcc/tree-sra.c
> ===
> *** src.orig/gcc/tree-sra.c
> --- src/gcc/tree-sra.c
> *** analyze_all_variable_accesses (void)
> *** 2312,2327 
>   tree var = referenced_var (i);
>   
>   if (TREE_CODE (var) == VAR_DECL
> - && ((unsigned) tree_low_cst (TYPE_SIZE (TREE_TYPE (var)), 1)
> - <= max_total_scalarization_size)
>   && type_consists_of_records_p (TREE_TYPE (var)))
> {
> ! completely_scalarize_var (var);
> ! if (dump_file && (dump_flags & TDF_DETAILS))
> {
> ! fprintf (dump_file, "Will attempt to totally scalarize ");
>   print_generic_expr (dump_file, var, 0);
> ! fprintf (dump_file, " (UID: %u): \n", DECL_UID (var));
> }
> }
> }
> --- 2312,2335 
>   tree var = referenced_var (i);
>   
>   if (TREE_CODE (var) == VAR_DECL
>   && type_consists_of_records_p (TREE_TYPE (var)))
> {
> ! if ((unsigned) tree_low_cst (TYPE_SIZE (TREE_TYPE (var)), 1)
> ! <= max_total_scalarization_size)
> !   {
> ! completely_scalarize_var (var);
> ! if (dump_file && (dump_flags & TDF_DETAILS))
> !   {
> ! fprintf (dump_file, "Will attempt to totally scalarize ");
> ! print_generic_expr (dump_file, var, 0);
> ! fprintf (dump_file, " (UID: %u): \n", DECL_UID (var));
> !   }
> !   }
> ! else if (dump_file && (dump_flags & TDF_DETAILS))
> {
> ! fprintf (dump_file, "Too big to totally scalarize: ");
>   print_generic_expr (dump_file, var, 0);
> ! fprintf (dump_file, " (UID: %u)\n", DECL_UID (var));
> }
> }
> }
> 
> 

-- 
Richard Guenther 
Novell / SUSE Labs
SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
GF: Jeff Hawn, Jennifer Guild, Felix Imendörffer

Re: [PATCH 0/3] Fix PR47654 and PR49649

2011-07-08 Thread Richard Guenther
On Thu, 7 Jul 2011, Sebastian Pop wrote:

> Hi,
> 
> First there are two cleanup patches independent of the fix:
> 
>   Start counting nesting level from 0.
>   Do not compute twice type, lb, and ub.
> 
> Then the patch that fixes PR47654:
> 
>   Fix PR47654: Compute LB and UB of a CLAST expression.
> 
> One of the reasons we cannot determine the IV type only from the
> polyhedral representation is that as in the testcase of PR47654, we
> are asked to generate an induction variable going from 0 to 127.  That
> could be represented with a "char".  However the upper bound
> expression of the loop generated by CLOOG is min (127, 51*scat_1 + 50)
> and that would overflow if we use a "char" type.  To evaluate a type
> in which the expression 51*scat_1 + 50 does not overflow, we have to
> compute an upper and lower bound for the expression.
> 
> To fix the problem exposed by Tobias:
> 
> > for (i = 0 ; i < 2; i++)
> >  for (j = i ; j < i + 1; j++)
> >for (k = j ; k < j + 1; k++)
> >  for (m = k ; m < k + 1; m++)
> >for (n = m ; n < m + 1; n++)
> >  A[0] += A[n];
> > 
> > I am a little bit afraid that we will increase the type size by an
> > order of magnitude (or at least one bit) for each nesting level.
> 
> instead of computing the lb and ub of scat_1 in "51*scat_1 + 50" based
> on the type of scat_1 (that we already code generated when building
> the outer loop), we use the polyhedral representation to get an
> accurate lb and ub for scat_1.
> 
> When translating the substitutions of a user statement using this
> precise method, like for example S5 in vect-pr43423.c:
> 
>   for (scat_1=0;scat_1<=min(T_3-1,T_4-1);scat_1++) {
> S5(scat_1);
> 
> we get a type that is too precise: based on the interval [0,99] we get
> the type "unsigned char" when the type of scat_1 is "int", misleading
> the vectorizer due to the insertion of spurious casts:
> 
> #  Access function 0: (int) {() graphite_IV.7_56, +, 1}_3;
> #)
> affine dependence test not usable: access function not affine or constant.
> 
> So we have to keep around the previous code gcc_type_for_clast_* that
> computes the type of an expression as the max precision of the
> components of that expression, and use that when computing the types
> of substitution expressions.
> 
> The patches passed together a full bootstrap and test on amd64-linux.
> Ok for trunk?

The idea sounds good to me and the middle-end-like looking pieces
look good.  I'd appreciate a 2nd look from Tobias.

Thanks,
Richard.


Re: Generic hwloop support library

2011-07-08 Thread Richard Sandiford
Bernd Schmidt  writes:
> On 07/05/11 21:25, Richard Sandiford wrote:
>> A C bootstrap only should be fine of course, since the code isn't
>> going to be run.)
>> 
>>> +  hwloop_info loops = NULL;
>> 
>> Unnecessary initialisation (or at least, it should be).
>
> ? The value is used inside the loop to initialize "next" of the first loop.

Sorry, I probably cut too much context.  I meant the initialisation here:

+  hwloop_info loops = NULL;
+  hwloop_info loop;
+  bitmap_obstack stack;
+
+  df_live_add_problem ();
+  df_live_set_all_dirty ();
+  df_analyze ();
+
+  bitmap_obstack_initialize (&stack);
+
+  if (dump_file)
+fprintf (dump_file, ";; Find loops, first pass\n\n");
+
+  loops = discover_loops (&stack, hooks);

Richard


Re: [df-scan.c] Optimise DF_REFs ordering in collection_rec, use HARD_REG_SETs instead of bitmaps

2011-07-08 Thread Richard Guenther
On Fri, Jul 8, 2011 at 5:20 AM, Dimitrios Apostolou  wrote:
> Hello list,
>
> The attached patch does two things for df_get_call_refs():
> * First it uses HARD_REG_SETs for defs_generated and
> regs_invalidated_by_call, instead of bitmaps. Replacing in total more than
> 400K calls (for my testcase) to bitmap_bit_p() with the much faster
> TEST_HARD_REG_BIT, reduces the total instruction count from about 13M to
> 1.5M.
> * Second it produces the REFs in REGNO order, which is important to keep the
> collection_rec sorted most times, and avoid expensive calls to qsort().
> Thanks to Paolo Bonzini for idea and mentoring.
>
> The second part makes a big difference if accompanied with another patch in
> df_insn_refs_collect(). I'll post a followup patch, that is unfortunately
> unstable for some of my tests, so I'd appreciate any comments.

Did you check the impact on memory usage?  I suppose on targets
with not many hard registers it should even improve, but do we expect
memory usage to be worse in any case?

Thanks,
Richard.

>
> Thanks,
> Dimitris
>


[PATCH] Fix PR49662, XFAIL testcases

2011-07-08 Thread Richard Guenther

This XFAILs the loop interchange testcases that are now confused
by better PRE.

Tested on x86_64-unknown-linux-gnu, applied.

Richard.

2011-07-08  Richard Guenther  

PR tree-optimization/49662
* gcc.dg/graphite/interchange-14.c: XFAIL.
* gcc.dg/graphite/interchange-15.c: Likewise.
* gcc.dg/graphite/interchange-mvt.c: Likewise.

Index: gcc/testsuite/gcc.dg/graphite/interchange-14.c
===
--- gcc/testsuite/gcc.dg/graphite/interchange-14.c  (revision 176030)
+++ gcc/testsuite/gcc.dg/graphite/interchange-14.c  (working copy)
@@ -54,5 +54,6 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
+/* PRE destroys the perfect nest and we can't cope with that yet.  */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { 
xfail *-*-* } } } */
 /* { dg-final { cleanup-tree-dump "graphite" } } */
Index: gcc/testsuite/gcc.dg/graphite/interchange-15.c
===
--- gcc/testsuite/gcc.dg/graphite/interchange-15.c  (revision 176030)
+++ gcc/testsuite/gcc.dg/graphite/interchange-15.c  (working copy)
@@ -48,6 +48,7 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
+/* PRE destroys the perfect nest and we can't cope with that yet.  */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { 
xfail *-*-* } } } */
 /* { dg-final { cleanup-tree-dump "graphite" } } */
 
Index: gcc/testsuite/gcc.dg/graphite/interchange-mvt.c
===
--- gcc/testsuite/gcc.dg/graphite/interchange-mvt.c (revision 176030)
+++ gcc/testsuite/gcc.dg/graphite/interchange-mvt.c (working copy)
@@ -58,6 +58,7 @@ main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" } } */
+/* PRE destroys the perfect nest and we can't cope with that yet.  */
+/* { dg-final { scan-tree-dump-times "will be interchanged" 1 "graphite" { 
xfail *-*-* } } } */
 /* { dg-final { cleanup-tree-dump "graphite" } } */
 


[PATCH] Remove call_expr_arg and call_expr_argp

2011-07-08 Thread Romain Geissler
Hello,

This patch simply removes legacy call_expr_arg and call_expr_argp
declarations since these functions no longer have a defined body. Use
CALL_EXPR_ARG and CALL_EXPR_ARGP macros instead.

Build fine and no regression on x86_64.

2011-07-08  Romain Geissler  

   * tree.h: Remove call_expr_arg and call_expr_argp



Index: gcc/tree.h
===
--- gcc/tree.h  (revision 175907)
+++ gcc/tree.h  (working copy)
@@ -5130,8 +5130,6 @@ extern bool commutative_ternary_tree_cod
 extern tree upper_bound_in_type (tree, tree);
 extern tree lower_bound_in_type (tree, tree);
 extern int operand_equal_for_phi_arg_p (const_tree, const_tree);
-extern tree call_expr_arg (tree, int);
-extern tree *call_expr_argp (tree, int);
 extern tree create_artificial_label (location_t);
 extern const char *get_name (tree);
 extern bool stdarg_p (const_tree);


Re: [PATCH] Remove call_expr_arg and call_expr_argp

2011-07-08 Thread Richard Guenther
On Fri, Jul 8, 2011 at 10:44 AM, Romain Geissler
 wrote:
> Hello,
>
> This patch simply removes legacy call_expr_arg and call_expr_argp
> declarations since these functions no longer have a defined body. Use
> CALL_EXPR_ARG and CALL_EXPR_ARGP macros instead.
>
> Build fine and no regression on x86_64.

Ok.

Thanks,
Richard.

> 2011-07-08  Romain Geissler  
>
>       * tree.h: Remove call_expr_arg and call_expr_argp
>
>
>
> Index: gcc/tree.h
> ===
> --- gcc/tree.h  (revision 175907)
> +++ gcc/tree.h  (working copy)
> @@ -5130,8 +5130,6 @@ extern bool commutative_ternary_tree_cod
>  extern tree upper_bound_in_type (tree, tree);
>  extern tree lower_bound_in_type (tree, tree);
>  extern int operand_equal_for_phi_arg_p (const_tree, const_tree);
> -extern tree call_expr_arg (tree, int);
> -extern tree *call_expr_argp (tree, int);
>  extern tree create_artificial_label (location_t);
>  extern const char *get_name (tree);
>  extern bool stdarg_p (const_tree);
>


Re: [df-scan.c] Optimise DF_REFs ordering in collection_rec, use HARD_REG_SETs instead of bitmaps

2011-07-08 Thread Dimitrios Apostolou

On Fri, 8 Jul 2011, Jakub Jelinek wrote:

On Fri, Jul 08, 2011 at 06:20:04AM +0300, Dimitrios Apostolou wrote:

The attached patch does two things for df_get_call_refs():
* First it uses HARD_REG_SETs for defs_generated and
regs_invalidated_by_call, instead of bitmaps. Replacing in total
more than 400K calls (for my testcase) to bitmap_bit_p() with the
much faster TEST_HARD_REG_BIT, reduces the total instruction count
from about 13M to 1.5M.


Have you verified that collection_rec->def_vec never contains pseudo
register references?  Otherwise you couldn't use
HARD_REG_SET... gcc_checking_assert might be useful.



Hi Jakub, Steve pointed me to the following from GCC Internals Manual:

call_insn insns have the same extra fields as insn insns, accessed in the 
same way and in addition contain a field CALL_INSN_FUNCTION_USAGE, which 
contains a list (chain of expr_list expressions) containing use and 
clobber expressions that denote hard registers and MEMs used or clobbered 
by the called function.



So doesn't that mean that for CALL insns it should contain only HARD_REG 
DEFs? I will ofcourse use an assert to be sure.



Thanks,
Dimitris



Re: [patch tree-optimization]: [1 of 3]: Boolify compares & more

2011-07-08 Thread Richard Guenther
On Thu, Jul 7, 2011 at 6:06 PM, Kai Tietz  wrote:
> Hello,
>
> This patch - first of series - adds to fold and some helper routines support
> for one-bit precision bitwise folding and detection.
> This patch is necessary for - next patch of series - boolification of
> comparisons.
>
> Bootstrapped and regression tested for all standard-languages (plus
> Ada and Obj-C++) on host x86_64-pc-linux-gnu.
>
> Ok for apply?

Factoring out fold_truth_andor to a function should be done separately.
A patch that does just that is pre-approved.

Otherwise the patch globs too many changes and lacks reasoning.
Why do we want to handle all this in fold when the boolification
happens only after gimplification?

Thanks,
Richard.

> Regards,
> Kai
>
> ChangeLog
>
> 2011-07-07  Kai Tietz  
>
>        * fold-const.c (fold_truth_not_expr): Handle
>        one bit precision bitwise operations.
>        (fold_range_test): Likewise.
>        (fold_truthop): Likewise.
>        (fold_binary_loc): Likewise.
>        (fold_truth_andor): Function replaces truth_andor
>        label.
>        (fold_ternary_loc): Use truth_value_type_p instead
>        of truth_value_p.
>        * gimple.c (canonicalize_cond_expr_cond): Likewise.
>        * gimplify.c (gimple_boolify): Likewise.
>        * tree-ssa-structalias.c (find_func_aliases): Likewise.
>        * tree-ssa-forwprop.c (truth_valued_ssa_name): Likewise.
>        * tree.h (truth_value_type_p): New function.
>        (truth_value_p): Implemented as macro via truth_value_type_p.
>
>
> Index: gcc-head/gcc/fold-const.c
> ===
> --- gcc-head.orig/gcc/fold-const.c
> +++ gcc-head/gcc/fold-const.c
> @@ -3074,20 +3074,35 @@ fold_truth_not_expr (location_t loc, tre
>     case INTEGER_CST:
>       return constant_boolean_node (integer_zerop (arg), type);
>
> +    case BIT_AND_EXPR:
> +      if (integer_onep (TREE_OPERAND (arg, 1)))
> +       return build2_loc (loc, EQ_EXPR, type, arg, build_int_cst (type, 0));
> +      if (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) != 1)
> +        return NULL_TREE;
> +      /* fall through */
>     case TRUTH_AND_EXPR:
>       loc1 = expr_location_or (TREE_OPERAND (arg, 0), loc);
>       loc2 = expr_location_or (TREE_OPERAND (arg, 1), loc);
> -      return build2_loc (loc, TRUTH_OR_EXPR, type,
> +      return build2_loc (loc, (code == BIT_AND_EXPR ? BIT_IOR_EXPR
> +                                                   : TRUTH_OR_EXPR), type,
>                         invert_truthvalue_loc (loc1, TREE_OPERAND (arg, 0)),
>                         invert_truthvalue_loc (loc2, TREE_OPERAND (arg, 1)));
>
> +    case BIT_IOR_EXPR:
> +      if (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) != 1)
> +        return NULL_TREE;
> +      /* fall through.  */
>     case TRUTH_OR_EXPR:
>       loc1 = expr_location_or (TREE_OPERAND (arg, 0), loc);
>       loc2 = expr_location_or (TREE_OPERAND (arg, 1), loc);
> -      return build2_loc (loc, TRUTH_AND_EXPR, type,
> +      return build2_loc (loc, (code == BIT_IOR_EXPR ? BIT_AND_EXPR
> +                                                   : TRUTH_AND_EXPR), type,
>                         invert_truthvalue_loc (loc1, TREE_OPERAND (arg, 0)),
>                         invert_truthvalue_loc (loc2, TREE_OPERAND (arg, 1)));
> -
> +    case BIT_XOR_EXPR:
> +      if (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) != 1)
> +        return NULL_TREE;
> +      /* fall through.  */
>     case TRUTH_XOR_EXPR:
>       /* Here we can invert either operand.  We invert the first operand
>         unless the second operand is a TRUTH_NOT_EXPR in which case our
> @@ -3095,10 +3110,14 @@ fold_truth_not_expr (location_t loc, tre
>         negation of the second operand.  */
>
>       if (TREE_CODE (TREE_OPERAND (arg, 1)) == TRUTH_NOT_EXPR)
> -       return build2_loc (loc, TRUTH_XOR_EXPR, type, TREE_OPERAND (arg, 0),
> +       return build2_loc (loc, code, type, TREE_OPERAND (arg, 0),
> +                          TREE_OPERAND (TREE_OPERAND (arg, 1), 0));
> +      else if (TREE_CODE (TREE_OPERAND (arg, 1)) == BIT_NOT_EXPR
> +              && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 1))) == 1)
> +       return build2_loc (loc, code, type, TREE_OPERAND (arg, 0),
>                           TREE_OPERAND (TREE_OPERAND (arg, 1), 0));
>       else
> -       return build2_loc (loc, TRUTH_XOR_EXPR, type,
> +       return build2_loc (loc, code, type,
>                           invert_truthvalue_loc (loc, TREE_OPERAND (arg, 0)),
>                           TREE_OPERAND (arg, 1));
>
> @@ -3116,6 +3135,11 @@ fold_truth_not_expr (location_t loc, tre
>                         invert_truthvalue_loc (loc1, TREE_OPERAND (arg, 0)),
>                         invert_truthvalue_loc (loc2, TREE_OPERAND (arg, 1)));
>
> +
> +    case BIT_NOT_EXPR:
> +      if (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) != 1)
> +        return NULL_TREE;
> +      /* fall through */
>     case TRUTH_NOT_

Re: PATCH: Support -mx32 in GCC tests

2011-07-08 Thread Uros Bizjak
On Fri, Jul 8, 2011 at 1:03 AM, H.J. Lu  wrote:

>> Here is the updated patch.  I will wait for Uros's comments.
>>
>
> I attached the wrong file.  Here is the updated patch.

--- a/gcc/testsuite/g++.dg/abi/bitfield3.C
+++ b/gcc/testsuite/g++.dg/abi/bitfield3.C
@@ -4,7 +4,7 @@
 // Cygwin and mingw32 default to MASK_ALIGN_DOUBLE. Override to ensure
 // 4-byte alignment.
 // { dg-options "-mno-align-double" { target i?86-*-cygwin* i?86-*-mingw* } }
-// { dg-require-effective-target ilp32 }
+// { dg-require-effective-target ia32 }

Please rather change dg-do run command to:

+// { dg-do <...> { target { { i?86-*-* x86_64-*-* } && ia32 } } }

and remove dg-require-effective-target entirely. This will ease
grepping for certain target considerably.

+++ b/gcc/testsuite/g++.dg/ext/attrib8.C
+++ b/gcc/testsuite/g++.dg/ext/tmplattr1.C
+++ b/gcc/testsuite/g++.dg/inherit/override-attribs.C
+++ b/gcc/testsuite/g++.dg/opt/life1.C
+++ b/gcc/testsuite/g++.dg/opt/nrv12.C
+++ b/gcc/testsuite/g++.old-deja/g++.ext/attrib1.C
+++ b/gcc/testsuite/g++.old-deja/g++.ext/attrib2.C
+++ b/gcc/testsuite/g++.old-deja/g++.ext/attrib3.C
+++ b/gcc/testsuite/g++.old-deja/g++.pt/asm2.C
+++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-28.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/prefetch-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/prefetch-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/prefetch-5.c
... and many more.

Same here.

--- a/gcc/testsuite/gcc.dg/20020103-1.c
+++ b/gcc/testsuite/gcc.dg/20020103-1.c
@@ -1,6 +1,6 @@
 /* Verify that constant equivalences get reloaded properly, either by being
spilled to the stack, or regenerated, but not dropped to memory.  */
-/* { dg-do compile { target { { i?86-*-* rs6000-*-* alpha*-*-*
x86_64-*-* } || { powerpc*-*-* && ilp32 } } } } */
+/* { dg-do compile { target { { i?86-*-* rs6000-*-* alpha*-*-*
x86_64-*-* } || { powerpc*-*-* && ia32 } } } } */

Wrong change.

--- a/gcc/testsuite/gcc.dg/pr25023.c
+++ b/gcc/testsuite/gcc.dg/pr25023.c
@@ -1,7 +1,7 @@
 /* PR debug/25023 */
 /* { dg-do compile } */
 /* { dg-options "-O2" } */
-/* { dg-options "-O2 -mtune=i686" { target { { i?86-*-* || x86_64-*-*
} && ilp32 } } } */
+/* { dg-options "-O2 -mtune=i686" { target { { i?86-*-* || x86_64-*-*
} && ia32 } } } */

Please also remove || in the target string.

--- a/gcc/testsuite/gcc.dg/lower-subreg-1.c
+++ b/gcc/testsuite/gcc.dg/lower-subreg-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { { { ! mips64 } && { ! ia64-*-* } } && {
! spu-*-* } } } } */
+/* { dg-do compile { target { { { { ! mips64 } && { ! ia64-*-* } } &&
{ ! spu-*-* } } && { ! { { i?86-*-* x86_64-*-* } && x32 } } } } } */
 /* { dg-options "-O -fdump-rtl-subreg1" } */
 /* { dg-require-effective-target ilp32 } */

This change is still present in updated patch, please change according
to Mike's comments. I'd prefer skip-if there, BTW.


BTW: What about using ... && { ! ia32 } instead of ... &&  { x32 || lp64 }  in

+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && { x32 ||
lp64 } } } } */

This will IMO future-proof the testcases.

Otherwise, the patch looks OK to me.

Uros.


Re: [patch tree-optimization]: [2 of 3]: Boolify compares & more

2011-07-08 Thread Richard Guenther
On Thu, Jul 7, 2011 at 6:07 PM, Kai Tietz  wrote:
> Hello,
>
> This patch - second of series - adds boolification of comparisions in
> gimplifier.  For this
> casts from/to boolean are marked as not-useless. And in fold_unary_loc
> casts to non-boolean integral types are preserved.
> The hunk in tree-ssa-forwprop.c in combine_cond-expr_cond is not strictly
> necessary - as long as fold-const handles 1-bit precision bitwise-expression
> with truth-logic - but it has shown to short-cut some expensier folding. So
> I kept it within this patch.

Please split it out.  Also ...

>
> The adjusted testcase gcc.dg/uninit-15.c indicates that due
> optimization we loose
> in this case variables declaration.  But this might be to be expected.
>
> In vectorization we have a regression in gcc.dg/vect/vect-cond-3.c
> test-case.  It's caused
> by always having boolean-type on conditions.  So vectorizer sees
> different types, which
> aren't handled by vectorizer right now.  Maybe this issue could be
> special-cased for
> boolean-types in tree-vect-loop, by making operand for used condition
> equal to vector-type.
> But this is a subject for a different patch and not addressed by this series.
>
> There is a regressions in tree-ssa/vrp47.c, and the fix is addressed
> by the 3rd patch of this
> series.
>
> Bootstrapped and regression tested for all standard-languages (plus
> Ada and Obj-C++) on host x86_64-pc-linux-gnu.
>
> Ok for apply?
>
> Regards,
> Kai
>
>
> ChangeLog
>
> 2011-07-07  Kai Tietz  
>
>        * fold-const.c (fold_unary_loc): Preserve
>        non-boolean-typed casts.
>        * gimplify.c (gimple_boolify): Handle boolification
>        of comparisons.
>        (gimplify_expr): Boolifiy non aggregate-typed
>        comparisons.
>        * tree-cfg.c (verify_gimple_comparison): Check result
>        type of comparison expression.
>        * tree-ssa.c (useless_type_conversion_p): Preserve incompatible
>        casts from/to boolean,
>        * tree-ssa-forwprop.c (combine_cond_expr_cond): Add simplification
>        support for one-bit-precision typed X for cases X != 0 and X == 0.
>        (forward_propagate_comparison): Adjust test of condition
>        result.
>
>
>        * gcc.dg/tree-ssa/builtin-expect-5.c: Adjusted.
>        * gcc.dg/tree-ssa/pr21031.c: Likewise.
>        * gcc.dg/tree-ssa/pr30978.c: Likewise.
>        * gcc.dg/tree-ssa/ssa-fre-6.c: Likewise.
>        * gcc.dg/binop-xor1.c: Mark it as expected fail.
>        * gcc.dg/binop-xor3.c: Likewise.
>        * gcc.dg/uninit-15.c: Adjust reported message.
>
> Index: gcc-head/gcc/fold-const.c
> ===
> --- gcc-head.orig/gcc/fold-const.c
> +++ gcc-head/gcc/fold-const.c
> @@ -7665,11 +7665,11 @@ fold_unary_loc (location_t loc, enum tre
>             non-integral type.
>             Do not fold the result as that would not simplify further, also
>             folding again results in recursions.  */
> -         if (INTEGRAL_TYPE_P (type))
> +         if (TREE_CODE (type) == BOOLEAN_TYPE)
>            return build2_loc (loc, TREE_CODE (op0), type,
>                               TREE_OPERAND (op0, 0),
>                               TREE_OPERAND (op0, 1));
> -         else
> +         else if (!INTEGRAL_TYPE_P (type))
>            return build3_loc (loc, COND_EXPR, type, op0,
>                               fold_convert (type, boolean_true_node),
>                               fold_convert (type, boolean_false_node));
> Index: gcc-head/gcc/gimplify.c
> ===
> --- gcc-head.orig/gcc/gimplify.c
> +++ gcc-head/gcc/gimplify.c
> @@ -2842,18 +2842,23 @@ gimple_boolify (tree expr)
>
>     case TRUTH_NOT_EXPR:
>       TREE_OPERAND (expr, 0) = gimple_boolify (TREE_OPERAND (expr, 0));
> -      /* FALLTHRU */
>
> -    case EQ_EXPR: case NE_EXPR:
> -    case LE_EXPR: case GE_EXPR: case LT_EXPR: case GT_EXPR:
>       /* These expressions always produce boolean results.  */
> -      TREE_TYPE (expr) = boolean_type_node;
> +      if (TREE_CODE (type) != BOOLEAN_TYPE)
> +       TREE_TYPE (expr) = boolean_type_node;
>       return expr;
>
>     default:
> +      if (COMPARISON_CLASS_P (expr))
> +       {
> +         /* There expressions always prduce boolean results.  */
> +         if (TREE_CODE (type) != BOOLEAN_TYPE)
> +           TREE_TYPE (expr) = boolean_type_node;
> +         return expr;
> +       }
>       /* Other expressions that get here must have boolean values, but
>         might need to be converted to the appropriate mode.  */
> -      if (type == boolean_type_node)
> +      if (TREE_CODE (type) == BOOLEAN_TYPE)
>        return expr;
>       return fold_convert_loc (loc, boolean_type_node, expr);
>     }
> @@ -6763,7 +6768,7 @@ gimplify_expr (tree *expr_p, gimple_seq
>            tree org_type = TREE_TYPE (*expr_p);
>
>            *expr_p = gimple_boolify (*expr_p);
> -           if (org_type != boolean_type_node)
> +          

Re: [patch tree-optimization]: [1 of 3]: Boolify compares & more

2011-07-08 Thread Kai Tietz
2011/7/8 Richard Guenther :
> On Thu, Jul 7, 2011 at 6:06 PM, Kai Tietz  wrote:
>> Hello,
>>
>> This patch - first of series - adds to fold and some helper routines support
>> for one-bit precision bitwise folding and detection.
>> This patch is necessary for - next patch of series - boolification of
>> comparisons.
>>
>> Bootstrapped and regression tested for all standard-languages (plus
>> Ada and Obj-C++) on host x86_64-pc-linux-gnu.
>>
>> Ok for apply?
>
> Factoring out fold_truth_andor to a function should be done separately.
> A patch that does just that is pre-approved.

Ok I will sent for this a separate patch. But in fact it makes just
sense together with the 1-bit precision bitwise support, too.

> Otherwise the patch globs too many changes and lacks reasoning.
> Why do we want to handle all this in fold when the boolification
> happens only after gimplification?

We still rely on truth/bitwise folding on fold-const.  Also we need to
handle this for passes, which are using fold_binary to optimize  and
handle boolified operations - like tree-ssa-reassoc, of tree-vect*.
This support in fold-const is necessary when we are preserving casts
from/to boolean, as otherwise we don't fold bitwise-binary with
compares proper anymore.  Additionally we have to take care that we
don't enter TRUTH_(AND|OR|XOR) expressions on boolified trees, as
otherwise tree-cfg will barf. Also we need to take care that types of
comparisons and TRUTH_NOT expressions are boolean one, as otherwise
again tree-cfg will detect incompatible types for those expressions.

> Thanks,
> Richard.
>
>> Regards,
>> Kai
>>
>> ChangeLog
>>
>> 2011-07-07  Kai Tietz  
>>
>>        * fold-const.c (fold_truth_not_expr): Handle
>>        one bit precision bitwise operations.
>>        (fold_range_test): Likewise.
>>        (fold_truthop): Likewise.
>>        (fold_binary_loc): Likewise.
>>        (fold_truth_andor): Function replaces truth_andor
>>        label.
>>        (fold_ternary_loc): Use truth_value_type_p instead
>>        of truth_value_p.
>>        * gimple.c (canonicalize_cond_expr_cond): Likewise.
>>        * gimplify.c (gimple_boolify): Likewise.
>>        * tree-ssa-structalias.c (find_func_aliases): Likewise.
>>        * tree-ssa-forwprop.c (truth_valued_ssa_name): Likewise.
>>        * tree.h (truth_value_type_p): New function.
>>        (truth_value_p): Implemented as macro via truth_value_type_p.
>>
>>
>> Index: gcc-head/gcc/fold-const.c
>> ===
>> --- gcc-head.orig/gcc/fold-const.c
>> +++ gcc-head/gcc/fold-const.c
>> @@ -3074,20 +3074,35 @@ fold_truth_not_expr (location_t loc, tre
>>     case INTEGER_CST:
>>       return constant_boolean_node (integer_zerop (arg), type);
>>
>> +    case BIT_AND_EXPR:
>> +      if (integer_onep (TREE_OPERAND (arg, 1)))
>> +       return build2_loc (loc, EQ_EXPR, type, arg, build_int_cst (type, 0));
>> +      if (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) != 1)
>> +        return NULL_TREE;
>> +      /* fall through */
>>     case TRUTH_AND_EXPR:
>>       loc1 = expr_location_or (TREE_OPERAND (arg, 0), loc);
>>       loc2 = expr_location_or (TREE_OPERAND (arg, 1), loc);
>> -      return build2_loc (loc, TRUTH_OR_EXPR, type,
>> +      return build2_loc (loc, (code == BIT_AND_EXPR ? BIT_IOR_EXPR
>> +                                                   : TRUTH_OR_EXPR), type,
>>                         invert_truthvalue_loc (loc1, TREE_OPERAND (arg, 0)),
>>                         invert_truthvalue_loc (loc2, TREE_OPERAND (arg, 1)));
>>
>> +    case BIT_IOR_EXPR:
>> +      if (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) != 1)
>> +        return NULL_TREE;
>> +      /* fall through.  */
>>     case TRUTH_OR_EXPR:
>>       loc1 = expr_location_or (TREE_OPERAND (arg, 0), loc);
>>       loc2 = expr_location_or (TREE_OPERAND (arg, 1), loc);
>> -      return build2_loc (loc, TRUTH_AND_EXPR, type,
>> +      return build2_loc (loc, (code == BIT_IOR_EXPR ? BIT_AND_EXPR
>> +                                                   : TRUTH_AND_EXPR), type,
>>                         invert_truthvalue_loc (loc1, TREE_OPERAND (arg, 0)),
>>                         invert_truthvalue_loc (loc2, TREE_OPERAND (arg, 1)));
>> -
>> +    case BIT_XOR_EXPR:
>> +      if (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) != 1)
>> +        return NULL_TREE;
>> +      /* fall through.  */
>>     case TRUTH_XOR_EXPR:
>>       /* Here we can invert either operand.  We invert the first operand
>>         unless the second operand is a TRUTH_NOT_EXPR in which case our
>> @@ -3095,10 +3110,14 @@ fold_truth_not_expr (location_t loc, tre
>>         negation of the second operand.  */
>>
>>       if (TREE_CODE (TREE_OPERAND (arg, 1)) == TRUTH_NOT_EXPR)
>> -       return build2_loc (loc, TRUTH_XOR_EXPR, type, TREE_OPERAND (arg, 0),
>> +       return build2_loc (loc, code, type, TREE_OPERAND (arg, 0),
>> +                          TREE_OPERAND (TREE_OPERAND (arg, 1)

Re: [wwwdocs] Buildstat update for 4.4

2011-07-08 Thread Gerald Pfeifer
On Wed, 6 Jul 2011, Tom G. Christensen wrote:
> Latest results for 4.4.x.

Thanks, Tom.

Gerald


Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Richard Guenther
On Thu, Jul 7, 2011 at 6:07 PM, Kai Tietz  wrote:
> Hello,
>
> This patch - third of series - fixes vrp to handle bitwise one-bit
> precision typed operations.
> And it introduces a second - limitted to non-switch-statement range - vrp 
> pass.

Err - please split this patch.  I agree with Paolo, this 2nd
substitute_and_fold call is bogus.  More comments inline.

>
> Bootstrapped and regression tested for all standard-languages (plus
> Ada and Obj-C++) on host x86_64-pc-linux-gnu.
>
> Ok for apply?
>
> Regards,
> Kai
>
> ChangeLog
>
> 2011-07-07  Kai Tietz  
>
>        * tree-vrp.c (in_second_pass): New static variable.
>        (extract_range_from_binary_expr): Add handling for
>        BIT_IOR_EXPR, BIT_AND_EXPR, and BIT_NOT_EXPR.
>        (register_edge_assert_for_1): Add handling for 1-bit
>        BIT_IOR_EXPR and BIT_NOT_EXPR.
>        (register_edge_assert_for): Add handling for 1-bit
>        BIT_IOR_EXPR.
>        (ssa_name_get_inner_ssa_name_p): New helper function.
>        (ssa_name_get_cast_to_p): New helper function.
>        (simplify_truth_ops_using_ranges): Handle prefixed
>        cast instruction for result, and add support for one
>        bit precision BIT_IOR_EXPR, BIT_AND_EXPR, BIT_XOR_EXPR,
>        , and BIT_NOT_EXPR.
>        (simplify_stmt_using_ranges): Add handling for one bit
>        precision BIT_IOR_EXPR, BIT_AND_EXPR, BIT_XOR_EXPR,
>        and BIT_NOT_EXPR.
>        (vrp_finalize): Do substitute and fold pass a second
>        time for vrp_stmt and preserve switch-edge simplification
>        on second run.
>        (simplify_switch_using_ranges): Preserve rerun of function
>        in second pass.
>
> Index: gcc-head/gcc/tree-vrp.c
> ===
> --- gcc-head.orig/gcc/tree-vrp.c
> +++ gcc-head/gcc/tree-vrp.c
> @@ -74,6 +74,9 @@ struct value_range_d
>
>  typedef struct value_range_d value_range_t;
>
> +/* This flag indicates that we are doing a second pass of VRP.  */
> +static bool in_second_pass = false;
> +
>  /* Set of SSA names found live during the RPO traversal of the function
>    for still active basic-blocks.  */
>  static sbitmap *live;
> @@ -2232,6 +2235,7 @@ extract_range_from_binary_expr (value_ra
>      some cases.  */
>   if (code != BIT_AND_EXPR
>       && code != TRUTH_AND_EXPR
> +      && code != BIT_IOR_EXPR

Huh?  So how would VARYING | x ever produce something better
than VARYING?

>       && code != TRUTH_OR_EXPR
>       && code != TRUNC_DIV_EXPR
>       && code != FLOOR_DIV_EXPR
> @@ -2291,6 +2295,8 @@ extract_range_from_binary_expr (value_ra
>          else
>            set_value_range_to_varying (vr);
>        }
> +      else if (code == BIT_IOR_EXPR)
> +        set_value_range_to_varying (vr);

err - BIT_IOR_EXPR on pointers?

>       else
>        gcc_unreachable ();
>
> @@ -2300,11 +2306,13 @@ extract_range_from_binary_expr (value_ra
>   /* For integer ranges, apply the operation to each end of the
>      range and see what we end up with.  */
>   if (code == TRUTH_AND_EXPR
> -      || code == TRUTH_OR_EXPR)
> +      || code == TRUTH_OR_EXPR
> +      || ((code == BIT_AND_EXPR || code == BIT_IOR_EXPR)
> +          && TYPE_PRECISION (TREE_TYPE (op1)) == 1))

Rather than adding code to handle BIT_*_EXPR this patch should
transform the TRUTH_*_EXPR handling to appropriate BIT_*_EXPR
handling as we no longer have TRUTH_*_EXPR in our IL.

In fact I would say the existing BIT_*_EXPR handling should already
cover all the TRUTH_*_CASES, so this patch patches the wrong
spot if it is necessary at all.

>     {
>       /* If one of the operands is zero, we know that the whole
>         expression evaluates zero.  */
> -      if (code == TRUTH_AND_EXPR
> +      if ((code == TRUTH_AND_EXPR || code == BIT_AND_EXPR)
>          && ((vr0.type == VR_RANGE
>               && integer_zerop (vr0.min)
>               && integer_zerop (vr0.max))
> @@ -2317,7 +2325,7 @@ extract_range_from_binary_expr (value_ra
>        }
>       /* If one of the operands is one, we know that the whole
>         expression evaluates one.  */
> -      else if (code == TRUTH_OR_EXPR
> +      else if ((code == TRUTH_OR_EXPR || code == BIT_IOR_EXPR)
>               && ((vr0.type == VR_RANGE
>                    && integer_onep (vr0.min)
>                    && integer_onep (vr0.max))
> @@ -2809,7 +2817,7 @@ extract_range_from_unary_expr (value_ran
>      cannot easily determine a resulting range.  */
>   if (code == FIX_TRUNC_EXPR
>       || code == FLOAT_EXPR
> -      || code == BIT_NOT_EXPR
> +      || (code == BIT_NOT_EXPR && TYPE_PRECISION (type) != 1)
>       || code == CONJ_EXPR)
>     {
>       /* We can still do constant propagation here.  */
> @@ -3976,7 +3984,9 @@ build_assert_expr_for (tree cond, tree v
>       tree a = build2 (ASSERT_EXPR, TREE_TYPE (v), v, cond);
>       assertion = gimple_build_assign (n, a);
>     }
> -  else if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
> +  else if (TREE_CODE (cond) == TRUTH_NOT_EXPR
> +  

Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Richard Guenther
On Thu, Jul 7, 2011 at 6:28 PM, Kai Tietz  wrote:
> 2011/7/7 Paolo Bonzini :
>> On 07/07/2011 06:07 PM, Kai Tietz wrote:
>>>
>>> +  /* We redo folding here one time for allowing to inspect more
>>> +     complex reductions.  */
>>> +  substitute_and_fold (op_with_constant_singleton_value_range,
>>> +                      vrp_fold_stmt, false);
>>> +  /* We need to mark this second pass to avoid re-entering of same
>>> +     edges for switch statments.  */
>>> +  in_second_pass = true;
>>>    substitute_and_fold (op_with_constant_singleton_value_range,
>>>                       vrp_fold_stmt, false);
>>> +  in_second_pass = false;
>>
>> This needs a much better explanation.
>>
>> Paolo
>
> Well, I can work on a better comment.  The complex reduction I mean
> here are cases like
>
> int x;
> int y;
> _Bool D1;
> _Bool D2;
> _Bool D3;
> int R;
>
> D1 = x[0..1] != 0;
> D2 = y[0..1] != 0;
> D3 = D1 & D2
> R = (int) D3
>
> (testcase is already present. See tree-ssa/vrp47.c).
>
> As VRP in first pass produces (and replaces) to:
>
> D1 = (_Bool) x[0..1];
> D2 = (_Bool) y[0..1];
> D3 = D1 & D2
> R = (int) D3
>
> Just in the second pass the reduction
>
> R = x[0..1] & y[0..1]

So why wouldn't that happen during the first pass?  The first
pass could change the IL to

 D1 = x[0..1] != 0;
 D2 = y[0..1] != 0;
 D3 = D1 & D2;
 R = x & y;

if D3 only has a single use.

> can happen.  In general it is sad that VRP can't insert during pass
> new statements right now.  This would cause issues in range-tables,
> which aren't designed for insertations.  As otherwise, we could do
> also simplify things like
>
> D1 = x[0..1] != 0;
> D2 = y[0..1] == 0;
> D3 = D1 & D2
> R = (int) D3
>
> to
> R = x[0..1] & (y[0..1] ^ 1)

Why that ^ 1?  And why does that confuse the range tables
if you re-use R?

> Regards,
> Kai
>


Re: [patch tree-optimization]: [1 of 3]: Boolify compares & more

2011-07-08 Thread Richard Guenther
On Fri, Jul 8, 2011 at 11:28 AM, Kai Tietz  wrote
> 2011/7/8 Richard Guenther :
>> On Thu, Jul 7, 2011 at 6:06 PM, Kai Tietz  wrote:
>>> Hello,
>>>
>>> This patch - first of series - adds to fold and some helper routines support
>>> for one-bit precision bitwise folding and detection.
>>> This patch is necessary for - next patch of series - boolification of
>>> comparisons.
>>>
>>> Bootstrapped and regression tested for all standard-languages (plus
>>> Ada and Obj-C++) on host x86_64-pc-linux-gnu.
>>>
>>> Ok for apply?
>>
>> Factoring out fold_truth_andor to a function should be done separately.
>> A patch that does just that is pre-approved.
>
> Ok I will sent for this a separate patch. But in fact it makes just
> sense together with the 1-bit precision bitwise support, too.

No, it makes sense anyway to get rid of that goto.  Note _only_ factoring
out the function, not changing anything in it.

>> Otherwise the patch globs too many changes and lacks reasoning.
>> Why do we want to handle all this in fold when the boolification
>> happens only after gimplification?
>
> We still rely on truth/bitwise folding on fold-const.  Also we need to
> handle this for passes, which are using fold_binary to optimize  and
> handle boolified operations - like tree-ssa-reassoc, of tree-vect*.
> This support in fold-const is necessary when we are preserving casts
> from/to boolean, as otherwise we don't fold bitwise-binary with
> compares proper anymore.  Additionally we have to take care that we
> don't enter TRUTH_(AND|OR|XOR) expressions on boolified trees, as
> otherwise tree-cfg will barf. Also we need to take care that types of
> comparisons and TRUTH_NOT expressions are boolean one, as otherwise
> again tree-cfg will detect incompatible types for those expressions.

Sounds like many different things for many individual patches.  Btw,
I'd rather have the tree passes that rely on fold call a gimple specific
wrapper where we can add such things (and also use gimple/SSA
specific optimizations, like less strict typing), like
gimple_fold_binary (), see also my gimple folding proposal from
earlier this year. http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01099.html

Richard.

>> Thanks,
>> Richard.
>>
>>> Regards,
>>> Kai
>>>
>>> ChangeLog
>>>
>>> 2011-07-07  Kai Tietz  
>>>
>>>        * fold-const.c (fold_truth_not_expr): Handle
>>>        one bit precision bitwise operations.
>>>        (fold_range_test): Likewise.
>>>        (fold_truthop): Likewise.
>>>        (fold_binary_loc): Likewise.
>>>        (fold_truth_andor): Function replaces truth_andor
>>>        label.
>>>        (fold_ternary_loc): Use truth_value_type_p instead
>>>        of truth_value_p.
>>>        * gimple.c (canonicalize_cond_expr_cond): Likewise.
>>>        * gimplify.c (gimple_boolify): Likewise.
>>>        * tree-ssa-structalias.c (find_func_aliases): Likewise.
>>>        * tree-ssa-forwprop.c (truth_valued_ssa_name): Likewise.
>>>        * tree.h (truth_value_type_p): New function.
>>>        (truth_value_p): Implemented as macro via truth_value_type_p.
>>>
>>>
>>> Index: gcc-head/gcc/fold-const.c
>>> ===
>>> --- gcc-head.orig/gcc/fold-const.c
>>> +++ gcc-head/gcc/fold-const.c
>>> @@ -3074,20 +3074,35 @@ fold_truth_not_expr (location_t loc, tre
>>>     case INTEGER_CST:
>>>       return constant_boolean_node (integer_zerop (arg), type);
>>>
>>> +    case BIT_AND_EXPR:
>>> +      if (integer_onep (TREE_OPERAND (arg, 1)))
>>> +       return build2_loc (loc, EQ_EXPR, type, arg, build_int_cst (type, 
>>> 0));
>>> +      if (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) != 1)
>>> +        return NULL_TREE;
>>> +      /* fall through */
>>>     case TRUTH_AND_EXPR:
>>>       loc1 = expr_location_or (TREE_OPERAND (arg, 0), loc);
>>>       loc2 = expr_location_or (TREE_OPERAND (arg, 1), loc);
>>> -      return build2_loc (loc, TRUTH_OR_EXPR, type,
>>> +      return build2_loc (loc, (code == BIT_AND_EXPR ? BIT_IOR_EXPR
>>> +                                                   : TRUTH_OR_EXPR), type,
>>>                         invert_truthvalue_loc (loc1, TREE_OPERAND (arg, 0)),
>>>                         invert_truthvalue_loc (loc2, TREE_OPERAND (arg, 
>>> 1)));
>>>
>>> +    case BIT_IOR_EXPR:
>>> +      if (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (arg, 0))) != 1)
>>> +        return NULL_TREE;
>>> +      /* fall through.  */
>>>     case TRUTH_OR_EXPR:
>>>       loc1 = expr_location_or (TREE_OPERAND (arg, 0), loc);
>>>       loc2 = expr_location_or (TREE_OPERAND (arg, 1), loc);
>>> -      return build2_loc (loc, TRUTH_AND_EXPR, type,
>>> +      return build2_loc (loc, (code == BIT_IOR_EXPR ? BIT_AND_EXPR
>>> +                                                   : TRUTH_AND_EXPR), type,
>>>                         invert_truthvalue_loc (loc1, TREE_OPERAND (arg, 0)),
>>>                         invert_truthvalue_loc (loc2, TREE_OPERAND (arg, 
>>> 1)));
>>> -
>>> +    case BIT_XOR_EXPR:

Re: [ARM] Deprecate -mwords-little-endian

2011-07-08 Thread Gerald Pfeifer
On Thu, 7 Jul 2011, Richard Earnshaw wrote:
> Looks fine to me, but please allow 24 hours for the web maintainers to
> comment if they wish.

Thanks, this looks good.

Gerald


Re: [Patch, AVR]: Fix PR46779

2011-07-08 Thread Georg-Johann Lay
CCed Eric and Bernd.

Denis Chertykov wrote:
>> Did you decide about the fix for PR46779?
>>
>> http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00810.html
>>
>> Is it ok to commit?
> 
> I forgot about testsuite regressions for this patch.
> 
> Denis.


There were no new regressions:
  http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00747.html

However, with the actual trunk (SVN 175991), I get two more
spill fails for following sources:

./gcc.c-torture/compile/pr32349.c -O1 -mmcu=atmega128

 pr30338.c: In function 'testload_func':
pr30338.c:13:1: error: unable to find a register to spill in class
'POINTER_REGS'
pr30338.c:13:1: error: this is the insn:
(insn 14 13 15 2 (set (reg:QI 24 r24 [orig:73 *D.1963_37 ] [73])
(mem:QI (subreg:HI (reg:SI 71) 0) [0 *D.1963_37+0 S1 A8]))
pr30338.c:9 4 {*movqi}
 (expr_list:REG_DEAD (reg:SI 71)
(nil)))
pr30338.c:13:1: internal compiler error: in spill_failure, at
reload1.c:2120



./gcc.c-torture/compile/pr32349.c -S -O3 -funroll-loops

pr32349.c: In function 'foo':
pr32349.c:26:1: error: unable to find a register to spill in class
'POINTER_REGS'
pr32349.c:26:1: error: this is the insn:
(insn 175 197 177 10 (set (reg/v:SI 234 [ m ])
(mem:SI (post_inc:HI (reg:HI 16 r16 [orig:192 ivtmp.18 ]
[192])) [3 MEM[base: D.1996_74, offset: 0B]+0 S4 A8])) pr32349.c:18 12
{*movsi}
 (expr_list:REG_INC (reg:HI 16 r16 [orig:192 ivtmp.18 ] [192])
(nil)))
pr32349.c:26:1: internal compiler error: in spill_failure, at
reload1.c:2120


(1)
I can fix *both* fails with additional test in avr_hard_regno_mode_ok:

+   if (GET_MODE_SIZE (mode) >= 4
+   && regno >= REG_X)
+ return 0;

(2)
I can fix the first fail but *not* the second by not allow SUBREGs in
avr_legitimate_address_p:

-   if (!strict && GET_CODE (x) == SUBREG) */
-   x = SUBREG_REG (x); */


(2) Looks very reasonble, Eric Botcazou proposed it because he ran
into problems:
   http://gcc.gnu.org/ml/gcc-patches/2011-04/msg01367.html

(1) Appears to be hackish, but it should be ok.  If code breaks
because of that is's *definitely* a reload bug (e.g. SI-subreg of DI).

Even the original avr_hard_regno_mode_ok is ok IMO because if a
machine says "I can hold HI in 28 but not QI in 29" reload has to
handle it (except a machine must allow word_mode in *all* it's
GENERAL_REGS, don't know if that's a must).

I made a patch for reload, too:
   http://gcc.gnu.org/ml/gcc/2011-06/msg5.html

Because IRA generates SUBREG of hardreg (which old lreg/greg handled
ok) and reload does not handle it correctly.  It generates a spill but
without the needed input reload so that one part of the register is
missing.

reload blames IRA or BE, IRA blames reload, BE blames IRA, etc...


I didn't rerun the testsuite with (1) or/and (2), I'd like both (1)
and (2) in the compiler.  What do you think?

For reference, I attached the patch again.  It's like the original
patch, just with some comment change.

Johann


PR target/46779
* config/avr/avr.c (avr_hard_regno_mode_ok): Rewrite.
In particular, allow 8-bit values in r28 and r29.
(avr_hard_regno_scratch_ok): Disallow any register that might be
part of the frame pointer.
(avr_hard_regno_rename_ok): Same.

Index: config/avr/avr.c
===
--- config/avr/avr.c	(revision 175991)
+++ config/avr/avr.c	(working copy)
@@ -6118,26 +6118,21 @@ jump_over_one_insn_p (rtx insn, rtx dest
 int
 avr_hard_regno_mode_ok (int regno, enum machine_mode mode)
 {
-  /* Disallow QImode in stack pointer regs.  */
-  if ((regno == REG_SP || regno == (REG_SP + 1)) && mode == QImode)
-return 0;
-
-  /* The only thing that can go into registers r28:r29 is a Pmode.  */
-  if (regno == REG_Y && mode == Pmode)
-return 1;
-
-  /* Otherwise disallow all regno/mode combinations that span r28:r29.  */
-  if (regno <= (REG_Y + 1) && (regno + GET_MODE_SIZE (mode)) >= (REG_Y + 1))
-return 0;
-
-  if (mode == QImode)
+  /* NOTE: 8-bit values must not be disallowed for R28 or R29.
+Disallowing QI et al. in these regs might lead to code like
+(set (subreg:QI (reg:HI 28) n) ...)
+which will result in wrong code because reload does not
+handle SUBREGs of hard regsisters like this.
+This could be fixed in reload.  However, it appears
+that fixing reload is not wanted by reload people.  */
+  
+  /* Any GENERAL_REGS register can hold 8-bit values.  */
+  
+  if (GET_MODE_SIZE (mode) == 1)
 return 1;
-
-  /* Modes larger than QImode occupy consecutive registers.  */
-  if (regno + GET_MODE_SIZE (mode) > FIRST_PSEUDO_REGISTER)
-return 0;
-
-  /* All modes larger than QImode should start in an even register.  */
+  
+  /* All modes larger than 8 bits should start in an even register.  */
+  
   return !(regno & 1);
 }
 
@@ -6410,13 +6405,23 @@ avr_hard_regno_scratch_ok (unsigned int
   && !df_regs_ever_live_p (regno))
 ret

Re: [df-scan.c] Optimise DF_REFs ordering in collection_rec, use HARD_REG_SETs instead of bitmaps

2011-07-08 Thread Dimitrios Apostolou

On Fri, 8 Jul 2011, Richard Guenther wrote:

On Fri, Jul 8, 2011 at 5:20 AM, Dimitrios Apostolou  wrote:

Hello list,

The attached patch does two things for df_get_call_refs():
* First it uses HARD_REG_SETs for defs_generated and
regs_invalidated_by_call, instead of bitmaps. Replacing in total more than
400K calls (for my testcase) to bitmap_bit_p() with the much faster
TEST_HARD_REG_BIT, reduces the total instruction count from about 13M to
1.5M.
* Second it produces the REFs in REGNO order, which is important to keep the
collection_rec sorted most times, and avoid expensive calls to qsort().
Thanks to Paolo Bonzini for idea and mentoring.

The second part makes a big difference if accompanied with another patch in
df_insn_refs_collect(). I'll post a followup patch, that is unfortunately
unstable for some of my tests, so I'd appreciate any comments.


Did you check the impact on memory usage?  I suppose on targets
with not many hard registers it should even improve, but do we expect
memory usage to be worse in any case?


Hi Richard, I didn't check memory usage, is that important? Since the 
struct bitmap is fairly bulky, it should take an arch with lots of hard 
regs (which one has the most?).


But still a few bytes tradeoff wouldn't be acceptable for a much faster 
type? And IMHO it makes the code better to understand, since once you see 
HARD_REG_SET you know you can't expect else. FWIW I'm now in the process 
of converting all other bitmap uses for hard regs, to HARD_REG_SETs, at 
least within DF. I'm not sure whether performance gains will be visible, 
however, not much code is as hot as df_get_call_refs().



Thanks,
Dimitris



Re: [Patch, AVR]: Fix PR46779

2011-07-08 Thread Denis Chertykov
2011/7/8 Georg-Johann Lay :
> CCed Eric and Bernd.
>
> Denis Chertykov wrote:
>>> Did you decide about the fix for PR46779?
>>>
>>> http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00810.html
>>>
>>> Is it ok to commit?
>>
>> I forgot about testsuite regressions for this patch.
>>
>> Denis.
>
>
> There were no new regressions:
>  http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00747.html
>
> However, with the actual trunk (SVN 175991), I get two more
> spill fails for following sources:
>
> ./gcc.c-torture/compile/pr32349.c -O1 -mmcu=atmega128
>
>  pr30338.c: In function 'testload_func':
> pr30338.c:13:1: error: unable to find a register to spill in class
> 'POINTER_REGS'
> pr30338.c:13:1: error: this is the insn:
> (insn 14 13 15 2 (set (reg:QI 24 r24 [orig:73 *D.1963_37 ] [73])
>        (mem:QI (subreg:HI (reg:SI 71) 0) [0 *D.1963_37+0 S1 A8]))
> pr30338.c:9 4 {*movqi}
>     (expr_list:REG_DEAD (reg:SI 71)
>        (nil)))
> pr30338.c:13:1: internal compiler error: in spill_failure, at
> reload1.c:2120
>
>
>
> ./gcc.c-torture/compile/pr32349.c -S -O3 -funroll-loops
>
> pr32349.c: In function 'foo':
> pr32349.c:26:1: error: unable to find a register to spill in class
> 'POINTER_REGS'
> pr32349.c:26:1: error: this is the insn:
> (insn 175 197 177 10 (set (reg/v:SI 234 [ m ])
>        (mem:SI (post_inc:HI (reg:HI 16 r16 [orig:192 ivtmp.18 ]
> [192])) [3 MEM[base: D.1996_74, offset: 0B]+0 S4 A8])) pr32349.c:18 12
> {*movsi}
>     (expr_list:REG_INC (reg:HI 16 r16 [orig:192 ivtmp.18 ] [192])
>        (nil)))
> pr32349.c:26:1: internal compiler error: in spill_failure, at
> reload1.c:2120
>
>
> (1)
> I can fix *both* fails with additional test in avr_hard_regno_mode_ok:
>
> +   if (GET_MODE_SIZE (mode) >= 4
> +       && regno >= REG_X)
> +     return 0;
>
> (2)
> I can fix the first fail but *not* the second by not allow SUBREGs in
> avr_legitimate_address_p:
>
> -   if (!strict && GET_CODE (x) == SUBREG) */
> -       x = SUBREG_REG (x); */
>
>
> (2) Looks very reasonble, Eric Botcazou proposed it because he ran
> into problems:
>   http://gcc.gnu.org/ml/gcc-patches/2011-04/msg01367.html
>
> (1) Appears to be hackish, but it should be ok.  If code breaks
> because of that is's *definitely* a reload bug (e.g. SI-subreg of DI).
>
> Even the original avr_hard_regno_mode_ok is ok IMO because if a
> machine says "I can hold HI in 28 but not QI in 29" reload has to
> handle it (except a machine must allow word_mode in *all* it's
> GENERAL_REGS, don't know if that's a must).
>
> I made a patch for reload, too:
>   http://gcc.gnu.org/ml/gcc/2011-06/msg5.html
>
> Because IRA generates SUBREG of hardreg (which old lreg/greg handled
> ok) and reload does not handle it correctly.  It generates a spill but
> without the needed input reload so that one part of the register is
> missing.
>
> reload blames IRA or BE, IRA blames reload, BE blames IRA, etc...
>
>
> I didn't rerun the testsuite with (1) or/and (2), I'd like both (1)
> and (2) in the compiler.  What do you think?

I think that AVR is a stress test for GCC core. We are on the edge.
IMHO your patch is a change one tweaks to another.
It's not needed if it adds regressions.

Denis.


[testsuite] Fix g++.dg/opt/devirt2.C on 32-bit Solaris/SPARC with Sun as (PR testsuite/48727)

2011-07-08 Thread Rainer Orth
As described in the PR, unless the assembler supports -relax, SPARC gcc
can emit sethi+jmp instead of just call, so the pattern has to be more
specific.

This patch implements this, also having to account for the fact that
subexpressions in Tcl regexps double the match count.

Tested with the appropriate runtest invocation on sparc-sun-solaris2.11
with both Sun and GNU as, both multilibs, and i386-pc-solaris2.10.

Installed on mainline.

Rainer



2011-07-08  Rainer Orth  

PR testsuite/48727
* g++.dg/opt/devirt2.C: Use specific pattern for sparc*-*-*.

Index: gcc/testsuite/g++.dg/opt/devirt2.C
===
--- gcc/testsuite/g++.dg/opt/devirt2.C  (revision 176033)
+++ gcc/testsuite/g++.dg/opt/devirt2.C  (working copy)
@@ -2,9 +2,14 @@
 // { dg-options "-O2" }
 // The IA64 and HPPA compilers generate external declarations in addition
 // to the call so those scans need to be more specific.
-// { dg-final { scan-assembler-times "xyzzy" 2 { target { ! { hppa*-*-* 
ia64*-*-hpux* } } } } }
-// { dg-final { scan-assembler-times "br\[^\n\]*xyzzy"  2 { target 
ia64*-*-hpux* } } }
-// { dg-final { scan-assembler-times "xyzzy\[^\n\]*,%r"  2 { target hppa*-*-* 
} } }
+// { dg-final { scan-assembler-times "xyzzy" 2 { target { ! { hppa*-*-* 
ia64*-*-hpux* sparc*-*-* } } } } }
+// { dg-final { scan-assembler-times "br\[^\n\]*xyzzy" 2 { target 
ia64*-*-hpux* } } }
+// { dg-final { scan-assembler-times "xyzzy\[^\n\]*,%r" 2 { target hppa*-*-* } 
} }
+// Unless the assembler supports -relax, the 32-bit SPARC compiler generates
+// sethi/jmp instead of just call, so the scans need to be more specific.
+// With subexpressions, Tcl regexp -inline -all returns both the complete
+// match and the subexpressions, so double the count.
+// { dg-final { scan-assembler-times "\(jmp|call\)\[^\n\]*xyzzy" 4 { target 
sparc*-*-* } } }
 
 struct S { S(); virtual void xyzzy(); };
 struct R { int a; S s; R(); };

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH, testsuite] Fix for PR49519, miscompiled 447.dealII in SPEC CPU 2006

2011-07-08 Thread Kirill Yukhin
Eric, Jakub,
Thanks for your inputs, I've applied them. Updated patch is attached.

So, one more time:

ChangeLog entry:
2011-07-06  Kirill Yukhin  

PR middle-end/49519
* calls.c (mem_overlaps_already_clobbered_arg_p): Additional
check if address is stored in register. If so - give up.
(check_sibcall_argument_overlap_1): Do not perform check of
overlapping when it is call to address.

tessuite/ChangeLog entry:
2011-07-06  Kirill Yukhin  

* g++.dg/torture/pr49519.C: New test.

Bootstrapped, new test fails without the patch, passes when it is applied.
This fixes the problem with SPEC2006/447.dealII miscompile

Ok for trunk?

Thanks, K


pr49519-2.gcc.patch
Description: Binary data


[build] Move fixed-bit support to toplevel libgcc

2011-07-08 Thread Rainer Orth
The next patch in the line of toplevel libgcc moves proved to be
completely trivial: fixed-bit.[ch] is moved over with corresponding
include and Makefile changes.

Tested with a C-only --enable-fixed-point non-bootstrap build/test on
mips-sgi-irix6.5.  Without --enable-fixed-point, fixed-point support is
omitted on that target since it overflows the default command line
length limit, but I've raised that to the max. on my test machine.

The build succeeded, and libgcc.a/libgcc_s.so.1 grew by a factor 10
resp. 6 in size :-)

All fixed-point tests succeeded for both multilibs.

Ok for mainline?

Thanks.
Rainer


2011-06-22  Rainer Orth  

gcc:
* config/fixed-bit.c, config/fixed-bit.h: Move to ../libgcc.

libgcc:
* fixed-bit.c, fixed-bit.h: New files.
* fixed-obj.mk ($(o)_s$(objext)): Use $(srcdir) to refer to fixed-bit.c.

diff --git a/gcc/config/fixed-bit.c b/libgcc/fixed-bit.c
rename from gcc/config/fixed-bit.c
rename to libgcc/fixed-bit.c
--- a/gcc/config/fixed-bit.c
+++ b/libgcc/fixed-bit.c
@@ -51,7 +51,7 @@ see the files COPYING3 and COPYING.RUNTI
 #define MIN_UNITS_PER_WORD UNITS_PER_WORD
 #endif
 
-#include "config/fixed-bit.h"
+#include "fixed-bit.h"
 
 #if defined(FIXED_ADD) && defined(L_add)
 FIXED_C_TYPE
diff --git a/gcc/config/fixed-bit.h b/libgcc/fixed-bit.h
rename from gcc/config/fixed-bit.h
rename to libgcc/fixed-bit.h
diff --git a/libgcc/fixed-obj.mk b/libgcc/fixed-obj.mk
--- a/libgcc/fixed-obj.mk
+++ b/libgcc/fixed-obj.mk
@@ -22,10 +22,10 @@ endif
 
 #$(info $o$(objext): -DL$($o-label) $($o-opt))
 
-$o$(objext): %$(objext): $(gcc_srcdir)/config/fixed-bit.c
-   $(gcc_compile) -DL$($*-label) $($*-opt) -c 
$(gcc_srcdir)/config/fixed-bit.c $(vis_hide)
+$o$(objext): %$(objext): $(srcdir)/fixed-bit.c
+   $(gcc_compile) -DL$($*-label) $($*-opt) -c $(srcdir)/fixed-bit.c 
$(vis_hide)
 
 ifeq ($(enable_shared),yes)
-$(o)_s$(objext): %_s$(objext): $(gcc_srcdir)/config/fixed-bit.c
-   $(gcc_s_compile) -DL$($*-label) $($*-opt) -c 
$(gcc_srcdir)/config/fixed-bit.c
+$(o)_s$(objext): %_s$(objext): $(srcdir)/fixed-bit.c
+   $(gcc_s_compile) -DL$($*-label) $($*-opt) -c $(srcdir)/fixed-bit.c
 endif

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Make VRP optimize useless conversions

2011-07-08 Thread Richard Guenther
On Thu, 7 Jul 2011, Michael Matz wrote:

> Hi,
> 
> On Thu, 7 Jul 2011, Richard Guenther wrote:
> 
> > +   tree rhs1 = gimple_assign_rhs1 (stmt);
> > +   gimple def_stmt = SSA_NAME_DEF_STMT (rhs1);
> > +   value_range_t *final, *inner;
> > + 
> > +   /* Obtain final and inner value-ranges for a conversion
> > +  sequence (final-type)(intermediate-type)inner-type.  */
> > +   final = get_value_range (gimple_assign_lhs (stmt));
> > +   if (final->type != VR_RANGE)
> > + return false;
> > +   if (!is_gimple_assign (def_stmt)
> > +   || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
> > + return false;
> > +   rhs1 = gimple_assign_rhs1 (def_stmt);
> > +   if (TREE_CODE (rhs1) != SSA_NAME)
> > + return false;
> > +   inner = get_value_range (rhs1);
> > +   if (inner->type != VR_RANGE)
> > + return false;
> > +   if (!tree_int_cst_equal (final->min, inner->min)
> > +   || !tree_int_cst_equal (final->max, inner->max))
> > + return false;
> 
> I think that's a bit too conservative.  Granted in current VRP it might 
> work, but think about an intermediate truncation plus widening:
> 
>   short s;
>   short d = (short)(signed char)s;
> 
> It wouldn't be wrong for VRP to assign d the range [-16384,16383], 
> suboptimal but correct.  That would trigger your function in removing the 
> truncation, and _that_ would be incorrect.  The bounds of VRP aren't 
> reliably tight.  You probably want to recheck if the intermediate 
> conversion isn't truncating the known input range of rhs1.

It should be indeed safe with the current handling of conversions,
but better be safe.  So, like the following?

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Thanks,
Richard.

2011-07-08  Richard Guenther  

* tree-vrp.c (simplify_conversion_using_ranges): Also check
the intermediate value-range.

Index: gcc/tree-vrp.c
===
--- gcc/tree-vrp.c  (revision 176030)
+++ gcc/tree-vrp.c  (working copy)
@@ -7348,14 +7348,22 @@ static bool
 simplify_conversion_using_ranges (gimple stmt)
 {
   tree rhs1 = gimple_assign_rhs1 (stmt);
-  gimple def_stmt = SSA_NAME_DEF_STMT (rhs1);
-  value_range_t *final, *inner;
+  gimple def_stmt;
+  value_range_t *final, *intermediate, *inner;
 
-  /* Obtain final and inner value-ranges for a conversion
+  /* Obtain final, intermediate and inner value-ranges for a conversion
  sequence (final-type)(intermediate-type)inner-type.  */
   final = get_value_range (gimple_assign_lhs (stmt));
   if (final->type != VR_RANGE)
 return false;
+  intermediate = get_value_range (rhs1);
+  if (intermediate->type != VR_RANGE)
+return false;
+  if (!tree_int_cst_equal (final->min, intermediate->min)
+  || !tree_int_cst_equal (final->max, intermediate->max))
+return false;
+
+  def_stmt = SSA_NAME_DEF_STMT (rhs1);
   if (!is_gimple_assign (def_stmt)
   || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
 return false;
@@ -7365,11 +7373,12 @@ simplify_conversion_using_ranges (gimple
   inner = get_value_range (rhs1);
   if (inner->type != VR_RANGE)
 return false;
-  /* If the value-range is preserved by the conversion sequence strip
- the intermediate conversion.  */
   if (!tree_int_cst_equal (final->min, inner->min)
   || !tree_int_cst_equal (final->max, inner->max))
 return false;
+
+  /* The value-range is preserved by the conversion sequence; strip
+ the intermediate conversion.  */
   gimple_assign_set_rhs1 (stmt, rhs1);
   update_stmt (stmt);
   return true;


Re: what can be in a group set?

2011-07-08 Thread Richard Sandiford
Paolo Bonzini  writes:
> df-scan.c has this code to deal with group sets:
>
>/* It is legal to have a set destination be a parallel. */
>if (GET_CODE (dst) == PARALLEL)
>  {
>int i;
>
>for (i = XVECLEN (dst, 0) - 1; i >= 0; i--)
>  {
>rtx temp = XVECEXP (dst, 0, i);
>if (GET_CODE (temp) == EXPR_LIST || GET_CODE (temp) == CLOBBER
>|| GET_CODE (temp) == SET)
>  df_def_record_1 (collection_rec,
>   temp, bb, insn_info,
>   GET_CODE (temp) == CLOBBER
>   ? flags | DF_REF_MUST_CLOBBER : flags);
>  }
>return;
>  }
>
> It seems to me that the case of (set (parallel [(set ...)])) and (set 
> (parallel [(clobber ...)])) is bogus.  I would like to simplify it to 
> the following:
>
>/* It is legal to have a set destination be a parallel. */
>if (GET_CODE (dst) == PARALLEL)
>  {
>int i;
>
>for (i = XVECLEN (dst, 0) - 1; i >= 0; i--)
>  {
>rtx temp = XVECEXP (dst, 0, i);
>assert (GET_CODE (temp) == EXPR_LIST);
>df_def_record_1 (collection_rec, temp, bb, insn_info, flags);
>  }
>return;
>  }
>
> Does this make sense?

Yeah, the docs seem pretty certain that expr_list is the only valid choice.

The docs also say that the first expr_list can be null:

  If @var{lval} is a @code{parallel}, it is used to represent the case of
  a function returning a structure in multiple registers.  Each element
  of the @code{parallel} is an @code{expr_list} whose first operand is a
  @code{reg} and whose second operand is a @code{const_int} representing the
  offset (in bytes) into the structure at which the data in that register
  corresponds.  The first element may be null to indicate that the structure
  is also passed partly in memory.

but I can't see any code to handle that.  Am I missing something,
or does the lack of a crash here mean that we can remove the last
sentence?

(It might have been added for symmetry with argument passing, where this
sort of thing is needed.  But if it isn't actually used or implemented for
returns, it might be less confusing to remove it.)

> See the attached patch for the overall thing I 
> was thinking of.
>
> Paolo
>
> * df-scan.c (df_def_record_1): Assert a parallel must contain
> an EXPR_LIST at this point.  Receive the LOC and move its
> extraction...
> (df_defs_record): ... here.  Remove superfluous braces.

Looks good.

Richard


Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Kai Tietz
2011/7/8 Richard Guenther :
> On Thu, Jul 7, 2011 at 6:28 PM, Kai Tietz  wrote:
>> 2011/7/7 Paolo Bonzini :
>>> On 07/07/2011 06:07 PM, Kai Tietz wrote:

 +  /* We redo folding here one time for allowing to inspect more
 +     complex reductions.  */
 +  substitute_and_fold (op_with_constant_singleton_value_range,
 +                      vrp_fold_stmt, false);
 +  /* We need to mark this second pass to avoid re-entering of same
 +     edges for switch statments.  */
 +  in_second_pass = true;
    substitute_and_fold (op_with_constant_singleton_value_range,
                       vrp_fold_stmt, false);
 +  in_second_pass = false;
>>>
>>> This needs a much better explanation.
>>>
>>> Paolo
>>
>> Well, I can work on a better comment.  The complex reduction I mean
>> here are cases like
>>
>> int x;
>> int y;
>> _Bool D1;
>> _Bool D2;
>> _Bool D3;
>> int R;
>>
>> D1 = x[0..1] != 0;
>> D2 = y[0..1] != 0;
>> D3 = D1 & D2
>> R = (int) D3
>>
>> (testcase is already present. See tree-ssa/vrp47.c).
>>
>> As VRP in first pass produces (and replaces) to:
>>
>> D1 = (_Bool) x[0..1];
>> D2 = (_Bool) y[0..1];
>> D3 = D1 & D2
>> R = (int) D3
>>
>> Just in the second pass the reduction
>>
>> R = x[0..1] & y[0..1]
>
> So why wouldn't that happen during the first pass?  The first
> pass could change the IL to
>
>  D1 = x[0..1] != 0;
>  D2 = y[0..1] != 0;
>  D3 = D1 & D2;
>  R = x & y;
>
> if D3 only has a single use.
>
>> can happen.  In general it is sad that VRP can't insert during pass
>> new statements right now.  This would cause issues in range-tables,
>> which aren't designed for insertations.  As otherwise, we could do
>> also simplify things like
>>
>> D1 = x[0..1] != 0;
>> D2 = y[0..1] == 0;
>> D3 = D1 & D2
>> R = (int) D3
>>
>> to
>> R = x[0..1] & (y[0..1] ^ 1)
>
> Why that ^ 1?  And why does that confuse the range tables
> if you re-use R?

Because (y[0..1] ^1) has a type change.  All present SSA-nodes have
boolean type, but (y[0..1] ^ 1) is an integer one.  We have just the
cast def, which has final type. See code of vrp_stmt truth and you
will notice that for X with range 0..1 it converts X == 0 -> X ^ 1.
But here we have possible type change as a comparison is boolean and X
might not.

Kai

>> Regards,
>> Kai
>>
>


Re: [build] Move fixed-bit support to toplevel libgcc

2011-07-08 Thread Paolo Bonzini

On 07/08/2011 12:33 PM, Rainer Orth wrote:

The next patch in the line of toplevel libgcc moves proved to be
completely trivial: fixed-bit.[ch] is moved over with corresponding
include and Makefile changes.

Tested with a C-only --enable-fixed-point non-bootstrap build/test on
mips-sgi-irix6.5.  Without --enable-fixed-point, fixed-point support is
omitted on that target since it overflows the default command line
length limit, but I've raised that to the max. on my test machine.

The build succeeded, and libgcc.a/libgcc_s.so.1 grew by a factor 10
resp. 6 in size :-)

All fixed-point tests succeeded for both multilibs.

Ok for mainline?

Thanks.
Rainer


2011-06-22  Rainer Orth

gcc:
* config/fixed-bit.c, config/fixed-bit.h: Move to ../libgcc.

libgcc:
* fixed-bit.c, fixed-bit.h: New files.
* fixed-obj.mk ($(o)_s$(objext)): Use $(srcdir) to refer to fixed-bit.c.

diff --git a/gcc/config/fixed-bit.c b/libgcc/fixed-bit.c
rename from gcc/config/fixed-bit.c
rename to libgcc/fixed-bit.c
--- a/gcc/config/fixed-bit.c
+++ b/libgcc/fixed-bit.c
@@ -51,7 +51,7 @@ see the files COPYING3 and COPYING.RUNTI
  #define MIN_UNITS_PER_WORD UNITS_PER_WORD
  #endif

-#include "config/fixed-bit.h"
+#include "fixed-bit.h"

  #if defined(FIXED_ADD)&&  defined(L_add)
  FIXED_C_TYPE
diff --git a/gcc/config/fixed-bit.h b/libgcc/fixed-bit.h
rename from gcc/config/fixed-bit.h
rename to libgcc/fixed-bit.h
diff --git a/libgcc/fixed-obj.mk b/libgcc/fixed-obj.mk
--- a/libgcc/fixed-obj.mk
+++ b/libgcc/fixed-obj.mk
@@ -22,10 +22,10 @@ endif

  #$(info $o$(objext): -DL$($o-label) $($o-opt))

-$o$(objext): %$(objext): $(gcc_srcdir)/config/fixed-bit.c
-   $(gcc_compile) -DL$($*-label) $($*-opt) -c 
$(gcc_srcdir)/config/fixed-bit.c $(vis_hide)
+$o$(objext): %$(objext): $(srcdir)/fixed-bit.c
+   $(gcc_compile) -DL$($*-label) $($*-opt) -c $(srcdir)/fixed-bit.c 
$(vis_hide)

  ifeq ($(enable_shared),yes)
-$(o)_s$(objext): %_s$(objext): $(gcc_srcdir)/config/fixed-bit.c
-   $(gcc_s_compile) -DL$($*-label) $($*-opt) -c 
$(gcc_srcdir)/config/fixed-bit.c
+$(o)_s$(objext): %_s$(objext): $(srcdir)/fixed-bit.c
+   $(gcc_s_compile) -DL$($*-label) $($*-opt) -c $(srcdir)/fixed-bit.c
  endif



Ok.

Paolo


Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Kai Tietz
2011/7/8 Richard Guenther :
> On Thu, Jul 7, 2011 at 6:28 PM, Kai Tietz  wrote:
>> 2011/7/7 Paolo Bonzini :
>>> On 07/07/2011 06:07 PM, Kai Tietz wrote:

 +  /* We redo folding here one time for allowing to inspect more
 +     complex reductions.  */
 +  substitute_and_fold (op_with_constant_singleton_value_range,
 +                      vrp_fold_stmt, false);
 +  /* We need to mark this second pass to avoid re-entering of same
 +     edges for switch statments.  */
 +  in_second_pass = true;
    substitute_and_fold (op_with_constant_singleton_value_range,
                       vrp_fold_stmt, false);
 +  in_second_pass = false;
>>>
>>> This needs a much better explanation.
>>>
>>> Paolo
>>
>> Well, I can work on a better comment.  The complex reduction I mean
>> here are cases like
>>
>> int x;
>> int y;
>> _Bool D1;
>> _Bool D2;
>> _Bool D3;
>> int R;
>>
>> D1 = x[0..1] != 0;
>> D2 = y[0..1] != 0;
>> D3 = D1 & D2
>> R = (int) D3
>>
>> (testcase is already present. See tree-ssa/vrp47.c).
>>
>> As VRP in first pass produces (and replaces) to:
>>
>> D1 = (_Bool) x[0..1];
>> D2 = (_Bool) y[0..1];
>> D3 = D1 & D2
>> R = (int) D3
>>
>> Just in the second pass the reduction
>>
>> R = x[0..1] & y[0..1]
>
> So why wouldn't that happen during the first pass?  The first
> pass could change the IL to

The issue is that substitute_and_fold runs within BBs statements
folding from last to first.  So most simplifications are done too late
to recognize dependent one. Maybe it would be another way here to have
a flag for substitute_and_fold to indicate that folding pass shall run
first -> last or last->first?

>  D1 = x[0..1] != 0;
>  D2 = y[0..1] != 0;
>  D3 = D1 & D2;
>  R = x & y;
>
> if D3 only has a single use.

Well, to change type of an SSA-name, if it has single-use might be
another way here.  To have the ability to enter new temp-registers
would be better and avoids the dependency of single use, but well,
range tables don't support that now.


Re: [PATCH, testsuite] Fix for PR49519, miscompiled 447.dealII in SPEC CPU 2006

2011-07-08 Thread Eric Botcazou
> So, one more time:

You didn't really need to re-submit but...

> ChangeLog entry:
> 2011-07-06  Kirill Yukhin  
>
> PR middle-end/49519
> * calls.c (mem_overlaps_already_clobbered_arg_p): Additional
> check if address is stored in register. If so - give up.
> (check_sibcall_argument_overlap_1): Do not perform check of
> overlapping when it is call to address.
>
> tessuite/ChangeLog entry:
> 2011-07-06  Kirill Yukhin  
>
> * g++.dg/torture/pr49519.C: New test.

OK, thanks.

-- 
Eric Botcazou


Re: [PATCH] Un-obsolete Interix

2011-07-08 Thread Gerald Pfeifer
Hi Doug,

On Wed, 29 Jun 2011, Douglas Rupp wrote:
> An update to the IA-32/x86-64 section seems the right place. I confess 
> to being ignorant about how to update this html page. Please advise.

at http://gcc.gnu.org/cvs.html I tried to summarize things a bit.
If you have any questions, don't hesitate to let me know!

If things turn out to be tricky, you're also very welcome to download
the current page using a browser, just send me a diff, and I'll take
care of the rest.

Gerald


Re: [build] Move dfp-bit support to toplevel libgcc

2011-07-08 Thread Rainer Orth
Paolo Bonzini  writes:

> Patch is okay if you take care of committing both, but please wait 48 hours
> or so, and please post the updated patch with config/dfp-bit.c moved to
> dfp-bit.c (config/t-dfprules should stay there).

Here's the revised version.  Bootstrapped without regressions on
x86_64-unknown-linux-gnu.

Will commit on monday.

Rainer


2011-06-22  Rainer Orth  

gcc:
* config/dfp-bit.c, config/dfp-bit.h: Move to ../libgcc.
* config/t-dfprules: Move to ../libgcc/config.
* config.gcc (i[34567]86-*-linux*, i[34567]86-*-kfreebsd*-gnu,
i[34567]86-*-knetbsd*-gnu, i[34567]86-*-gnu*,
i[34567]86-*-kopensolaris*-gnu): Remove t-dfprules from tmake_file.
(x86_64-*-linux*, x86_64-*-kfreebsd*-gnu, x86_64-*-knetbsd*-gnu):
Likewise.
(i[34567]86-*-cygwin*): Likewise.
(i[34567]86-*-mingw*,  x86_64-*-mingw*): Likewise.
(powerpc-*-linux*, powerpc64-*-linux*): Likewise.
* Makefile.in (D32PBIT_FUNCS, D64PBIT_FUNCS, D128PBIT_FUNCS): Remove.
(libgcc.mvars): Remove DFP_ENABLE, DFP_CFLAGS, D32PBIT_FUNCS,
D64PBIT_FUNCS, D128PBIT_FUNCS.

libgcc:
* dfp-bit.c, dfp-bit.h: New files.
* Makefile.in (D32PBIT_FUNCS, D64PBIT_FUNCS, D128PBIT_FUNCS): New
variables.
($(d32pbit-o)): Use $(srcdir) to refer to dfp-bit.c
($(d64pbit-o)): Likewise.
($(d128pbit-o)): Likewise.
* config/t-dfprules: New file.
* config.host (i[34567]86-*-linux*): Add t-dfprules to tmake_file.
(i[34567]86-*-kfreebsd*-gnu, i[34567]86-*-knetbsd*-gnu,
i[34567]86-*-gnu*, i[34567]86-*-kopensolaris*-gnu): Likewise.
(x86_64-*-linux*): Likewise.
(x86_64-*-kfreebsd*-gnu, x86_64-*-knetbsd*-gnu): Likewise.
(i[34567]86-*-cygwin*): Likewise.
(i[34567]86-*-mingw*,  x86_64-*-mingw*): Likewise.
(powerpc-*-linux*, powerpc64-*-linux*): Likewise.

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1547,30 +1547,6 @@ TPBIT_FUNCS = _pack_tf _unpack_tf _addsu
 _lt_tf _le_tf _unord_tf _si_to_tf _tf_to_si _negate_tf _make_tf \
 _tf_to_df _tf_to_sf _thenan_tf _tf_to_usi _usi_to_tf
 
-D32PBIT_FUNCS = _addsub_sd _div_sd _mul_sd _plus_sd _minus_sd \
-   _eq_sd _ne_sd _lt_sd _gt_sd _le_sd _ge_sd \
-   _sd_to_si _sd_to_di _sd_to_usi _sd_to_udi \
-   _si_to_sd _di_to_sd _usi_to_sd _udi_to_sd \
-   _sd_to_sf _sd_to_df _sd_to_xf _sd_to_tf \
-   _sf_to_sd _df_to_sd _xf_to_sd _tf_to_sd \
-   _sd_to_dd _sd_to_td _unord_sd _conv_sd
-
-D64PBIT_FUNCS = _addsub_dd _div_dd _mul_dd _plus_dd _minus_dd \
-   _eq_dd _ne_dd _lt_dd _gt_dd _le_dd _ge_dd \
-   _dd_to_si _dd_to_di _dd_to_usi _dd_to_udi \
-   _si_to_dd _di_to_dd _usi_to_dd _udi_to_dd \
-   _dd_to_sf _dd_to_df _dd_to_xf _dd_to_tf \
-   _sf_to_dd _df_to_dd _xf_to_dd _tf_to_dd \
-   _dd_to_sd _dd_to_td _unord_dd _conv_dd
-
-D128PBIT_FUNCS = _addsub_td _div_td _mul_td _plus_td _minus_td \
-   _eq_td _ne_td _lt_td _gt_td _le_td _ge_td \
-   _td_to_si _td_to_di _td_to_usi _td_to_udi \
-   _si_to_td _di_to_td _usi_to_td _udi_to_td \
-   _td_to_sf _td_to_df _td_to_xf _td_to_tf \
-   _sf_to_td _df_to_td _xf_to_td _tf_to_td \
-   _td_to_sd _td_to_dd _unord_td _conv_td
-
 # These might cause a divide overflow trap and so are compiled with
 # unwinder info.
 LIB2_DIVMOD_FUNCS = _divdi3 _moddi3 _udivdi3 _umoddi3 _udiv_w_sdiv _udivmoddi4
@@ -1928,14 +1904,6 @@ libgcc.mvars: config.status Makefile $(L
echo DPBIT_FUNCS = '$(DPBIT_FUNCS)' >> tmp-libgcc.mvars
echo TPBIT = '$(TPBIT)' >> tmp-libgcc.mvars
echo TPBIT_FUNCS = '$(TPBIT_FUNCS)' >> tmp-libgcc.mvars
-   echo DFP_ENABLE = '$(DFP_ENABLE)' >> tmp-libgcc.mvars
-   echo DFP_CFLAGS='$(DFP_CFLAGS)' >> tmp-libgcc.mvars
-   echo D32PBIT='$(D32PBIT)' >> tmp-libgcc.mvars
-   echo D32PBIT_FUNCS='$(D32PBIT_FUNCS)' >> tmp-libgcc.mvars
-   echo D64PBIT='$(D64PBIT)' >> tmp-libgcc.mvars
-   echo D64PBIT_FUNCS='$(D64PBIT_FUNCS)' >> tmp-libgcc.mvars
-   echo D128PBIT='$(D128PBIT)' >> tmp-libgcc.mvars
-   echo D128PBIT_FUNCS='$(D128PBIT_FUNCS)' >> tmp-libgcc.mvars
echo GCC_EXTRA_PARTS = '$(GCC_EXTRA_PARTS)' >> tmp-libgcc.mvars
echo SHLIB_LINK = '$(subst 
$(GCC_FOR_TARGET),$$(GCC_FOR_TARGET),$(SHLIB_LINK))' >> tmp-libgcc.mvars
echo SHLIB_INSTALL = '$(SHLIB_INSTALL)' >> tmp-libgcc.mvars
diff --git a/gcc/config.gcc b/gcc/config.gcc
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1305,7 +1305,7 @@ i[34567]86-*-linux* | i[34567]86-*-kfree
i[34567]86-*-kopensolaris*-gnu) tm_file="${tm_file} i386/gnu-user.h 
kopensolaris-gnu.h i386/kopensolaris-gnu.h" ;;
i[34567]86-*-gnu*) tm_file="$tm_file i386/gnu-user.h gnu.h i386/gnu.h";;
esac
-   tmake_file="${tmake_file} i386/t-crtstuff i386/t-crtpc i386/t-crtfm 
t-dfprules"
+   tmake_file="${tmake_file} i386/t-crts

[build] Move libgcov support to toplevel libgcc

2011-07-08 Thread Rainer Orth
And another easy one: moving libgcov over to libgcc.

Bootstrapped without regressions on i386-pc-solaris2.11 and
x86_64-unknown-linux-gnu.

Ok for mainline?

After this one, and once the problems with the unwinder move are sorted
out, I've got a few more:

* gthr is just in the planning stage

* fp-bit is almost ready

* soft-fp just started to investigate

* libgcc_tm_file only an idea

Rainer


2011-07-02  Rainer Orth  

gcc:
* Makefile.in (LIBGCOV): Remove.
(libgcc.mvars): Remove LIBGCOV.
* libgov.c: Move to ../libgcc.

libgcc:
* Makfile.in (LIBGCOV): New variable.
($(libgcov-objects)): Use $(srcdir) to refer to libgcov.c.
* libgcov.c: New file.

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1524,14 +1524,6 @@ MOSTLYCLEANFILES = insn-flags.h insn-con
 # Defined in libgcc2.c, included only in the static library.
 LIB2FUNCS_ST = _eprintf __gcc_bcmp
 
-# Defined in libgcov.c, included only in gcov library
-LIBGCOV = _gcov _gcov_merge_add _gcov_merge_single _gcov_merge_delta \
-_gcov_fork _gcov_execl _gcov_execlp _gcov_execle \
-_gcov_execv _gcov_execvp _gcov_execve \
-_gcov_interval_profiler _gcov_pow2_profiler _gcov_one_value_profiler \
-_gcov_indirect_call_profiler _gcov_average_profiler _gcov_ior_profiler \
-_gcov_merge_ior
-
 # These might cause a divide overflow trap and so are compiled with
 # unwinder info.
 LIB2_DIVMOD_FUNCS = _divdi3 _moddi3 _udivdi3 _umoddi3 _udiv_w_sdiv _udivmoddi4
@@ -1877,7 +1869,6 @@ libgcc.mvars: config.status Makefile $(L
echo LIB1ASMSRC = '$(LIB1ASMSRC)' >> tmp-libgcc.mvars
echo LIB2FUNCS_ST = '$(LIB2FUNCS_ST)' >> tmp-libgcc.mvars
echo LIB2FUNCS_EXCLUDE = '$(LIB2FUNCS_EXCLUDE)' >> tmp-libgcc.mvars
-   echo LIBGCOV = '$(LIBGCOV)' >> tmp-libgcc.mvars
echo LIB2ADD = '$(call srcdirify,$(LIB2ADD))' >> tmp-libgcc.mvars
echo LIB2ADD_ST = '$(call srcdirify,$(LIB2ADD_ST))' >> tmp-libgcc.mvars
echo LIB2_SIDITI_CONV_FUNCS = '$(LIB2_SIDITI_CONV_FUNCS)' >> 
tmp-libgcc.mvars
diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -793,9 +793,19 @@ iter-items := $(LIBUNWIND)
 include $(iterator)
 
 # Build libgcov components.
+
+# Defined in libgcov.c, included only in gcov library
+LIBGCOV = _gcov _gcov_merge_add _gcov_merge_single _gcov_merge_delta \
+_gcov_fork _gcov_execl _gcov_execlp _gcov_execle \
+_gcov_execv _gcov_execvp _gcov_execve \
+_gcov_interval_profiler _gcov_pow2_profiler _gcov_one_value_profiler \
+_gcov_indirect_call_profiler _gcov_average_profiler _gcov_ior_profiler \
+_gcov_merge_ior
+
 libgcov-objects = $(patsubst %,%$(objext),$(LIBGCOV))
-$(libgcov-objects): %$(objext): $(gcc_srcdir)/libgcov.c
-   $(gcc_compile) -DL$* -c $(gcc_srcdir)/libgcov.c
+
+$(libgcov-objects): %$(objext): $(srcdir)/libgcov.c
+   $(gcc_compile) -DL$* -c $(srcdir)/libgcov.c
 
 
 # Static libraries.
diff --git a/gcc/libgcov.c b/libgcc/libgcov.c
rename from gcc/libgcov.c
rename to libgcc/libgcov.c

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [patch tree-optimization]: [2 of 3]: Boolify compares & more

2011-07-08 Thread Kai Tietz
2011/7/8 Richard Guenther :
> On Thu, Jul 7, 2011 at 6:07 PM, Kai Tietz  wrote:
>> Hello,
>>
>> This patch - second of series - adds boolification of comparisions in
>> gimplifier.  For this
>> casts from/to boolean are marked as not-useless. And in fold_unary_loc
>> casts to non-boolean integral types are preserved.
>> The hunk in tree-ssa-forwprop.c in combine_cond-expr_cond is not strictly
>> necessary - as long as fold-const handles 1-bit precision bitwise-expression
>> with truth-logic - but it has shown to short-cut some expensier folding. So
>> I kept it within this patch.
>
> Please split it out.  Also ...
>
>>
>> The adjusted testcase gcc.dg/uninit-15.c indicates that due
>> optimization we loose
>> in this case variables declaration.  But this might be to be expected.
>>
>> In vectorization we have a regression in gcc.dg/vect/vect-cond-3.c
>> test-case.  It's caused
>> by always having boolean-type on conditions.  So vectorizer sees
>> different types, which
>> aren't handled by vectorizer right now.  Maybe this issue could be
>> special-cased for
>> boolean-types in tree-vect-loop, by making operand for used condition
>> equal to vector-type.
>> But this is a subject for a different patch and not addressed by this series.
>>
>> There is a regressions in tree-ssa/vrp47.c, and the fix is addressed
>> by the 3rd patch of this
>> series.
>>
>> Bootstrapped and regression tested for all standard-languages (plus
>> Ada and Obj-C++) on host x86_64-pc-linux-gnu.
>>
>> Ok for apply?
>>
>> Regards,
>> Kai
>>
>>
>> ChangeLog
>>
>> 2011-07-07  Kai Tietz  
>>
>>        * fold-const.c (fold_unary_loc): Preserve
>>        non-boolean-typed casts.
>>        * gimplify.c (gimple_boolify): Handle boolification
>>        of comparisons.
>>        (gimplify_expr): Boolifiy non aggregate-typed
>>        comparisons.
>>        * tree-cfg.c (verify_gimple_comparison): Check result
>>        type of comparison expression.
>>        * tree-ssa.c (useless_type_conversion_p): Preserve incompatible
>>        casts from/to boolean,
>>        * tree-ssa-forwprop.c (combine_cond_expr_cond): Add simplification
>>        support for one-bit-precision typed X for cases X != 0 and X == 0.
>>        (forward_propagate_comparison): Adjust test of condition
>>        result.
>>
>>
>>        * gcc.dg/tree-ssa/builtin-expect-5.c: Adjusted.
>>        * gcc.dg/tree-ssa/pr21031.c: Likewise.
>>        * gcc.dg/tree-ssa/pr30978.c: Likewise.
>>        * gcc.dg/tree-ssa/ssa-fre-6.c: Likewise.
>>        * gcc.dg/binop-xor1.c: Mark it as expected fail.
>>        * gcc.dg/binop-xor3.c: Likewise.
>>        * gcc.dg/uninit-15.c: Adjust reported message.
>>
>> Index: gcc-head/gcc/fold-const.c
>> ===
>> --- gcc-head.orig/gcc/fold-const.c
>> +++ gcc-head/gcc/fold-const.c
>> @@ -7665,11 +7665,11 @@ fold_unary_loc (location_t loc, enum tre
>>             non-integral type.
>>             Do not fold the result as that would not simplify further, also
>>             folding again results in recursions.  */
>> -         if (INTEGRAL_TYPE_P (type))
>> +         if (TREE_CODE (type) == BOOLEAN_TYPE)
>>            return build2_loc (loc, TREE_CODE (op0), type,
>>                               TREE_OPERAND (op0, 0),
>>                               TREE_OPERAND (op0, 1));
>> -         else
>> +         else if (!INTEGRAL_TYPE_P (type))
>>            return build3_loc (loc, COND_EXPR, type, op0,
>>                               fold_convert (type, boolean_true_node),
>>                               fold_convert (type, boolean_false_node));
>> Index: gcc-head/gcc/gimplify.c
>> ===
>> --- gcc-head.orig/gcc/gimplify.c
>> +++ gcc-head/gcc/gimplify.c
>> @@ -2842,18 +2842,23 @@ gimple_boolify (tree expr)
>>
>>     case TRUTH_NOT_EXPR:
>>       TREE_OPERAND (expr, 0) = gimple_boolify (TREE_OPERAND (expr, 0));
>> -      /* FALLTHRU */
>>
>> -    case EQ_EXPR: case NE_EXPR:
>> -    case LE_EXPR: case GE_EXPR: case LT_EXPR: case GT_EXPR:
>>       /* These expressions always produce boolean results.  */
>> -      TREE_TYPE (expr) = boolean_type_node;
>> +      if (TREE_CODE (type) != BOOLEAN_TYPE)
>> +       TREE_TYPE (expr) = boolean_type_node;
>>       return expr;
>>
>>     default:
>> +      if (COMPARISON_CLASS_P (expr))
>> +       {
>> +         /* There expressions always prduce boolean results.  */
>> +         if (TREE_CODE (type) != BOOLEAN_TYPE)
>> +           TREE_TYPE (expr) = boolean_type_node;
>> +         return expr;
>> +       }
>>       /* Other expressions that get here must have boolean values, but
>>         might need to be converted to the appropriate mode.  */
>> -      if (type == boolean_type_node)
>> +      if (TREE_CODE (type) == BOOLEAN_TYPE)
>>        return expr;
>>       return fold_convert_loc (loc, boolean_type_node, expr);
>>     }
>> @@ -6763,7 +6768,7 @@ gimplify_expr (tree *expr_p, gimple_seq
>>         

Re: [build] Move libgcov support to toplevel libgcc

2011-07-08 Thread Paolo Bonzini

On 07/08/2011 01:31 PM, Rainer Orth wrote:

And another easy one: moving libgcov over to libgcc.

Bootstrapped without regressions on i386-pc-solaris2.11 and
x86_64-unknown-linux-gnu.

Ok for mainline?

After this one, and once the problems with the unwinder move are sorted
out, I've got a few more:

* gthr is just in the planning stage

* fp-bit is almost ready

* soft-fp just started to investigate

* libgcc_tm_file only an idea

Rainer



Ok!

Paolo


Re: [patch tree-optimization]: [2 of 3]: Boolify compares & more

2011-07-08 Thread Richard Guenther
On Fri, Jul 8, 2011 at 1:32 PM, Kai Tietz  wrote:
> 2011/7/8 Richard Guenther :
>> On Thu, Jul 7, 2011 at 6:07 PM, Kai Tietz  wrote:
>>> Hello,
>>>
>>> This patch - second of series - adds boolification of comparisions in
>>> gimplifier.  For this
>>> casts from/to boolean are marked as not-useless. And in fold_unary_loc
>>> casts to non-boolean integral types are preserved.
>>> The hunk in tree-ssa-forwprop.c in combine_cond-expr_cond is not strictly
>>> necessary - as long as fold-const handles 1-bit precision bitwise-expression
>>> with truth-logic - but it has shown to short-cut some expensier folding. So
>>> I kept it within this patch.
>>
>> Please split it out.  Also ...
>>
>>>
>>> The adjusted testcase gcc.dg/uninit-15.c indicates that due
>>> optimization we loose
>>> in this case variables declaration.  But this might be to be expected.
>>>
>>> In vectorization we have a regression in gcc.dg/vect/vect-cond-3.c
>>> test-case.  It's caused
>>> by always having boolean-type on conditions.  So vectorizer sees
>>> different types, which
>>> aren't handled by vectorizer right now.  Maybe this issue could be
>>> special-cased for
>>> boolean-types in tree-vect-loop, by making operand for used condition
>>> equal to vector-type.
>>> But this is a subject for a different patch and not addressed by this 
>>> series.
>>>
>>> There is a regressions in tree-ssa/vrp47.c, and the fix is addressed
>>> by the 3rd patch of this
>>> series.
>>>
>>> Bootstrapped and regression tested for all standard-languages (plus
>>> Ada and Obj-C++) on host x86_64-pc-linux-gnu.
>>>
>>> Ok for apply?
>>>
>>> Regards,
>>> Kai
>>>
>>>
>>> ChangeLog
>>>
>>> 2011-07-07  Kai Tietz  
>>>
>>>        * fold-const.c (fold_unary_loc): Preserve
>>>        non-boolean-typed casts.
>>>        * gimplify.c (gimple_boolify): Handle boolification
>>>        of comparisons.
>>>        (gimplify_expr): Boolifiy non aggregate-typed
>>>        comparisons.
>>>        * tree-cfg.c (verify_gimple_comparison): Check result
>>>        type of comparison expression.
>>>        * tree-ssa.c (useless_type_conversion_p): Preserve incompatible
>>>        casts from/to boolean,
>>>        * tree-ssa-forwprop.c (combine_cond_expr_cond): Add simplification
>>>        support for one-bit-precision typed X for cases X != 0 and X == 0.
>>>        (forward_propagate_comparison): Adjust test of condition
>>>        result.
>>>
>>>
>>>        * gcc.dg/tree-ssa/builtin-expect-5.c: Adjusted.
>>>        * gcc.dg/tree-ssa/pr21031.c: Likewise.
>>>        * gcc.dg/tree-ssa/pr30978.c: Likewise.
>>>        * gcc.dg/tree-ssa/ssa-fre-6.c: Likewise.
>>>        * gcc.dg/binop-xor1.c: Mark it as expected fail.
>>>        * gcc.dg/binop-xor3.c: Likewise.
>>>        * gcc.dg/uninit-15.c: Adjust reported message.
>>>
>>> Index: gcc-head/gcc/fold-const.c
>>> ===
>>> --- gcc-head.orig/gcc/fold-const.c
>>> +++ gcc-head/gcc/fold-const.c
>>> @@ -7665,11 +7665,11 @@ fold_unary_loc (location_t loc, enum tre
>>>             non-integral type.
>>>             Do not fold the result as that would not simplify further, also
>>>             folding again results in recursions.  */
>>> -         if (INTEGRAL_TYPE_P (type))
>>> +         if (TREE_CODE (type) == BOOLEAN_TYPE)
>>>            return build2_loc (loc, TREE_CODE (op0), type,
>>>                               TREE_OPERAND (op0, 0),
>>>                               TREE_OPERAND (op0, 1));
>>> -         else
>>> +         else if (!INTEGRAL_TYPE_P (type))
>>>            return build3_loc (loc, COND_EXPR, type, op0,
>>>                               fold_convert (type, boolean_true_node),
>>>                               fold_convert (type, boolean_false_node));
>>> Index: gcc-head/gcc/gimplify.c
>>> ===
>>> --- gcc-head.orig/gcc/gimplify.c
>>> +++ gcc-head/gcc/gimplify.c
>>> @@ -2842,18 +2842,23 @@ gimple_boolify (tree expr)
>>>
>>>     case TRUTH_NOT_EXPR:
>>>       TREE_OPERAND (expr, 0) = gimple_boolify (TREE_OPERAND (expr, 0));
>>> -      /* FALLTHRU */
>>>
>>> -    case EQ_EXPR: case NE_EXPR:
>>> -    case LE_EXPR: case GE_EXPR: case LT_EXPR: case GT_EXPR:
>>>       /* These expressions always produce boolean results.  */
>>> -      TREE_TYPE (expr) = boolean_type_node;
>>> +      if (TREE_CODE (type) != BOOLEAN_TYPE)
>>> +       TREE_TYPE (expr) = boolean_type_node;
>>>       return expr;
>>>
>>>     default:
>>> +      if (COMPARISON_CLASS_P (expr))
>>> +       {
>>> +         /* There expressions always prduce boolean results.  */
>>> +         if (TREE_CODE (type) != BOOLEAN_TYPE)
>>> +           TREE_TYPE (expr) = boolean_type_node;
>>> +         return expr;
>>> +       }
>>>       /* Other expressions that get here must have boolean values, but
>>>         might need to be converted to the appropriate mode.  */
>>> -      if (type == boolean_type_node)
>>> +      if (TREE_CODE (type) == BOOLEAN_TYPE

Re: [Patch, AVR]: Fix PR46779

2011-07-08 Thread Georg-Johann Lay
Denis Chertykov wrote:
> 2011/7/8 Georg-Johann Lay :
>> CCed Eric and Bernd.
>>
>> Denis Chertykov wrote:
 Did you decide about the fix for PR46779?

 http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00810.html

 Is it ok to commit?
>>> I forgot about testsuite regressions for this patch.
>>>
>>> Denis.
>>
>> There were no new regressions:
>>  http://gcc.gnu.org/ml/gcc-patches/2011-06/msg00747.html
>>
>> However, with the actual trunk (SVN 175991), I get two more
>> spill fails for following sources:
>>
>> ./gcc.c-torture/compile/pr32349.c -O1 -mmcu=atmega128
>>
>>  pr30338.c: In function 'testload_func':
>> pr30338.c:13:1: error: unable to find a register to spill in class
>> 'POINTER_REGS'
>> pr30338.c:13:1: error: this is the insn:
>> (insn 14 13 15 2 (set (reg:QI 24 r24 [orig:73 *D.1963_37 ] [73])
>>(mem:QI (subreg:HI (reg:SI 71) 0) [0 *D.1963_37+0 S1 A8]))
>> pr30338.c:9 4 {*movqi}
>> (expr_list:REG_DEAD (reg:SI 71)
>>(nil)))
>> pr30338.c:13:1: internal compiler error: in spill_failure, at
>> reload1.c:2120
>>
>>
>>
>> ./gcc.c-torture/compile/pr32349.c -S -O3 -funroll-loops
>>
>> pr32349.c: In function 'foo':
>> pr32349.c:26:1: error: unable to find a register to spill in class
>> 'POINTER_REGS'
>> pr32349.c:26:1: error: this is the insn:
>> (insn 175 197 177 10 (set (reg/v:SI 234 [ m ])
>>(mem:SI (post_inc:HI (reg:HI 16 r16 [orig:192 ivtmp.18 ]
>> [192])) [3 MEM[base: D.1996_74, offset: 0B]+0 S4 A8])) pr32349.c:18 12
>> {*movsi}
>> (expr_list:REG_INC (reg:HI 16 r16 [orig:192 ivtmp.18 ] [192])
>>(nil)))
>> pr32349.c:26:1: internal compiler error: in spill_failure, at
>> reload1.c:2120
>>
>>
>> (1)
>> I can fix *both* fails with additional test in avr_hard_regno_mode_ok:
>>
>> +   if (GET_MODE_SIZE (mode) >= 4
>> +   && regno >= REG_X)
>> + return 0;
>>
>> (2)
>> I can fix the first fail but *not* the second by not allow SUBREGs in
>> avr_legitimate_address_p:
>>
>> -   if (!strict && GET_CODE (x) == SUBREG) */
>> -   x = SUBREG_REG (x); */
>>
>>
>> (2) Looks very reasonble, Eric Botcazou proposed it because he ran
>> into problems:
>>   http://gcc.gnu.org/ml/gcc-patches/2011-04/msg01367.html
>>
>> (1) Appears to be hackish, but it should be ok.  If code breaks
>> because of that is's *definitely* a reload bug (e.g. SI-subreg of DI).
>>
>> Even the original avr_hard_regno_mode_ok is ok IMO because if a
>> machine says "I can hold HI in 28 but not QI in 29" reload has to
>> handle it (except a machine must allow word_mode in *all* it's
>> GENERAL_REGS, don't know if that's a must).
>>
>> I made a patch for reload, too:
>>   http://gcc.gnu.org/ml/gcc/2011-06/msg5.html
>>
>> Because IRA generates SUBREG of hardreg (which old lreg/greg handled
>> ok) and reload does not handle it correctly.  It generates a spill but
>> without the needed input reload so that one part of the register is
>> missing.
>>
>> reload blames IRA or BE, IRA blames reload, BE blames IRA, etc...
>>
>>
>> I didn't rerun the testsuite with (1) or/and (2), I'd like both (1)
>> and (2) in the compiler.  What do you think?
> 
> I think that AVR is a stress test for GCC core. We are on the edge.
> IMHO your patch is a change one tweaks to another.
> It's not needed if it adds regressions.
> 
> Denis.

Reran testsuite against newer version (175991).

First the good news.

Following tests pass, that's the reason for the patch:

* gcc.target/avr/pr46779-1.c
* gcc.target/avr/pr46779-2.c

These tests now pass, too.  They all came up with spill fail both with
and without original patch, but pass with (1) and (2) added:

* gcc.c-torture/execute/pr38051.c (-Os)
* gcc.dg/20030324-1.c (-O -fstrict-aliasing -fgcse)
* gcc.dg/pr43670.c (-O -ftree-vrp -fcompare-debug)

And here the not-so-good news. There's additional ICE in reload:

* gcc.dg/pr32912-2.c (-Os)

pr32912-2.c:23:1: internal compiler error: in find_valid_class, at
reload.c:708
Please submit a full bug report,
with preprocessed source if appropriate.
See  for instructions.
compiler exited with status 1
output is:
pr32912-2.c: In function 'bar':
pr32912-2.c:23:1: internal compiler error: in find_valid_class, at
reload.c:708


But look at the source!

#if(__SIZEOF_INT__ >= 4)
typedef int __m128i __attribute__ ((__vector_size__ (16)));
#else
typedef long __m128i __attribute__ ((__vector_size__ (16)));
#endif

That's no sensible on AVR at all!
The stack trace:

Breakpoint 1, fancy_abort (file=0x8896f38
"../../../gcc.gnu.org/trunk/gcc/reload.c", line=708,
function=0x8896fa8 "find_valid_class") at
../../../gcc.gnu.org/trunk/gcc/diagnostic.c:893
(gdb) bt
#0  fancy_abort (file=0x8896f38
"../../../gcc.gnu.org/trunk/gcc/reload.c", line=708,
function=0x8896fa8 "find_valid_class") at
../../../gcc.gnu.org/trunk/gcc/diagnostic.c:893
#1  0x0845a9d4 in find_valid_class (outer=SImode, inner=TImode, n=12,
dest_regno=16) at ../../../gcc.gnu.org/trunk/gcc/reload.c:708
#2  0x0845bfd5 in push_reload (in=0x0, out=

Re: Provide 64-bit default Solaris/x86 configuration (PR target/39150)

2011-07-08 Thread Rainer Orth
Ian Lance Taylor  writes:

> Rainer Orth  writes:
>
>> All bootstraps have completed without regressions, so I've installed the
>> patch as is, after verifying that the libgo parts aren't present in the
>> upstream Go repo.
>
> I committed the libgo patch to the upstream repository.

I completely missed that: at least in https://go.googlecode.com/hg/
libtool.m4 isn't present.

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] Make VRP optimize useless conversions

2011-07-08 Thread Michael Matz
Hi,

On Fri, 8 Jul 2011, Richard Guenther wrote:

> It should be indeed safe with the current handling of conversions, but 
> better be safe.  So, like the following?

No.  The point is that you can't compare the bounds that VRP computes with 
each other when the outcome affects correctness.  Think about a very 
trivial and stupid VRP, that assigns the range [WIDEST_INT_MIN .. 
WIDEST_UINT_MAX] to each and every SSA name without looking at types and 
operations at all (assuming that this reflects the largest int type on the 
target).  It's useless but correct.  Of course we wouldn't implement such 
useless range discovery, but similar situations can arise when some VRP 
algorithms give up for certain reasons, or computation of tight bounds 
merely isn't implemented for some operations.

Your routines need to work also in the presence of such imprecise ranges.

Hence, the check that the intermediate conversion is useless needs to take 
into account the input value range (that's conservatively correct), and 
the precision and signedness of the target type (if it can represent all 
value of the input range the conversion was useless).  It must not look at 
the suspected value range of the destination, precisely because it is 
conservative only.


Ciao,
Michael.


[PATCH] Fix rs6000 vector select RTL patterns (PR target/49621)

2011-07-08 Thread Jakub Jelinek
Hi!

As mentioned in the PR, IMNSHO the rs6000 vector_select_* patterns
are invalid RTL, they compare a vector register in some vector mode
to const0_rtx instead of corresponding vector zero constant.

The "Comparison Operations" section of RTL docs says:
"The mode of the comparison is determined by the operands; they
must both be valid for a common machine mode."

Having one vector mode and one VOIDmode confuses simplify-rtx.c enough
to create simplifications which lead to ICEs.

The following patch fixes that by using CONST0_RTX (mode) instead
in the patterns.

Bootstrapped/regtested on powerpc64-linux --with-cpu=default32 and
powerpc64-linux, ok for trunk/4.6?

2011-07-08  Jakub Jelinek  

PR target/49621
* config/rs6000/rs6000.c (rs6000_emit_vector_cond_expr): Use
CONST0_RTX (dest_mode) instead of const0_rtx as second operand
of NE.
* config/rs6000/vector.md (vector_select_,
vector_select__uns): Change second operand of NE to
CONST0_RTX (mode) instead of const0_rtx.
* config/rs6000/altivec.md (*altivec_vsel,
*altivec_vsel_uns): Expect second operand of NE to be
zero_constant of the corresponding vector mode.
* config/rs6000/vsx.md (*vsx_xxsel, *vsx_xxsel_uns):
Likewise.

* gcc.target/powerpc/altivec-34.c: New test.

--- gcc/config/rs6000/rs6000.c.jj   2011-07-07 13:25:49.0 +0200
+++ gcc/config/rs6000/rs6000.c  2011-07-08 09:28:30.0 +0200
@@ -16888,7 +16888,7 @@ rs6000_emit_vector_cond_expr (rtx dest, 
   op_false = tmp;
 }
 
-  cond2 = gen_rtx_fmt_ee (NE, cc_mode, mask, const0_rtx);
+  cond2 = gen_rtx_fmt_ee (NE, cc_mode, mask, CONST0_RTX (dest_mode));
   emit_insn (gen_rtx_SET (VOIDmode,
  dest,
  gen_rtx_IF_THEN_ELSE (dest_mode,
--- gcc/config/rs6000/vector.md.jj  2011-05-02 18:39:25.0 +0200
+++ gcc/config/rs6000/vector.md 2011-07-07 13:50:37.0 +0200
@@ -465,21 +465,21 @@ (define_expand "vector_select_"
   [(set (match_operand:VEC_L 0 "vlogical_operand" "")
(if_then_else:VEC_L
 (ne:CC (match_operand:VEC_L 3 "vlogical_operand" "")
-   (const_int 0))
+   (match_dup 4))
 (match_operand:VEC_L 2 "vlogical_operand" "")
 (match_operand:VEC_L 1 "vlogical_operand" "")))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
-  "")
+  "operands[4] = CONST0_RTX (mode);")
 
 (define_expand "vector_select__uns"
   [(set (match_operand:VEC_L 0 "vlogical_operand" "")
(if_then_else:VEC_L
 (ne:CCUNS (match_operand:VEC_L 3 "vlogical_operand" "")
-  (const_int 0))
+  (match_dup 4))
 (match_operand:VEC_L 2 "vlogical_operand" "")
 (match_operand:VEC_L 1 "vlogical_operand" "")))]
   "VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)"
-  "")
+  "operands[4] = CONST0_RTX (mode);")
 
 ;; Expansions that compare vectors producing a vector result and a predicate,
 ;; setting CR6 to indicate a combined status
--- gcc/config/rs6000/altivec.md.jj 2011-04-01 23:09:21.0 +0200
+++ gcc/config/rs6000/altivec.md2011-07-07 13:50:37.0 +0200
@@ -487,7 +487,7 @@ (define_insn "*altivec_vsel"
   [(set (match_operand:VM 0 "altivec_register_operand" "=v")
(if_then_else:VM
 (ne:CC (match_operand:VM 1 "altivec_register_operand" "v")
-   (const_int 0))
+   (match_operand:VM 4 "zero_constant" ""))
 (match_operand:VM 2 "altivec_register_operand" "v")
 (match_operand:VM 3 "altivec_register_operand" "v")))]
   "VECTOR_MEM_ALTIVEC_P (mode)"
@@ -498,7 +498,7 @@ (define_insn "*altivec_vsel_uns"
   [(set (match_operand:VM 0 "altivec_register_operand" "=v")
(if_then_else:VM
 (ne:CCUNS (match_operand:VM 1 "altivec_register_operand" "v")
-  (const_int 0))
+  (match_operand:VM 4 "zero_constant" ""))
 (match_operand:VM 2 "altivec_register_operand" "v")
 (match_operand:VM 3 "altivec_register_operand" "v")))]
   "VECTOR_MEM_ALTIVEC_P (mode)"
--- gcc/config/rs6000/vsx.md.jj 2011-05-02 18:39:25.0 +0200
+++ gcc/config/rs6000/vsx.md2011-07-07 13:50:37.0 +0200
@@ -674,7 +674,7 @@ (define_insn "*vsx_xxsel"
   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=,?wa")
(if_then_else:VSX_L
 (ne:CC (match_operand:VSX_L 1 "vsx_register_operand" ",wa")
-   (const_int 0))
+   (match_operand:VSX_L 4 "zero_constant" ""))
 (match_operand:VSX_L 2 "vsx_register_operand" ",wa")
 (match_operand:VSX_L 3 "vsx_register_operand" ",wa")))]
   "VECTOR_MEM_VSX_P (mode)"
@@ -685,7 +685,7 @@ (define_insn "*vsx_xxsel_uns"
   [(set (match_operand:VSX_L 0 "vsx_register_operand" "=,?wa")
(if_then_else:VSX_L
 (ne:CCUNS (match_operand:VSX_L 1 "vsx_register_operand" ",wa")
-  (const_int 0))
+  (match_operand:VSX_L 4 "zero_constant" ""))
   

Re: [PATCH (3/7)] Widening multiply-and-accumulate pattern matching

2011-07-08 Thread Andrew Stubbs

On 07/07/11 13:37, Richard Guenther wrote:

I'll cook up a quick patch for VRP.


Like the attached.  I'll finish and properly test it.


Your patch appears to do the wrong thing for this test case:

int
foo (int a, short b, short c)
{
  int bc = b * c;
  return a + (short)bc;
}

With your patch, the input to the widening-mult pass now looks like this:

foo (int a, short int b, short int c)
{
  int bc;
  int D.2016;
  int D.2015;
  int D.2014;

:
  D.2014_2 = (int) b_1(D);
  D.2015_4 = (int) c_3(D);
  bc_5 = D.2014_2 * D.2015_4;
  D.2016_9 = bc_5 + a_8(D);
  return D.2016_9;

}

It looks like when the user tries to deliberately break the maths your 
patch seems to unbreak it.


Andrew


[PATCH] gcc.dg/20020312-2.c: pass -Wl,-no_pie for darwin11

2011-07-08 Thread Jack Howarth
   Currently the gcc.dg/20020312-2.c testcase fails on darwin11 with...

FAIL: gcc.dg/20020312-2.c (test for excess errors)

at -m32 due to the fact that the absolute addressing (-mdynamic-no-pic) is not 
allowed with the
default -pie linkage of darwin11 and later which produces a warning. The 
attached patch solves this by passing
-Wl,-no_pie for darwin10 and later. This form of the target match was selected 
because the linker
in darwin9 and earlier doesn't understand -no_pie. Tested on 
x86_64-apple-darwin10/11. Okay for
gcc trunk and gcc-4_6-branch/gcc-4_5-branch/gcc-4_4-branch?
 Jack

2011-07-08  Jack Howarth 

* gcc.dg/20020312-2.c: Pass -Wl,-no_pie at -m32 for darwin10 and later.

Index: gcc.dg/20020312-2.c
===
--- gcc.dg/20020312-2.c (revision 176025)
+++ gcc.dg/20020312-2.c (working copy)
@@ -7,6 +7,7 @@
 
 /* { dg-do run } */
 /* { dg-options "-O -fno-pic" } */
+/* { dg-options "-O -fno-pic -Wl,-no_pie" { target { { i?86-*-darwin1* 
x86_64-*-darwin1* } && ilp32 } } } */
 
 extern void abort (void);
 


Re: what can be in a group set?

2011-07-08 Thread Paolo Bonzini

On 07/08/2011 12:43 PM, Richard Sandiford wrote:

The docs also say that the first expr_list can be null:

   If @var{lval} is a @code{parallel}, it is used to represent the case of
   a function returning a structure in multiple registers.  Each element
   of the @code{parallel} is an @code{expr_list} whose first operand is a
   @code{reg} and whose second operand is a @code{const_int} representing the
   offset (in bytes) into the structure at which the data in that register
   corresponds.  The first element may be null to indicate that the structure
   is also passed partly in memory.

but I can't see any code to handle that.  Am I missing something,
or does the lack of a crash here mean that we can remove the last
sentence?

(It might have been added for symmetry with argument passing, where this
sort of thing is needed.  But if it isn't actually used or implemented for
returns, it might be less confusing to remove it.)


Indeed.  Dimitrios, can you pick up the patch since it will somewhat 
simplify your work to eliminate defs_generated?


Paolo


Re: what can be in a group set?

2011-07-08 Thread Dimitrios Apostolou

On Fri, 8 Jul 2011, Paolo Bonzini wrote:

On 07/08/2011 12:43 PM, Richard Sandiford wrote:

The docs also say that the first expr_list can be null:

   If @var{lval} is a @code{parallel}, it is used to represent the case of
   a function returning a structure in multiple registers.  Each element
   of the @code{parallel} is an @code{expr_list} whose first operand is a
   @code{reg} and whose second operand is a @code{const_int} representing 
the

   offset (in bytes) into the structure at which the data in that register
   corresponds.  The first element may be null to indicate that the 
structure

   is also passed partly in memory.

but I can't see any code to handle that.  Am I missing something,
or does the lack of a crash here mean that we can remove the last
sentence?

(It might have been added for symmetry with argument passing, where this
sort of thing is needed.  But if it isn't actually used or implemented for
returns, it might be less confusing to remove it.)


Indeed.  Dimitrios, can you pick up the patch since it will somewhat simplify 
your work to eliminate defs_generated?


I'll certainly try :-)

Paolo, something else, in df_mark_reg() is it ever possible for regno to 
be >= FIRST_PSEUDO_REGISTER? An assert I've put doesn't trigger for my 
simple test :-)



Thanks,
Dimitris



Re: [PATCH, testsuite] Fix for PR49519, miscompiled 447.dealII in SPEC CPU 2006

2011-07-08 Thread H.J. Lu
On Fri, Jul 8, 2011 at 3:58 AM, Eric Botcazou  wrote:
>> So, one more time:
>
> You didn't really need to re-submit but...
>
>> ChangeLog entry:
>> 2011-07-06  Kirill Yukhin  
>>
>>         PR middle-end/49519
>>         * calls.c (mem_overlaps_already_clobbered_arg_p): Additional
>>         check if address is stored in register. If so - give up.
>>         (check_sibcall_argument_overlap_1): Do not perform check of
>>         overlapping when it is call to address.
>>
>> tessuite/ChangeLog entry:
>> 2011-07-06  Kirill Yukhin  
>>
>>         * g++.dg/torture/pr49519.C: New test.
>
> OK, thanks.
>

I checked it in for Kirill.

Thanks.

-- 
H.J.


[PATCH] darwin-segaddr.c: pass -Wl,-no_pie for darwin11

2011-07-08 Thread Jack Howarth
   Currently, gcc.dg/darwin-segaddr.c fails its execution test at both -m32
and -m64 on darwin11. This is due to the default -pie linkage on darwin11 
allowing
ASLR to randomize the segaddr on each individual execution of the resulting 
binary. The
attached patch eliminates this failure by passing -Wl,-no_pie for darwin10 and 
later.
This form of the match was selected becaused darwin9 and earlier doesn't support
-no_pie in the linker. Tested on x86_64-apple-darwin10/11. Okay for gcc trunk
and gcc-4_6-branch/gcc-4_5-branch/gcc-4_4-branch?
Jack

2011-07-08  Jack Howarth 

* gcc.dg/darwin-segaddr.c: Pass -Wl,-no_pie for darwin10 and later.


Index: gcc.dg/darwin-segaddr.c
===
--- gcc.dg/darwin-segaddr.c (revision 176025)
+++ gcc.dg/darwin-segaddr.c (working copy)
@@ -2,6 +2,8 @@
 /* { dg-do run { target *-*-darwin* } } */
 /* { dg-options "-O0 -segaddr __TEST 0x20" { target { *-*-darwin* && { ! 
lp64 } } } } */
 /* { dg-options "-O0 -segaddr __TEST 0x11000" { target { *-*-darwin* && 
lp64 } } } */
+/* { dg-options "-O0 -segaddr __TEST 0x20 -Wl,-no_pie" { target { 
*-*-darwin1* && { ! lp64 } } } } */
+/* { dg-options "-O0 -segaddr __TEST 0x11000 -Wl,-no_pie" { target { 
*-*-darwin1* && lp64 } } } */
 
 extern void abort ();
 


[Patch,Committed,AVR]: Don't use 'magic' register number.

2011-07-08 Thread Georg-Johann Lay
http://gcc.gnu.org/viewcvs?view=revision&revision=176041

Committed this patchlet as pre-approved by Denis.

Johann

* config/avr/avr.c (output_reload_insisf): Use 'REG_Z+1' instead
of magic '31'.


--- trunk/gcc/config/avr/avr.c  2011/07/08 13:03:38 176040
+++ trunk/gcc/config/avr/avr.c  2011/07/08 13:09:07 176041
@@ -6239,7 +6239,7 @@
  That's cheaper than loading from constant pool.  */

   cooked_clobber_p = true;
-  clobber_reg = gen_rtx_REG (QImode, 31);
+  clobber_reg = gen_rtx_REG (QImode, REG_Z + 1);
   avr_asm_len ("mov __tmp_reg__,%0", &clobber_reg, len, 1);
   break;
 }



Re: [PATCH (3/7)] Widening multiply-and-accumulate pattern matching

2011-07-08 Thread Richard Guenther
On Fri, Jul 8, 2011 at 2:44 PM, Andrew Stubbs  wrote:
> On 07/07/11 13:37, Richard Guenther wrote:
>>>
>>> I'll cook up a quick patch for VRP.
>>
>> Like the attached.  I'll finish and properly test it.
>
> Your patch appears to do the wrong thing for this test case:
>
> int
> foo (int a, short b, short c)
> {
>  int bc = b * c;
>  return a + (short)bc;
> }
>
> With your patch, the input to the widening-mult pass now looks like this:
>
> foo (int a, short int b, short int c)
> {
>  int bc;
>  int D.2016;
>  int D.2015;
>  int D.2014;
>
> :
>  D.2014_2 = (int) b_1(D);
>  D.2015_4 = (int) c_3(D);
>  bc_5 = D.2014_2 * D.2015_4;
>  D.2016_9 = bc_5 + a_8(D);
>  return D.2016_9;
>
> }
>
> It looks like when the user tries to deliberately break the maths your patch
> seems to unbreak it.

Yeah, I fixed that in the checked in version.

Richard.

> Andrew
>


Re: what can be in a group set?

2011-07-08 Thread Paolo Bonzini
On 07/08/2011 03:05 PM, Dimitrios Apostolou wrote:
> 
> Paolo, something else, in df_mark_reg() is it ever possible for regno to 
> be >= FIRST_PSEUDO_REGISTER? An assert I've put doesn't trigger for my 
> simple test :-)

>From reading the docs of EH_RETURN_STACKADJ_RTX and EH_RETURN_HANDLER_RTX,
it seems you're safe.

This in df-problems.c also suggests the same:

  if (bb_index == EXIT_BLOCK)
{
  unsigned regno;
  bitmap_iterator bi;
  EXECUTE_IF_SET_IN_BITMAP (df->exit_block_uses, FIRST_PSEUDO_REGISTER,
regno, bi)
gcc_unreachable ();
}

A more solid reasoning is that a pseudo cannot be considered live at exit or
at entry to a function, because the caller would not know where it lives.

That said, changing exit_block_uses and entry_block_defs to HARD_REG_SET would
be a nice cleanup, but it would also touch target code due to

  targetm.extra_live_on_entry (entry_block_defs);

I wouldn't bother for now until you're a bit more experienced.  Unlike
invalidated_by_call it shouldn't show up in profiles, or does it?

Paolo


[patch tree-optimize]: Factor out lable truth_andor as function

2011-07-08 Thread Kai Tietz
Hello,

This patch factors out the label truth_andor in fold_binary_loc into a
separate function.

2011-07-08  Kai Tietz  

* fold-const.c (fold_truth_andor): Factored out truth_andor
label from fold_binary as function.
(fold_binary_loc): Replace truth_andor lable
by function fold_truth_andor.

Tested for x86_64-pc-linux-gnu. As pre-approved by Richi, applied to
trunk at revision 176043.

Regards,
Kai


Re: [PATCH] Make VRP optimize useless conversions

2011-07-08 Thread Richard Guenther
On Fri, 8 Jul 2011, Michael Matz wrote:

> Hi,
> 
> On Fri, 8 Jul 2011, Richard Guenther wrote:
> 
> > It should be indeed safe with the current handling of conversions, but 
> > better be safe.  So, like the following?
> 
> No.  The point is that you can't compare the bounds that VRP computes with 
> each other when the outcome affects correctness.  Think about a very 
> trivial and stupid VRP, that assigns the range [WIDEST_INT_MIN .. 
> WIDEST_UINT_MAX] to each and every SSA name without looking at types and 
> operations at all (assuming that this reflects the largest int type on the 
> target).  It's useless but correct.  Of course we wouldn't implement such 
> useless range discovery, but similar situations can arise when some VRP 
> algorithms give up for certain reasons, or computation of tight bounds 
> merely isn't implemented for some operations.
> 
> Your routines need to work also in the presence of such imprecise ranges.
> 
> Hence, the check that the intermediate conversion is useless needs to take 
> into account the input value range (that's conservatively correct), and 
> the precision and signedness of the target type (if it can represent all 
> value of the input range the conversion was useless).  It must not look at 
> the suspected value range of the destination, precisely because it is 
> conservative only.

Ok, indeed conservative is different for what VRP does and for what
a transformation must assess.  So the following patch makes
a conservative attempt at checking the transformation (which of
course non-surprisingly matches what the VRP part does).

So, more like the following?

Bootstrap & regtest in progress.

Thanks,
Richard.

2011-07-08  Richard Guenther  

* tree-vrp.c (simplify_conversion_using_ranges): Manually
translate the source value-range through the conversion chain.

Index: gcc/tree-vrp.c
===
--- gcc/tree-vrp.c  (revision 176030)
+++ gcc/tree-vrp.c  (working copy)
@@ -7347,30 +7347,56 @@ simplify_switch_using_ranges (gimple stm
 static bool
 simplify_conversion_using_ranges (gimple stmt)
 {
-  tree rhs1 = gimple_assign_rhs1 (stmt);
-  gimple def_stmt = SSA_NAME_DEF_STMT (rhs1);
-  value_range_t *final, *inner;
+  tree innerop, middleop, finaltype;
+  gimple def_stmt;
+  value_range_t *innervr;
+  double_int innermin, innermax, middlemin, middlemax;
 
-  /* Obtain final and inner value-ranges for a conversion
- sequence (final-type)(intermediate-type)inner-type.  */
-  final = get_value_range (gimple_assign_lhs (stmt));
-  if (final->type != VR_RANGE)
-return false;
+  finaltype = TREE_TYPE (gimple_assign_lhs (stmt));
+  middleop = gimple_assign_rhs1 (stmt);
+  def_stmt = SSA_NAME_DEF_STMT (middleop);
   if (!is_gimple_assign (def_stmt)
   || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
 return false;
-  rhs1 = gimple_assign_rhs1 (def_stmt);
-  if (TREE_CODE (rhs1) != SSA_NAME)
+  innerop = gimple_assign_rhs1 (def_stmt);
+  if (TREE_CODE (innerop) != SSA_NAME)
+return false;
+
+  /* Do not allow changing a zero- to a sign-extension or vice versa.  */
+  if (TYPE_UNSIGNED (finaltype)
+  != TYPE_UNSIGNED (TREE_TYPE (middleop)))
 return false;
-  inner = get_value_range (rhs1);
-  if (inner->type != VR_RANGE)
+
+  /* Get the value-range of the inner operand.  */
+  innervr = get_value_range (innerop);
+  if (innervr->type != VR_RANGE
+  || TREE_CODE (innervr->min) != INTEGER_CST
+  || TREE_CODE (innervr->max) != INTEGER_CST)
 return false;
-  /* If the value-range is preserved by the conversion sequence strip
- the intermediate conversion.  */
-  if (!tree_int_cst_equal (final->min, inner->min)
-  || !tree_int_cst_equal (final->max, inner->max))
+
+  /* Simulate the conversion chain to check if the result is equal if
+ the middle conversion is removed.  */
+  innermin = tree_to_double_int (innervr->min);
+  innermax = tree_to_double_int (innervr->max);
+  middlemin = double_int_ext (innermin, TYPE_PRECISION (TREE_TYPE (middleop)),
+ TYPE_UNSIGNED (TREE_TYPE (middleop)));
+  middlemax = double_int_ext (innermax, TYPE_PRECISION (TREE_TYPE (middleop)),
+ TYPE_UNSIGNED (TREE_TYPE (middleop)));
+  if (!double_int_equal_p (double_int_ext (middlemin,
+  TYPE_PRECISION (finaltype),
+  TYPE_UNSIGNED (finaltype)),
+  double_int_ext (innermin,
+  TYPE_PRECISION (finaltype),
+  TYPE_UNSIGNED (finaltype)))
+  || !double_int_equal_p (double_int_ext (middlemax,
+ TYPE_PRECISION (finaltype),
+ TYPE_UNSIGNED (finaltype)),
+ double_int_ext (innermax,
+

Re: what can be in a group set?

2011-07-08 Thread Dimitrios Apostolou

Thanks Paolo for the detailed explanation!

On Fri, 8 Jul 2011, Paolo Bonzini wrote:


That said, changing exit_block_uses and entry_block_defs to HARD_REG_SET would
be a nice cleanup, but it would also touch target code due to

 targetm.extra_live_on_entry (entry_block_defs);



I've already done that :-p


I wouldn't bother for now until you're a bit more experienced.  Unlike
invalidated_by_call it shouldn't show up in profiles, or does it?


Indeed it doesn't show, I just wanted to do it as a clean-up for 
transitioning to HARD_REG_SET all relevant sets in struct df_d.


The only problem remaining is I need a bitmap_copy_from_hard_reg_set() 
function for df_lr_local_compute(), where the bb_info->use bitmap is 
initialised from the exit_block_uses HARD_REG_SET.



Dimitris



Re: [PATCH] __builtin_assume_aligned

2011-07-08 Thread Gerald Pfeifer
On Tue, 28 Jun 2011, Richard Guenther wrote:
>> Thanks, here is what I've committed after bootstrapping/regtesting
>> again on x86_64-linux and i686-linux.
> Thanks Jakub.  Probably worth an entry in changes.html.

Yes, please. :-)

Gerald


Re: [PATCH 4/6] Shrink-wrapping

2011-07-08 Thread Bernd Schmidt
On 07/07/11 22:08, Richard Sandiford wrote:
> Sure, I understand that returns does more than return on ARM.
> What I meant was: we'd normally want that other stuff to be
> expressed in rtl alongside the (return) rtx.  E.g. something like:
> 
>   (parallel
> [(return)
>  (set (reg r4) (mem (plus (reg sp) (const_int ...
>  (set (reg r5) (mem (plus (reg sp) (const_int ...
>  (set (reg sp) (plus (reg sp) (const_int ...)))])

I've thought about it some more. Isn't this just a question of
definitions? Much like we implicitly clobber call-used registers for a
CALL rtx, we might as well define RETURN to restore the intersection
between regs_ever_live and call-saved regs? This is what its current
usage implies, but I guess it's never been necessary to spell it out
explicitly since we don't optimize across branches to the exit block.


Bernd


Re: [PATCH] Address lowering [1/3] Main patch

2011-07-08 Thread William J. Schmidt
On Mon, 2011-07-04 at 17:30 +0200, Michael Matz wrote:
> Hi,
> 
> On Mon, 4 Jul 2011, Richard Guenther wrote:
> 
> > I still do not like the implementation of yet another CSE machinery
> > given that we already have two.
> 
> From reading it it really seems to be a normal block-local CSE, without 
> anything fancy.  Hence, moving the pass just a little earlier (before 
> pass_vrp/pass_dominator) should already provide for all optimizations.  If 
> not those should be improved.
> 
> I see that it is used for also getting rid of the zero-offset statements 
> in case non-zero-offsets follow.  I think that's generally worthwhile so 
> probably should be done in one of the above optimizers.

Just FYI, I've verified that this works as expected; the zero-offset
optimization can be moved into the dom2 pass without too much
difficulty, and the CSE in the dom2 pass is sufficient for what I've
seen in my limited testing.  This reduces the size and complexity of the
patch considerably -- thanks!

If it turns out that we end up going this route, it will mean modifying
at least 11 more scan tests whose expected output out of dom2 changes.

However, I'll be turning my attention now to some of the alternate
implementations that Richard suggested, to see how much of the gains can
be obtained by other means.

Thanks,
Bill




Re: [alpha] Don't force MIPS debugging on dwarf2.

2011-07-08 Thread Joseph S. Myers
On Thu, 7 Jul 2011, Richard Henderson wrote:

> alpha-elf: Disable stabs debugging, and the mips sdb extensions.
> 
> In particular, the mips sdb extensions accidentally implied the
> irix dwarf2 extensions and restrictions.  Disable stabs because
> it's almost certainly untested for years.

With this in, the only alpha target that ends up with MIPS_DEBUGGING_INFO 
defined is alpha*-dec-osf5.1*; all others use alpha/elf.h or alpha/vms.h.  
And the only other targets defining MIPS_DEBUGGING_INFO are Irix and MIPS 
OpenBSD - that is, all targets defining this macro are on the deprecation 
list.  So once we remove the deprecated targets after 4.7 branches we can 
remove all support for MIPS_DEBUGGING_INFO and poison the macro.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [df-scan.c] Optimise DF_REFs ordering in collection_rec, use HARD_REG_SETs instead of bitmaps

2011-07-08 Thread Paolo Bonzini

On 07/08/2011 11:05 AM, Dimitrios Apostolou wrote:

On Fri, 8 Jul 2011, Jakub Jelinek wrote:

On Fri, Jul 08, 2011 at 06:20:04AM +0300, Dimitrios Apostolou wrote:

The attached patch does two things for df_get_call_refs():
* First it uses HARD_REG_SETs for defs_generated and
regs_invalidated_by_call, instead of bitmaps. Replacing in total
more than 400K calls (for my testcase) to bitmap_bit_p() with the
much faster TEST_HARD_REG_BIT, reduces the total instruction count
from about 13M to 1.5M.


Have you verified that collection_rec->def_vec never contains pseudo
register references? Otherwise you couldn't use
HARD_REG_SET... gcc_checking_assert might be useful.



Hi Jakub, Steve pointed me to the following from GCC Internals Manual:

call_insn insns have the same extra fields as insn insns, accessed in
the same way and in addition contain a field CALL_INSN_FUNCTION_USAGE,
which contains a list (chain of expr_list expressions) containing use
and clobber expressions that denote hard registers and MEMs used or
clobbered by the called function.

So doesn't that mean that for CALL insns it should contain only HARD_REG
DEFs? I will ofcourse use an assert to be sure.


That part is only for CALL_INSN_FUNCTION_USAGE, which is what 
df_get_call_refs handles.  However, if you rewrite the handling of 
defs_generated as required by your second patch, you'll then be sure 
that you will only have hard registers.


BTW, what testcase are you using?  I suggest that you try building 
stage1 with CFLAGS=--save-temps, and get some of the largest 
preprocessed .i files from there (combine and fold-const for example). 
You can then time them very easily from the old and new build 
directories, with "./cc1 /path/to/file.i -O2".


Paolo


C++ PATCH for c++/49673 (constexpr init should go in rodata)

2011-07-08 Thread Jason Merrill
Now that we have constexpr constructors, having a non-trivial 
constructor no longer precludes a variable being TREE_READONLY.  The 
front end will clear TREE_READONLY if the variable requires non-constant 
initialization.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit bd0343de0277ae5d66f60a42d6479df0161fc075
Author: Jason Merrill 
Date:   Fri Jul 8 09:32:45 2011 -0400

	PR c++/49673
gcc/c-family/
	* c-common.c (c_apply_type_quals_to_decl): Don't check
	TYPE_NEEDS_CONSTRUCTING.
gcc/cp/
	* typeck.c (cp_apply_type_quals_to_decl): Don't check
	TYPE_NEEDS_CONSTRUCTING.

diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index 67291de..f61b9cc 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -4058,14 +4058,11 @@ c_apply_type_quals_to_decl (int type_quals, tree decl)
   if (type == error_mark_node)
 return;
 
-  if (((type_quals & TYPE_QUAL_CONST)
-   || (type && TREE_CODE (type) == REFERENCE_TYPE))
-  /* An object declared 'const' is only readonly after it is
-	 initialized.  We don't have any way of expressing this currently,
-	 so we need to be conservative and unset TREE_READONLY for types
-	 with constructors.  Otherwise aliasing code will ignore stores in
-	 an inline constructor.  */
-  && !(type && TYPE_NEEDS_CONSTRUCTING (type)))
+  if ((type_quals & TYPE_QUAL_CONST)
+  || (type && TREE_CODE (type) == REFERENCE_TYPE))
+/* We used to check TYPE_NEEDS_CONSTRUCTING here, but now a constexpr
+   constructor can produce constant init, so rely on the front end to
+   clear TREE_READONLY if the variable has non-constant init.  */
 TREE_READONLY (decl) = 1;
   if (type_quals & TYPE_QUAL_VOLATILE)
 {
diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 2acb18e..f0d68c3 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -8127,12 +8127,12 @@ cp_apply_type_quals_to_decl (int type_quals, tree decl)
 		&& type_quals != TYPE_UNQUALIFIED));
 
   /* Avoid setting TREE_READONLY incorrectly.  */
-  if (/* If the object has a constructor, the constructor may modify
-	 the object.  */
-  TYPE_NEEDS_CONSTRUCTING (type)
-  /* If the type isn't complete, we don't know yet if it will need
+  /* We used to check TYPE_NEEDS_CONSTRUCTING here, but now a constexpr
+ constructor can produce constant init, so rely on cp_finish_decl to
+ clear TREE_READONLY if the variable has non-constant init.  */
+  if (/* If the type isn't complete, we don't know yet if it will need
 	 constructing.  */
-  || !COMPLETE_TYPE_P (type)
+  !COMPLETE_TYPE_P (type)
   /* If the type has a mutable component, that component might be
 	 modified.  */
   || TYPE_HAS_MUTABLE_P (type))
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
new file mode 100644
index 000..e2edb2e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
@@ -0,0 +1,11 @@
+// PR c++/49673: check that test_data goes into .rodata
+// { dg-options -std=c++0x }
+// { dg-final { scan-assembler "rodata" } }
+
+struct Data
+{
+  int i;
+  constexpr Data(int i = 0) : i(i+1) {}
+};
+
+extern const Data test_data = { 1 };


[Patch,testsuite]: Skip -fschedule-insns if no scheduling available.

2011-07-08 Thread Georg-Johann Lay
There are tests in suite that set -fschedule-insns[2] and FAIL because of

warning: instruction scheduling not supported on this target machine
[enabled by default]

As there is no individual switch do disable the warning, the patch
disables some test cases so reduce the overall "noise of false test
fails".

There is no proc for that yet, so I wrote one. Lightly tested with my
testsuite: get now UNSUPPORTED instead of FAIL.

Ok to commit?

Johann

* lib/target-supports.exp (check_effective_target_scheduling):
New Function.
* gcc.dg/pr46614.c: Add dg-require-effective-target scheduling.
* gcc.dg/pr45055.c: Ditto.
* gcc.dg/pr45353.c: Ditto.
* g++.dg/pr45056.C: Ditto.

Index: lib/target-supports.exp
===
--- lib/target-supports.exp	(revision 176044)
+++ lib/target-supports.exp	(working copy)
@@ -712,6 +712,14 @@ proc check_effective_target_function_sec
 } "-ffunction-sections"]
 }
 
+# Return 1 if instruction scheduling is available, 0 otherwise.
+
+proc check_effective_target_scheduling {} {
+return [check_no_compiler_messages scheduling object {
+	void foo (void) { }
+} "-fschedule-insns"]
+}
+
 # Return 1 if compilation with -fgraphite is error-free for trivial 
 # code, 0 otherwise.
 
Index: gcc.dg/pr46614.c
===
--- gcc.dg/pr46614.c	(revision 176044)
+++ gcc.dg/pr46614.c	(working copy)
@@ -1,6 +1,7 @@
 /* PR rtl-optimization/46614 */
 /* { dg-do run } */
 /* { dg-options "-O -fno-rename-registers -fsched2-use-superblocks -fschedule-insns2 -funroll-loops" } */
+/* { dg-require-effective-target scheduling } */
 
 extern void abort (void);
 
Index: gcc.dg/pr45055.c
===
--- gcc.dg/pr45055.c	(revision 176044)
+++ gcc.dg/pr45055.c	(working copy)
@@ -1,6 +1,7 @@
 /* PR debug/45055 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -ftracer -fsched-pressure -funroll-loops -fschedule-insns -fcompare-debug" } */
+/* { dg-require-effective-target scheduling } */
 
 int colormap[10];
 
Index: gcc.dg/pr45353.c
===
--- gcc.dg/pr45353.c	(revision 176044)
+++ gcc.dg/pr45353.c	(working copy)
@@ -1,6 +1,7 @@
 /* PR rtl-optimization/45353 */
 /* { dg-do compile } */
 /* { dg-options "-O2 -fschedule-insns -fselective-scheduling" } */
+/* { dg-require-effective-target scheduling } */
 
 void
 foo ()
Index: g++.dg/pr45056.C
===
--- g++.dg/pr45056.C	(revision 176044)
+++ g++.dg/pr45056.C	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O -fschedule-insns2 -fschedule-insns -g" } */
+/* { dg-require-effective-target scheduling } */
 
 template < class _T1, class _T2 > struct pair
 {


Re: Provide 64-bit default Solaris/x86 configuration (PR target/39150)

2011-07-08 Thread Ian Lance Taylor
Rainer Orth  writes:

> Ian Lance Taylor  writes:
>
>> Rainer Orth  writes:
>>
>>> All bootstraps have completed without regressions, so I've installed the
>>> patch as is, after verifying that the libgo parts aren't present in the
>>> upstream Go repo.
>>
>> I committed the libgo patch to the upstream repository.
>
> I completely missed that: at least in https://go.googlecode.com/hg/
> libtool.m4 isn't present.

The immediate upstream project for libgo is
https://gofrontend.googlecode.com/hg/ .  Most of the files in the libgo
directory in that project come from https://go.googlecode.com/hg/ , but
not the ones at top level, or in the config or syscalls directory.

Ian


Re: [build] Move libgcov support to toplevel libgcc

2011-07-08 Thread Joseph S. Myers
On Fri, 8 Jul 2011, Rainer Orth wrote:

> And another easy one: moving libgcov over to libgcc.

Do you have any specific plans regarding gcov-io.c and gcov-io.h?  Because 
they are genuinely used on both the host and the target they are a 
trickier case; I wonder if they should end up in their own toplevel 
directory like libdecnumber, building a proper library that exports 
different functions when configured for the host than for the target, that 
for the target gets included in libgcov and that for the host gets linked 
into cc1, gcov and gcov-dump rather than having files do the present 
'#include "gcov-io.c"'.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Kai Tietz
2011/7/8 Richard Guenther :
> On Thu, Jul 7, 2011 at 6:28 PM, Kai Tietz  wrote:
>> 2011/7/7 Paolo Bonzini :
>>> On 07/07/2011 06:07 PM, Kai Tietz wrote:

 +  /* We redo folding here one time for allowing to inspect more
 +     complex reductions.  */
 +  substitute_and_fold (op_with_constant_singleton_value_range,
 +                      vrp_fold_stmt, false);
 +  /* We need to mark this second pass to avoid re-entering of same
 +     edges for switch statments.  */
 +  in_second_pass = true;
    substitute_and_fold (op_with_constant_singleton_value_range,
                       vrp_fold_stmt, false);
 +  in_second_pass = false;
>>>
>>> This needs a much better explanation.
>>>
>>> Paolo
>>
>> Well, I can work on a better comment.  The complex reduction I mean
>> here are cases like
>>
>> int x;
>> int y;
>> _Bool D1;
>> _Bool D2;
>> _Bool D3;
>> int R;
>>
>> D1 = x[0..1] != 0;
>> D2 = y[0..1] != 0;
>> D3 = D1 & D2
>> R = (int) D3
>>
>> (testcase is already present. See tree-ssa/vrp47.c).
>>
>> As VRP in first pass produces (and replaces) to:
>>
>> D1 = (_Bool) x[0..1];
>> D2 = (_Bool) y[0..1];
>> D3 = D1 & D2
>> R = (int) D3
>>
>> Just in the second pass the reduction
>>
>> R = x[0..1] & y[0..1]
>
> So why wouldn't that happen during the first pass?  The first
> pass could change the IL to
>
>  D1 = x[0..1] != 0;
>  D2 = y[0..1] != 0;
>  D3 = D1 & D2;
>  R = x & y;
>
> if D3 only has a single use.
No, as D3 would need a type change, and this isn't possible.  If it
wasn't absolutely clear, this patch to VRP is necessary after patch 2,
as here D1, D2, and D3 have bool-type, and just R is of type int.

>> can happen.  In general it is sad that VRP can't insert during pass
>> new statements right now.  This would cause issues in range-tables,
>> which aren't designed for insertations.  As otherwise, we could do
>> also simplify things like
>>
>> D1 = x[0..1] != 0;
>> D2 = y[0..1] == 0;
>> D3 = D1 & D2
>> R = (int) D3
>>
>> to
>> R = x[0..1] & (y[0..1] ^ 1)
>
> Why that ^ 1?  And why does that confuse the range tables
> if you re-use R?
Because we would need to insert a new statement and this isn't allowed
in VRP. See the comments in VRP and substitute_and_fold.  VRP
disallows to remove statements or to insert new ones.

>> Regards,
>> Kai


Re: C++ PATCH for c++/49673 (constexpr init should go in rodata)

2011-07-08 Thread Jakub Jelinek
On Fri, Jul 08, 2011 at 10:22:16AM -0400, Jason Merrill wrote:
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
> @@ -0,0 +1,11 @@
> +// PR c++/49673: check that test_data goes into .rodata
> +// { dg-options -std=c++0x }
> +// { dg-final { scan-assembler "rodata" } }

That will match only on ELF and perhaps a few other targets, but I'm certain
many targets put read-only data elsewhere.  I'd just guard this
with a few most common target triplets that are known to use .rodata
section.

> +
> +struct Data
> +{
> +  int i;
> +  constexpr Data(int i = 0) : i(i+1) {}
> +};
> +
> +extern const Data test_data = { 1 };

Jakub


Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Richard Guenther
On Fri, Jul 8, 2011 at 4:35 PM, Kai Tietz  wrote:
> 2011/7/8 Richard Guenther :
>> On Thu, Jul 7, 2011 at 6:28 PM, Kai Tietz  wrote:
>>> 2011/7/7 Paolo Bonzini :
 On 07/07/2011 06:07 PM, Kai Tietz wrote:
>
> +  /* We redo folding here one time for allowing to inspect more
> +     complex reductions.  */
> +  substitute_and_fold (op_with_constant_singleton_value_range,
> +                      vrp_fold_stmt, false);
> +  /* We need to mark this second pass to avoid re-entering of same
> +     edges for switch statments.  */
> +  in_second_pass = true;
>    substitute_and_fold (op_with_constant_singleton_value_range,
>                       vrp_fold_stmt, false);
> +  in_second_pass = false;

 This needs a much better explanation.

 Paolo
>>>
>>> Well, I can work on a better comment.  The complex reduction I mean
>>> here are cases like
>>>
>>> int x;
>>> int y;
>>> _Bool D1;
>>> _Bool D2;
>>> _Bool D3;
>>> int R;
>>>
>>> D1 = x[0..1] != 0;
>>> D2 = y[0..1] != 0;
>>> D3 = D1 & D2
>>> R = (int) D3
>>>
>>> (testcase is already present. See tree-ssa/vrp47.c).
>>>
>>> As VRP in first pass produces (and replaces) to:
>>>
>>> D1 = (_Bool) x[0..1];
>>> D2 = (_Bool) y[0..1];
>>> D3 = D1 & D2
>>> R = (int) D3
>>>
>>> Just in the second pass the reduction
>>>
>>> R = x[0..1] & y[0..1]
>>
>> So why wouldn't that happen during the first pass?  The first
>> pass could change the IL to
>>
>>  D1 = x[0..1] != 0;
>>  D2 = y[0..1] != 0;
>>  D3 = D1 & D2;
>>  R = x & y;
>>
>> if D3 only has a single use.
> No, as D3 would need a type change, and this isn't possible.  If it
> wasn't absolutely clear, this patch to VRP is necessary after patch 2,
> as here D1, D2, and D3 have bool-type, and just R is of type int.

In your example x,y and R are int, so it works with re-using R.

>>> can happen.  In general it is sad that VRP can't insert during pass
>>> new statements right now.  This would cause issues in range-tables,
>>> which aren't designed for insertations.  As otherwise, we could do
>>> also simplify things like
>>>
>>> D1 = x[0..1] != 0;
>>> D2 = y[0..1] == 0;
>>> D3 = D1 & D2
>>> R = (int) D3
>>>
>>> to
>>> R = x[0..1] & (y[0..1] ^ 1)
>>
>> Why that ^ 1?  And why does that confuse the range tables
>> if you re-use R?
> Because we would need to insert a new statement and this isn't allowed
> in VRP. See the comments in VRP and substitute_and_fold.  VRP
> disallows to remove statements or to insert new ones.

That's not a hard limitation.

>>> Regards,
>>> Kai
>


Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Kai Tietz
2011/7/8 Richard Guenther :
> On Fri, Jul 8, 2011 at 4:35 PM, Kai Tietz  wrote:
>> 2011/7/8 Richard Guenther :
>>> On Thu, Jul 7, 2011 at 6:28 PM, Kai Tietz  wrote:
 2011/7/7 Paolo Bonzini :
> On 07/07/2011 06:07 PM, Kai Tietz wrote:
>>
>> +  /* We redo folding here one time for allowing to inspect more
>> +     complex reductions.  */
>> +  substitute_and_fold (op_with_constant_singleton_value_range,
>> +                      vrp_fold_stmt, false);
>> +  /* We need to mark this second pass to avoid re-entering of same
>> +     edges for switch statments.  */
>> +  in_second_pass = true;
>>    substitute_and_fold (op_with_constant_singleton_value_range,
>>                       vrp_fold_stmt, false);
>> +  in_second_pass = false;
>
> This needs a much better explanation.
>
> Paolo

 Well, I can work on a better comment.  The complex reduction I mean
 here are cases like

 int x;
 int y;
 _Bool D1;
 _Bool D2;
 _Bool D3;
 int R;

 D1 = x[0..1] != 0;
 D2 = y[0..1] != 0;
 D3 = D1 & D2
 R = (int) D3

 (testcase is already present. See tree-ssa/vrp47.c).

 As VRP in first pass produces (and replaces) to:

 D1 = (_Bool) x[0..1];
 D2 = (_Bool) y[0..1];
 D3 = D1 & D2
 R = (int) D3

 Just in the second pass the reduction

 R = x[0..1] & y[0..1]
>>>
>>> So why wouldn't that happen during the first pass?  The first
>>> pass could change the IL to
>>>
>>>  D1 = x[0..1] != 0;
>>>  D2 = y[0..1] != 0;
>>>  D3 = D1 & D2;
>>>  R = x & y;
>>>
>>> if D3 only has a single use.
>> No, as D3 would need a type change, and this isn't possible.  If it
>> wasn't absolutely clear, this patch to VRP is necessary after patch 2,
>> as here D1, D2, and D3 have bool-type, and just R is of type int.
>
> In your example x,y and R are int, so it works with re-using R.
Well, if we add pattern match with prefixed cast, it works. This
actual my patch does, as it finds out that D1's and D2's operand is of
kind int, and matches R's type. So it uses R to simplify.  This
pattern is better then nothing, but more complex operations can't be
handled without introducing new statements.

Eg:

int foo (int a, int b, int c)
{
  if (a < 0 || a > 1 || b < 0 || b > 1 || c < 0 || c > 1)
return -1;
  return (a != 0 | b != 0) | c != 0;
}

Here we get:

int a; int b; int c;
_Bool D1, D2, D3, D4;
int R;

...

D1 = (bool) a;
D2 = (bool) b;
D3 = (bool) c;
D4 = D1 | D2;
D5 = D4 | D3
R = (int) D5;

This can't be simplified by VRP without inserting new statement.

 can happen.  In general it is sad that VRP can't insert during pass
 new statements right now.  This would cause issues in range-tables,
 which aren't designed for insertations.  As otherwise, we could do
 also simplify things like

 D1 = x[0..1] != 0;
 D2 = y[0..1] == 0;
 D3 = D1 & D2
 R = (int) D3

 to
 R = x[0..1] & (y[0..1] ^ 1)
>>>
>>> Why that ^ 1?  And why does that confuse the range tables
>>> if you re-use R?
>> Because we would need to insert a new statement and this isn't allowed
>> in VRP. See the comments in VRP and substitute_and_fold.  VRP
>> disallows to remove statements or to insert new ones.
>
> That's not a hard limitation.

Hmm, ok. I played by it for a while to add this, but some later passes
like switch-range analyzis and jump-threading (IIRC) getting confused
by this.  AFAIU are the major culprits here the inserted ASSERTs, but
maybe I am wrong about this.

Kai


[Patch,testsuite]: Skip AVR if .text overflows

2011-07-08 Thread Georg-Johann Lay
These tests are too big for AVR: .text (128 KiB) overflows and ld
complains.

Ok to commit?

Johann

* gcc.dg/compat/struct-by-value-16_main.c: Skip AVR.
* gcc.dg/compat/struct-by-value-17_main.c: Skip AVR.
* gcc.dg/compat/struct-by-value-18_main.c: Skip AVR.
* g++.dg/torture/pr31863.C: Skip AVR.
Index: gcc.dg/compat/struct-by-value-18_main.c
===
--- gcc.dg/compat/struct-by-value-18_main.c	(revision 175991)
+++ gcc.dg/compat/struct-by-value-18_main.c	(working copy)
@@ -1,6 +1,7 @@
 /* Test structures passed by value, including to a function with a
variable-length argument lists.  All struct members are of type
_Complex long double.  */
+/* { dg-skip-if "Program too big" { "avr-*-*" } { "*" } { "" } } */
 
 extern void struct_by_value_18_x (void);
 extern void exit (int);
Index: gcc.dg/compat/struct-by-value-17_main.c
===
--- gcc.dg/compat/struct-by-value-17_main.c	(revision 175991)
+++ gcc.dg/compat/struct-by-value-17_main.c	(working copy)
@@ -1,6 +1,7 @@
 /* Test structures passed by value, including to a function with a
variable-length argument lists.  All struct members are of type
_Complex double.  */
+/* { dg-skip-if "Program too big" { "avr-*-*" } { "*" } { "" } } */
 
 extern void struct_by_value_17_x (void);
 extern void exit (int);
Index: gcc.dg/compat/struct-by-value-16_main.c
===
--- gcc.dg/compat/struct-by-value-16_main.c	(revision 175991)
+++ gcc.dg/compat/struct-by-value-16_main.c	(working copy)
@@ -1,6 +1,7 @@
 /* Test structures passed by value, including to a function with a
variable-length argument lists.  All struct members are of type
_Complex float.  */
+/* { dg-skip-if "Program too big" { "avr-*-*" } { "*" } { "" } } */
 
 extern void struct_by_value_16_x (void);
 extern void exit (int);
Index: g++.dg/torture/pr31863.C
===
--- g++.dg/torture/pr31863.C	(revision 175991)
+++ g++.dg/torture/pr31863.C	(working copy)
@@ -1,5 +1,6 @@
 /* { dg-do link } */
 /* { dg-timeout-factor 2.0 } */
+/* { dg-skip-if "Program too big" { "avr-*-*" } { "*" } { "" } } */
 
 namespace Loki
 {


New template for 'cpplib' made available

2011-07-08 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.  (If you have
any questions, send them to .)

A new POT file for textual domain 'cpplib' has been made available
to the language teams for translation.  It is archived as:

http://translationproject.org/POT-files/cpplib-4.6.1.pot

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

Below is the URL which has been provided to the translators of your
package.  Please inform the translation coordinator, at the address
at the bottom, if this information is not current:

ftp://ftp.gnu.org/gnu/gcc/gcc-4.6.1/gcc-4.6.1.tar.bz2

Translated PO files will later be automatically e-mailed to you.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




Re: [PATCH] Fix rs6000 vector select RTL patterns (PR target/49621)

2011-07-08 Thread David Edelsohn
On Fri, Jul 8, 2011 at 8:40 AM, Jakub Jelinek  wrote:
> Hi!
>
> As mentioned in the PR, IMNSHO the rs6000 vector_select_* patterns
> are invalid RTL, they compare a vector register in some vector mode
> to const0_rtx instead of corresponding vector zero constant.
>
> The "Comparison Operations" section of RTL docs says:
> "The mode of the comparison is determined by the operands; they
> must both be valid for a common machine mode."
>
> Having one vector mode and one VOIDmode confuses simplify-rtx.c enough
> to create simplifications which lead to ICEs.
>
> The following patch fixes that by using CONST0_RTX (mode) instead
> in the patterns.
>
> Bootstrapped/regtested on powerpc64-linux --with-cpu=default32 and
> powerpc64-linux, ok for trunk/4.6?
>
> 2011-07-08  Jakub Jelinek  
>
>        PR target/49621
>        * config/rs6000/rs6000.c (rs6000_emit_vector_cond_expr): Use
>        CONST0_RTX (dest_mode) instead of const0_rtx as second operand
>        of NE.
>        * config/rs6000/vector.md (vector_select_,
>        vector_select__uns): Change second operand of NE to
>        CONST0_RTX (mode) instead of const0_rtx.
>        * config/rs6000/altivec.md (*altivec_vsel,
>        *altivec_vsel_uns): Expect second operand of NE to be
>        zero_constant of the corresponding vector mode.
>        * config/rs6000/vsx.md (*vsx_xxsel, *vsx_xxsel_uns):
>        Likewise.
>
>        * gcc.target/powerpc/altivec-34.c: New test.

Okay.

I remember Geoff fixing something similar for FP a while ago.  Sorry I
missed this when this series of patches was merged.

Thanks, David


Re: Fix PR 49014

2011-07-08 Thread Vladimir Makarov

On 11-07-08 3:25 AM, Andrey Belevantsev wrote:

On 07.07.2011 20:18, Vladimir Makarov wrote:


The changes in sel-sched.c is ok for me. i386.md changes look ok for 
me too

but you should ask a x86 maintainer to get an approval for the change.

I think you should describe the attribute in the documentation 
because it

is common for all targets.

I can not approve common.opt changes because it makes selective 
scheduler
is default for the 2nd insn scheduling for all targets. Such change 
should
be justified by thorough testing and benchmarking (compilation speed, 
code
size, performance improvements) on several platforms (at least on 
major ones).
I didn't intend to enable sel-sched for all targets, the patch was 
just an RFC to see whether there is an agreement about usefulness of 
such attribute, and the common.opt change was to show how I tested the 
patch.  I am sorry for not making it clear in the mail.


Sorry, for my misunderstanding.  The patch itself with some work could 
be submitted because the check is in the selective scheduling and it is 
used as default just for few targets.


If the check were in haifa-scheduler, we would have a lot of troubles 
and broken targets. Many targets have a lot of subtargets and I am sure 
a lot of their descriptions are not full.  To be honest, I have no idea 
how to solve the problem of absence of some insn dfa descriptions with a 
small pain.  In any case, a big target maintainers involvement will be 
required.


I guess, if we did the check optional, it could help.  The target 
maintainers could switch on the check and fix the insn description 
absence if they want and when they want.
I am planning to check Bernd's thought about whether I selected the 
right -mcpu switch for testing, as I was under impression that 
nowadays this should be autodetected by configure.  I will also modify 
the attribute as suggested.  Then we can discuss further.  I am going 
to leave on vacation soon though so I don't know when exactly I can 
proceed with this.



Ok. Have a nice vacation.






Re: [build] Move libgcov support to toplevel libgcc

2011-07-08 Thread Rainer Orth
"Joseph S. Myers"  writes:

> On Fri, 8 Jul 2011, Rainer Orth wrote:
>
>> And another easy one: moving libgcov over to libgcc.
>
> Do you have any specific plans regarding gcov-io.c and gcov-io.h?  Because 

None so far: the issues outlined in the libgcov submission are currently
the end of what I plan to do on this front.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: Provide 64-bit default Solaris/x86 configuration (PR target/39150)

2011-07-08 Thread Rainer Orth
Ian,

>>> I committed the libgo patch to the upstream repository.
>>
>> I completely missed that: at least in https://go.googlecode.com/hg/
>> libtool.m4 isn't present.
>
> The immediate upstream project for libgo is
> https://gofrontend.googlecode.com/hg/ .  Most of the files in the libgo
> directory in that project come from https://go.googlecode.com/hg/ , but
> not the ones at top level, or in the config or syscalls directory.

didn't know about that.  Love you're using hg there :-)

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Kai Tietz
2011/7/8 Richard Guenther :
> On Thu, Jul 7, 2011 at 6:07 PM, Kai Tietz  wrote:
>> Index: gcc-head/gcc/tree-vrp.c
>> @@ -2232,6 +2235,7 @@ extract_range_from_binary_expr (value_ra
>>      some cases.  */
>>   if (code != BIT_AND_EXPR
>>       && code != TRUTH_AND_EXPR
>> +      && code != BIT_IOR_EXPR
>
> Huh?  So how would VARYING | x ever produce something better
> than VARYING?

Because BIT_IOR_EXPR might be a 1-bit precision operation and so
equivalent to TRUTH_OR_EXPR. It might be that BIT_XOR_EXPR is worth to
be added here too, as for one-bit precision typed expression it is
equivalent to TRUTH_XOR_EXPR.

Kai


[PATCH 0/3] Merge from graphite branch to trunk.

2011-07-08 Thread Sebastian Pop
Hi, there was only one patch committed to the graphite branch that
matters to also have in trunk:

  Remove continue from loop, add one more assert.

The other patches are squashed into the two commits:

  Only update ChangeLog.graphite.

these are related to the graphite-opencl infrastructure and that is
not yet ready to be committed to trunk.

I am testing this patchset on amd64-linux and I will commit to trunk
once it passes regstrap.

Sebastian


[PATCH 3/3] Only update ChangeLog.graphite.

2011-07-08 Thread Sebastian Pop
---
 gcc/ChangeLog.graphite |   22 ++
 1 files changed, 22 insertions(+), 0 deletions(-)

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index 628fb7e..b2c1b1c 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,3 +1,25 @@
+2011-05-26  Alexey Kravets  
+
+   PR middle-end/49147
+   * graphite-opencl-codegen.c
+   (opencl_get_perfect_nested_loop_depth): Use
+   enabled_dependency_in_clast_loop_p instead of
+   dependency_in_clast_loop_p.
+   * graphite-opencl.c (opencl_transform_stmt_list): Ditto.
+   (enabled_dependency_in_clast_loop_p): New.
+   * graphite-opencl.h (enabled_dependency_in_clast_loop_p): Export.
+
+   * gcc.dg/graphite/pr49147.c: New test.
+
+2011-05-24  Sebastian Pop  
+
+   * graphite-opencl-codegen.c (opencl_print_local_vars): Fix typo.
+
+2011-05-24  Sebastian Pop  
+
+   * graphite-dependences.c (graphite_outer_subscript_bound): Remove
+   memory leaks, add one more assert.
+
 2011-05-24  Sebastian Pop  
 
* graphite-dependences.c (build_alias_set_powerset): Remove
-- 
1.7.4.1



[PATCH 1/3] Only update ChangeLog.graphite.

2011-07-08 Thread Sebastian Pop
---
 gcc/ChangeLog.graphite |   27 +++
 1 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index c3fd009..2a14fc4 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,3 +1,30 @@
+2011-05-24  Sebastian Pop  
+
+   * graphite-dependences.c (graphite_outer_subscript_bound): Remove
+   unused parameter.
+   * graphite-dependences.h (graphite_outer_subscript_bound): Same.
+   * graphite-opencl-meta-clast.c (opencl_set_meta_rw_flags): Update
+   call to graphite_outer_subscript_bound.
+   * graphite-opencl.c (opencl_get_indirect_size): Same.
+
+2011-05-24  Alexey Kravets  
+
+   * graphite-opencl.c (opencl_private_var_name_p): Removed.
+   (graphite_artificial_array_p): Removed.
+   (opencl_data_create): Use zero_dim_array_p instead of
+   graphite_artificial_array_p.
+
+2011-01-25  Sebastian Pop  
+
+   * graphite-opencl.c (opencl_create_function_call): Use cl_khr_fp64
+   pragma extension only when it is defined.  Use cl_amd_fp64 when it
+   is defined.
+
+2011-01-23  Tobias Grosser  
+
+   * doc/install.texi: Replace '/' by an or and give the exact
+   version number of cloog-0.16.1
+
 2011-01-20  Sebastian Pop  
 
* graphite-sese-to-poly.c (dr_indices_valid_in_loop): New.
-- 
1.7.4.1



[PATCH 2/3] Remove continue from loop, add one more assert.

2011-07-08 Thread Sebastian Pop
2011-07-08  Sebastian Pop  

* graphite-dependences.c (build_alias_set_powerset): Remove
continue from loop, add one more assert.
---
 gcc/ChangeLog  |5 +
 gcc/ChangeLog.graphite |5 +
 gcc/graphite-dependences.c |   15 ++-
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1cb2ce0..d207316 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2011-07-08  Sebastian Pop  
+
+   * graphite-dependences.c (build_alias_set_powerset): Remove
+   continue from loop, add one more assert.
+
 2011-07-08  Richard Guenther  
 
* fold-const.c (fold_binary_loc): Remove index +p PTR -> PTR +p index
diff --git a/gcc/ChangeLog.graphite b/gcc/ChangeLog.graphite
index 2a14fc4..628fb7e 100644
--- a/gcc/ChangeLog.graphite
+++ b/gcc/ChangeLog.graphite
@@ -1,5 +1,10 @@
 2011-05-24  Sebastian Pop  
 
+   * graphite-dependences.c (build_alias_set_powerset): Remove
+   continue from loop, add one more assert.
+
+2011-05-24  Sebastian Pop  
+
* graphite-dependences.c (graphite_outer_subscript_bound): Remove
unused parameter.
* graphite-dependences.h (graphite_outer_subscript_bound): Same.
diff --git a/gcc/graphite-dependences.c b/gcc/graphite-dependences.c
index b9b1d1b..081aa69 100644
--- a/gcc/graphite-dependences.c
+++ b/gcc/graphite-dependences.c
@@ -171,19 +171,16 @@ build_alias_set_powerset 
(ppl_Pointset_Powerset_C_Polyhedron_t alias_powerset,
 {
   ppl_dimension_type *ds;
   ppl_dimension_type access_dim;
-  unsigned i, pos = 0;
+  unsigned i, pos;
 
   ppl_Pointset_Powerset_C_Polyhedron_space_dimension (alias_powerset,
  &access_dim);
-  ds = XNEWVEC (ppl_dimension_type, access_dim-1);
-  for (i = 0; i < access_dim; i++)
-{
-  if (i == alias_dim)
-   continue;
+  ds = XNEWVEC (ppl_dimension_type, access_dim - 1);
+  gcc_assert (alias_dim < access_dim);
 
-  ds[pos] = i;
-  pos++;
-}
+  for (pos = 0, i = 0; i < access_dim; i++)
+if (i != alias_dim)
+  ds[pos++] = i;
 
   ppl_Pointset_Powerset_C_Polyhedron_remove_space_dimensions (alias_powerset,
  ds,
-- 
1.7.4.1



New template for 'gcc' made available

2011-07-08 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.  (If you have
any questions, send them to .)

A new POT file for textual domain 'gcc' has been made available
to the language teams for translation.  It is archived as:

http://translationproject.org/POT-files/gcc-4.6.1.pot

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

Below is the URL which has been provided to the translators of your
package.  Please inform the translation coordinator, at the address
at the bottom, if this information is not current:

ftp://ftp.gnu.org/gnu/gcc/gcc-4.6.1/gcc-4.6.1.tar.bz2

Translated PO files will later be automatically e-mailed to you.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Kai Tietz
Hello,

This is the reworked patch, It fixes vrp to handle bitwise one-bit
precision typed operations
and to handle some type hoisting cases, Some cases can't be handled as
long as vrp doesn't
allows to insert new statements in folding pass.
To have in first pass better match, VRP uses for stmt-folding now for each BB
first -> last stepping.  I extended for this function
substitute_and_fold function by an
new argument, which indicates if scanning within BB shall be done from
first to last,
or from last to first. I removed in this new patch the part of
re-doing stmt-fold pass, as
this is no longer necessary by changing folding direction within BB.

This modification of scanning direction plus type-cast handling allows
it to remove dom-dump
from the testcase tree-ssa/vrp47.c, as all cases are handled now
within vrp itself.

Bootstrapped and regression tested for all standard-languages (plus
Ada and Obj-C++) on host x86_64-pc-linux-gnu.

Ok for apply?

Regards,
Kai

ChangeLog gcc/

2011-07-08  Kai Tietz  

* tree-ssa-ccp.c (ccp_finalize): Add new
argument for substitute_and_fold.
* tree-ssa-copy.c (fini_copy_prop): Likewise.
* tree-ssa-propagate.h (substitute_and_fold):
Likewise.
* tree-ssa-propagate.c (substitute_and_fold):
Likewise.
* tree-vrp.c (vrp_finalize): Likewise.
(extract_range_from_binary_expr): Add handling
for BIT_IOR_EXPR, BIT_AND_EXPR, and BIT_NOT_EXPR.
(register_edge_assert_for_1): Add handling for 1-bit
BIT_IOR_EXPR and BIT_NOT_EXPR.
(register_edge_assert_for): Add handling for 1-bit
BIT_IOR_EXPR.
(ssa_name_get_inner_ssa_name_p): New helper function.
(ssa_name_get_cast_to_p): New helper function.
(simplify_truth_ops_using_ranges): Handle prefixed
cast instruction for result, and add support for one
bit precision BIT_IOR_EXPR, BIT_AND_EXPR, BIT_XOR_EXPR,
and BIT_NOT_EXPR.
(simplify_stmt_using_ranges): Add handling for one bit
precision BIT_IOR_EXPR, BIT_AND_EXPR, BIT_XOR_EXPR,
and BIT_NOT_EXPR.

ChangeLog gcc/testsuite

2011-07-08  Kai Tietz  

* gcc.dg/tree-ssa/vrp47.c: Remove dom-output
and adjust testcase for vrp output analysis.

Index: gcc/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c
===
--- gcc.orig/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c  2011-01-11
20:36:16.0 +0100
+++ gcc/gcc/testsuite/gcc.dg/tree-ssa/vrp47.c   2011-07-08
17:49:55.016847200 +0200
@@ -4,7 +4,7 @@
jumps when evaluating an && condition.  VRP is not able to optimize
this.  */
 /* { dg-do compile { target { ! "mips*-*-* s390*-*-*  avr-*-*
mn10300-*-*" } } } */
-/* { dg-options "-O2 -fdump-tree-vrp -fdump-tree-dom" } */
+/* { dg-options "-O2 -fdump-tree-vrp" } */
 /* { dg-options "-O2 -fdump-tree-vrp -fdump-tree-dom -march=i586" {
target { i?86-*-* && ilp32 } } } */

 int h(int x, int y)
@@ -36,13 +36,10 @@ int f(int x)
0 or 1.  */
 /* { dg-final { scan-tree-dump-times "\[xy\]\[^ \]* !=" 0 "vrp1" } } */

-/* This one needs more copy propagation that only happens in dom1.  */
-/* { dg-final { scan-tree-dump-times "x\[^ \]* & y" 1 "dom1" } } */
-/* { dg-final { scan-tree-dump-times "x\[^ \]* & y" 1 "vrp1" { xfail
*-*-* } } } */
+/* { dg-final { scan-tree-dump-times "x\[^ \]* & y" 1 "vrp1" } } */

 /* These two are fully simplified by VRP.  */
 /* { dg-final { scan-tree-dump-times "x\[^ \]* \[|\] y" 1 "vrp1" } } */
 /* { dg-final { scan-tree-dump-times "x\[^ \]* \\^ 1" 1 "vrp1" } } */

 /* { dg-final { cleanup-tree-dump "vrp\[0-9\]" } } */
-/* { dg-final { cleanup-tree-dump "dom\[0-9\]" } } */
Index: gcc/gcc/tree-ssa-ccp.c
===
--- gcc.orig/gcc/tree-ssa-ccp.c 2011-06-30 11:30:12.0 +0200
+++ gcc/gcc/tree-ssa-ccp.c  2011-07-08 17:20:22.378750800 +0200
@@ -880,7 +880,8 @@ ccp_finalize (void)

   /* Perform substitutions based on the known constant values.  */
   something_changed = substitute_and_fold (get_constant_value,
-  ccp_fold_stmt, true);
+  ccp_fold_stmt, true,
+  true);

   free (const_val);
   const_val = NULL;
Index: gcc/gcc/tree-ssa-copy.c
===
--- gcc.orig/gcc/tree-ssa-copy.c2011-06-17 11:52:51.0 +0200
+++ gcc/gcc/tree-ssa-copy.c 2011-07-08 17:19:32.464412500 +0200
@@ -778,7 +778,7 @@ fini_copy_prop (void)

   /* Don't do DCE if we have loops.  That's the simplest way to not
  destroy the scev cache.  */
-  substitute_and_fold (get_value, NULL, !current_loops);
+  substitute_and_fold (get_value, NULL, !current_loops, true);

   free (copy_of);
 }
Index: gcc/gcc/tree-ssa-propagate.c
===
--- gcc.orig/gcc/

Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Michael Matz
Hi,

On Fri, 8 Jul 2011, Kai Tietz wrote:

> This is the reworked patch, It fixes vrp to handle bitwise one-bit 
> precision typed operations and to handle some type hoisting cases, Some 
> cases can't be handled as long as vrp doesn't allows to insert new 
> statements in folding pass. To have in first pass better match, VRP uses 
> for stmt-folding now for each BB first -> last stepping.  I extended for 
> this function substitute_and_fold function by an new argument, which 
> indicates if scanning within BB shall be done from first to last, or 
> from last to first. I removed in this new patch the part of re-doing 
> stmt-fold pass, as this is no longer necessary by changing folding 
> direction within BB.

You still add BIT_IOR_EXPR for POINTER_TYPE_P, which seems strange.  All 
these test for TYPE_PRECISION being 1 (and then handling BIT_IOR/AND_EXPR 
like TRUTH_IOR/AND_EXPR) aren't necessary if you extend the general 
handling for BIT_IOR_EXPR (for instance) to deal with not only constant 
1, but simply handling all-ones constants specially.  That is replace 
integer_onep with integer_all_onesp at certain places.

Because also for wider than 1-bit precision it's the case that we can 
infer usefull ranges out of "VARYING | all-ones".

Certainly the special casing on 1-bit is ugly.  Work towards making 
tree-vrp more lean and handling cases more general instead of piling 
special case over special case.


Ciao,
Michael.


Re: __sync_swap* with acq/rel/full memory barrier semantics

2011-07-08 Thread Aldy Hernandez



Some names include "sync" and some don't?


Well, I was going to blame Aldy :-) but then I went to look at this, and
thats the same way *all* the other __sync instructions seem to be.

ie:

builtins.c:expand_builtin_lock_test_and_set (enum machine_mode mode,
tree exp,
builtins.c: case BUILT_IN_LOCK_TEST_AND_SET_1:
builtins.c: case BUILT_IN_LOCK_TEST_AND_SET_2:
builtins.c: case BUILT_IN_LOCK_TEST_AND_SET_4:

whereas everything else is 'sync_lock_test_and_set'..

So i guess it falls to prior art... I assume Aldy just cut-and-pasted
for his new routine and just changed the names in the same format.


Correct, this was the way all the other sync builtins were implemented. 
 I found it odd as well, but wanted to keep my changes to a minimum.


Re: [patch tree-optimization]: [3 of 3]: Boolify compares & more

2011-07-08 Thread Kai Tietz
2011/7/8 Michael Matz :
> Hi,
>
> On Fri, 8 Jul 2011, Kai Tietz wrote:
>
>> This is the reworked patch, It fixes vrp to handle bitwise one-bit
>> precision typed operations and to handle some type hoisting cases, Some
>> cases can't be handled as long as vrp doesn't allows to insert new
>> statements in folding pass. To have in first pass better match, VRP uses
>> for stmt-folding now for each BB first -> last stepping.  I extended for
>> this function substitute_and_fold function by an new argument, which
>> indicates if scanning within BB shall be done from first to last, or
>> from last to first. I removed in this new patch the part of re-doing
>> stmt-fold pass, as this is no longer necessary by changing folding
>> direction within BB.
>
> You still add BIT_IOR_EXPR for POINTER_TYPE_P, which seems strange.
Yes, I am aware of that. I added old behavior for BIT_IOR_EXPR here as otherwise
it would run into the gcc_unreachable case. As here we want to say
varying ... Well, even this
is not necessarily true.  As an bitwise-binary-op with different width
on both arguments sides might
 still have a smaller range then the type itself.
Eg: x[0..255] | y[0..1024] has a limitted range in result of max.

As we handle here value-ranges and not bit-masks for VR-inspection,
there are some limitations, too.
Eg: (x[mask:0xf0] | y[mask:0x7]) & 8 is for sure zero.

>  All
> these test for TYPE_PRECISION being 1 (and then handling BIT_IOR/AND_EXPR
> like TRUTH_IOR/AND_EXPR) aren't necessary if you extend the general
> handling for BIT_IOR_EXPR (for instance) to deal with not only constant
> 1, but simply handling all-ones constants specially.  That is replace
> integer_onep with integer_all_onesp at certain places.
Well, in some cases this is true, but checking for precision has the
advantages that signed cases are covered here and we need not to
compare range min/max.  Nevertheless some assumptions on combinations
are only true for one-bit precision types.

> Because also for wider than 1-bit precision it's the case that we can
> infer usefull ranges out of "VARYING | all-ones".
Yes, this might have advantages on some inspections.

Kai


Re: [PATCH] New IPA-CP with real function cloning

2011-07-08 Thread Martin Jambor
Hi,

On Thu, Jul 07, 2011 at 06:03:07PM +0200, Jan Hubicka wrote:
> Hi,
> patch is long, so let me review it in more passes.

Fair enough.

> > 
> > 
> > 2011-06-22  Martin Jambor  
> > 
> > * ipa-prop.h: Include alloc-pool.h.
> > (ipa_lattice_type): Removed.
> > (ipcp_value_source): New type.
> > (ipcp_value): Likewise.
> > (ipcp_values_pool): Declare.
> > (ipcp_sources_pool): Likewise.
> > (ipa_param_descriptor): Removed.
> > (ipcp_lattice): Removed fileds type and constant. Added fields decl,
> > values, values_count, contains_variable, bottom, used and virt_call.
> > (ipa_node_params): New fields lattices, known_vals,
> > clone_for_all_contexts and noe dead, removed fields params and
> > count_scale.
> > (ipa_get_param): Updated.
> > (ipa_param_cannot_devirtualize_p): Removed.
> > (ipa_param_types_vec_empty): Likewise.
> > (ipa_edge_args): New field next_edge_clone.
> > (ipa_func_list): Removed.
> > (ipa_init_func_list): Removed declaration.
> > (ipa_push_func_to_list_1): Likewise.
> > (ipa_pop_func_from_list): Likewise.
> > (ipa_push_func_to_list): Removed.
> > (ipa_lattice_from_jfunc): Remove declaration.
> > (ipa_get_jf_pass_through_result): Declare.
> > (ipa_get_jf_ancestor_result): Likewise.
> > (ipa_value_from_jfunc): Likewise.
> > (ipa_get_lattice): Update.
> > (ipa_lat_is_single_const): New function.
> > * ipa-prop.c (ipa_push_func_to_list_1): Removed.
> > (ipa_init_func_list): Likewise.
> > (ipa_pop_func_from_list): Likewise.
> > (ipa_get_param_decl_index): Fix coding style.
> > (ipa_populate_param_decls): Update to use new lattices.
> > (ipa_initialize_node_params): Likewise.
> > (visit_ref_for_mod_analysis): Likewise.
> > (ipa_analyze_params_uses): Likewise.
> > (ipa_free_node_params_substructures): Likewise.
> > (ipa_edge_duplication_hook): Add the new edge to the list of edge
> > clones.
> > (ipa_node_duplication_hook): Update to use new lattices.
> > (ipa_free_all_structures_after_ipa_cp): Free alloc pools.
> > (ipa_free_all_structures_after_iinln): Likewise.
> > (ipa_write_node_info): Update to use new lattices.
> > (ipa_read_node_info): Likewise.
> > (ipa_get_jf_pass_through_result): New function.
> > (ipa_get_jf_ancestor_result): Likewise.
> > (ipa_value_from_jfunc): Likewise.
> > (ipa_cst_from_jfunc): Reimplemented using ipa_value_from_jfunc.
> > * ipa-cp.c: Reimplemented.
> > * params.def (PARAM_DEVIRT_TYPE_LIST_SIZE): Removed.
> > (PARAM_IPA_CP_VALUE_LIST_SIZE): New parameter.
> > * Makefile.in (IPA_PROP_H): Added alloc-pool.h to dependencies.
> > 
> > * doc/invoke.texi (devirt-type-list-size): Removed description.
> > (ipa-cp-value-list-size): Added description.
> > 
> > * testsuite/gcc.dg/ipa/ipa-1.c: Updated testcase dump scan.
> > * testsuite/gcc.dg/ipa/ipa-2.c: Likewise.
> > * testsuite/gcc.dg/ipa/ipa-3.c: Likewise and made functions static.
> > * testsuite/gcc.dg/ipa/ipa-4.c: Updated testcase dump scan.
> > * testsuite/gcc.dg/ipa/ipa-5.c: Likewise.
> > * testsuite/gcc.dg/ipa/ipa-7.c: Xfail test.
> > * testsuite/gcc.dg/ipa/ipa-8.c: Updated testcase dump scan.
> > * testsuite/gcc.dg/ipa/ipacost-1.c: Likewise.
> > * testsuite/gcc.dg/ipa/ipacost-2.c: Likewise.
> > * testsuite/gcc.dg/ipa/ipcp-1.c: New test.
> > * testsuite/gcc.dg/ipa/ipcp-2.c: Likewise.
> > * testsuite/gcc.dg/tree-ssa/ipa-cp-1.c: Updated testcase.
> 
> > /* Interprocedural analyses.
> >Copyright (C) 2005, 2007, 2008, 2009, 2010
> 2011
> >Free Software Foundation, Inc.
> > 
> > 
> > /* The following definitions and interfaces are used by
> >interprocedural analyses or parameters.  */
> > 
> > /* ipa-prop.c stuff (ipa-cp, indirect inlining):  */
> 
> I was bit thinking about it and probably we could make ipa-prop
> and ipa-inline-analysis to be stand alone analysis passes, instead of
> something called either from inliner or ipa-cp analysis stage. But
> that could be done incrementally.

As I said in the first introductory mail, the summary generation part
is not really affected by this patch in any serious way.

> 
> > 
> > /* A jump function for a callsite represents the values passed as actual
> >arguments of the callsite. There are three main types of values :
> > 
> >Pass-through - the caller's formal parameter is passed as an actual
> >   argument, possibly one simple operation performed on it.
> >Constant - a constant (is_gimple_ip_invariant)is passed as an actual
> >   argument.
> >Unknown  - neither of the above.
> > 
> >IPA_JF_CONST_MEMBER_PTR stands for C++ member pointers, it is a special
> >constant in this regard.  Other constants are represented with 
> > IPA_JF_CONST.
> 
> While we are at docs, I would bit expand. It seems to me that for someone not 
> familiar
> with the

RFA PR regression/49498

2011-07-08 Thread Jeff Law
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1


As detailed in the PR, improvements to jump threading caused the
relatively simple guard predicates in this testcase to become
significantly more complex.  The predicate complexity is enough to
confuse the predicate-aware pruning of bogus uninitialized variable
warnings.

Note the actual runtime flow control was improved by jump threading,
which was doing exactly what it should.

Based on David's comments, it's unlikely the predicate-aware code in
tree-ssa-uninit.c is going to be able to handle the more complex guards.
 So I'm turning off DOM (jump threading) for this testcase.

OK for trunk?


-BEGIN PGP SIGNATURE-
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org/

iQEcBAEBAgAGBQJOFz2VAAoJEBRtltQi2kC7qMQH/2GMEXQrFZzWZev2Rd7CH20F
x7SsUDtkPW5K5pd1uLJOTsGh7fwr8l173n27GQVR5DN5OCLmoxWDrpsUeaMRd4bg
LcZun7h+NGSrqxna/LExs9PBNR1P9blh1X6/LCqmWuo8hIqJ5HDUDK6674iD4C8p
I71W25FYPgAno9Okm0UiBKOaZjRJdtfiZqMSgM9HreagYbHQcMYlcWsyc9irXM9b
oxkaFzM+Aq5ZxpulpD0NCJ4aGMe6u2+FymrsjbbrPfnB2y7MY1DklxA0L7NO893d
dxZ5N3Fi1adDsUP7Oh/0PNGQkB6HjDlAR6gV0oyUAamswn/Owo6lAYvQdNTMUAk=
=VYjS
-END PGP SIGNATURE-
* gcc.dg/unint-pred-8_b.c: Disable dominator optimizations.

Index: gcc.dg/uninit-pred-8_b.c
===
*** gcc.dg/uninit-pred-8_b.c(revision 176000)
--- gcc.dg/uninit-pred-8_b.c(working copy)
***
*** 1,6 
  
  /* { dg-do compile } */
! /* { dg-options "-Wuninitialized -O2" } */
  
  int g;
  void bar();
--- 1,6 
  
  /* { dg-do compile } */
! /* { dg-options "-Wuninitialized -fno-tree-dominator-opts -O2" } */
  
  int g;
  void bar();


Re: C++ PATCH for c++/49673 (constexpr init should go in rodata)

2011-07-08 Thread Jason Merrill

On 07/08/2011 10:35 AM, Jakub Jelinek wrote:

That will match only on ELF and perhaps a few other targets, but I'm certain
many targets put read-only data elsewhere.  I'd just guard this
with a few most common target triplets that are known to use .rodata
section.


Done, thanks.  I've also removed the unneeded check for COMPLETE_TYPE_P.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit e24d93b0a1e1df42c4f1197515e7e2fbe211a0cb
Author: jason 
Date:   Fri Jul 8 16:46:28 2011 +

constexpr-rom.C tweak

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@176049 138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
index e2edb2e..144be2d 100644
--- a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
@@ -1,6 +1,6 @@
 // PR c++/49673: check that test_data goes into .rodata
 // { dg-options -std=c++0x }
-// { dg-final { scan-assembler "rodata" } }
+// { dg-final { scan-assembler "rodata" { target { *-*-linux-gnu || *-*-elf } } } }
 
 struct Data
 {
commit e2f7f86c6d5e734fe393217eaca8d4da9969f343
Author: Jason Merrill 
Date:   Fri Jul 8 10:27:55 2011 -0400

	* typeck.c (cp_apply_type_quals_to_decl): Don't check
	COMPLETE_TYPE_P either.

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index f0d68c3..5febff5 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -8130,12 +8130,10 @@ cp_apply_type_quals_to_decl (int type_quals, tree decl)
   /* We used to check TYPE_NEEDS_CONSTRUCTING here, but now a constexpr
  constructor can produce constant init, so rely on cp_finish_decl to
  clear TREE_READONLY if the variable has non-constant init.  */
-  if (/* If the type isn't complete, we don't know yet if it will need
-	 constructing.  */
-  !COMPLETE_TYPE_P (type)
-  /* If the type has a mutable component, that component might be
-	 modified.  */
-  || TYPE_HAS_MUTABLE_P (type))
+
+  /* If the type has a mutable component, that component might be
+ modified.  */
+  if (TYPE_HAS_MUTABLE_P (type))
 type_quals &= ~TYPE_QUAL_CONST;
 
   c_apply_type_quals_to_decl (type_quals, decl);


C++ PATCH for c++/45603 (ICE with user declaration of __cxa_guard_acquire)

2011-07-08 Thread Jason Merrill

The testcase is kind of bogus, but we ought to be more robust.

Tested x86_64-pc-linux-gnu, applied to trunk.
commit f6ac9e35ced48b4e646f1cb0a8b5cab869ad91f6
Author: Jason Merrill 
Date:   Fri Jul 8 13:05:01 2011 -0400

	PR c++/45603
	* decl.c (expand_static_init): Don't get confused by user
	declaration of __cxa_guard_acquire.

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index e36739b..266d049 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -6646,7 +6646,6 @@ expand_static_init (tree decl, tree init)
   tree if_stmt = NULL_TREE, inner_if_stmt = NULL_TREE;
   tree then_clause = NULL_TREE, inner_then_clause = NULL_TREE;
   tree guard, guard_addr;
-  tree acquire_fn, release_fn, abort_fn;
   tree flag, begin;
 
   /* Emit code to perform this initialization but once.  This code
@@ -6696,29 +6695,31 @@ expand_static_init (tree decl, tree init)
 
   if (flag_threadsafe_statics)
 	{
+	  tree vfntype = NULL_TREE;
+	  tree acquire_name, release_name, abort_name;
+	  tree acquire_fn, release_fn, abort_fn;
 	  guard_addr = build_address (guard);
 
-	  acquire_fn = get_identifier ("__cxa_guard_acquire");
-	  release_fn = get_identifier ("__cxa_guard_release");
-	  abort_fn = get_identifier ("__cxa_guard_abort");
-	  if (!get_global_value_if_present (acquire_fn, &acquire_fn))
-	{
-	  tree vfntype = build_function_type_list (void_type_node,
-		   TREE_TYPE (guard_addr),
-		   NULL_TREE);
-	  acquire_fn = push_library_fn
-		(acquire_fn, build_function_type_list (integer_type_node,
+	  acquire_name = get_identifier ("__cxa_guard_acquire");
+	  release_name = get_identifier ("__cxa_guard_release");
+	  abort_name = get_identifier ("__cxa_guard_abort");
+	  acquire_fn = identifier_global_value (acquire_name);
+	  release_fn = identifier_global_value (release_name);
+	  abort_fn = identifier_global_value (abort_name);
+	  if (!acquire_fn)
+	acquire_fn = push_library_fn
+	  (acquire_name, build_function_type_list (integer_type_node,
 		   TREE_TYPE (guard_addr),
 		   NULL_TREE),
-		 NULL_TREE);
-	  release_fn = push_library_fn (release_fn, vfntype, NULL_TREE);
-	  abort_fn = push_library_fn (abort_fn, vfntype, NULL_TREE);
-	}
-	  else
-	{
-	  release_fn = identifier_global_value (release_fn);
-	  abort_fn = identifier_global_value (abort_fn);
-	}
+	   NULL_TREE);
+	  if (!release_fn || !abort_fn)
+	vfntype = build_function_type_list (void_type_node,
+		TREE_TYPE (guard_addr),
+		NULL_TREE);
+	  if (!release_fn)
+	release_fn = push_library_fn (release_name, vfntype, NULL_TREE);
+	  if (!abort_fn)
+	abort_fn = push_library_fn (abort_name, vfntype, NULL_TREE);
 
 	  inner_if_stmt = begin_if_stmt ();
 	  finish_if_stmt_cond (build_call_n (acquire_fn, 1, guard_addr),
diff --git a/gcc/testsuite/g++.dg/abi/guard3.C b/gcc/testsuite/g++.dg/abi/guard3.C
new file mode 100644
index 000..fd9d00e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/abi/guard3.C
@@ -0,0 +1,14 @@
+// PR c++/45603
+
+extern "C" int __cxa_guard_acquire();
+
+struct A
+{
+  ~A();
+};
+
+A* f()
+{
+  static A a;
+  return &a;
+}


Re: CFT: Move unwinder to toplevel libgcc

2011-07-08 Thread Rainer Orth
Steve,

> I just tried builds on ia64 linux and HP-UX and both builds failed.  I
> am re-trying the HP-UX build with --with-system-libunwind to see if that
> changes things but that should be the default on IA64 HP-UX.
>
> On Linux (debian) the build stopped with:
>
> /test/big-foot1/gcc/nightly/gcc-ia64-debian-linux-gnu-trunk/ia64-debian-linux-gnu/bin/ranlib
>  libgcov.a
> make[3]: *** No rule to make target
> `/test/big-foot1/gcc/nightly/src/trunk/libgcc/unwind-sjlj.c', needed by
> `unwind-sjlj.o'.  Stop.
[...]
> The patch appeared to install correctly into my source tree and I ran 
> autoreconf to regenerate the
> configure files.  It looks like patch didn't handle the unwind files that 
> moved.  I will try doing
> that by hand and see if that fixes things.

the diff was a git-style patch with renames, which AFAIK only git patch
can handle correctly.  I'm using a mercurial mirror of the gcc repo and
mercurial queues for development, and that style makes if far easier to
see what's going on, compared to the usual remove-and-add which makes
patches excessively long.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: CFT: Move unwinder to toplevel libgcc

2011-07-08 Thread Rainer Orth
Steve,

> It looks like the GCC build is trying to compile unwind-ia64.c on IA64
> HP-UX even though it should not use or need this file.  Using
> --with-system-libunwind doesn't seem to help.  I am not sure where this
> should be handled under the new setup.  Previously config.gcc would
> either include or not include t-glibc-libunwind in the Makefile to build
> or not build this file.  This might be coming from t-eh-ia64 rather
> then t-glibc-libunwind.  Both of these include unwind-ia64.c.

I see what's going on: before my patch, ia64*-*-hpux* would use

tmake_file="ia64/t-ia64 ia64/t-hpux"

The former has

LIB2ADDEH = $(srcdir)/config/ia64/unwind-ia64.c $(srcdir)/unwind-sjlj.c \
  $(srcdir)/unwind-c.c

the latter

LIB2ADDEH = $(srcdir)/unwind-c.c

overriding the ia64 default.

Unfortunately, I got the ordering wrong in libgcc:

tmake_file="ia64/t-hpux ia64/t-eh-ia64"

To fix this, ia64/t-eh-ia64 can go completely.  This should restore
things as they were before.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: CFT: Move unwinder to toplevel libgcc

2011-07-08 Thread Rainer Orth
Steve,

> It looks like this is caused by having two '#include "md-unwind-support.h"'
> lines in unwind-ia64.c.  If I remove the first one (mixed in with the other
> includes at the top of the file) and leave the second one then the file
> compiles.  I still get the warning about discarding the cast but I guess that
> is OK.  I have a bootstrap running on IA64 Linux and it looks good so far.
> I haven't had a chance to follow-up on IA64 HP-UX.

I added the first #include to provide the definitions of UNW_IVMS_MODE
and MD_UNW_COMPATIBLE_PERSONALITY_P, not realizing that there was
another one already.

I guess the default definition of the latter can be moved below the
second #include "md-unwind-support.h"?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] New IPA-CP with real function cloning

2011-07-08 Thread Jan Hubicka
> > > /* Structure holding data required to describe a pass-through jump 
> > > function.  */
> > > 
> > > struct GTY(()) ipa_pass_through_data
> > > {
> > >   /* If an operation is to be performed on the original parameter, this 
> > > is the
> > >  second (constant) operand.  */
> > >   tree operand;
> > >   /* Number of the caller's formal parameter being passed.  */
> > >   int formal_id;
> > 
> > I probably should use this in ipa-inline-analsysi where I call it for some 
> > reason operand_num :)
> 
> So far I have resisted the urge to rename this but it pre-dates my
> involvement with gcc.  I'd like it to be called parm_index but since
> we might want to use it also for global variables and we might need
> something more complex for also handling parts of aggregates, I left
> it for later.

parm_index sounds to me good, too.  formal_id is the name used in paper
so it makes sense, but its meaning is unobvious.
We could rename it later, together with ipa-inline-analysis one
(I think consistency here is more important).

Yep, we will have to see how the jfuncs will look like once they
reffer to global vars and parts of agregates..
> 
> > 
> > > 
> > > struct ipcp_value;
> > 
> > I wonder if the jump functions used by several passes and ipcp
> > internal types both has o go into the same header?
> 
> Well, I originally wanted ipa-prop to provide services to the outside
> world like ipa-inline-analysis.c and to have ipa-cp self-contained.
> But I guess the data separation is more important.  So if we move
> ipa_cst_from_jfunc (and its 3 friends) to ipa-cp and move
> ipcp_lattice.decl and ipcp_lattice.used to a special structure, I
> might move ipcp_value_source, ipcp_value, and ipcp_lattice altogether
> to ipa-cp.

I also think that having ipa-prop as a generic module for propagating&
looking into args is the goal.
Separating datastructures form ipa-cp definitely makes this more obvious.

I've done that to the inliner, too. inline_summary is now all about the
function body size/time estimates and the inliner heuristics do have
their own datastructures in their own space.
> 
> At the moment I'm not sure whether I want to do this as a followup
> patch or incorporate it in the changes.  I think I'll start with the
> latter and revert to the former if it is too invasive to parts which
> have not been touched so far by the change.

Lets see, I am happy with the patch going in with current organization of
datastructures if we move into privatizing it later.
> > > {
> > >   /* Pointer to an array of structures describing individual formal
> > >  parameters.  */
> > >   struct ipcp_lattice *lattices;
> > 
> > Hmm, how we get here around the need to mark this GTY(). I.e are we sure 
> > that all the known_vals
> > must be referneced from elsewhere at ggc time?
> 
> (Scalar) constants that are results of arithmetic jump functions may
> not be referenced from elsewhere, everything else is referenced from
> the jump functions.  If it is a problem it is already present in the
> current IPA-CP.  ipa_node_params and lattices are not GTYed there
> either.

Hmm, I guess it is not really problem only because the lattices are used
only in ipa-cp so the values do not really live across GGC call.

Well, this will be solved by separating out the ipa-cp datastructures, so
it is not a problem.
> 
> > I would also slowly switch those things to VECtors..
> 
> Perhaps, but individual lattices are and always have been accessed
> through ipa_get_lattice which checks bounds and so there's no big
> reason to do that.

Yep, I added the bounds check when I was debugging.  Not big deal,
just we sort of do have agreement using our VECtor API where it fits..
> 
> > 
> > >   /* Only for versioned nodes this field would not be NULL,
> > >  it points to the node that IPA cp cloned from.  */
> > >   struct cgraph_node *ipcp_orig_node;
> > Why not use node->clone_of here?
> 
> That would not work if the node was a clone created by some other
> pass.  I need to differentiate between clones I create because they do
> not have lattices but do have the exact values for individual
> parameters in known_vals (which is NULL otherwise).  I should probably
> use a flag though, the code I ended up only checks it for NULL anyway.

Hmm, OK, either flag or keeping the pointer is fine.
> > > 
> > > /* ipa_edge_args stores information related to a callsite and 
> > > particularly its
> > >arguments.  It can be accessed by the IPA_EDGE_REF macro.  */
> > > typedef struct GTY(()) ipa_edge_args
> > 
> > probably edge_summary would be my preferred name.
> 
> Ugh, this is the current name, we may change it later.  In any event
> the name should probably tell that the summary is about parameters.

Hmm, OK, it is not bad name after all.
> 
> > 
> > > {
> > >   /* Next pointer in a linked list of clones of the same function.  */
> > >   struct cgraph_edge *next_edge_clone;
> > 
> > What this is needed for?
> 
> For get_info_about_necessary_edges and ga

Re: [build] Move libgcov support to toplevel libgcc

2011-07-08 Thread Jan Hubicka
> On Fri, 8 Jul 2011, Rainer Orth wrote:
> 
> > And another easy one: moving libgcov over to libgcc.
> 
> Do you have any specific plans regarding gcov-io.c and gcov-io.h?  Because 
> they are genuinely used on both the host and the target they are a 
> trickier case; I wonder if they should end up in their own toplevel 
> directory like libdecnumber, building a proper library that exports 
> different functions when configured for the host than for the target, that 
> for the target gets included in libgcov and that for the host gets linked 
> into cc1, gcov and gcov-dump rather than having files do the present 
> '#include "gcov-io.c"'.

I would also preffer libgcov to go into its own toplevel directory,
especially because there are plans to add non-stdlib i/o into it i.e.
for kernel profiling.  that way it would be handy to have libgcov
as a toplevel library with its own configure that allows it to be build
independently of rest of GCC.

Honza
> 
> -- 
> Joseph S. Myers
> jos...@codesourcery.com


New Spanish PO file for 'gcc' (version 4.6.1)

2011-07-08 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the Spanish team of translators.  The file is available at:

http://translationproject.org/latest/gcc/es.po

(This file, 'gcc-4.6.1.es.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

http://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

http://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




Re: [patch tree-optimization]: [1 of 3]: Boolify compares & more

2011-07-08 Thread Kai Tietz
2011/7/8 Richard Guenther :
> On Fri, Jul 8, 2011 at 11:28 AM, Kai Tietz  wrote
>> 2011/7/8 Richard Guenther :
>>> On Thu, Jul 7, 2011 at 6:06 PM, Kai Tietz  wrote:
 Hello,

 This patch - first of series - adds to fold and some helper routines 
 support
 for one-bit precision bitwise folding and detection.
 This patch is necessary for - next patch of series - boolification of
 comparisons.

 Bootstrapped and regression tested for all standard-languages (plus
 Ada and Obj-C++) on host x86_64-pc-linux-gnu.

 Ok for apply?
>>>
>>> Factoring out fold_truth_andor to a function should be done separately.
>>> A patch that does just that is pre-approved.
>>
>> Ok I will sent for this a separate patch. But in fact it makes just
>> sense together with the 1-bit precision bitwise support, too.
>
> No, it makes sense anyway to get rid of that goto.  Note _only_ factoring
> out the function, not changing anything in it.

Done.

>>> Otherwise the patch globs too many changes and lacks reasoning.
>>> Why do we want to handle all this in fold when the boolification
>>> happens only after gimplification?
>>
>> We still rely on truth/bitwise folding on fold-const.  Also we need to
>> handle this for passes, which are using fold_binary to optimize  and
>> handle boolified operations - like tree-ssa-reassoc, of tree-vect*.
>> This support in fold-const is necessary when we are preserving casts
>> from/to boolean, as otherwise we don't fold bitwise-binary with
>> compares proper anymore.  Additionally we have to take care that we
>> don't enter TRUTH_(AND|OR|XOR) expressions on boolified trees, as
>> otherwise tree-cfg will barf. Also we need to take care that types of
>> comparisons and TRUTH_NOT expressions are boolean one, as otherwise
>> again tree-cfg will detect incompatible types for those expressions.
>
> Sounds like many different things for many individual patches.  Btw,
> I'd rather have the tree passes that rely on fold call a gimple specific
> wrapper where we can add such things (and also use gimple/SSA
> specific optimizations, like less strict typing), like
> gimple_fold_binary (), see also my gimple folding proposal from
> earlier this year. http://gcc.gnu.org/ml/gcc-patches/2011-03/msg01099.html

Well, this is for sure a good thing, but didn't solve the issues about
fold-const and 1-bit precision bitwise-operations. We need to handle
them in fold-const as otherwise even worse things are happening there.
 As fold-const happily decides that bitwise-binaries with comparisons
or thruth valued arguments getting transformed back into
TRUTH_(AND|OR|XOR)[IF]_EXPRs, which is indeed contra-productive on an
already gimplified tree.
For sure it would be better to avoid for such passes fold-const at all
and have instead a pure-ssa-named folding mechanism, but this is a
different story and not part of this patch.  Focus here is that we are
again able to do proper folding on boolified bitwise operations and to
provide to some passes the knowledge that a 1-bit precision bitwise
operation is an equivalent to a TRUTH_(AND|OR|XOR) and can be handled.

Regards,
Kai


Re: CFT: Move unwinder to toplevel libgcc

2011-07-08 Thread Steve Ellcey
On Fri, 2011-07-08 at 20:16 +0200, Rainer Orth wrote:
> Steve,
> 
> > It looks like this is caused by having two '#include "md-unwind-support.h"'
> > lines in unwind-ia64.c.  If I remove the first one (mixed in with the other
> > includes at the top of the file) and leave the second one then the file
> > compiles.  I still get the warning about discarding the cast but I guess 
> > that
> > is OK.  I have a bootstrap running on IA64 Linux and it looks good so far.
> > I haven't had a chance to follow-up on IA64 HP-UX.
> 
> I added the first #include to provide the definitions of UNW_IVMS_MODE
> and MD_UNW_COMPATIBLE_PERSONALITY_P, not realizing that there was
> another one already.
> 
> I guess the default definition of the latter can be moved below the
> second #include "md-unwind-support.h"?
> 
>   Rainer

I think that will work for VMS.  I just removed the include since I
don't define the macro on HP-UX or Linux.  With that change and with the
removal of ia64/t-eh-ia64 from tmake_file for HP-UX I got a bootstrap on
both IA64 HP-UX and Linux.  I haven't done a full test run yet, I will
see if I can do that over the weekend.

Steve Ellcey
s...@cup.hp.com



Re: [Patch, Fortran] Add stat=/errmsg= support to _gfortran_caf_register

2011-07-08 Thread Janne Blomqvist
On Thu, Jul 7, 2011 at 15:48, Tobias Burnus  wrote:
> I have now committed the patch with only the nits fixed (Rev.175966). But
> given that the coarray support - especially with regards to the library - is
> still in a flux, we can still change everything, including the ABI of the
> library and the file organization. I am sure that not all design decisions
> are optimal.

One minor thing is that one should use gfc_charlen_type_node
(frontend) and gfc_charlen_type (library) for string lengths instead
of int. (Currently gfc_charlen_type is a typedef for int, but if this
is at some point changed to size_t, as has been discussed, then it's
easier if one needs only to change a few places.)


-- 
Janne Blomqvist


[PATCH] Optimize NE/EQ comparisons of narrow integer types in debug info (PR debug/49676)

2011-07-08 Thread Jakub Jelinek
Hi!

E.g. on
   extern void d (int);
   void __attribute__((noinline, noclone))
   self (int i)
   {
 if (i == 200)
   self (i + 1);
 else
   d (i + 2);
   }
this patch saves two bytes in the location description of the call site
value where we have a SImode comparison with 200, by emitting
DW_OP_const4u 0x DW_OP_and DW_OP_const1u 200 DW_OP_ne
instead of
DW_OP_const1u 32 DW_OP_shl DW_OP_constu (200LL << 32) DW_OP_ne
For EQ/NE it really doesn't matter if we do the comparison with sign
extended or zero extended values, so if doing it zero-extended
is shorter, let's do it that way.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2011-07-08  Jakub Jelinek  

PR debug/49676
* dwarf2out.c (size_of_int_loc_descriptor): New function.
(address_of_int_loc_descriptor): Use it.
(scompare_loc_descriptor): Optimize EQ/NE comparison with
constant.

--- gcc/dwarf2out.c.jj  2011-07-08 15:09:38.0 +0200
+++ gcc/dwarf2out.c 2011-07-08 17:19:38.0 +0200
@@ -10848,44 +10848,53 @@ int_loc_descriptor (HOST_WIDE_INT i)
   return new_loc_descr (op, i, 0);
 }
 
-/* Return loc description representing "address" of integer value.
-   This can appear only as toplevel expression.  */
+/* Return size_of_locs (int_loc_descriptor (i)) without
+   actually allocating it.  */
 
-static dw_loc_descr_ref
-address_of_int_loc_descriptor (int size, HOST_WIDE_INT i)
+static unsigned long
+size_of_int_loc_descriptor (HOST_WIDE_INT i)
 {
-  int litsize;
-  dw_loc_descr_ref loc_result = NULL;
-
-  if (!(dwarf_version >= 4 || !dwarf_strict))
-return NULL;
-
   if (i >= 0)
 {
   if (i <= 31)
-   litsize = 1;
+   return 1;
   else if (i <= 0xff)
-   litsize = 2;
+   return 2;
   else if (i <= 0x)
-   litsize = 3;
+   return 3;
   else if (HOST_BITS_PER_WIDE_INT == 32
   || i <= 0x)
-   litsize = 5;
+   return 5;
   else
-   litsize = 1 + size_of_uleb128 ((unsigned HOST_WIDE_INT) i);
+   return 1 + size_of_uleb128 ((unsigned HOST_WIDE_INT) i);
 }
   else
 {
   if (i >= -0x80)
-   litsize = 2;
+   return 2;
   else if (i >= -0x8000)
-   litsize = 3;
+   return 3;
   else if (HOST_BITS_PER_WIDE_INT == 32
   || i >= -0x8000)
-   litsize = 5;
+   return 5;
   else
-   litsize = 1 + size_of_sleb128 (i);
+   return 1 + size_of_sleb128 (i);
 }
+}
+
+/* Return loc description representing "address" of integer value.
+   This can appear only as toplevel expression.  */
+
+static dw_loc_descr_ref
+address_of_int_loc_descriptor (int size, HOST_WIDE_INT i)
+{
+  int litsize;
+  dw_loc_descr_ref loc_result = NULL;
+
+  if (!(dwarf_version >= 4 || !dwarf_strict))
+return NULL;
+
+  litsize = size_of_int_loc_descriptor (i);
   /* Determine if DW_OP_stack_value or DW_OP_implicit_value
  is more compact.  For DW_OP_stack_value we need:
  litsize + 1 (DW_OP_stack_value)
@@ -11284,6 +11293,28 @@ scompare_loc_descriptor (enum dwarf_loca
  && (unsigned HOST_WIDE_INT) INTVAL (XEXP (rtl, 1))
 == (INTVAL (XEXP (rtl, 1)) & GET_MODE_MASK (op_mode)
return compare_loc_descriptor (op, op0, op1);
+
+  /* EQ/NE comparison against constant in narrower type than
+DWARF2_ADDR_SIZE can be performed either as
+DW_OP_const1u  DW_OP_shl DW_OP_const* 
+DW_OP_{eq,ne}
+or
+DW_OP_const*u  DW_OP_and DW_OP_const* 
+DW_OP_{eq,ne}.  Pick whatever is shorter.  */
+  if (CONST_INT_P (XEXP (rtl, 1))
+ && GET_MODE_BITSIZE (op_mode) < HOST_BITS_PER_WIDE_INT
+ && (size_of_int_loc_descriptor (shift) + 1
+ + size_of_int_loc_descriptor (INTVAL (XEXP (rtl, 1)) << shift)
+ >= size_of_int_loc_descriptor (GET_MODE_MASK (op_mode)) + 1
++ size_of_int_loc_descriptor (INTVAL (XEXP (rtl, 1))
+  & GET_MODE_MASK (op_mode
+   {
+ add_loc_descr (&op0, int_loc_descriptor (GET_MODE_MASK (op_mode)));
+ add_loc_descr (&op0, new_loc_descr (DW_OP_and, 0, 0));
+ op1 = int_loc_descriptor (INTVAL (XEXP (rtl, 1))
+   & GET_MODE_MASK (op_mode));
+ return compare_loc_descriptor (op, op0, op1);
+   }
 }
   add_loc_descr (&op0, int_loc_descriptor (shift));
   add_loc_descr (&op0, new_loc_descr (DW_OP_shl, 0, 0));

Jakub


RFA: PATCH to add cgraph_add_to_same_comdat_group

2011-07-08 Thread Jason Merrill
As suggested in c++/49353, this patch stops copying linkage flags to 
thunks in function_and_variable_visibility, replacing that with just 
asserts to make sure that the front end is setting things up properly.


To make the front end's job easier, I've added 
cgraph_add_to_same_comdat_group so that the front end doesn't actually 
need to deal with managing the circular list.


Passes C++ regression testing, OK for trunk if all-language testing passes?
commit b05f8f3caaab12bbd61d7bdf05e29bd09e63ef48
Author: Jason Merrill 
Date:   Fri Jul 8 10:37:09 2011 -0400

gcc/
	* cgraph.c (cgraph_add_to_same_comdat_group): New.
	* cgraph.h: Declare it.
	* ipa.c (function_and_variable_visibility): Make sure thunks
	have the right visibility.
gcc/cp/
	* method.c (use_thunk): Use cgraph_add_to_same_comdat_group.
	* optimize.c (maybe_clone_body): Likewise.
	* semantics.c (maybe_add_lambda_conv_op): Likewise.

diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 86e7207..09aad60 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1630,6 +1630,31 @@ cgraph_remove_node (struct cgraph_node *node)
   free_nodes = node;
 }
 
+/* Add NEW_ to the same comdat group that OLD is in.  */
+
+void
+cgraph_add_to_same_comdat_group (struct cgraph_node *new_,
+ struct cgraph_node *old)
+{
+  gcc_assert (DECL_ONE_ONLY (old->decl));
+  gcc_assert (!new_->same_comdat_group);
+  gcc_assert (new_ != old);
+
+  DECL_COMDAT_GROUP (new_->decl) = DECL_COMDAT_GROUP (old->decl);
+  new_->same_comdat_group = old;
+  if (!old->same_comdat_group)
+old->same_comdat_group = new_;
+  else
+{
+  struct cgraph_node *n;
+  for (n = old->same_comdat_group;
+	   n->same_comdat_group != old;
+	   n = n->same_comdat_group)
+	;
+  n->same_comdat_group = new_;
+}
+}
+
 /* Remove the node from cgraph.  */
 
 void
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 9133923..5d6ff7c 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -469,6 +469,7 @@ void debug_cgraph_node (struct cgraph_node *);
 void cgraph_insert_node_to_hashtable (struct cgraph_node *node);
 void cgraph_remove_edge (struct cgraph_edge *);
 void cgraph_remove_node (struct cgraph_node *);
+void cgraph_add_to_same_comdat_group (struct cgraph_node *, struct cgraph_node *);
 void cgraph_remove_node_and_inline_clones (struct cgraph_node *);
 void cgraph_release_function_body (struct cgraph_node *);
 void cgraph_node_remove_callees (struct cgraph_node *node);
diff --git a/gcc/cp/method.c b/gcc/cp/method.c
index d41a4dd..3d272a3 100644
--- a/gcc/cp/method.c
+++ b/gcc/cp/method.c
@@ -283,7 +283,7 @@ use_thunk (tree thunk_fndecl, bool emit_p)
   tree virtual_offset;
   HOST_WIDE_INT fixed_offset, virtual_value;
   bool this_adjusting = DECL_THIS_THUNK_P (thunk_fndecl);
-  struct cgraph_node *funcn;
+  struct cgraph_node *funcn, *thunk_node;
 
   /* We should have called finish_thunk to give it a name.  */
   gcc_assert (DECL_NAME (thunk_fndecl));
@@ -344,8 +344,7 @@ use_thunk (tree thunk_fndecl, bool emit_p)
   DECL_VISIBILITY_SPECIFIED (thunk_fndecl)
 = DECL_VISIBILITY_SPECIFIED (function);
   DECL_COMDAT (thunk_fndecl) = DECL_COMDAT (function);
-  if (DECL_ONE_ONLY (function) || DECL_WEAK (function))
-make_decl_one_only (thunk_fndecl, cxx_comdat_group (thunk_fndecl));
+  DECL_WEAK (thunk_fndecl) = DECL_WEAK (function);
 
   if (flag_syntax_only)
 {
@@ -386,9 +385,11 @@ use_thunk (tree thunk_fndecl, bool emit_p)
   TREE_ASM_WRITTEN (thunk_fndecl) = 1;
   funcn = cgraph_get_node (function);
   gcc_checking_assert (funcn);
-  cgraph_add_thunk (funcn, thunk_fndecl, function,
-		this_adjusting, fixed_offset, virtual_value,
-		virtual_offset, alias);
+  thunk_node = cgraph_add_thunk (funcn, thunk_fndecl, function,
+ this_adjusting, fixed_offset, virtual_value,
+ virtual_offset, alias);
+  if (DECL_ONE_ONLY (function))
+cgraph_add_to_same_comdat_group (thunk_node, funcn);
 
   if (!this_adjusting
   || !targetm.asm_out.can_output_mi_thunk (thunk_fndecl, fixed_offset,
diff --git a/gcc/cp/optimize.c b/gcc/cp/optimize.c
index b9e3551..6a06988 100644
--- a/gcc/cp/optimize.c
+++ b/gcc/cp/optimize.c
@@ -309,12 +309,12 @@ maybe_clone_body (tree fn)
 	  && (!DECL_ONE_ONLY (fns[0])
 	  || (HAVE_COMDAT_GROUP
 		  && DECL_WEAK (fns[0])))
-	  && (flag_syntax_only
-	  /* Set linkage flags appropriately before
-		 cgraph_create_function_alias looks at them.  */
-	  || (expand_or_defer_fn_1 (clone)
-		  && cgraph_same_body_alias (cgraph_get_node (fns[0]),
-	 clone, fns[0]
+	  && !flag_syntax_only
+	  /* Set linkage flags appropriately before
+	 cgraph_create_function_alias looks at them.  */
+	  && expand_or_defer_fn_1 (clone)
+	  && cgraph_same_body_alias (cgraph_get_node (fns[0]),
+ clone, fns[0]))
 	{
 	  alias = true;
 	  if (DECL_ONE_ONLY (fns[0]))
@@ -324,13 +324,22 @@ maybe_clone_body (tree fn)
 		 *[CD][12]*.  */
 	  comdat_group = cdtor_comdat_group (fns[1], fns[0]);
 	  DECL_COMDAT_GROUP (fns

  1   2   >