date:20150602

update gthr-tpf.h

2015-06-02 Thread DJ Delorie


This patch updates gthr-tpf.h to the current gthr.h API and TPF API.  Ok?

* gthr-tpf.h (__GTHREADS_CXX0X): Define.
(__gthread_t): Define.
(__gthread_cond_t): Define.
(__gthread_time_t): Define.
(__GTHREAD_HAS_COND): Define.
(__GTHREAD_COND_INIT): Define.

(__gthread_active_p): Check __tpf_pthread_active().
(__gthread_mutex_lock): Remove unneeded conditional.
(__gthread_mutex_trylock): Likewise.
(__gthread_recursive_mutex_lock): Likewise.
(__gthread_recursive_mutex_trylock): Likewise.
(__gthread_recursive_mutex_unlock): Likewise.
(__gthread_recursive_mutex_init_function): Likewise.

(__gthread_join): New.
(__gthread_detach): New.
(__gthread_equal): New.
(__gthread_self): New.
(__gthread_yield): New.
(__gthread_mutex_timedlock): New.
(__gthread_recursive_mutex_timedlock): New.
(__gthread_cond_broadcast): New.
(__gthread_cond_signal): New.
(__gthread_cond_wait): New.
(__gthread_cond_wait_recursive): New.
(__gthread_cond_timedwait): New.
(__gthread_cond_timedwait_recursive): New.
(__gthread_cond_destroy): New.

Index: libgcc/config/s390/gthr-tpf.h
===
--- libgcc/config/s390/gthr-tpf.h   (revision 224011)
+++ libgcc/config/s390/gthr-tpf.h   (working copy)
@@ -35,6 +35,8 @@
Easy, since the interface is just one-to-one mapping.  */
 
 #define __GTHREADS 1
+/* To enable the c++0x thread library.  */
+#define __GTHREADS_CXX0X 1
 
 /* Some implementations of  require this to be defined.  */
 #ifndef _REENTRANT
@@ -44,11 +46,17 @@
 #include 
 #include 
 
+typedef pthread_t __gthread_t;
 typedef pthread_key_t __gthread_key_t;
 typedef pthread_once_t __gthread_once_t;
 typedef pthread_mutex_t __gthread_mutex_t;
 typedef pthread_mutex_t __gthread_recursive_mutex_t;
+typedef pthread_cond_t __gthread_cond_t; 
+typedef struct timespec __gthread_time_t; 
 
+#define __GTHREAD_HAS_COND 1
+#define __GTHREAD_COND_INIT PTHREAD_COND_INITIALIZER
+
 #if defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER)
 #define __GTHREAD_RECURSIVE_MUTEX_INIT PTHREAD_RECURSIVE_MUTEX_INITIALIZER
 #elif defined(PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP)
@@ -80,9 +88,15 @@
 __gthrw(pthread_getspecific)
 __gthrw(pthread_setspecific)
 __gthrw(pthread_create)
+__gthrw(pthread_join)
+__gthrw(pthread_detach)
+__gthrw(pthread_equal)
+__gthrw(pthread_self)
+__gthrw(sched_yield)
 
 __gthrw(pthread_mutex_lock)
 __gthrw(pthread_mutex_trylock)
+__gthrw(pthread_mutex_timedlock) 
 __gthrw(pthread_mutex_unlock)
 __gthrw(pthread_mutexattr_init)
 __gthrw(pthread_mutexattr_settype)
@@ -90,31 +104,81 @@
 __gthrw(pthread_mutex_init)
 __gthrw(pthread_mutex_destroy)
 
+__gthrw(pthread_cond_broadcast)
+__gthrw(pthread_cond_signal)
+__gthrw(pthread_cond_wait)
+__gthrw(pthread_cond_timedwait)
+__gthrw(pthread_cond_destroy)
+
+
 static inline int
 __gthread_active_p (void)
 {
-  return 1;
+  return __tpf_pthread_active ();
 }
 
 static inline int
-__gthread_once (__gthread_once_t *__once, void (*__func) (void))
+__gthread_create (__gthread_t *__threadid, void *(*__func) (void*),
+ void *__args)
 {
+  return __gthrw_(pthread_create) (__threadid, NULL, __func, __args);
+}
+
+static inline int
+__gthread_join (__gthread_t __threadid, void **__value_ptr)
+{
   if (__tpf_pthread_active ())
-return __gthrw_(pthread_once) (__once, __func);
+return __gthrw_(pthread_join) (__threadid, __value_ptr);
   else
 return -1;
 }
 
 static inline int
-__gthread_key_create (__gthread_key_t *__key, void (*__dtor) (void *))
+__gthread_detach (__gthread_t __threadid)
 {
   if (__tpf_pthread_active ())
-return __gthrw_(pthread_key_create) (__key, __dtor);
+return __gthrw_(pthread_detach) (__threadid);
   else
 return -1;
 }
 
 static inline int
+__gthread_equal (__gthread_t __t1, __gthread_t __t2)
+{
+  if (__tpf_pthread_active ())
+return __gthrw_(pthread_equal) (__t1, __t2);
+  else
+return -1;
+}
+
+static inline __gthread_t
+__gthread_self (void)
+{
+  return __gthrw_(pthread_self) ();
+}
+
+static inline int
+__gthread_yield (void)
+{
+  return __gthrw_(sched_yield) ();
+}
+
+static inline int
+__gthread_once (__gthread_once_t *__once, void (*__func) (void))
+{
+  if (__tpf_pthread_active ())
+return __gthrw_(pthread_once) (__once, __func);
+  else
+return -1;
+}
+
+static inline int
+__gthread_key_create (__gthread_key_t *__key, void (*__dtor) (void *))
+{
+  return __gthrw_(pthread_key_create) (__key, __dtor);
+}
+
+static inline int
 __gthread_key_delete (__gthread_key_t __key)
 {
   if (__tpf_pthread_active ())
@@ -153,22 +217,23 @@
 static inline int
 __gthread_mutex_lock (__gthread_mutex_t *__mutex)
 {
-  if (__tpf_pthread_active ())
-return __gthrw_(pthread_mutex_lock) (__mutex);
-  else
-return 0

Re: C++ PATCH for c++/44282 (ia32 calling convention attributes and mangling)

2015-06-02 Thread Uros Bizjak

Hello!

>PR c++/44282
>gcc/cp/
>* mangle.c (attr_strcmp): New.
>(write_CV_qualifiers_for_type): Also write out attributes that
>affect type identity.
>(write_type): Strip all attributes after writing qualifiers.
>libiberty/
>* cp-demangle.c (cplus_demangle_type): Handle arguments to vendor
>extended qualifier.

+++ b/gcc/testsuite/g++.dg/abi/mangle-regparm.C
@@ -0,0 +1,29 @@
+// { dg-do run { target i?86-*-* } }

This should read:

+// { dg-do run { target { { i?86-*-* x86_64-*-* } && ia32 } } }

The test wasn't actually run on x86_64-linux target. I'll commit the
following patch after regtest:

--cut here--
Index: mangle-regparm.C
===
--- mangle-regparm.C(revision 224011)
+++ mangle-regparm.C(working copy)
@@ -1,4 +1,4 @@
-// { dg-do run { target i?86-*-* } }
+// { dg-do run { target { { i?86-*-* x86_64-*-* } && ia32 } } }
// { dg-final { scan-assembler
"_Z18IndirectExternCallIPU7stdcallU7regparmILi3EEFviiEiEvT_T0_S3_" } }

typedef __SIZE_TYPE__ size_t;
--cut here--

Uros.

Re: [PATCH] Fix misuse of always_inline attribute in 3 hotpatch tests

2015-06-02 Thread Dominik Vogt

On Mon, Jun 01, 2015 at 12:29:31PM +0200, Jakub Jelinek wrote:
> On Mon, Jun 01, 2015 at 11:20:29AM +0100, Dominik Vogt wrote:
> > There are many more
> > test cases that forget the "inline" and filter the warning with
> > "-Wno-attributes".  I'll post an add-on patch later after testing
> > it.
> 
> Well, we need some testcases that actually verify we don't ICE
> when the inline keyword is not missed.
> But if you mean tests like e.g. i386/mpx/, or some other s390/
> tests, then indeed, those should be fixed.

Patches attached, split into three separate parts:

0001-*
  Tests which I'm quite sure that the inline keyword should be
  added.

0002-*
  Specific inline tests that lack the inline keyword, but that may
  be on purpose.  Unfortunately the comments in the tests don't
  allow to decide this; the tests need to be check by someone who
  knows what they are about.

0003-*
  Tests for specific problem reports.  The inline keyword was
  missing in the original code in the PR, fixing that might break
  the regression test.  On the other hand the regression test
  relies on undefined behaviour.  I'm not sure what should be done
  about these test cases.

gcc/testsuite/ChangeLog
---

2015-06-02  Dominik Vogt  

* gcc/testsuite/g++.dg/ipa/devirt-c-7.C:  Add "inline" to functions with
attribute((always_inline)).  Remove -Wnoattribues from compiler options.
* gcc/testsuite/g++.dg/tree-prof/morefunc.C: Likewise.
* gcc/testsuite/g++.dg/tree-prof/reorder.C: Likewise.
* gcc/testsuite/gcc.dg/20051201-1.c: Likewise.
* gcc/testsuite/gcc.dg/torture/pta-structcopy-1.c: Likewise.
* gcc/testsuite/gcc.dg/uninit-pred-5_a.c: Likewise.
* gcc/testsuite/gcc.dg/uninit-pred-5_b.c: Likewise.
* gcc/testsuite/gcc.target/i386/chkp-always_inline.c: Likewise.
* gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-1-lbv.c: Likewise.
* gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-1-nov.c: Likewise.
* gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-1-ubv.c: Likewise.
* gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-2-lbv.c: Likewise.
* gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-2-nov.c: Likewise.
* gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-2-ubv.c: Likewise.
* gcc/testsuite/gcc.target/s390/20090223-1.c: Likewise.

2015-06-02  Dominik Vogt  

* gcc/testsuite/gcc.dg/inline-22.c:  Add "inline" to functions with
attribute((always_inline)).  Remove -Wnoattribues from compiler options.
* gcc/testsuite/gcc.dg/inline-36.c: Likewise.
* gcc/testsuite/gcc.dg/inline-37.c: Likewise.
* gcc/testsuite/gcc.dg/inline-38.c: Likewise.
* gcc/testsuite/gcc.dg/inline-39.c: Likewise.

2015-06-02  Dominik Vogt  

* gcc/testsuite/g++.dg/torture/pr51436.C: Add "inline" to functions with
attribute((always_inline)).
* gcc/testsuite/gcc.c-torture/execute/pr33992.c: Likewise.
* gcc/testsuite/gcc.dg/tm/pr52141.c: Likewise.
* gcc/testsuite/gcc.dg/torture/pr39204.c: Likewise.
* gcc/testsuite/gcc.dg/debug/pr41264-1.c: Likewise.  Remove
-Wnoattribues from compiler options
* gcc/testsuite/gcc.dg/tree-ssa/pr40087.c: Likewise.

Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany
>From b0d52bdc8ea046ab65ec47dda1d1e14e5119cee8 Mon Sep 17 00:00:00 2001
From: Dominik Vogt 
Date: Tue, 2 Jun 2015 08:12:44 +0100
Subject: [PATCH 1/3] Add "inline" to functions with the always_inline
 attribute. I

---
 gcc/testsuite/g++.dg/ipa/devirt-c-7.C | 4 ++--
 gcc/testsuite/g++.dg/tree-prof/morefunc.C | 6 +++---
 gcc/testsuite/g++.dg/tree-prof/reorder.C  | 6 +++---
 gcc/testsuite/gcc.dg/20051201-1.c | 6 +++---
 gcc/testsuite/gcc.dg/torture/pta-structcopy-1.c   | 4 ++--
 gcc/testsuite/gcc.dg/uninit-pred-5_a.c| 4 ++--
 gcc/testsuite/gcc.dg/uninit-pred-5_b.c| 4 ++--
 gcc/testsuite/gcc.target/i386/chkp-always_inline.c| 4 ++--
 gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-1-lbv.c | 4 +---
 gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-1-nov.c | 4 +---
 gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-1-ubv.c | 4 +---
 gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-2-lbv.c | 4 +---
 gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-2-nov.c | 4 +---
 gcc/testsuite/gcc.target/i386/mpx/va-arg-pack-2-ubv.c | 4 +---
 gcc/testsuite/gcc.target/s390/20090223-1.c| 6 +++---
 15 files changed, 28 insertions(+), 40 deletions(-)

diff --git a/gcc/testsuite/g++.dg/ipa/devirt-c-7.C b/gcc/testsuite/g++.dg/ipa/devirt-c-7.C
index 2e76cbe..06fbb66 100644
--- a/gcc/testsuite/g++.dg/ipa/devirt-c-7.C
+++ b/gcc/testsuite/g++.dg/ipa/devirt-c-7.C
@@ -2,7 +2,7 @@
object within another one when looking for dynamic type change .  */
 /* { dg-do run } */
 /* { dg-require-effective-target nonpic } */
-/* { dg-options "-O3 -Wno-attributes"  } */
+/* { dg-options "-O3"

[PATCH] Optimize SLP from scalars, workaround PR65961

2015-06-02 Thread Richard Biener


The following patch optimizes the case where we decide to build up
all operands from a SLP node from scalars to just build up the result
from scalars.  That's usually less expensive and in the PR65961
works around a case that we don't handle correctly (yet).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2015-06-01  Richard Biener  

PR tree-optimization/65961
* tree-vect-slp.c (vect_get_and_check_slp_defs): Remove bogus
check and clarify dump message.
(vect_build_slp_tree): If all children are built up from scalars
build up the parent from scalars instead.
* tree-vect-stmts.c (vect_is_simple_use): Cleanup.

* gcc.dg/torture/pr65961.c: New testcase.

Index: gcc/tree-vect-slp.c
===
*** gcc/tree-vect-slp.c (revision 223974)
--- gcc/tree-vect-slp.c (working copy)
*** again:
*** 301,313 
oprnd_info = (*oprnds_info)[i];
  
if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
!  &def, &dt)
! || (!def_stmt && dt != vect_constant_def))
{
  if (dump_enabled_p ())
{
  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!  "Build SLP failed: can't find def for ");
  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
--- 301,312 
oprnd_info = (*oprnds_info)[i];
  
if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
!  &def, &dt))
{
  if (dump_enabled_p ())
{
  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
!  "Build SLP failed: can't analyze def for ");
  dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
}
*** vect_build_slp_tree (loop_vec_info loop_
*** 1092,1097 
--- 1091,1125 
   vectorization_factor, matches,
   npermutes, &this_tree_size, max_tree_size))
{
+ /* If we have all children of child built up from scalars then just
+throw that away and build it up this node from scalars.  */
+ if (!SLP_TREE_CHILDREN (child).is_empty ())
+   {
+ unsigned int j;
+ slp_tree grandchild;
+ 
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
+   if (grandchild != NULL)
+ break;
+ if (!grandchild)
+   {
+ /* Roll back.  */
+ *max_nunits = old_max_nunits;
+ loads->truncate (old_nloads);
+ FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
+ vect_free_slp_tree (grandchild);
+ SLP_TREE_CHILDREN (child).truncate (0);
+ 
+ dump_printf_loc (MSG_NOTE, vect_location,
+  "Building parent vector operands from "
+  "scalars instead\n");
+ oprnd_info->def_stmts = vNULL;
+ vect_free_slp_tree (child);
+ SLP_TREE_CHILDREN (*node).quick_push (NULL);
+ continue;
+   }
+   }
+ 
  oprnd_info->def_stmts = vNULL;
  SLP_TREE_CHILDREN (*node).quick_push (child);
  continue;
Index: gcc/tree-vect-stmts.c
===
*** gcc/tree-vect-stmts.c   (revision 223974)
--- gcc/tree-vect-stmts.c   (working copy)
*** vect_is_simple_use (tree operand, gimple
*** 7878,7892 
  bb_vec_info bb_vinfo, gimple *def_stmt,
tree *def, enum vect_def_type *dt)
  {
-   basic_block bb;
-   stmt_vec_info stmt_vinfo;
-   struct loop *loop = NULL;
- 
-   if (loop_vinfo)
- loop = LOOP_VINFO_LOOP (loop_vinfo);
- 
*def_stmt = NULL;
*def = NULL_TREE;
  
if (dump_enabled_p ())
  {
--- 7878,7886 
  bb_vec_info bb_vinfo, gimple *def_stmt,
tree *def, enum vect_def_type *dt)
  {
*def_stmt = NULL;
*def = NULL_TREE;
+   *dt = vect_unknown_def_type;
  
if (dump_enabled_p ())
  {
*** vect_is_simple_use (tree operand, gimple
*** 7909,7921 
return true;
  }
  
-   if (TREE_CODE (operand) == PAREN_EXPR)
- {
-   if (dump_enabled_p ())
- dump_printf_loc (MSG_NOTE, vect_location, "non-associatable copy.\n");
-   operand = TREE_OPERAND (operand, 0);
- }
- 
if (TREE_CODE (operand) != SSA_NAME)
  {
if (dump_enabled_p ())
--- 7903,7908 
*** vect_is_simple_use (tree operand,

[commit] [patch] PR other/65366: Fix gdbhooks.py for GDB with Python3

2015-06-02 Thread Jan Kratochvil

On Mon, 01 Jun 2015 22:59:03 +0200, Jason Merrill wrote:
> OK, thanks.

Checked in: r224012


Jan

Re: [patch 10/10] debug-early merge: compiler proper

2015-06-02 Thread Richard Biener

On Mon, Jun 1, 2015 at 7:42 PM, Aldy Hernandez  wrote:
> On 06/01/2015 01:00 PM, Richard Biener wrote:
>>
>> On June 1, 2015 5:42:57 PM GMT+02:00, Aldy Hernandez 
>> wrote:
>>>
>>> On 06/01/2015 04:04 AM, Richard Biener wrote:

 On Mon, Jun 1, 2015 at 10:03 AM, Richard Biener
>
>
>>> We still have the problem that function locals in dwarf2out are seen in
>>>
>>> decls_for_scope by iterating through BLOCK_VARS, and temporaries do not
>>>
>>> live in BLOCK_VARS.
>>>
>>> How did they get picked up and annotated in your approach?
>>
>>
>> The size type ones are in BLOCJ_VARS IIRC (or I have to check the last
>> posted patch for other related hunks).
>
>
> Hmmm, it doesn't seem so in my testcase:
>
> $ cat a.c
> unsigned int i=555;
>
> int main()
> {
>   unsigned int array[i];
>   __asm__ __volatile__ ("" : : "m" (array));
> }
>
> (gdb) print stmt
> $108 = 
> (gdb) call debug_generic_stmt(stmt)
> BLOCK #0
>   SUPERCONTEXT: main
>   VARS: array
>
> The temporary has DECL_IGNORED_P appropriately.
>
> It does show up in DECL_STRUCT_FUNCTION()->local_decls, but so do a few
> other temporaries and SSA variables which we're not interested in.

Ok, so I have the following in my LTO debug patch (ignore the first part of the
2nd hunk - just look at the variably_modified_type_p case), which means you
are correct.  The ??? comment in process_vla_type needs fixing of course,
either there or in walk_type_fields (to catch all cases that have gimplified
sizes, see callers of gimplify_one_sizepos/gimplify_type_sizes).

I suppose we can change things this way as a followup (as it needs some work)

@@ -21036,6 +21263,31 @@ gen_block_die (tree stmt, dw_die_ref context_die)
 decls_for_scope (stmt, context_die);
 }

+static tree
+process_vla_type (tree *tp, int *walk_subtrees, void *ctx)
+{
+  /* ???  walk_type_fields doesn't walk TYPE_SIZE and friends and
+ while it walks TYPE_DOMAIN for arrays it doesn't walk
+ TYPE_MIN/MAX_VALUE.  Just special-case the ARRAY_TYPE domain
+ type case here for now.  */
+  if (TREE_CODE (*tp) == INTEGER_TYPE)
+{
+  if (TREE_CODE (TYPE_MIN_VALUE (*tp)) == VAR_DECL
+ && DECL_ARTIFICIAL (TYPE_MIN_VALUE (*tp))
+ && !DECL_IGNORED_P (TYPE_MIN_VALUE (*tp)))
+   gen_decl_die (TYPE_MIN_VALUE (*tp), NULL_TREE, (dw_die_ref) ctx);
+  if (TREE_CODE (TYPE_MAX_VALUE (*tp)) == VAR_DECL
+ && DECL_ARTIFICIAL (TYPE_MAX_VALUE (*tp))
+ && !DECL_IGNORED_P (TYPE_MAX_VALUE (*tp)))
+   gen_decl_die (TYPE_MAX_VALUE (*tp), NULL_TREE, (dw_die_ref) ctx);
+}
+
+  if (!TYPE_P (*tp))
+*walk_subtrees = 0;
+
+  return NULL_TREE;
+}
+
 /* Process variable DECL (or variable with origin ORIGIN) within
block STMT and add it to CONTEXT_DIE.  */
 static void
@@ -21061,7 +21313,44 @@ process_scope_var (tree stmt, tree decl, tree
origin, dw_die_ref context_die)
 stmt, context_die);
 }
   else
-gen_decl_die (decl, origin, context_die);
+{
+  if (decl && DECL_P (decl))
+   die = lookup_decl_die (decl);
+
+  if (in_lto_p
+ && die && die->die_parent != context_die)
+   {
+ /* ???  For non-LTO operation we do not want to get here via
+dwarf2out_abstract_function / set_decl_origin_self which
+ends up modifying the tree rep in some odd way instead
+of just playing with the DIEs.  */
+ /* We associate vars with their DECL_CONTEXT first which misses
+their BLOCK association.  Move them.  */
+ gcc_assert (die->die_parent != NULL);
+ /* ???  Moving is expensive.  Better fix DECL_CONTEXT?  */
+ dw_die_ref prev = die->die_parent->die_child;
+ while (prev->die_sib != die)
+   prev = prev->die_sib;
+ remove_child_with_prev (die, prev);
+ add_child_die (context_die, die);
+   }
+
+  if (in_lto_p
+ && TREE_CODE (decl) == VAR_DECL
+ && variably_modified_type_p (TREE_TYPE (decl), cfun->decl))
+   {
+ /* We need to add location attributes to decls refered to
+from the decls type but we don't have DIEs for the type
+itself materialized.  The decls are also not part of the
+functions BLOCK tree (because they are artificial).  */
+ walk_tree (&TREE_TYPE (decl), process_vla_type, NULL, NULL);
+   }
+
+  /* ???  The following gets stray type DIEs created even for decls
+that were created early.  */
+
+  gen_decl_die (decl, origin, context_die);
+}
 }

 /* Generate all of the decls declared within a given scope and (recursively)


> Aldy

Re: [C/C++ PATCH] Implement -Wshift-overflow (PR c++/55095)

2015-06-02 Thread Richard Biener

On Mon, Jun 1, 2015 at 10:06 PM, Richard Sandiford
 wrote:
> Marek Polacek  writes:
>> +  /* Left-hand operand must be signed.  */
>> +  if (TYPE_UNSIGNED (type0))
>> +return false;
>> +
>> +  /* Compute the result in infinite precision math (sort of).  */
>> +  widest_int w = wi::lshift (wi::to_widest (op0), wi::to_widest (op1));
>> +  unsigned int min_prec = wi::min_precision (w, SIGNED);
>> +  /* Watch out for shifting a negative value.  */
>> +  tree r = wide_int_to_tree (tree_int_cst_sgn (op0) >= 0
>> +  ? unsigned_type_for (type0)
>> +  : type0, w);
>> +  bool overflowed = wi::cmps (w, wi::to_widest (r));
>> +  if (overflowed && c_inhibit_evaluation_warnings == 0)
>> +warning_at (loc, OPT_Wshift_overflow,
>> + "result of %qE requires %u bits to represent, "
>> + "but %qT only has %u bits",
>> + build2_loc (loc, LSHIFT_EXPR, type0, op0, op1),
>> + min_prec, type0, TYPE_PRECISION (type0));
>> +
>> +  return overflowed;
>
> Yeah, this "sort of" is a bit worrying :-)  Especially as the number
> of bits in a widest_int depends on the number of bits in the target's
> widest integer mode.  E.g. for i386 it's forced to 128, but for ARM
> it's 512 (IIRC).
>
> Could you do the check based on the wi::min_precision of the unshifted
> value?  I.e. see whether adding the shift amount to that gives a value
> greater than type's precision.

You could always use a FIXED_WIDE_INT like VRP does for its overflow
detection stuff.

Richard.

> Thanks,
> Richard

Re: [PATCH][ARM][stage-1] Initialise cost to COSTS_N_INSNS (1) and increment in arm rtx costs

2015-06-02 Thread Kyrill Tkachov


Ping^4.

Thanks,
Kyrill

On 21/05/15 18:00, Kyrill Tkachov wrote:

Ping^3.

Thanks,
Kyrill

On 12/05/15 10:09, Kyrill Tkachov wrote:

Ping^2.

Thanks,
Kyrill

On 30/04/15 13:00, Kyrill Tkachov wrote:

Ping.
https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01130.html

Thanks,
Kyrill

On 21/04/15 10:11, Kyrill Tkachov wrote:

Hi all,

This is the first of a series to clean up and simplify the arm rtx costs 
function.
This patch initialises the cost to COSTS_N_INSNS (1) at the top and increments 
it when appropriate
in the rest of the function. This makes it more similar to the aarch64 rtx 
costs function and saves
us the trouble of having to remember to initialise the cost to COSTS_N_INSNS 
(1) in each case of the
switch statement.

Bootstrapped and tested arm-none-linux-gnueabihf.
Compiled some large programs with no codegen difference, except some DIV 
synthesis algorithms were changed,
presumably due to the cost of SDIV/UDIV, which is now being correctly 
calculated (before it was missing the
baseline COSTS_N_INSNS (1)).

Ok for trunk?

Thanks,
Kyrill

2015-04-21  Kyrylo Tkachov  

 * config/arm/arm.c (arm_new_rtx_costs): Initialise cost to
 COSTS_N_INSNS (1) and increment it appropriately throughout the
 function.

Re: [patch] consolidate some includes into coretypes.h

2015-06-02 Thread Richard Biener

On Mon, Jun 1, 2015 at 11:02 PM, Andrew MacLeod  wrote:
> I've begun looking at cleaning up the include files. Before removing
> unnecessary includes, I'd like to get a few other cleanups out of the way to
> simplify the dependency web. This is the first.
>
> There are some interrelated numerical definition headers (double-int.h,
> fixed-value.h, real.h and wide-int.h) .  Virtually every gcc source file
> ends up including them indirectly one or more times.  They also end up
> including signop.h and machmode.h as prerequisites as well.
>
> first,
>
> #include "machmode.h"
> #include "signop.h"
> #include "wide-int.h"
> #include "double-int.h"
>
> any source file  which includes tree.h, gimple.h *or* rtl.h will require all
> of these to compile.  That is basically the entire compiler.
>
> then there are:
>
> #include "real.h"
> #include "fixed-value.h" /* require real.h to compile */
>
> rtl.h has a hard dependency on these 2 files to compile, and although tree.h
> still parses and compiles when they are not included, it does provide some
> macros which access tree fields which return a FIXED_VALUE.   Any file which
> includes tree.h could therefore require real.h and fixed-value.h if they use
> the results of those macros.
>
> That said, I tried flattening these includes throughout the compiler to see
> exactly which other source files really need real.h and fixed-value.h.  I
> changbed it such that those 2 files were included by rtl.h, realmpfr.h, and
> dfp.h which have hard requirements.  I found about 37 of the remaining
> source files required real.h and about 16 required fixed-value.h
>
> Personally given those numbers and since tree.h exposes potential uses of
> FIXED_VALUE, the simple and cleanest thing to do is just add all 6 of these
> includes to the basic pre-requisites for source files.
>
> Currently, all source files start with
> #include "config.h"
> #include "system.h"
> #include "coretypes.h"
>
> The first include can be different for generator (bconfig.h) and target
> (tconfig.h) files,  so with a small tweak to coretypes.h to include these 6
> header files when config.h has been included (by checking if GCC_CONFIG_H is
> defined),  everything pretty much magically works.  I think it makes sense
> to put them there since they are core types and is already included
> everywhere its needed. If that is not satisfactory, I could create a new
> include file which comes after coretypes when appropriate...
>
> The only exception is the cases where rtl.h is included by some generator
> file. These generator files are used late enough in the build  that
> insn-modes.h exists and can safely include all these files.  I added a
> condition to rtl.h to include these files when GENERATOR_FILE is defined
> since they wouldn't have been included by coretypes.h normally.
>
> With that change I can remove *all* other #includes of these 6 files, and
> simplify the initial part of the include web quite nicely.  I also used the
> opportunity to remove coretypes.h from a couple of includes that no longer
> need to include it.
>
> there are 2 patches. The first is short and has the interesting changes, the
> second is purely automated and removes all the extraneous #includes of these
> files which are now all encapsulated in coretypes.h.
>
> Bootstraps from scratch on x86_64-unknown-linux-gnu with no new test
> regressions.  I also built it on all the config-list.mk targets with no
> additional compilation errors.
>
> OK for trunk?

Generally the idea is sound (amend coretypes.h), but I don't like the
GCC_CONFIG_H guard, why does !GENERATOR_FILE not work?

Furthermore I don't like the special-casing in rtl.h, instead have
coretypes.h contain sth like

#ifdef GENERATOR_FILE
... rtl.h special-case
#else
... GCC_CONFIG_H stuff
#endif

Thanks,
Richard.

> Andrew
>
> -  I do have a functioning patch which moves real.h and fixed-value.h to the
> required source files, I just dont like it as well so propose this one
> first.
>

Re: [patch] Fix ICE on function [not] returning variable size

2015-06-02 Thread Richard Biener

On Mon, Jun 1, 2015 at 11:08 PM, Eric Botcazou  wrote:
>> Ok.  (I wonder if there are any cases where the return value is allocated by
>> the callee?)
>
> Thanks.
>
> Do you mean in GCC or in programming languages in general or...?  In GNAT, we
> have something like that: when a function returns an unconstrained type whose
> size depends on some discriminants of the type, the caller doesn't know the
> size of the return value in advance, so the callee allocates the return value
> on the "secondary stack" and effectively returns a pointer to it.  Of course
> you need a specific machinery to manage this secondary stack.  And, needless
> to say, this is quite inefficient, so we attempt to reduce its usage:
>   https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02632.html

Yes, in general in GCC.  In this case we could still remove the lhs (not sure if
it is worth the trouble or even easy to detect on GIMPLE)?

Richard.

> --
> Eric Botcazou

Re: [BUILDROBOT] arc-elf: match_code "REG" matches nothing

2015-06-02 Thread Richard Sandiford

Jan-Benedict Glaw  writes:
> On Fri, 2015-05-22 16:42:44 +0100, Richard Sandiford
>  wrote:
>> This patch adjusts the fix for PR target/65689 along the lines suggested
>> in https://gcc.gnu.org/ml/gcc-patches/2015-04/msg01559.html.  The idea
>> is to reuse the existing gensupport.c routine to work out the codes
>> accepted by constraints.
>> 
>> I'd originally done this with an eye to using compute_test_codes for
>> the problem that Andreas found on s390.  I don't think it's going to
>> be useful for that after all, but it seems worth having for its on sake.
>> 
>> Bootstrapped & regression-tested on x86_64-linux-gnu.  OK to install?
>
> I'm also getting fallout for arc-elf, see eg. build
> http://toolchain.lug-owl.de/buildbot/show_build_details.php?id=442948

Sorry, think this message must have been caught by a filter so it didn't
reach my inbox.

Tested on mmix and arc-elf and applied as obvious.

Thanks,
Richard


gcc/
* config/arc/constraints.md: Use lower-case names in match_code.
* config/mmix/constraints.md: Likewise.

Index: gcc/config/arc/constraints.md
===
--- gcc/config/arc/constraints.md   2015-06-01 21:07:51.418630361 +0100
+++ gcc/config/arc/constraints.md   2015-06-01 21:07:51.571631855 +0100
@@ -335,7 +335,7 @@ (define_constraint "Rcq"
Cryptic q - for short insn generation while not affecting register 
allocation
Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3},
@code{r12}-@code{r15}"
-  (and (match_code "REG")
+  (and (match_code "reg")
(match_test "TARGET_Rcq
&& !arc_ccfsm_cond_exec_p ()
&& IN_RANGE (REGNO (op) ^ 4, 4, 11)")))
@@ -347,7 +347,7 @@ (define_constraint "Rcq"
 (define_constraint "Rcw"
   "@internal
Cryptic w - for use in early alternatives with matching constraint"
-  (and (match_code "REG")
+  (and (match_code "reg")
(match_test
"TARGET_Rcw
 && REGNO (op) < FIRST_PSEUDO_REGISTER
@@ -357,7 +357,7 @@ (define_constraint "Rcw"
 (define_constraint "Rcr"
   "@internal
Cryptic r - for use in early alternatives with matching constraint"
-  (and (match_code "REG")
+  (and (match_code "reg")
(match_test
"TARGET_Rcw
 && REGNO (op) < FIRST_PSEUDO_REGISTER
@@ -367,13 +367,13 @@ (define_constraint "Rcr"
 (define_constraint "Rcb"
   "@internal
Stack Pointer register @code{r28} - do not reload into its class"
-  (and (match_code "REG")
+  (and (match_code "reg")
(match_test "REGNO (op) == 28")))
 
 (define_constraint "Rck"
   "@internal
blink (usful for push_s / pop_s)"
-  (and (match_code "REG")
+  (and (match_code "reg")
(match_test "REGNO (op) == 31")))
 
 (define_constraint "Rs5"
@@ -381,7 +381,7 @@ (define_constraint "Rs5"
sibcall register - only allow one of the five available 16 bit isnsn.
Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3},
@code{r12}"
-  (and (match_code "REG")
+  (and (match_code "reg")
(match_test "!arc_ccfsm_cond_exec_p ()")
(ior (match_test "(unsigned) REGNO (op) <= 3")
(match_test "REGNO (op) == 12"
@@ -389,7 +389,7 @@ (define_constraint "Rs5"
 (define_constraint "Rcc"
   "@internal
   Condition Codes"
-  (and (match_code "REG") (match_test "cc_register (op, VOIDmode)")))
+  (and (match_code "reg") (match_test "cc_register (op, VOIDmode)")))
 
 
 (define_constraint "Q"
Index: gcc/config/mmix/constraints.md
===
--- gcc/config/mmix/constraints.md  2015-06-01 21:07:51.418630361 +0100
+++ gcc/config/mmix/constraints.md  2015-06-01 21:07:51.572631864 +0100
@@ -89,8 +89,8 @@ (define_constraint "R"
   (and (not (match_code "const_int,const_double"))
(match_test "mmix_constant_address_p (op)")
(ior (match_test "!TARGET_BASE_ADDRESSES")
-   (match_code "LABEL_REF")
-   (and (match_code "SYMBOL_REF")
+   (match_code "label_ref")
+   (and (match_code "symbol_ref")
 (match_test "SYMBOL_REF_FLAG (op)")
 
 ;; FIXME: L (or S) is redundant.

Re: [PATCH 15/16] gcc: Use libgas and libld within the driver

2015-06-02 Thread Richard Biener

On Mon, Jun 1, 2015 at 11:04 PM, David Malcolm  wrote:
> This patch adds the ability for gcc to be configured with:
>   --with-embedded-as
>   --with-embedded-ld
> If so, invocations of "as" and "ld" are detected in the gcc driver, and
> specialcased by invoking these in-process as shared libraries.  This is
> intended for use by libgccjit, when the driver itself is in-process
> within libgccjit, eliminating fork/exec and dynamic-library resolution.
>
> Doing so dramatically speeds up jit.dg/test-benchmark.c.
>
> The patch generalizes the named items support within timevar.c, so that
> as well as having bucket of named "jit client items" we also have
> buckets for "as" and for "ld" so that they can account for time spent
> within them.
>
> One remaining hack here, appending CFLAGS-gcc.o with a hardcoded include
> path, but I didn't want that to hold up posting what I've got so far.

Hum, so why not go further and embed as into cc1/cc1plus, etc.?  That is,
make the as invocation parts of the driver accessible to the compiler
in some way.

This way we can eventually add a more efficient way of funneling the compiler
assembler output to libas (well, I suppose you at least use -pipe...).

Richard.

> gcc/ChangeLog:
> * configure.ac: Add --with-embedded-as and --with-embedded-ld.
> * gcc.c: Include libgas.h and libld.h.
> (class ctimershim): New.
> (ctimershim::impl_push): New.
> (ctimershim::impl_pop): New.
> (run_embedded_as): New.
> (run_embedded_ld): New.
> (enum known_command): New.
> (get_known_command): New.
> (tv_id_for_known_command): New.
> (maybe_run_embedded_command): New.
> (execute): Invoke get_known_command and
> maybe_run_embedded_command, potentially avoiding the need to call
> into pex.
> * timevar.c (timer::named_items::print): Add "name" param rather
> than hardcoding "Client items".
> (timer::timer): Initialize "m_has_named_items"; replace
> "m_jit_client_items" with "m_named_items" array.
> (timer::~timer): Likewise.
> (timer::push_client_item): Rename to...
> (timer::push_named_item): ...this and add "dict" param,
> generalizing to support an array of dicts of named items.
> (timer::pop_client_item): Rename to...
> (timer::pop_named_item): ...this, generalizing to support
> an array of dicts of named items.
> (timer::print): Print JIT client items first (if any), then
> GCC timevar items, then embedded as items (if any), then embedded
> ld items (if any).
> * timevar.def (TV_DRIVER_EMBEDDED_AS): New.
> (TV_DRIVER_EMBEDDED_LD): New.
> * timevar.h (timer::item_dict): New enum.
> (timer::push_client_item): Rename to...
> (timer::push_named_item): ...this, adding "dict" param.
> (timer::pop_client_item): Rename to...
> (timer::pop_named_item):  ...this, adding "dict" param.
> (timer::get_item_dict): New.
> (timer::m_jit_client_items): Drop this field in favor of...
> (timer::m_named_items): ...this array.
> (timer::m_has_named_items): New.
>
> gcc/jit/ChangeLog:
> * Make-lang.in (LIBGCCJIT_FILENAME): Add EXTRA_GCC_LIBS to link.
> * libgccjit.c (gcc_jit_timer_push): Replace call to
> timer->push_client_item with timer->push_named_item.
> (gcc_jit_timer_pop): Likewise for pop.
> * notes.txt: Indicate that as/ld could be embedded.
> ---
>  gcc/Makefile.in  |   3 +
>  gcc/configure.ac |  25 ++
>  gcc/gcc.c| 214 
> ---
>  gcc/jit/Make-lang.in |   2 +-
>  gcc/jit/libgccjit.c  |   5 +-
>  gcc/jit/notes.txt|   4 +-
>  gcc/timevar.c|  56 ++
>  gcc/timevar.def  |   2 +
>  gcc/timevar.h|  33 +++-
>  9 files changed, 308 insertions(+), 36 deletions(-)
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index 2388975..9061933 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1993,6 +1993,9 @@ DRIVER_DEFINES = \
>
>  CFLAGS-gcc.o += $(DRIVER_DEFINES)
>
> +# FIXME
> +CFLAGS-gcc.o += 
> -I/home/david/coding/gcc-python/binutils-gdb-libraries/install/include
> +
>  specs.h : s-specs ; @true
>  s-specs : Makefile
> lsf="$(lang_specs_files)"; for f in $$lsf; do \
> diff --git a/gcc/configure.ac b/gcc/configure.ac
> index 810725c..6f50908 100644
> --- a/gcc/configure.ac
> +++ b/gcc/configure.ac
> @@ -1114,6 +1114,31 @@ LIBS=
>  AC_SEARCH_LIBS(kstat_open, kstat)
>  EXTRA_GCC_LIBS="$LIBS"
>  LIBS="$save_LIBS"
> +
> +# Support embedding libgas in the driver
> +
> +AC_ARG_WITH([embedded-as],
> + [AS_HELP_STRING([--with-embedded-as],
> +   [use libgas to embed the assembler in-process])],
> + [AC_CHECK_LIB([gas], [gas_main],
> +   [EXTRA_GCC_LIBS+=" -lgas $LDFLAGS";
> +AC_DEFINE(HAVE_LIBGAS, 1,
> +   [Define if libgas is in

Re: [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II

2015-06-02 Thread Richard Biener

On Tue, Jun 2, 2015 at 4:55 AM, Bin.Cheng  wrote:
> On Mon, Jun 1, 2015 at 6:45 PM, Richard Biener
>  wrote:
>> On Tue, May 26, 2015 at 1:04 PM, Bin Cheng  wrote:
>>> Hi,
>>> My first part patch improving how we handle overflow in scev is posted at
>>> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01795.html .  Here comes the
>>> second part patch.
>>>
>>> This patch does below improvements:
>>>   1) Computes and records control iv for each loop's exit edge.  This
>>> provides a way to compute overflow information in loop niter and use it in
>>> different customers.  It think it's useful, especially with option
>>> -funsafe-loop-optimizers.
>>>   2) Improve chrec_convert by adding new interface
>>> loop_exits_before_overflow.  It checks if a converted IV overflows wrto its
>>> type and loop using overflow information of loop's control iv.  This
>>> basically propagates no-overflow information from control iv to ivs
>>> converted from control iv.  Moreover, we can further improve the logic by
>>> using possible VRP information in the future.
>>
>> But 2) you already posted (and I have approved it but you didn't commit 
>> yet?).
>>
>> Can you commit that approved patch and only send the parts I didn't approve
>> yet?
>>
>> Thanks,
>> Richard.
>>
>>> With this patch, cases like scev-9.c and scev-10.c in patch can be handled
>>> now.  Cases reported in PR48052 can be vectorized too.
>>> Opinions?
>>>
>>> Thanks,
>>> bin
>>>
>>>
>>> 2015-05-26  Bin Cheng  
>>>
>>> * cfgloop.h (struct control_iv): New.
>>> (struct loop): New field control_ivs.
>>> * tree-ssa-loop-niter.c : Include "stor-layout.h".
>>> (number_of_iterations_lt): Set no_overflow information.
>>> (number_of_iterations_exit): Init control iv in niter struct.
>>> (record_control_iv): New.
>>> (estimate_numbers_of_iterations_loop): Call record_control_iv.
>>> (loop_exits_before_overflow): New.  Interface factored out of
>>> scev_probably_wraps_p.
>>> (scev_probably_wraps_p): Factor loop niter related code into
>>> loop_exits_before_overflow.
>>> (free_numbers_of_iterations_estimates_loop): Free control ivs.
>>> * tree-ssa-loop-niter.h (free_loop_control_ivs): New.
>>>
>>> gcc/testsuite/ChangeLog
>>> 2015-05-26  Bin Cheng  
>>>
>>> PR tree-optimization/48052
>>> * gcc.dg/tree-ssa/scev-8.c: New.
>>> * gcc.dg/tree-ssa/scev-9.c: New.
>>> * gcc.dg/tree-ssa/scev-10.c: New.
>>> * gcc.dg/vect/pr48052.c: New.
>>>
>
> Hi Richard,
> I think you replied the review message of this patch to another
> thread.  Sorry for being mis-leading.  S I copied and answered your
> review comments in this thread thus we can continue here.
>
>>> +   /* Done proving if this is a no-overflow control IV.  */
>>> +   if (operand_equal_p (base, civ->base, 0))
>>> + return true;
>>
>> so all control IVs are no-overflow?
>
> This patch only records known no-overflow control ivs in loop
> structure, so it depends on loop niter analyzer.  For now, this patch
> (and the existing code) sets no-overflow flag only for two cases.  One
> is the step-1 case, the other one is in assert_no_overflow_lt.
> As a matter of fact, we may want to set no_overflow flag for all cases
> with -funsafe-loop-optimizations in the future.  In that case, we will
> assume all control IVs are no-overflow.
>
>>
>>> +base <= UPPER_BOUND (type) - step  ;;step > 0
>>> +base >= LOWER_BOUND (type) - step  ;;step < 0
>>> +
>>> +  by using loop's initial condition.  */
>>> +   stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
>>> +   if (operand_equal_p (stepped, civ->base, 0))
>>> + {
>>> +   if (tree_int_cst_sign_bit (step))
>>> + {
>>> +   code = LT_EXPR;
>>> +   extreme = lower_bound_in_type (type, type);
>>> + }
>>> +   else
>>> + {
>>> +   code = GT_EXPR;
>>> +   extreme = upper_bound_in_type (type, type);
>>> + }
>>> +   extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
>>> +   e = fold_build2 (code, boolean_type_node, base, extreme);
>>
>> looks like you are actually computing base + step <= UPPER_BOUND (type)
>> so maybe adjust the comment.  But as both step and UPPER_BOUND  (type)
>> are constants why not compute it the way the comment specifies it?  
>> Comparison
>> codes also don't match the comment and we try to prove the condition is 
>> false.
> I tried to prove the condition are satisfied by proving the reverse
> condition ("base > UPPER_BOUND (type) - step") is false here.  In the
> updated patch, I revised comments to reflect that logic.  Is it ok?
>
>>
>> This also reminds me of eventually pushing forward my idea of strengthening
>> simplify_using_initial_
>> conditions by using the VRP machinery (I have a small
>> prototype patch for that).
> Interes

Re: [C/C++ PATCH] Implement -Wshift-overflow (PR c++/55095)

2015-06-02 Thread Richard Sandiford

Richard Biener  writes:
> On Mon, Jun 1, 2015 at 10:06 PM, Richard Sandiford
>  wrote:
>> Marek Polacek  writes:
>>> +  /* Left-hand operand must be signed.  */
>>> +  if (TYPE_UNSIGNED (type0))
>>> +return false;
>>> +
>>> +  /* Compute the result in infinite precision math (sort of).  */
>>> +  widest_int w = wi::lshift (wi::to_widest (op0), wi::to_widest (op1));
>>> +  unsigned int min_prec = wi::min_precision (w, SIGNED);
>>> +  /* Watch out for shifting a negative value.  */
>>> +  tree r = wide_int_to_tree (tree_int_cst_sgn (op0) >= 0
>>> +  ? unsigned_type_for (type0)
>>> +  : type0, w);
>>> +  bool overflowed = wi::cmps (w, wi::to_widest (r));
>>> +  if (overflowed && c_inhibit_evaluation_warnings == 0)
>>> +warning_at (loc, OPT_Wshift_overflow,
>>> + "result of %qE requires %u bits to represent, "
>>> + "but %qT only has %u bits",
>>> + build2_loc (loc, LSHIFT_EXPR, type0, op0, op1),
>>> + min_prec, type0, TYPE_PRECISION (type0));
>>> +
>>> +  return overflowed;
>>
>> Yeah, this "sort of" is a bit worrying :-)  Especially as the number
>> of bits in a widest_int depends on the number of bits in the target's
>> widest integer mode.  E.g. for i386 it's forced to 128, but for ARM
>> it's 512 (IIRC).
>>
>> Could you do the check based on the wi::min_precision of the unshifted
>> value?  I.e. see whether adding the shift amount to that gives a value
>> greater than type's precision.
>
> You could always use a FIXED_WIDE_INT like VRP does for its overflow
> detection stuff.

That would work too, but why impose an arbitrary limit?  Unless I'm
missing something, the code above should be equivalent to:

  unsigned int min_prec = (wi::min_precision (op0, SIGNED)
   + TREE_INT_CST_LOW (op1));
  bool overflowed = min_prec > TYPE_PRECISION (type0);
  if (overflowed && c_inhibit_evaluation_warnings == 0)
warning_at (loc, OPT_Wshift_overflow,
 "result of %qE requires %u bits to represent, "
 "but %qT only has %u bits",
 build2_loc (loc, LSHIFT_EXPR, type0, op0, op1),
 min_prec, type0, TYPE_PRECISION (type0));

which seems simpler than anything involving wider precision.

Thanks,
Richard

[patch] Small tweak to gimplifier

2015-06-02 Thread Eric Botcazou

Hi,

there is a trick in the gimplifier to alleviate some annoying effects of the 
gimplification on the debug info (for -O0 when var-tracking is not enabled but 
it's done unconditionally):

  /* Try to alleviate the effects of the gimplification creating artificial
 temporaries (see for example is_gimple_reg_rhs) on the debug info.  */
  if (!gimplify_ctxp->into_ssa
  && TREE_CODE (*from_p) == VAR_DECL
  && DECL_IGNORED_P (*from_p)
  && DECL_P (*to_p)
  && !DECL_IGNORED_P (*to_p))
{
  if (!DECL_NAME (*from_p) && DECL_NAME (*to_p))
DECL_NAME (*from_p)
  = create_tmp_var_name (IDENTIFIER_POINTER (DECL_NAME (*to_p)));
  DECL_HAS_DEBUG_EXPR_P (*from_p) = 1;
  SET_DECL_DEBUG_EXPR (*from_p, *to_p);
   }

We have a large Ada testcase for which this creates a GC hazard in LTO mode 
because this creates a DECL_DEBUG_EXPR link between a parent and a nested 
function, which badly interacts with a DECL_VALUE_EXPR link created during 
unnesting (it's the known GC issue with circular references through hash 
tables in GC memory).

Therefore the attached patch restricts the trick to local variables only.
That's transparent, modulo a benign tweak to gcc.dg/vect/vec-scal-opt.c 
because of the DECL_NAME change.

Tested on x86_64-suse-linux, OK for the mainline?


2015-06-02  Eric Botcazou  

* gimplify.c (gimplify_modify_expr): Do not create a DECL_DEBUG_EXPR if
the target doesn't belong to the current function.


2015-06-02  Eric Botcazou  

* gcc.dg/vect/vec-scal-opt.c: Adjust regexp.


-- 
Eric BotcazouIndex: testsuite/gcc.dg/vect/vec-scal-opt.c
===
--- testsuite/gcc.dg/vect/vec-scal-opt.c	(revision 224011)
+++ testsuite/gcc.dg/vect/vec-scal-opt.c	(working copy)
@@ -19,4 +19,4 @@ int main (int argc, char *argv[]) {
return vidx(short, r1, 0);
 }
 
-/* { dg-final { scan-tree-dump-times ">> k.\[0-9_\]*" 1 "veclower21" } } */
+/* { dg-final { scan-tree-dump-times ">> _\[0-9\]*" 1 "veclower21" } } */
Index: gimplify.c
===
--- gimplify.c	(revision 224011)
+++ gimplify.c	(working copy)
@@ -4707,12 +4707,14 @@ gimplify_modify_expr (tree *expr_p, gimp
 return gimplify_modify_expr_complex_part (expr_p, pre_p, want_value);
 
   /* Try to alleviate the effects of the gimplification creating artificial
- temporaries (see for example is_gimple_reg_rhs) on the debug info.  */
+ temporaries (see for example is_gimple_reg_rhs) on the debug info, but
+ make sure not to create DECL_DEBUG_EXPR links across functions.  */
   if (!gimplify_ctxp->into_ssa
   && TREE_CODE (*from_p) == VAR_DECL
   && DECL_IGNORED_P (*from_p)
   && DECL_P (*to_p)
-  && !DECL_IGNORED_P (*to_p))
+  && !DECL_IGNORED_P (*to_p)
+  && decl_function_context (*to_p) == current_function_decl)
 {
   if (!DECL_NAME (*from_p) && DECL_NAME (*to_p))
 	DECL_NAME (*from_p)

Re: [PATCH][AARCH64]Use shl for vec_shr_ rtx pattern.

2015-06-02 Thread Renlin Li


Is it Okay for me to backport it to gcc-5?

Regards,
Renlin Li

On 30/04/15 16:21, Marcus Shawcroft wrote:

On 30 April 2015 at 12:55, Renlin Li  wrote:


2015-04-30  Renlin Li  

 * config/aarch64/aarch64-simd.md (vec_shr): Defined as an unspec.
 * config/aarch64/iterators.md (unspec): Add UNSPEC_VEC_SHR.

gcc/testsuite/ChangeLog:

2015-04-30  Renlin Li  

 * gcc.target/aarch64/vect-reduc-or_1.c: New.

+  __builtin_printf("Failed %d\n", sum);
+  abort();

Space before (
Otherwise OK /Marcus

Re: [patch] Fix ICE on function [not] returning variable size

2015-06-02 Thread Eric Botcazou

> Yes, in general in GCC.  In this case we could still remove the lhs (not
> sure if it is worth the trouble or even easy to detect on GIMPLE)?

At least for the case I described in Ada, that's already done since the 
function effectively returns a pointer type.

-- 
Eric Botcazou

Re: [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II

2015-06-02 Thread Bin.Cheng

On Tue, Jun 2, 2015 at 4:40 PM, Richard Biener
 wrote:
> On Tue, Jun 2, 2015 at 4:55 AM, Bin.Cheng  wrote:
>> On Mon, Jun 1, 2015 at 6:45 PM, Richard Biener
>>  wrote:
>>> On Tue, May 26, 2015 at 1:04 PM, Bin Cheng  wrote:
 Hi,
 My first part patch improving how we handle overflow in scev is posted at
 https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01795.html .  Here comes the
 second part patch.

 This patch does below improvements:
   1) Computes and records control iv for each loop's exit edge.  This
 provides a way to compute overflow information in loop niter and use it in
 different customers.  It think it's useful, especially with option
 -funsafe-loop-optimizers.
   2) Improve chrec_convert by adding new interface
 loop_exits_before_overflow.  It checks if a converted IV overflows wrto its
 type and loop using overflow information of loop's control iv.  This
 basically propagates no-overflow information from control iv to ivs
 converted from control iv.  Moreover, we can further improve the logic by
 using possible VRP information in the future.
>>>
>>> But 2) you already posted (and I have approved it but you didn't commit 
>>> yet?).
>>>
>>> Can you commit that approved patch and only send the parts I didn't approve
>>> yet?
>>>
>>> Thanks,
>>> Richard.
>>>
 With this patch, cases like scev-9.c and scev-10.c in patch can be handled
 now.  Cases reported in PR48052 can be vectorized too.
 Opinions?

 Thanks,
 bin


 2015-05-26  Bin Cheng  

 * cfgloop.h (struct control_iv): New.
 (struct loop): New field control_ivs.
 * tree-ssa-loop-niter.c : Include "stor-layout.h".
 (number_of_iterations_lt): Set no_overflow information.
 (number_of_iterations_exit): Init control iv in niter struct.
 (record_control_iv): New.
 (estimate_numbers_of_iterations_loop): Call record_control_iv.
 (loop_exits_before_overflow): New.  Interface factored out of
 scev_probably_wraps_p.
 (scev_probably_wraps_p): Factor loop niter related code into
 loop_exits_before_overflow.
 (free_numbers_of_iterations_estimates_loop): Free control ivs.
 * tree-ssa-loop-niter.h (free_loop_control_ivs): New.

 gcc/testsuite/ChangeLog
 2015-05-26  Bin Cheng  

 PR tree-optimization/48052
 * gcc.dg/tree-ssa/scev-8.c: New.
 * gcc.dg/tree-ssa/scev-9.c: New.
 * gcc.dg/tree-ssa/scev-10.c: New.
 * gcc.dg/vect/pr48052.c: New.

>>
>> Hi Richard,
>> I think you replied the review message of this patch to another
>> thread.  Sorry for being mis-leading.  S I copied and answered your
>> review comments in this thread thus we can continue here.
>>
 +   /* Done proving if this is a no-overflow control IV.  */
 +   if (operand_equal_p (base, civ->base, 0))
 + return true;
>>>
>>> so all control IVs are no-overflow?
>>
>> This patch only records known no-overflow control ivs in loop
>> structure, so it depends on loop niter analyzer.  For now, this patch
>> (and the existing code) sets no-overflow flag only for two cases.  One
>> is the step-1 case, the other one is in assert_no_overflow_lt.
>> As a matter of fact, we may want to set no_overflow flag for all cases
>> with -funsafe-loop-optimizations in the future.  In that case, we will
>> assume all control IVs are no-overflow.
>>
>>>
 +base <= UPPER_BOUND (type) - step  ;;step > 0
 +base >= LOWER_BOUND (type) - step  ;;step < 0
 +
 +  by using loop's initial condition.  */
 +   stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
 +   if (operand_equal_p (stepped, civ->base, 0))
 + {
 +   if (tree_int_cst_sign_bit (step))
 + {
 +   code = LT_EXPR;
 +   extreme = lower_bound_in_type (type, type);
 + }
 +   else
 + {
 +   code = GT_EXPR;
 +   extreme = upper_bound_in_type (type, type);
 + }
 +   extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
 +   e = fold_build2 (code, boolean_type_node, base, extreme);
>>>
>>> looks like you are actually computing base + step <= UPPER_BOUND (type)
>>> so maybe adjust the comment.  But as both step and UPPER_BOUND  (type)
>>> are constants why not compute it the way the comment specifies it?  
>>> Comparison
>>> codes also don't match the comment and we try to prove the condition is 
>>> false.
>> I tried to prove the condition are satisfied by proving the reverse
>> condition ("base > UPPER_BOUND (type) - step") is false here.  In the
>> updated patch, I revised comments to reflect that logic.  Is it ok?
>>
>>>
>>> This also reminds me of eve

Re: [patch] Small tweak to gimplifier

2015-06-02 Thread Richard Biener

On Tue, Jun 2, 2015 at 11:08 AM, Eric Botcazou  wrote:
> Hi,
>
> there is a trick in the gimplifier to alleviate some annoying effects of the
> gimplification on the debug info (for -O0 when var-tracking is not enabled but
> it's done unconditionally):
>
>   /* Try to alleviate the effects of the gimplification creating artificial
>  temporaries (see for example is_gimple_reg_rhs) on the debug info.  */
>   if (!gimplify_ctxp->into_ssa
>   && TREE_CODE (*from_p) == VAR_DECL
>   && DECL_IGNORED_P (*from_p)
>   && DECL_P (*to_p)
>   && !DECL_IGNORED_P (*to_p))
> {
>   if (!DECL_NAME (*from_p) && DECL_NAME (*to_p))
> DECL_NAME (*from_p)
>   = create_tmp_var_name (IDENTIFIER_POINTER (DECL_NAME (*to_p)));
>   DECL_HAS_DEBUG_EXPR_P (*from_p) = 1;
>   SET_DECL_DEBUG_EXPR (*from_p, *to_p);
>}
>
> We have a large Ada testcase for which this creates a GC hazard in LTO mode
> because this creates a DECL_DEBUG_EXPR link between a parent and a nested
> function, which badly interacts with a DECL_VALUE_EXPR link created during
> unnesting (it's the known GC issue with circular references through hash
> tables in GC memory).
>
> Therefore the attached patch restricts the trick to local variables only.
> That's transparent, modulo a benign tweak to gcc.dg/vect/vec-scal-opt.c
> because of the DECL_NAME change.
>
> Tested on x86_64-suse-linux, OK for the mainline?

Ok.

Thanks,
Richard.

>
> 2015-06-02  Eric Botcazou  
>
> * gimplify.c (gimplify_modify_expr): Do not create a DECL_DEBUG_EXPR 
> if
> the target doesn't belong to the current function.
>
>
> 2015-06-02  Eric Botcazou  
>
> * gcc.dg/vect/vec-scal-opt.c: Adjust regexp.
>
>
> --
> Eric Botcazou

Re: [PATCH GCC]Improve how we handle overflow in scev by using overflow information computed for control iv in loop niter, part II

2015-06-02 Thread Richard Biener

On Tue, Jun 2, 2015 at 11:30 AM, Bin.Cheng  wrote:
> On Tue, Jun 2, 2015 at 4:40 PM, Richard Biener
>  wrote:
>> On Tue, Jun 2, 2015 at 4:55 AM, Bin.Cheng  wrote:
>>> On Mon, Jun 1, 2015 at 6:45 PM, Richard Biener
>>>  wrote:
 On Tue, May 26, 2015 at 1:04 PM, Bin Cheng  wrote:
> Hi,
> My first part patch improving how we handle overflow in scev is posted at
> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01795.html .  Here comes the
> second part patch.
>
> This patch does below improvements:
>   1) Computes and records control iv for each loop's exit edge.  This
> provides a way to compute overflow information in loop niter and use it in
> different customers.  It think it's useful, especially with option
> -funsafe-loop-optimizers.
>   2) Improve chrec_convert by adding new interface
> loop_exits_before_overflow.  It checks if a converted IV overflows wrto 
> its
> type and loop using overflow information of loop's control iv.  This
> basically propagates no-overflow information from control iv to ivs
> converted from control iv.  Moreover, we can further improve the logic by
> using possible VRP information in the future.

 But 2) you already posted (and I have approved it but you didn't commit 
 yet?).

 Can you commit that approved patch and only send the parts I didn't approve
 yet?

 Thanks,
 Richard.

> With this patch, cases like scev-9.c and scev-10.c in patch can be handled
> now.  Cases reported in PR48052 can be vectorized too.
> Opinions?
>
> Thanks,
> bin
>
>
> 2015-05-26  Bin Cheng  
>
> * cfgloop.h (struct control_iv): New.
> (struct loop): New field control_ivs.
> * tree-ssa-loop-niter.c : Include "stor-layout.h".
> (number_of_iterations_lt): Set no_overflow information.
> (number_of_iterations_exit): Init control iv in niter struct.
> (record_control_iv): New.
> (estimate_numbers_of_iterations_loop): Call record_control_iv.
> (loop_exits_before_overflow): New.  Interface factored out of
> scev_probably_wraps_p.
> (scev_probably_wraps_p): Factor loop niter related code into
> loop_exits_before_overflow.
> (free_numbers_of_iterations_estimates_loop): Free control ivs.
> * tree-ssa-loop-niter.h (free_loop_control_ivs): New.
>
> gcc/testsuite/ChangeLog
> 2015-05-26  Bin Cheng  
>
> PR tree-optimization/48052
> * gcc.dg/tree-ssa/scev-8.c: New.
> * gcc.dg/tree-ssa/scev-9.c: New.
> * gcc.dg/tree-ssa/scev-10.c: New.
> * gcc.dg/vect/pr48052.c: New.
>
>>>
>>> Hi Richard,
>>> I think you replied the review message of this patch to another
>>> thread.  Sorry for being mis-leading.  S I copied and answered your
>>> review comments in this thread thus we can continue here.
>>>
> +   /* Done proving if this is a no-overflow control IV.  */
> +   if (operand_equal_p (base, civ->base, 0))
> + return true;

 so all control IVs are no-overflow?
>>>
>>> This patch only records known no-overflow control ivs in loop
>>> structure, so it depends on loop niter analyzer.  For now, this patch
>>> (and the existing code) sets no-overflow flag only for two cases.  One
>>> is the step-1 case, the other one is in assert_no_overflow_lt.
>>> As a matter of fact, we may want to set no_overflow flag for all cases
>>> with -funsafe-loop-optimizations in the future.  In that case, we will
>>> assume all control IVs are no-overflow.
>>>

> +base <= UPPER_BOUND (type) - step  ;;step > 0
> +base >= LOWER_BOUND (type) - step  ;;step < 0
> +
> +  by using loop's initial condition.  */
> +   stepped = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, step);
> +   if (operand_equal_p (stepped, civ->base, 0))
> + {
> +   if (tree_int_cst_sign_bit (step))
> + {
> +   code = LT_EXPR;
> +   extreme = lower_bound_in_type (type, type);
> + }
> +   else
> + {
> +   code = GT_EXPR;
> +   extreme = upper_bound_in_type (type, type);
> + }
> +   extreme = fold_build2 (MINUS_EXPR, type, extreme, step);
> +   e = fold_build2 (code, boolean_type_node, base, extreme);

 looks like you are actually computing base + step <= UPPER_BOUND (type)
 so maybe adjust the comment.  But as both step and UPPER_BOUND  (type)
 are constants why not compute it the way the comment specifies it?  
 Comparison
 codes also don't match the comment and we try to prove the condition is 
 false.
>>> I tried to prove the condition are satisfied by proving the reverse
>>> condi

Re: [PATCH, AARCH64] make stdarg functions work with +nofp

2015-06-02 Thread James Greenhalgh

On Sat, May 23, 2015 at 12:24:00AM +0100, Jim Wilson wrote:
> The compiler currently ICEs when compiling a stdarg function with
> +nofp, as reported in PR 66258.
> 
> The aarch64.md file disables FP instructions using TARGET_FLOAT, which
> supports both -mgeneral-regs-only and +nofp.  But there is code in
> aarch64.c that checks TARGET_GENERAL_REGS_ONLY.  This results in FP
> instructions when using +nofp,  The aarch64.c code needs to use
> TARGET_FLOAT instead like the md file already does.
> 
> I can't meaningfully test this with a bootstrap, since the patch has
> no effect unless I bootstrap with +nofp, and that will fail as gcc
> contains floating point code.
> 
> The testsuite already has multiple stdarg tests, so there is no need
> for another one.
> 
> I tested this by verifying I get the same results for some simple
> testcasess with and without the patch, with and without using
> -mgeneral-regs-only and -mcpu=cortex-a53+nofp.

This patch doesn't quite look right to me. The cases you change seem
like they should be (TARGET_FLOAT || TARGET_SIMD), rather than just
TARGET_FLOAT. In an armv8-a+nofp environment, you still have access to the
SIMD registers and instructions (reading between the lines on the bug
report, this is almost certainly not what is intended in Grub!).

Digging a bit deeper in to the ICE in PR66258, it seems to me that
the problematic pattern is "*movti_aarch64":

  (define_insn "*movti_aarch64"
[(set (match_operand:TI 0
   "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
  (match_operand:TI 1
   "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
"(register_operand (operands[0], TImode)
  || aarch64_reg_or_zero (operands[1], TImode))"
"@
 #
 #
 #
 orr\\t%0.16b, %1.16b, %1.16b
 ldp\\t%0, %H0, %1
 stp\\t%1, %H1, %0
 stp\\txzr, xzr, %0
 ldr\\t%q0, %1
 str\\t%q1, %0"
[(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
 load2,store2,store2,f_loadd,f_stored")
 (set_attr "length" "8,8,8,4,4,4,4,4,4")
 (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
 (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
  )

Note that the split alternatives are going to unconditionally create
and emit insns which require TARGET_FLOAT, but the fp attribute is
not set on those alternatives. Many of the TI mode split patterns
could be expressed as a umov from vector registers to general purpose
registers for a TARGET_SIMD target.

Have you investigated this approach at all?

Thanks,
James

[gomp4, committed] Add checks for num_gangs(32) in goacc kernels tests

2015-06-02 Thread Tom de Vries


Hi,

this patch adds an extra check in the goacc kernels testcases.

Committed to gomp-4_0-branch.

Thanks,
- Tom
Add checks for num_gangs(32) in goacc kernels tests

2015-05-28  Tom de Vries  

	* c-c++-common/goacc/kernels-counter-vars-function-scope.c: Add check
	for num_gangs (32).
	* c-c++-common/goacc/kernels-loop-2.c: Same.
	* c-c++-common/goacc/kernels-loop-data-2.c: Same.
	* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Same.
	* c-c++-common/goacc/kernels-loop-data-enter-exit.c: Same.
	* c-c++-common/goacc/kernels-loop-data-update.c: Same.
	* c-c++-common/goacc/kernels-loop-data.c: Same.
	* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Same.
	* c-c++-common/goacc/kernels-loop.c: Same.
	* c-c++-common/goacc/kernels-one-counter-var.c: Same.
	* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: Same.

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c b/gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
index 06cdb29..77b013a 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-counter-vars-function-scope.c
@@ -51,5 +51,7 @@ main (void)
 /* Check that the loop has been split off into a function.  */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.0" 1 "optimized" } } */
 
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 1 "parloops_oacc_kernels" } } */
+
 /* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
index ab69fe9..ef2314d 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c
@@ -58,5 +58,7 @@ main (void)
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
 
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 3 "parloops_oacc_kernels" } } */
+
 /* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
index fc6da6e..8f2c905 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c
@@ -67,5 +67,7 @@ main (void)
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
 
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 3 "parloops_oacc_kernels" } } */
+
 /* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c
index 945359f..2b6a8fd 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c
@@ -65,5 +65,7 @@ main (void)
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
 
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 3 "parloops_oacc_kernels" } } */
+
 /* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c
index 2d6e5e3..4d7b1ba 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c
@@ -62,5 +62,7 @@ main (void)
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.1" 1 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "(?n);; Function .*main._omp_fn.2" 1 "optimized" } } */
 
+/* { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 3 "parloops_oacc_kernels" } } */
+
 /* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */
 /* { dg-final { cleanup-tree-dump "optimized" } } */
diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
index c7aaf0f..3fe9b18 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c
@@ -62,5 +62,7 @@ main (void)
 /* {

Re: [PATCH][AARCH64]Use shl for vec_shr_ rtx pattern.

2015-06-02 Thread Marcus Shawcroft

On 2 June 2015 at 10:30, Renlin Li  wrote:
> Is it Okay for me to backport it to gcc-5?

OK provided the patch applies cleanly and there are no regressions. /Marcus

>
> Regards,
> Renlin Li
>
>
> On 30/04/15 16:21, Marcus Shawcroft wrote:
>>
>> On 30 April 2015 at 12:55, Renlin Li  wrote:
>>
>>> 2015-04-30  Renlin Li  
>>>
>>>  * config/aarch64/aarch64-simd.md (vec_shr): Defined as an unspec.
>>>  * config/aarch64/iterators.md (unspec): Add UNSPEC_VEC_SHR.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 2015-04-30  Renlin Li  
>>>
>>>  * gcc.target/aarch64/vect-reduc-or_1.c: New.
>>
>> +  __builtin_printf("Failed %d\n", sum);
>> +  abort();
>>
>> Space before (
>> Otherwise OK /Marcus
>>
>

Re: [PATCH 01/35] Introduce new type-based pool allocator.

2015-06-02 Thread Andreas Schwab

In file included from ../../gcc/stmt.c:78:0:
../../gcc/alloc-pool.h: In function 'void expand_sjlj_dispatch_table(rtx, vec)':
../../gcc/alloc-pool.h:303:4: error: 'case_node_pool.pool_allocator:\
:m_block_size' may be used uninitialized in this function [-Werror=maybe-uninit\
ialized]
block = XNEWVEC (char, m_block_size);
^
../../gcc/stmt.c:1339:33: note: 'case_node_pool.pool_allocator::m_bl\
ock_size' was declared here
   pool_allocator case_node_pool ("struct sjlj_case pool",
 ^

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

Re: [gomp4] Worker-single predication

2015-06-02 Thread Thomas Schwinge

Hi Bernd!

On Mon, 1 Jun 2015 17:58:51 +0200, Bernd Schmidt  
wrote:
> This extends the previous vector-single support to also handle 
> worker-level predication.

Thanks!

> --- gcc/omp-low.c (revision 223974)
> +++ gcc/omp-low.c (working copy)

> +/* Allocate storage for OpenACC worker threads in CTX to broadcast
> +   condition results.  CLAUSES are the clauses of the parallel construct.  */
> +
> +static void
> +oacc_alloc_broadcast_storage (omp_context *ctx, tree clauses)
> +{
> +  tree vull_type_node = build_qualified_type (long_long_unsigned_type_node,
> +TYPE_QUAL_VOLATILE);
> +  tree uptr_node = build_pointer_type (vull_type_node);
> +
> +  tree clause = find_omp_clause (clauses, OMP_CLAUSE_NUM_WORKERS);
> +  tree host_count = integer_one_node;
> +  if (clause)
> +host_count = OMP_CLAUSE_NUM_WORKERS_EXPR (clause);
> +
> +  ctx->worker_sync_elt
> += alloc_var_ganglocal (NULL_TREE, long_long_unsigned_type_node,
> +ctx, TYPE_SIZE_UNIT (long_long_unsigned_type_node));
> +}

> @@ -12503,7 +12673,10 @@ lower_omp_target (gimple_stmt_iterator *
>orlist = NULL;
>  
>if (is_gimple_omp_oacc (stmt))
> -oacc_init_count_vars (ctx, clauses);
> +{
> +  oacc_init_count_vars (ctx, clauses);
> +  oacc_alloc_broadcast_storage (ctx, clauses);
> +}
>  
>if (has_reduction)
>  {

A few warnings/errors resulting in bootstrap failures.  Not yet committed
-- probably you meant to do something with host_count?

commit f0a9e05f8b16436767e4f899580b8f3e753d228f
Author: Thomas Schwinge 
Date:   Tue Jun 2 12:07:35 2015 +0200

Resolve bootstrap failures

... introduced in r223989.

[...]/source-gcc/gcc/omp-low.c: In function 'void 
oacc_alloc_broadcast_storage(omp_context*, tree)':
[...]/source-gcc/gcc/omp-low.c:9412:8: error: unused variable 
'uptr_node' [-Werror=unused-variable]
   tree uptr_node = build_pointer_type (vull_type_node);
^
[...]/source-gcc/gcc/omp-low.c:9415:8: error: variable 'host_count' set 
but not used [-Werror=unused-but-set-variable]
   tree host_count = integer_one_node;
^
[...]/source-gcc/gcc/omp-low.c: In function 'void 
predicate_bb(basic_block, omp_region*, int)':
[...]/source-gcc/gcc/omp-low.c:10462:16: error: unused variable 
'adjust_bb_ptr' [-Werror=unused-variable]
   basic_block *adjust_bb_ptr = NULL;
^
---
 gcc/omp-low.c |   14 ++
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git gcc/omp-low.c gcc/omp-low.c
index 01e5d4b..ace9e24 100644
--- gcc/omp-low.c
+++ gcc/omp-low.c
@@ -9405,17 +9405,8 @@ expand_omp_atomic (struct omp_region *region)
condition results.  CLAUSES are the clauses of the parallel construct.  */
 
 static void
-oacc_alloc_broadcast_storage (omp_context *ctx, tree clauses)
+oacc_alloc_broadcast_storage (omp_context *ctx)
 {
-  tree vull_type_node = build_qualified_type (long_long_unsigned_type_node,
-  TYPE_QUAL_VOLATILE);
-  tree uptr_node = build_pointer_type (vull_type_node);
-
-  tree clause = find_omp_clause (clauses, OMP_CLAUSE_NUM_WORKERS);
-  tree host_count = integer_one_node;
-  if (clause)
-host_count = OMP_CLAUSE_NUM_WORKERS_EXPR (clause);
-
   ctx->worker_sync_elt
 = alloc_var_ganglocal (NULL_TREE, long_long_unsigned_type_node,
   ctx, TYPE_SIZE_UNIT (long_long_unsigned_type_node));
@@ -10459,7 +10450,6 @@ predicate_bb (basic_block bb, struct omp_region 
*parent, int mask)
 return;
 
   basic_block skip_dest_bb = NULL;
-  basic_block *adjust_bb_ptr = NULL;
 
   if (gimple_code (stmt) == GIMPLE_OMP_ENTRY_END)
 return;
@@ -12675,7 +12665,7 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)
   if (is_gimple_omp_oacc (stmt))
 {
   oacc_init_count_vars (ctx, clauses);
-  oacc_alloc_broadcast_storage (ctx, clauses);
+  oacc_alloc_broadcast_storage (ctx);
 }
 
   if (has_reduction)


Grüße,
 Thomas


signature.asc
Description: PGP signature

[PATCH]: Neaten df_scan_start_dump dump

2015-06-02 Thread Uros Bizjak

Hello!

2015-06-02  Uros Bizjak  

* df-scan.c (df_scan_start_dump): Add space between regno and regname.

Tested on x86_64-linux-gnu, committed to mainline SVN.

Uros.

Index: df-scan.c
===
--- df-scan.c   (revision 224011)
+++ df-scan.c   (working copy)
@@ -365,7 +365,7 @@ df_scan_start_dump (FILE *file ATTRIBUTE_UNUSED)
  fprintf (file, ";;  regs ever live \t");
  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if (df_regs_ever_live_p (i))
-  fprintf (file, " %d[%s]", i, reg_names[i]);
+  fprintf (file, " %d [%s]", i, reg_names[i]);
  fprintf (file, "\n;;  ref usage \t");

  for (i = 0; i < (int)df->regs_inited; i++)

Re: [PATCH, AARCH64] make stdarg functions work with +nofp

2015-06-02 Thread Kyrill Tkachov


Hi James, Jim,

On 02/06/15 10:42, James Greenhalgh wrote:

On Sat, May 23, 2015 at 12:24:00AM +0100, Jim Wilson wrote:

The compiler currently ICEs when compiling a stdarg function with
+nofp, as reported in PR 66258.

The aarch64.md file disables FP instructions using TARGET_FLOAT, which
supports both -mgeneral-regs-only and +nofp.  But there is code in
aarch64.c that checks TARGET_GENERAL_REGS_ONLY.  This results in FP
instructions when using +nofp,  The aarch64.c code needs to use
TARGET_FLOAT instead like the md file already does.

I can't meaningfully test this with a bootstrap, since the patch has
no effect unless I bootstrap with +nofp, and that will fail as gcc
contains floating point code.

The testsuite already has multiple stdarg tests, so there is no need
for another one.

I tested this by verifying I get the same results for some simple
testcasess with and without the patch, with and without using
-mgeneral-regs-only and -mcpu=cortex-a53+nofp.

This patch doesn't quite look right to me. The cases you change seem
like they should be (TARGET_FLOAT || TARGET_SIMD), rather than just
TARGET_FLOAT. In an armv8-a+nofp environment, you still have access to the
SIMD registers and instructions (reading between the lines on the bug
report, this is almost certainly not what is intended in Grub!).


I don't think that's quite right. TARGET_SIMD *always* implies TARGET_FP as it 
is a superset of that functionality.

For the precise relations of them look in aarch64-option-extensions.def.
Turning off fp with +nofp (or -mgeneral-regs-only) always turns off simd while 
turning off simd
with +nosimd doesn't turn off fp.

Cheers,

Kyrill



Digging a bit deeper in to the ICE in PR66258, it seems to me that
the problematic pattern is "*movti_aarch64":

   (define_insn "*movti_aarch64"
 [(set (match_operand:TI 0
   "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
  (match_operand:TI 1
   "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
 "(register_operand (operands[0], TImode)
   || aarch64_reg_or_zero (operands[1], TImode))"
 "@
  #
  #
  #
  orr\\t%0.16b, %1.16b, %1.16b
  ldp\\t%0, %H0, %1
  stp\\t%1, %H1, %0
  stp\\txzr, xzr, %0
  ldr\\t%q0, %1
  str\\t%q1, %0"
 [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
 load2,store2,store2,f_loadd,f_stored")
  (set_attr "length" "8,8,8,4,4,4,4,4,4")
  (set_attr "simd" "*,*,*,yes,*,*,*,*,*")
  (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
   )

Note that the split alternatives are going to unconditionally create
and emit insns which require TARGET_FLOAT, but the fp attribute is
not set on those alternatives. Many of the TI mode split patterns
could be expressed as a umov from vector registers to general purpose
registers for a TARGET_SIMD target.

Have you investigated this approach at all?

Thanks,
James

Re: [PATCH, AARCH64] make stdarg functions work with +nofp

2015-06-02 Thread James Greenhalgh

On Tue, Jun 02, 2015 at 11:38:29AM +0100, Kyrill Tkachov wrote:
> Hi James, Jim,
> 
> On 02/06/15 10:42, James Greenhalgh wrote:
> > On Sat, May 23, 2015 at 12:24:00AM +0100, Jim Wilson wrote:
> >> The compiler currently ICEs when compiling a stdarg function with
> >> +nofp, as reported in PR 66258.
> >>
> >> The aarch64.md file disables FP instructions using TARGET_FLOAT, which
> >> supports both -mgeneral-regs-only and +nofp.  But there is code in
> >> aarch64.c that checks TARGET_GENERAL_REGS_ONLY.  This results in FP
> >> instructions when using +nofp,  The aarch64.c code needs to use
> >> TARGET_FLOAT instead like the md file already does.
> >>
> >> I can't meaningfully test this with a bootstrap, since the patch has
> >> no effect unless I bootstrap with +nofp, and that will fail as gcc
> >> contains floating point code.
> >>
> >> The testsuite already has multiple stdarg tests, so there is no need
> >> for another one.
> >>
> >> I tested this by verifying I get the same results for some simple
> >> testcasess with and without the patch, with and without using
> >> -mgeneral-regs-only and -mcpu=cortex-a53+nofp.
> > This patch doesn't quite look right to me. The cases you change seem
> > like they should be (TARGET_FLOAT || TARGET_SIMD), rather than just
> > TARGET_FLOAT. In an armv8-a+nofp environment, you still have access to the
> > SIMD registers and instructions (reading between the lines on the bug
> > report, this is almost certainly not what is intended in Grub!).
> 
> I don't think that's quite right. TARGET_SIMD *always* implies TARGET_FP as
> it is a superset of that functionality.
> 
> For the precise relations of them look in aarch64-option-extensions.def.
> Turning off fp with +nofp (or -mgeneral-regs-only) always turns off simd
> while turning off simd with +nosimd doesn't turn off fp.

Right, understood. I had incorrectly thought we had kept them as fully
distinct options to disable parts of the ARMv8-A instruction set.

In which case, Jim, your patch is OK. Sorry for my initial confusion.

I think I saw a patch kicking around internally to improve the
documentation in this area, Alan - was that yours?

Thanks,
James

Re: C++ PATCH for c++/44282 (ia32 calling convention attributes and mangling)

2015-06-02 Thread Uros Bizjak

On Tue, Jun 2, 2015 at 9:26 AM, Uros Bizjak  wrote:
> Hello!
>
>>PR c++/44282
>>gcc/cp/
>>* mangle.c (attr_strcmp): New.
>>(write_CV_qualifiers_for_type): Also write out attributes that
>>affect type identity.
>>(write_type): Strip all attributes after writing qualifiers.
>>libiberty/
>>* cp-demangle.c (cplus_demangle_type): Handle arguments to vendor
>>extended qualifier.
>
> +++ b/gcc/testsuite/g++.dg/abi/mangle-regparm.C
> @@ -0,0 +1,29 @@
> +// { dg-do run { target i?86-*-* } }
>
> This should read:
>
> +// { dg-do run { target { { i?86-*-* x86_64-*-* } && ia32 } } }
>
> The test wasn't actually run on x86_64-linux target. I'll commit the
> following patch after regtest:

Unfortunately, something is wrong with the testcase itself:

FAIL: g++.dg/abi/mangle-regparm.C  -std=c++98 (test for excess errors)
WARNING: g++.dg/abi/mangle-regparm.C  -std=c++98 compilation failed to
produce executable
FAIL: g++.dg/abi/mangle-regparm.C  -std=c++11 (test for excess errors)
WARNING: g++.dg/abi/mangle-regparm.C  -std=c++11 compilation failed to
produce executable
FAIL: g++.dg/abi/mangle-regparm.C  -std=c++14 (test for excess errors)
WARNING: g++.dg/abi/mangle-regparm.C  -std=c++14 compilation failed to
produce executable

with the following error:

FAIL: g++.dg/abi/mangle-regparm.C  -std=c++98 (test for excess errors)
Excess errors:
/usr/bin/ld: /tmp/ccU8LttY.o: bad reloc symbol index (0x5b550 >= 0x12)
for offset 0x6c in section `.text'
/tmp/ccU8LttY.o: could not read symbols: Bad value

WARNING: g++.dg/abi/mangle-regparm.C  -std=c++98 compilation failed to
produce executable
g++.dg/abi/mangle-regparm.C  -std=c++98 : output file does not exist
UNRESOLVED: g++.dg/abi/mangle-regparm.C  -std=c++98  scan-assembler
_Z18IndirectExternCallIPU7stdcallU7regparmILi3EEFviiEiEvT_T0_S3_

Uros.

Re: [gomp4] Worker-single predication

2015-06-02 Thread Thomas Schwinge

Hi Bernd!

On Mon, 1 Jun 2015 17:58:51 +0200, Bernd Schmidt  
wrote:
> This extends the previous vector-single support to also handle 
> worker-level predication. [...]

This causes the following regressions; would you please have a look?

[-PASS:-]{+FAIL: g++.dg/goacc/template.C  -std=c++11 (internal compiler 
error)+}
{+FAIL:+} g++.dg/goacc/template.C  -std=c++11 (test for excess errors)
[-PASS:-]{+FAIL: g++.dg/goacc/template.C  -std=c++14 (internal compiler 
error)+}
{+FAIL:+} g++.dg/goacc/template.C  -std=c++14 (test for excess errors)
[-PASS:-]{+FAIL: g++.dg/goacc/template.C  -std=c++98 (internal compiler 
error)+}
{+FAIL:+} g++.dg/goacc/template.C  -std=c++98 (test for excess errors)

spawn [...]/build-gcc/gcc/testsuite/g++3/../../xg++ 
-B[...]/build-gcc/gcc/testsuite/g++3/../../ 
[...]/source-gcc/gcc/testsuite/g++.dg/goacc/template.C 
-fno-diagnostics-show-caret -fdiagnostics-color=never -nostdinc++ 
-I[...]/build-gcc/x86_64-unknown-linux-gnu/libstdc++-v3/include/x86_64-unknown-linux-gnu
 -I[...]/build-gcc/x86_64-unknown-linux-gnu/libstdc++-v3/include 
-I[...]/source-gcc/libstdc++-v3/libsupc++ 
-I[...]/source-gcc/libstdc++-v3/include/backward 
-I[...]/source-gcc/libstdc++-v3/testsuite/util -fmessage-length=0 -std=c++98 
-fopenacc -S -o template.s
[...]/source-gcc/gcc/testsuite/g++.dg/goacc/template.C: In function 'T 
oacc_parallel_copy(T) [with T = int]':
[...]/source-gcc/gcc/testsuite/g++.dg/goacc/template.C:90:10: internal 
compiler error: in as_a, at is-a.h:192
0xbb8a60 as_a
[...]/source-gcc/gcc/is-a.h:192
0xbb8a60 expand_omp_atomic
[...]/source-gcc/gcc/omp-low.c:9349
0xbb8a60 expand_omp
[...]/source-gcc/gcc/omp-low.c:10068
0xbb6d98 expand_omp
[...]/source-gcc/gcc/omp-low.c:10029
0xbbddd7 execute_expand_omp
[...]/source-gcc/gcc/omp-low.c:10659

PASS: gfortran.dg/goacc/parallel-tree.f95   -O   scan-tree-dump-times 
original "
PASS: gfortran.dg/goacc/parallel-tree.f95   -O   scan-tree-dump-times 
original "private\\(v\\)" 1
PASS: gfortran.dg/goacc/parallel-tree.f95   -O   scan-tree-dump-times 
original "reduction\\(max:q\\)" 1
PASS: gfortran.dg/goacc/parallel-tree.f95   -O   scan-tree-dump-times 
original "vector_length" 1
[-PASS:-]{+FAIL: gfortran.dg/goacc/parallel-tree.f95   -O  (internal 
compiler error)+}
{+FAIL:+} gfortran.dg/goacc/parallel-tree.f95   -O  (test for excess errors)

spawn [...]/build-gcc/gcc/testsuite/gfortran4/../../gfortran 
-B[...]/build-gcc/gcc/testsuite/gfortran4/../../ 
-B[...]/build-gcc/x86_64-unknown-linux-gnu/./libgfortran/ 
[...]/source-gcc/gcc/testsuite/gfortran.dg/goacc/parallel-tree.f95 
-fno-diagnostics-show-caret -fdiagnostics-color=never -O -fopenacc 
-fdump-tree-original -S -o parallel-tree.s
[...]/source-gcc/gcc/testsuite/gfortran.dg/goacc/parallel-tree.f95:14:0: 
internal compiler error: in as_a, at is-a.h:192
0xa57220 as_a
[...]/source-gcc/gcc/is-a.h:192
0xa57220 expand_omp_atomic
[...]/source-gcc/gcc/omp-low.c:9349
0xa57220 expand_omp
[...]/source-gcc/gcc/omp-low.c:10068
0xa8 expand_omp
[...]/source-gcc/gcc/omp-low.c:10029
0xa5c597 execute_expand_omp
[...]/source-gcc/gcc/omp-low.c:10659


Grüße,
 Thomas


signature.asc
Description: PGP signature

Re: [PATCH 15/16] gcc: Use libgas and libld within the driver

2015-06-02 Thread Trevor Saunders

On Tue, Jun 02, 2015 at 10:31:53AM +0200, Richard Biener wrote:
> On Mon, Jun 1, 2015 at 11:04 PM, David Malcolm  wrote:
> > This patch adds the ability for gcc to be configured with:
> >   --with-embedded-as
> >   --with-embedded-ld
> > If so, invocations of "as" and "ld" are detected in the gcc driver, and
> > specialcased by invoking these in-process as shared libraries.  This is
> > intended for use by libgccjit, when the driver itself is in-process
> > within libgccjit, eliminating fork/exec and dynamic-library resolution.
> >
> > Doing so dramatically speeds up jit.dg/test-benchmark.c.
> >
> > The patch generalizes the named items support within timevar.c, so that
> > as well as having bucket of named "jit client items" we also have
> > buckets for "as" and for "ld" so that they can account for time spent
> > within them.
> >
> > One remaining hack here, appending CFLAGS-gcc.o with a hardcoded include
> > path, but I didn't want that to hold up posting what I've got so far.
> 
> Hum, so why not go further and embed as into cc1/cc1plus, etc.?  That is,
> make the as invocation parts of the driver accessible to the compiler
> in some way.

It certainly seems like something worth looking into, but I certainly
wouldn't want to hold binutils changes up on that.

> This way we can eventually add a more efficient way of funneling the compiler
> assembler output to libas (well, I suppose you at least use -pipe...).

yeah, and eventually maybe dump a whole bunch of text formating code.  I
wonder how much faster just doing this makes things though.

Trev

> 
> Richard.
> 
> > gcc/ChangeLog:
> > * configure.ac: Add --with-embedded-as and --with-embedded-ld.
> > * gcc.c: Include libgas.h and libld.h.
> > (class ctimershim): New.
> > (ctimershim::impl_push): New.
> > (ctimershim::impl_pop): New.
> > (run_embedded_as): New.
> > (run_embedded_ld): New.
> > (enum known_command): New.
> > (get_known_command): New.
> > (tv_id_for_known_command): New.
> > (maybe_run_embedded_command): New.
> > (execute): Invoke get_known_command and
> > maybe_run_embedded_command, potentially avoiding the need to call
> > into pex.
> > * timevar.c (timer::named_items::print): Add "name" param rather
> > than hardcoding "Client items".
> > (timer::timer): Initialize "m_has_named_items"; replace
> > "m_jit_client_items" with "m_named_items" array.
> > (timer::~timer): Likewise.
> > (timer::push_client_item): Rename to...
> > (timer::push_named_item): ...this and add "dict" param,
> > generalizing to support an array of dicts of named items.
> > (timer::pop_client_item): Rename to...
> > (timer::pop_named_item): ...this, generalizing to support
> > an array of dicts of named items.
> > (timer::print): Print JIT client items first (if any), then
> > GCC timevar items, then embedded as items (if any), then embedded
> > ld items (if any).
> > * timevar.def (TV_DRIVER_EMBEDDED_AS): New.
> > (TV_DRIVER_EMBEDDED_LD): New.
> > * timevar.h (timer::item_dict): New enum.
> > (timer::push_client_item): Rename to...
> > (timer::push_named_item): ...this, adding "dict" param.
> > (timer::pop_client_item): Rename to...
> > (timer::pop_named_item):  ...this, adding "dict" param.
> > (timer::get_item_dict): New.
> > (timer::m_jit_client_items): Drop this field in favor of...
> > (timer::m_named_items): ...this array.
> > (timer::m_has_named_items): New.
> >
> > gcc/jit/ChangeLog:
> > * Make-lang.in (LIBGCCJIT_FILENAME): Add EXTRA_GCC_LIBS to link.
> > * libgccjit.c (gcc_jit_timer_push): Replace call to
> > timer->push_client_item with timer->push_named_item.
> > (gcc_jit_timer_pop): Likewise for pop.
> > * notes.txt: Indicate that as/ld could be embedded.
> > ---
> >  gcc/Makefile.in  |   3 +
> >  gcc/configure.ac |  25 ++
> >  gcc/gcc.c| 214 
> > ---
> >  gcc/jit/Make-lang.in |   2 +-
> >  gcc/jit/libgccjit.c  |   5 +-
> >  gcc/jit/notes.txt|   4 +-
> >  gcc/timevar.c|  56 ++
> >  gcc/timevar.def  |   2 +
> >  gcc/timevar.h|  33 +++-
> >  9 files changed, 308 insertions(+), 36 deletions(-)
> >
> > diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> > index 2388975..9061933 100644
> > --- a/gcc/Makefile.in
> > +++ b/gcc/Makefile.in
> > @@ -1993,6 +1993,9 @@ DRIVER_DEFINES = \
> >
> >  CFLAGS-gcc.o += $(DRIVER_DEFINES)
> >
> > +# FIXME
> > +CFLAGS-gcc.o += 
> > -I/home/david/coding/gcc-python/binutils-gdb-libraries/install/include
> > +
> >  specs.h : s-specs ; @true
> >  s-specs : Makefile
> > lsf="$(lang_specs_files)"; for f in $$lsf; do \
> > diff --git a/gcc/configure.ac b/gcc/configure.ac
>

[patch] Document resolved DRs in libstdc++ manual

2015-06-02 Thread Jonathan Wakely


This replaces our local copies of the LWG issues lists with the latest
revisions (from http://www.open-std.org/jtc1/sc22/wg21/docs/lwg93.zip)
and updates
https://gcc.gnu.org/onlinedocs/libstdc++/manual/bugs.html#manual.intro.status.bugs.iso
to document some of the DRs we've implemented in the last few years.

Committed to trunk.

commit 2861a8c80a4b06c95296328d5d6ae10147565bb1
Author: Jonathan Wakely 
Date:   Fri May 29 16:09:05 2015 +0100

	* doc/html/ext/lwg-active.html: Update to R93.
	* doc/html/ext/lwg-closed.html: Likewise.
	* doc/html/ext/lwg-defects.html: Likewise.
	* doc/html/manual/*: Regenerate.
	* doc/xml/manual/intro.xml: Document status of several DRs.

diff --git a/libstdc++-v3/doc/xml/manual/intro.xml b/libstdc++-v3/doc/xml/manual/intro.xml
index f0c66a2..e6039eb 100644
--- a/libstdc++-v3/doc/xml/manual/intro.xml
+++ b/libstdc++-v3/doc/xml/manual/intro.xml
@@ -644,6 +644,12 @@ requirements of the license of GCC.
 Implement the resolution, basically cast less.
 
 
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#445">445:
+	iterator_traits::reference unspecified for some iterator categories
+
+Change istreambuf_iterator::reference in C++11 mode.
+
+
 http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#453">453:
 	basic_stringbuf::seekoff need not always fail for an empty stream
 
@@ -663,6 +669,12 @@ requirements of the license of GCC.
 	at(const key_type&) to std::map.
 
 
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#467">467:
+	char_traits::lt(), compare(), and memcmp()
+
+Change lt.
+
+
 http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#508">508:
 	Bad parameters for ranlux64_base_01
 
@@ -820,6 +832,182 @@ requirements of the license of GCC.
 Correctly decay types in signature of std::async.
 
 
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2049">2049:
+	is_destructible underspecified
+
+Handle non-object types.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2056">2056:
+	future_errc enums start with value 0 (invalid value for broken_promise)
+
+Reorder enumerators.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2059">2059:
+	C++0x ambiguity problem with map::erase
+
+Add additional overloads.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2067">2067:
+	packaged_task should have deleted copy c'tor with const parameter
+
+Fix signatures.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2101">2101:
+	Some transformation types can produce impossible types
+
+Use the referenceable type concept.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2106">2106:
+	move_iterator wrapping iterators returning prvalues
+
+Change the reference type.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2118">2118:
+	unique_ptr for array does not support cv qualification conversion of actual argument
+
+Adjust constraints to allow safe conversions.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2132">2132:
+	std::function ambiguity
+
+Constrain the constructor to only accept callable types.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2141">2141:
+	common_type trait produces reference types
+
+Use decay for the result type.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2144">2144:
+	Missing noexcept specification in type_index
+
+Add noexcept
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2145">2145:
+	error_category default constructor
+
+Declare a public constexpr constructor.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2162">2162:
+	allocator_traits::max_size missing noexcept
+
+Add noexcept.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2187">2187:
+	vector is missing emplace and emplace_back member functions
+
+Add emplace and emplace_back member functions.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2196">2196:
+	Specification of is_*[copy/move]_[constructible/assignable] unclear for non-referencable types
+
+Use the referenceable type concept.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2313">2313:
+	tuple_size should always derive from integral_constant
+
+Update definitions of the partial specializations for const and volatile types.
+
+
+http://www.w3.org/1999/xlink"; xlink:href="../ext/lwg-defects.html#2329">2329:
+   regex_match()/regex_search() with match_results should forbid temporary strings
+
+

[PATCH 1/2] Memory statistics enhancement.

2015-06-02 Thread mliska

Hi.

Following 2 patches improve memory statistics infrastructure. First one
ports pool allocator to the new infrastructure. And the second one makes
column alignment properly.

Both can bootstrap on x86_64-linux-pc and survive regression tests.

Ready for trunk?
Thank you,
Martin

Port pool-allocator memory stats to a new infrastructure.

gcc/ChangeLog:

2015-06-02  Martin Liska  

* alloc-pool.c (allocate_pool_descriptor): Remove.
(struct pool_output_info): Likewise.
(print_alloc_pool_statistics): Likewise.
(dump_alloc_pool_statistics): Likewise.
* alloc-pool.h (struct pool_usage): New struct.
(pool_allocator::initialize): Change usage of memory statistics
to a new interface.
(pool_allocator::release): Likewise.
(pool_allocator::allocate): Likewise.
(pool_allocator::remove): Likewise.
* mem-stats-traits.h (enum mem_alloc_origin): Add new enum value
for a pool allocator.
* mem-stats.h (struct mem_location): Add new ctor.
(struct mem_usage): Add counter for number of
instances.
(mem_alloc_description::register_descriptor): New overload of
the function.
---
 gcc/alloc-pool.c   |  60 +
 gcc/alloc-pool.h   | 102 +++--
 gcc/mem-stats-traits.h |   3 +-
 gcc/mem-stats.h|  69 ++---
 4 files changed, 132 insertions(+), 102 deletions(-)

diff --git a/gcc/alloc-pool.c b/gcc/alloc-pool.c
index e9fdc86..601c2b7 100644
--- a/gcc/alloc-pool.c
+++ b/gcc/alloc-pool.c
@@ -26,70 +26,14 @@ along with GCC; see the file COPYING3.  If not see
 #include "hash-map.h"
 
 ALLOC_POOL_ID_TYPE last_id;
-
-/* Hashtable mapping alloc_pool names to descriptors.  */
-hash_map *alloc_pool_hash;
-
-struct alloc_pool_descriptor *
-allocate_pool_descriptor (const char *name)
-{
-  if (!alloc_pool_hash)
-alloc_pool_hash = new hash_map (10,
-false,
-false);
-
-  return &alloc_pool_hash->get_or_insert (name);
-}
-
-/* Output per-alloc_pool statistics.  */
-
-/* Used to accumulate statistics about alloc_pool sizes.  */
-struct pool_output_info
-{
-  unsigned long total_created;
-  unsigned long total_allocated;
-};
-
-/* Called via hash_map.traverse.  Output alloc_pool descriptor pointed out by
-   SLOT and update statistics.  */
-bool
-print_alloc_pool_statistics (const char *const &name,
-const alloc_pool_descriptor &d,
-struct pool_output_info *i)
-{
-  if (d.allocated)
-{
-  fprintf (stderr,
-  "%-22s %6d %10lu %10lu(%10lu) %10lu(%10lu) %10lu(%10lu)\n",
-  name, d.elt_size, d.created, d.allocated,
-  d.allocated / d.elt_size, d.peak, d.peak / d.elt_size,
-  d.current, d.current / d.elt_size);
-  i->total_allocated += d.allocated;
-  i->total_created += d.created;
-}
-  return 1;
-}
+mem_alloc_description pool_allocator_usage;
 
 /* Output per-alloc_pool memory usage statistics.  */
 void
 dump_alloc_pool_statistics (void)
 {
-  struct pool_output_info info;
-
   if (! GATHER_STATISTICS)
 return;
 
-  if (!alloc_pool_hash)
-return;
-
-  fprintf (stderr, "\nAlloc-pool Kind Elt size  Pools  Allocated 
(elts)Peak (elts)Leak (elts)\n");
-  fprintf (stderr, 
"--\n");
-  info.total_created = 0;
-  info.total_allocated = 0;
-  alloc_pool_hash->traverse  (&info);
-  fprintf (stderr, 
"--\n");
-  fprintf (stderr, "%-22s   %7lu %10lu\n",
-  "Total", info.total_created, info.total_allocated);
-  fprintf (stderr, 
"--\n");
+  pool_allocator_usage.dump (ALLOC_POOL);
 }
diff --git a/gcc/alloc-pool.h b/gcc/alloc-pool.h
index 96a1342..a1727ce 100644
--- a/gcc/alloc-pool.h
+++ b/gcc/alloc-pool.h
@@ -26,6 +26,71 @@ extern void dump_alloc_pool_statistics (void);
 
 typedef unsigned long ALLOC_POOL_ID_TYPE;
 
+/* Pool allocator memory usage.  */
+struct pool_usage: public mem_usage
+{
+  /* Default contructor.  */
+  pool_usage (): m_element_size (0), m_pool_name ("") {}
+  /* Constructor.  */
+  pool_usage (size_t allocated, size_t times, size_t peak,
+ size_t instances, size_t element_size,
+ const char *pool_name)
+: mem_usage (allocated, times, peak, instances),
+  m_element_size (element_size),
+  m_pool_name (pool_name) {}
+
+  /* Sum the usage with SECOND usage.  */
+  pool_usage operator+ (const pool_usage &second)
+  {
+return p

[PATCH 2/2] Fix memory report layout at various places.

2015-06-02 Thread mliska

gcc/ChangeLog:

2015-06-02  Martin Liska  

* alloc-pool.h (struct pool_usage): Correct space padding.
* ggc-page.c (ggc_print_statistics): Align columns in a report.
* mem-stats.h (struct mem_usage): Add argument to print_dash_line.
* tree.c (dump_tree_statistics): Align columns in a report.
---
 gcc/alloc-pool.h |  2 +-
 gcc/ggc-page.c   | 46 +-
 gcc/mem-stats.h  |  4 ++--
 gcc/tree.c   | 19 ---
 4 files changed, 40 insertions(+), 31 deletions(-)

diff --git a/gcc/alloc-pool.h b/gcc/alloc-pool.h
index a1727ce..6f1bbed 100644
--- a/gcc/alloc-pool.h
+++ b/gcc/alloc-pool.h
@@ -78,7 +78,7 @@ struct pool_usage: public mem_usage
   inline void dump_footer ()
   {
 print_dash_line ();
-fprintf (stderr, "%s%75li%10li\n", "Total", (long)m_instances,
+fprintf (stderr, "%s%82li%10li\n", "Total", (long)m_instances,
 (long)m_allocated);
 print_dash_line ();
   }
diff --git a/gcc/ggc-page.c b/gcc/ggc-page.c
index 158156a..7fceeba 100644
--- a/gcc/ggc-page.c
+++ b/gcc/ggc-page.c
@@ -2268,7 +2268,7 @@ ggc_print_statistics (void)
  allocation.  */
   fprintf (stderr,
"Memory still allocated at the end of the compilation process\n");
-  fprintf (stderr, "%-5s %10s  %10s  %10s\n",
+  fprintf (stderr, "%-8s %10s  %10s  %10s\n",
   "Size", "Allocated", "Used", "Overhead");
   for (i = 0; i < NUM_ORDERS; ++i)
 {
@@ -2295,47 +2295,51 @@ ggc_print_statistics (void)
  overhead += (sizeof (page_entry) - sizeof (long)
   + BITMAP_SIZE (OBJECTS_IN_PAGE (p) + 1));
}
-  fprintf (stderr, "%-5lu %10lu%c %10lu%c %10lu%c\n",
+  fprintf (stderr, "%-8lu %10lu%c %10lu%c %10lu%c\n",
   (unsigned long) OBJECT_SIZE (i),
   SCALE (allocated), STAT_LABEL (allocated),
   SCALE (in_use), STAT_LABEL (in_use),
   SCALE (overhead), STAT_LABEL (overhead));
   total_overhead += overhead;
 }
-  fprintf (stderr, "%-5s %10lu%c %10lu%c %10lu%c\n", "Total",
+  fprintf (stderr, "%-8s %10lu%c %10lu%c %10lu%c\n", "Total",
   SCALE (G.bytes_mapped), STAT_LABEL (G.bytes_mapped),
   SCALE (G.allocated), STAT_LABEL (G.allocated),
   SCALE (total_overhead), STAT_LABEL (total_overhead));
 
   if (GATHER_STATISTICS)
 {
-  fprintf (stderr, "\nTotal allocations and overheads during the 
compilation process\n");
+  fprintf (stderr, "\nTotal allocations and overheads during "
+  "the compilation process\n");
 
-  fprintf (stderr, "Total Overhead:%10" 
HOST_LONG_LONG_FORMAT "d\n",
-  G.stats.total_overhead);
-  fprintf (stderr, "Total Allocated:   %10" 
HOST_LONG_LONG_FORMAT "d\n",
+  fprintf (stderr, "Total Overhead:  %10"
+  HOST_LONG_LONG_FORMAT "d\n", G.stats.total_overhead);
+  fprintf (stderr, "Total Allocated: %10"
+  HOST_LONG_LONG_FORMAT "d\n",
   G.stats.total_allocated);
 
-  fprintf (stderr, "Total Overhead  under  32B:%10" 
HOST_LONG_LONG_FORMAT "d\n",
-  G.stats.total_overhead_under32);
-  fprintf (stderr, "Total Allocated under  32B:%10" 
HOST_LONG_LONG_FORMAT "d\n",
-  G.stats.total_allocated_under32);
-  fprintf (stderr, "Total Overhead  under  64B:%10" 
HOST_LONG_LONG_FORMAT "d\n",
-  G.stats.total_overhead_under64);
-  fprintf (stderr, "Total Allocated under  64B:%10" 
HOST_LONG_LONG_FORMAT "d\n",
-  G.stats.total_allocated_under64);
-  fprintf (stderr, "Total Overhead  under 128B:%10" 
HOST_LONG_LONG_FORMAT "d\n",
-  G.stats.total_overhead_under128);
-  fprintf (stderr, "Total Allocated under 128B:%10" 
HOST_LONG_LONG_FORMAT "d\n",
-  G.stats.total_allocated_under128);
+  fprintf (stderr, "Total Overhead  under  32B:  %10"
+  HOST_LONG_LONG_FORMAT "d\n", G.stats.total_overhead_under32);
+  fprintf (stderr, "Total Allocated under  32B:  %10"
+  HOST_LONG_LONG_FORMAT "d\n", G.stats.total_allocated_under32);
+  fprintf (stderr, "Total Overhead  under  64B:  %10"
+  HOST_LONG_LONG_FORMAT "d\n", G.stats.total_overhead_under64);
+  fprintf (stderr, "Total Allocated under  64B:  %10"
+  HOST_LONG_LONG_FORMAT "d\n", G.stats.total_allocated_under64);
+  fprintf (stderr, "Total Overhead  under 128B:  %10"
+  HOST_LONG_LONG_FORMAT "d\n", G.stats.total_overhead_under128);
+  fprintf (stderr, "Total Allocated under 128B:  %10"
+  HOST_LONG_LONG_FORMAT "d\n", G.stats.total_allocated_under128);
 
   for (i = 0; i < NUM_ORDERS; i++)
if (G.stats.total_allocated_per_order[i])

[patch] Update ABI versioning info in libstdc++ manual

2015-06-02 Thread Jonathan Wakely


Committed to trunk, will also commit to the gcc-5-branch too.
commit 0d264889b71aa29214f5414b707f8ba5735abe20
Author: Jonathan Wakely 
Date:   Tue Jun 2 12:17:09 2015 +0100

	* doc/xml/manual/abi.xml: Document versioning for 5.1.0 release.
	* doc/html/manual/*: Regenerate.

diff --git a/libstdc++-v3/doc/xml/manual/abi.xml b/libstdc++-v3/doc/xml/manual/abi.xml
index 86c591d..a2ed57b 100644
--- a/libstdc++-v3/doc/xml/manual/abi.xml
+++ b/libstdc++-v3/doc/xml/manual/abi.xml
@@ -264,6 +264,7 @@ compatible.
 GCC 4.8.0: libstdc++.so.6.0.18
 GCC 4.8.3: libstdc++.so.6.0.19
 GCC 4.9.0: libstdc++.so.6.0.20
+GCC 5.1.0: libstdc++.so.6.0.21
 
 
   Note 1: Error should be libstdc++.so.3.0.3.
@@ -329,6 +330,7 @@ compatible.
 GCC 4.8.0: GLIBCXX_3.4.18, CXXABI_1.3.7
 GCC 4.8.3: GLIBCXX_3.4.19, CXXABI_1.3.7
 GCC 4.9.0: GLIBCXX_3.4.20, CXXABI_1.3.8
+GCC 5.1.0: GLIBCXX_3.4.21, CXXABI_1.3.9
 
 
 
@@ -539,6 +541,7 @@ compatible.
 GCC 3.3.3: include/c++/3.3.3
 GCC 3.4.x: include/c++/3.4.x
 GCC 4.x.y: include/c++/4.x.y
+GCC 5.x.0: include/c++/5.x.0

Re: C++ PATCH for c++/44282 (ia32 calling convention attributes and mangling)

2015-06-02 Thread Uros Bizjak

On Tue, Jun 2, 2015 at 1:03 PM, Uros Bizjak  wrote:
> On Tue, Jun 2, 2015 at 9:26 AM, Uros Bizjak  wrote:
>> Hello!
>>
>>>PR c++/44282
>>>gcc/cp/
>>>* mangle.c (attr_strcmp): New.
>>>(write_CV_qualifiers_for_type): Also write out attributes that
>>>affect type identity.
>>>(write_type): Strip all attributes after writing qualifiers.
>>>libiberty/
>>>* cp-demangle.c (cplus_demangle_type): Handle arguments to vendor
>>>extended qualifier.
>>
>> +++ b/gcc/testsuite/g++.dg/abi/mangle-regparm.C
>> @@ -0,0 +1,29 @@
>> +// { dg-do run { target i?86-*-* } }
>>
>> This should read:
>>
>> +// { dg-do run { target { { i?86-*-* x86_64-*-* } && ia32 } } }
>>
>> The test wasn't actually run on x86_64-linux target. I'll commit the
>> following patch after regtest:
>
> Unfortunately, something is wrong with the testcase itself:
>
> FAIL: g++.dg/abi/mangle-regparm.C  -std=c++98 (test for excess errors)
> WARNING: g++.dg/abi/mangle-regparm.C  -std=c++98 compilation failed to
> produce executable
> FAIL: g++.dg/abi/mangle-regparm.C  -std=c++11 (test for excess errors)
> WARNING: g++.dg/abi/mangle-regparm.C  -std=c++11 compilation failed to
> produce executable
> FAIL: g++.dg/abi/mangle-regparm.C  -std=c++14 (test for excess errors)
> WARNING: g++.dg/abi/mangle-regparm.C  -std=c++14 compilation failed to
> produce executable
>
> with the following error:
>
> FAIL: g++.dg/abi/mangle-regparm.C  -std=c++98 (test for excess errors)
> Excess errors:
> /usr/bin/ld: /tmp/ccU8LttY.o: bad reloc symbol index (0x5b550 >= 0x12)
> for offset 0x6c in section `.text'
> /tmp/ccU8LttY.o: could not read symbols: Bad value
>
> WARNING: g++.dg/abi/mangle-regparm.C  -std=c++98 compilation failed to
> produce executable
> g++.dg/abi/mangle-regparm.C  -std=c++98 : output file does not exist
> UNRESOLVED: g++.dg/abi/mangle-regparm.C  -std=c++98  scan-assembler
> _Z18IndirectExternCallIPU7stdcallU7regparmILi3EEFviiEiEvT_T0_S3_

FYI, everything links and runs OK if .set is removed from the
following part of the asm:

.LFE4:
   .size   _Z18IndirectExternCallIPU7stdcallU7regparmILi3EEFviiEiEvT_T0_S3_,
.-_Z18IndirectExternCallIPU7stdcallU7regparmILi3EEFviiEiEvT_T0_S3_
   .weak   _Z18IndirectExternCallIPFviiEiEvT_T0_S3_
   .set
_Z18IndirectExternCallIPFviiEiEvT_T0_S3_,_Z18IndirectExternCallIPU7stdcallU7regparmILi3EEFviiEiEvT_T0_S3_
   .section
.text._Z18IndirectExternCallIPFviiEiEvT_T0_S3_,"axG",@progbits,_Z18IndirectExternCallIPFviiEiEvT_T0_S3_,comdat
   .weak   _Z18IndirectExternCallIPFviiEiEvT_T0_S3_
   .type   _Z18IndirectExternCallIPFviiEiEvT_T0_S3_, @function
_Z18IndirectExternCallIPFviiEiEvT_T0_S3_:

Uros.

[PR libgomp/65742, PR middle-end/66332] XFAIL acc_on_device compile-time evaluation (was: acc_on_device for device_type_host_nonshm)

2015-06-02 Thread Thomas Schwinge

Hi!

On Thu, 7 May 2015 19:32:26 +0100, Julian Brown  wrote:
> On Fri, 17 Apr 2015 15:16:19 +0200
> Jakub Jelinek  wrote:
> 
> > On Tue, Apr 14, 2015 at 05:43:26PM +0200, Thomas Schwinge wrote:
> > > Really, acc_on_device is implemented as a compiler builtin (which
> > > is just disabled for a few libgomp test cases, in order to test the
> > > acc_on_device library function in libgomp), and I never understood
> > > why the "fallback" implementation in libgomp (cited above) should
> > > be doing anything different from the GCC builtin.  Is the "problem"
> > > actually, that some
> > 
> > The question is if the builtin expansion isn't wrong, at least as
> > long as the host_nonshm device is meant to be supported.  The
> > #ifdef ACCEL_COMPILER
> > case is easier, at least as long as ACCEL_COMPILER compiled code is
> > not meant to be able to offload to other devices (or host again), but
> > the non-ACCEL_COMPILER case means the code is either on the host, or
> > host_nonshm, or e.g. with Intel MIC you could have some shared
> > library be compiled by the host compiler, but then actuall linked
> > into the MIC offloaded path.  In all those cases, I think it is just
> > the library that can determine the return value.
> > 
> > E.g. OpenMP omp_is_initial_device function is also only implemented
> > in the library, perhaps at some point I could expand it for #ifdef
> > ACCEL_COMPILER as builtin, but not for the host code, at least not
> > due to the host-nonshm plugin.
> 
> Here's a new version of the patch that doesn't use the open-coded
> expansion for acc_on_device for the host compiler at all. This means
> that the host and the host_nonshm plugin should DTRT without any
> special compiler options (which have thus been removed from the libgomp
> tests that set them or refer to them).
> 
> So now, for the host, acc_on_device returns:
> 
> acc_on_device (acc_device_none): true
> acc_on_device (acc_device_host): true
> otherwise: false
> 
> When the host_nonshm plugin is active, acc_on_device returns:
> 
> acc_on_device (acc_device_host_nonshm): true (except when "host
> fallback" is in effect, i.e. because of a false "if" clause).
> acc_on_device (acc_device_not_host): likewise.
> otherwise: false
> 
> In particular, the host_nonshm plugin doesn't consider itself to be
> running code "on the host".

> PR libgomp/65742
> 
> gcc/
> * builtins.c (expand_builtin_acc_on_device): Don't use open-coded
> sequence for !ACCEL_COMPILER.

As reported in , this caused the following
regression (C testing):

PASS: c-c++-common/goacc/acc_on_device-2.c (test for excess errors)
[-PASS:-]{+FAIL:+} c-c++-common/goacc/acc_on_device-2.c scan-rtl-dump-times 
expand "\\(call [^\\n]* acc_on_device" 0

Committed to trunk in r224028:

commit 1c2d9da9cee04516151b3894edb107e3cdf2c8b9
Author: tschwinge 
Date:   Tue Jun 2 11:48:56 2015 +

[PR libgomp/65742, PR middle-end/66332] XFAIL acc_on_device compile-time 
evaluation

The OpenACC 2.0a specification mandates differently, but we currently do 
get a
library call in the host code.

PR libgomp/65742
PR middle-end/66332

gcc/testsuite/
* c-c++-common/goacc/acc_on_device-2.c: XFAIL for C, too.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@224028 
138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/testsuite/ChangeLog|  6 ++
 gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c | 10 +-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git gcc/testsuite/ChangeLog gcc/testsuite/ChangeLog
index d91cf7c..3f51b10 100644
--- gcc/testsuite/ChangeLog
+++ gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2015-06-02  Thomas Schwinge  
+
+   PR libgomp/65742
+   PR middle-end/66332
+   * c-c++-common/goacc/acc_on_device-2.c: XFAIL for C, too.
+
 2015-06-02  Uros Bizjak  
 
* g++.dg/abi/mangle-regparm.C (dg-do): Fix x86_32 target selector.
diff --git gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c 
gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
index 8db0a66..6e3d292 100644
--- gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
+++ gcc/testsuite/c-c++-common/goacc/acc_on_device-2.c
@@ -20,9 +20,17 @@ f (void)
 }
 
 /* With -fopenacc, we're expecting the builtin to be expanded, so no calls.
+
TODO: in C++, even under extern "C", the use of enum for acc_device_t
perturbs expansion as a builtin, which expects an int parameter.  It's fine
when changing acc_device_t to plain int, but that's not what we're doing in
.
-   { dg-final { scan-rtl-dump-times "\\\(call \[^\\n\]* acc_on_device" 0 
"expand" { xfail c++ } } } */
+
+   TODO: given that we can't expand acc_on_device in
+   gcc/builtins.c:expand_builtin_acc_on_device for in the !ACCEL_COMPILER case
+   (because at that point we don't know whether we're acc_device_host or
+   acc_device_host_nonshm), we'll (erroneously) get a library call in the host
+   code.
+
+

[PATCH] Remove stray cleanup-tree-dump

2015-06-02 Thread Richard Biener


Committed.  Seems to cause half of the vectorizer tests to be dropped
and test-summary breaking for me.

Richard.

2015-06-02  Richard Biener  

* gcc.dg/vect/vect-outer-simd-1.c: Remove stray cleanup-tree-dump.

Index: gcc/testsuite/gcc.dg/vect/vect-outer-simd-1.c
===
--- gcc/testsuite/gcc.dg/vect/vect-outer-simd-1.c   (revision 224013)
+++ gcc/testsuite/gcc.dg/vect/vect-outer-simd-1.c   (working copy)
@@ -72,4 +72,3 @@ int main()
   return 0;
 } 
 /* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
-/* { dg-final { cleanup-tree-dump "vect" } } */

Re: [PATCH 15/16] gcc: Use libgas and libld within the driver

2015-06-02 Thread Richard Biener

On Tue, Jun 2, 2015 at 1:06 PM, Trevor Saunders  wrote:
> On Tue, Jun 02, 2015 at 10:31:53AM +0200, Richard Biener wrote:
>> On Mon, Jun 1, 2015 at 11:04 PM, David Malcolm  wrote:
>> > This patch adds the ability for gcc to be configured with:
>> >   --with-embedded-as
>> >   --with-embedded-ld
>> > If so, invocations of "as" and "ld" are detected in the gcc driver, and
>> > specialcased by invoking these in-process as shared libraries.  This is
>> > intended for use by libgccjit, when the driver itself is in-process
>> > within libgccjit, eliminating fork/exec and dynamic-library resolution.
>> >
>> > Doing so dramatically speeds up jit.dg/test-benchmark.c.
>> >
>> > The patch generalizes the named items support within timevar.c, so that
>> > as well as having bucket of named "jit client items" we also have
>> > buckets for "as" and for "ld" so that they can account for time spent
>> > within them.
>> >
>> > One remaining hack here, appending CFLAGS-gcc.o with a hardcoded include
>> > path, but I didn't want that to hold up posting what I've got so far.
>>
>> Hum, so why not go further and embed as into cc1/cc1plus, etc.?  That is,
>> make the as invocation parts of the driver accessible to the compiler
>> in some way.
>
> It certainly seems like something worth looking into, but I certainly
> wouldn't want to hold binutils changes up on that.
>
>> This way we can eventually add a more efficient way of funneling the compiler
>> assembler output to libas (well, I suppose you at least use -pipe...).
>
> yeah, and eventually maybe dump a whole bunch of text formating code.  I
> wonder how much faster just doing this makes things though.

No idea.  Eventually just hooking up to libopcodes and feeding simple stmts
as binary blobs could work, keeping the textual interchange for the rest...

Richard.

> Trev
>
>>
>> Richard.
>>
>> > gcc/ChangeLog:
>> > * configure.ac: Add --with-embedded-as and --with-embedded-ld.
>> > * gcc.c: Include libgas.h and libld.h.
>> > (class ctimershim): New.
>> > (ctimershim::impl_push): New.
>> > (ctimershim::impl_pop): New.
>> > (run_embedded_as): New.
>> > (run_embedded_ld): New.
>> > (enum known_command): New.
>> > (get_known_command): New.
>> > (tv_id_for_known_command): New.
>> > (maybe_run_embedded_command): New.
>> > (execute): Invoke get_known_command and
>> > maybe_run_embedded_command, potentially avoiding the need to call
>> > into pex.
>> > * timevar.c (timer::named_items::print): Add "name" param rather
>> > than hardcoding "Client items".
>> > (timer::timer): Initialize "m_has_named_items"; replace
>> > "m_jit_client_items" with "m_named_items" array.
>> > (timer::~timer): Likewise.
>> > (timer::push_client_item): Rename to...
>> > (timer::push_named_item): ...this and add "dict" param,
>> > generalizing to support an array of dicts of named items.
>> > (timer::pop_client_item): Rename to...
>> > (timer::pop_named_item): ...this, generalizing to support
>> > an array of dicts of named items.
>> > (timer::print): Print JIT client items first (if any), then
>> > GCC timevar items, then embedded as items (if any), then embedded
>> > ld items (if any).
>> > * timevar.def (TV_DRIVER_EMBEDDED_AS): New.
>> > (TV_DRIVER_EMBEDDED_LD): New.
>> > * timevar.h (timer::item_dict): New enum.
>> > (timer::push_client_item): Rename to...
>> > (timer::push_named_item): ...this, adding "dict" param.
>> > (timer::pop_client_item): Rename to...
>> > (timer::pop_named_item):  ...this, adding "dict" param.
>> > (timer::get_item_dict): New.
>> > (timer::m_jit_client_items): Drop this field in favor of...
>> > (timer::m_named_items): ...this array.
>> > (timer::m_has_named_items): New.
>> >
>> > gcc/jit/ChangeLog:
>> > * Make-lang.in (LIBGCCJIT_FILENAME): Add EXTRA_GCC_LIBS to link.
>> > * libgccjit.c (gcc_jit_timer_push): Replace call to
>> > timer->push_client_item with timer->push_named_item.
>> > (gcc_jit_timer_pop): Likewise for pop.
>> > * notes.txt: Indicate that as/ld could be embedded.
>> > ---
>> >  gcc/Makefile.in  |   3 +
>> >  gcc/configure.ac |  25 ++
>> >  gcc/gcc.c| 214 
>> > ---
>> >  gcc/jit/Make-lang.in |   2 +-
>> >  gcc/jit/libgccjit.c  |   5 +-
>> >  gcc/jit/notes.txt|   4 +-
>> >  gcc/timevar.c|  56 ++
>> >  gcc/timevar.def  |   2 +
>> >  gcc/timevar.h|  33 +++-
>> >  9 files changed, 308 insertions(+), 36 deletions(-)
>> >
>> > diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>> > index 2388975..9061933 100644
>> > --- a/gcc/Makefile.in
>> > +++ b/gcc/Makefile.in
>> > @@ -1993,6 +1993,9 @@ DRIVER_DEFINES = \
>> >
>>

Re: [PATCH] Fix PR65549, avoid force_decl_die in late compilation

2015-06-02 Thread Jason Merrill


OK.

Jason

Re: [patch] consolidate some includes into coretypes.h

2015-06-02 Thread Andrew MacLeod


On 06/02/2015 04:26 AM, Richard Biener wrote:

On Mon, Jun 1, 2015 at 11:02 PM, Andrew MacLeod  wrote:


Bootstraps from scratch on x86_64-unknown-linux-gnu with no new test
regressions.  I also built it on all the config-list.mk targets with no
additional compilation errors.

OK for trunk?

Generally the idea is sound (amend coretypes.h), but I don't like the
GCC_CONFIG_H guard, why does !GENERATOR_FILE not work?
Target files also use coretypes.h. In particular, libgcc includes it and 
does not have GENERATOR_FILE set.  Rather than checking for GCC_CONFIG_H 
we could check


#if !defined (GENERATOR_FILE) && !defined (USED_FOR_TARGET)

I think that should work OK.

Furthermore I don't like the special-casing in rtl.h, instead have
coretypes.h contain sth like

#ifdef GENERATOR_FILE
... rtl.h special-case
#else
... GCC_CONFIG_H stuff
#endif

Thanks,
Richard.


This one is harder. I don't like the special case either, but you cant 
really figure it out in coretypes.h.  The problem comes from some 
generator files which compile rtl.c and and a couple of other files, and 
thus have GENERATOR_FILE set... These run after the initial set of 
generators so insn-modes.h and friends have been created, and these 
includes are now required.   the presence of rtl.h seems to be the the 
litmus test and if it occurs in the include chain after coretypes.h, 
then we'll need these files.


I suppose you could just include those files in rtl.h directly without 
the guard...  it is probably the cleanest solution. Otherwise we'd 
either have to add a new identifying macro to a dozen generator files, 
or include these headers there, or some other such thing.


The following tweak to the 2 files address both issues.  how does that seem?

 Andrew

Index: coretypes.h
===
--- coretypes.h	(revision 223875)
+++ coretypes.h	(working copy)
@@ -299,4 +299,14 @@
 typedef unsigned char uchar;
 #endif
 
+/* Most host source files will require the following headers.  */
+#if !defined (GENERATOR_FILE) && !defined (USED_FOR_TARGET)
+#include "machmode.h"
+#include "signop.h"
+#include "wide-int.h" 
+#include "double-int.h"
+#include "real.h"
+#include "fixed-value.h"
+#endif
+
 #endif /* coretypes.h */
Index: rtl.h
===
--- rtl.h	(revision 223875)
+++ rtl.h	(working copy)
@@ -20,15 +20,21 @@
 #ifndef GCC_RTL_H
 #define GCC_RTL_H
 
+/* coretypes.h normally includes these header files, but does not for generator
+   files.  This file is included by some late running generator files which 
+   also requires them, so always include them here.  */
+#include "machmode.h" 
+#include "signop.h"
+#include "wide-int.h"
+#include "double-int.h"
+#include "real.h"
+#include "fixed-value.h"
+
 #include "statistics.h"
-#include "machmode.h"
 #include "input.h"
-#include "real.h"
 #include "vec.h"
-#include "fixed-value.h"
 #include "alias.h"
 #include "hashtab.h"
-#include "wide-int.h"
 #include "flags.h"
 #include "is-a.h"

Re: [wwwdocs] Buildstat update for 4.9

2015-06-02 Thread Gerald Pfeifer

On Mon, 1 Jun 2015, Tom G. Christensen wrote:
> Testresults for 4.9.2:
>   s390-ibm-linux-gnu (new)

Applied as well, thanks, Tom!

Gerald

Re: [wwwdocs] Buildstat update for 4.8

2015-06-02 Thread Gerald Pfeifer

On Mon, 1 Jun 2015, Tom G. Christensen wrote:
> Latest results for 4.8.x

Applied, thanks Tom!

Gerald

Re: [PATCH, 6/8] Add pass_copy_prop in pass_oacc_kernels

2015-06-02 Thread Tom de Vries

On 22-04-15 09:42, Richard Biener wrote:

This patch adds pass_loop_ccp to pass group pass_oacc_kernels.
> > >
> > >We need this pass to simplify the loop body, and allow pass_parloops to 
detect
> > >that loop iterations are independent.
> > >

> >
> >As suggested here (https://gcc.gnu.org/ml/gcc-patches/2014-11/msg02993.html  
)
> >I've replaced the pass_ccp with pass_copyprop, which performs trivial 
constant
> >propagation in addition to copy propagation.
> >
> >Bootstrapped and reg-tested as before.
> >
> >OK for trunk?

I've recently wondered why we do copy propagation after LIM and I don't
remember.  Can you remind me?  Can you add testcases that fail before
this kind of patches and pass afterwards?

For attached test-case, we manage to parallelize with pass_copy_prop (but then 
run into an ICE):

...
PASS: c-c++-common/goacc/kernels-loop-reduction.c scan-tree-dump-not 
parloops_oacc_kernels "FAILED:"
PASS: c-c++-common/goacc/kernels-loop-reduction.c scan-tree-dump-times 
parloops_oacc_kernels "SUCCESS: may be parallelized" 1

FAIL: c-c++-common/goacc/kernels-loop-reduction.c (internal compiler error)
FAIL: c-c++-common/goacc/kernels-loop-reduction.c (test for excess errors)
...

Without pass_copy_prop we don't manage to parallelize:
...
FAIL: c-c++-common/goacc/kernels-loop-reduction.c scan-tree-dump-not 
parloops_oacc_kernels "FAILED:"
FAIL: c-c++-common/goacc/kernels-loop-reduction.c scan-tree-dump-times 
parloops_oacc_kernels "SUCCESS: may be parallelized" 1

PASS: c-c++-common/goacc/kernels-loop-reduction.c (test for excess errors)
...

In more detail, before pass_copy_prop, we have:
...
  :
  # D__lsm.14_3 = PHI 
  ...
  sum.3_39 = D__lsm.14_3;
  sum.4_40 = _37 + sum.3_39;
  D__lsm.14_9 = sum.4_40;
  ...
  if (ii_43 <= 524287)
goto ;
  else
goto ;

  :
  goto ;
...

And after pass_copy_prop, we have:
...
  :
  # D__lsm.14_3 = PHI 
  ...
  sum.4_40 = D__lsm.14_3 + _37;
  ...
  if (ii_43 <= 524287)
goto ;
  else
goto ;

  :
  goto ;
...

The testcase is not committed yet, because reductions are not handled yet (which 
explains the ICE).

Thanks,
- Tom
/* { dg-additional-options "-O2" } */
/* { dg-additional-options "-ftree-parallelize-loops=32" } */
/* { dg-additional-options "-fdump-tree-parloops_oacc_kernels-all" } */

#include 

#define N (1024 * 512)
#define COUNTERTYPE unsigned int

int
main (void)
{
  unsigned int *__restrict a;
  unsigned int sum = 0;
  unsigned int sum2 = 0;

  a = (unsigned int *)malloc (N * sizeof (unsigned int));

  for (COUNTERTYPE i = 0; i < N; i++)
a[i] = i * 2;

#pragma acc kernels copy (sum) copyin (a[0:N])
  {
for (COUNTERTYPE ii = 0; ii < N; ii++)
  sum += a[ii];
  }

  for (COUNTERTYPE i = 0; i < N; i++)
sum2 += a[i];

  if (sum != sum2)
  abort ();

  free (a);

  return 0;
}

/* Check that only one loop is analyzed, and that it can be parallelized.  */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops_oacc_kernels" } } */
/* { dg-final { scan-tree-dump-not "FAILED:" "parloops_oacc_kernels" } } */

/* { dg-final { cleanup-tree-dump "parloops_oacc_kernels" } } */

Re: C++ PATCH for c++/44282 (ia32 calling convention attributes and mangling)

2015-06-02 Thread Jason Merrill


On 06/02/2015 07:34 AM, Uros Bizjak wrote:

Unfortunately, something is wrong with the testcase itself:

FAIL: g++.dg/abi/mangle-regparm.C  -std=c++98 (test for excess errors)
WARNING: g++.dg/abi/mangle-regparm.C  -std=c++98 compilation failed to
produce executable
FAIL: g++.dg/abi/mangle-regparm.C  -std=c++11 (test for excess errors)
WARNING: g++.dg/abi/mangle-regparm.C  -std=c++11 compilation failed to
produce executable
FAIL: g++.dg/abi/mangle-regparm.C  -std=c++14 (test for excess errors)
WARNING: g++.dg/abi/mangle-regparm.C  -std=c++14 compilation failed to
produce executable

with the following error:

FAIL: g++.dg/abi/mangle-regparm.C  -std=c++98 (test for excess errors)
Excess errors:
/usr/bin/ld: /tmp/ccU8LttY.o: bad reloc symbol index (0x5b550 >= 0x12)
for offset 0x6c in section `.text'
/tmp/ccU8LttY.o: could not read symbols: Bad value

WARNING: g++.dg/abi/mangle-regparm.C  -std=c++98 compilation failed to
produce executable
g++.dg/abi/mangle-regparm.C  -std=c++98 : output file does not exist
UNRESOLVED: g++.dg/abi/mangle-regparm.C  -std=c++98  scan-assembler
_Z18IndirectExternCallIPU7stdcallU7regparmILi3EEFviiEiEvT_T0_S3_


FYI, everything links and runs OK if .set is removed from the
following part of the asm:

.LFE4:
.size   
_Z18IndirectExternCallIPU7stdcallU7regparmILi3EEFviiEiEvT_T0_S3_,
.-_Z18IndirectExternCallIPU7stdcallU7regparmILi3EEFviiEiEvT_T0_S3_
.weak   _Z18IndirectExternCallIPFviiEiEvT_T0_S3_
.set
_Z18IndirectExternCallIPFviiEiEvT_T0_S3_,_Z18IndirectExternCallIPU7stdcallU7regparmILi3EEFviiEiEvT_T0_S3_
.section
.text._Z18IndirectExternCallIPFviiEiEvT_T0_S3_,"axG",@progbits,_Z18IndirectExternCallIPFviiEiEvT_T0_S3_,comdat
.weak   _Z18IndirectExternCallIPFviiEiEvT_T0_S3_
.type   _Z18IndirectExternCallIPFviiEiEvT_T0_S3_, @function
_Z18IndirectExternCallIPFviiEiEvT_T0_S3_:


Ugh, I thought I had dealt with that issue.  Looking...

Jason

[PINGv9][PATCH] ASan on unaligned accesses

2015-06-02 Thread Marat Zakirov


On 05/26/2015 05:03 PM, Marat Zakirov wrote:



On 05/20/2015 10:01 AM, Marat Zakirov wrote:



On 05/12/2015 02:16 PM, Marat Zakirov wrote:

On 04/07/2015 03:22 PM, Jakub Jelinek wrote:
How are the automatic misaligned variables different from say heap 
allocated ones, or global vars etc.? 
No difference you are right Jakub. Shadow memory initialization for 
heap values and globals of course also should be changed but it is a 
task for libsanitizer not ASan for which I am sending patch. Fix for 
libsanitizer to support unaligned heaps and globals will be 
committed by a separate patch.
Well, a RTL solution I've tried at http://gcc.gnu.org/PR22141, but 
it gave
mixed results, so either it needs more cost tuning when it is 
desirable and
when it is not, or perhaps better do that still on GIMPLE instead, 
together

with trying to optimize bitfield accesses and other cases of adjacent
location accesses.  But if we handle that on GIMPLE, it won't 
really affect

what asan RTL emitting code produces.

Jakub


I fixed the issue with 'movq' you were mentioned in a previous mail.

--Marat







gcc/ChangeLog:

2015-02-25  Marat Zakirov  

	* asan.c (asan_emit_stack_protection): Support for misalign accesses. 
	(asan_expand_check_ifn): Likewise. 
	* params.def: New option asan-catch-misaligned.
	* params.h: New param ASAN_CATCH_MISALIGNED.
	* doc/invoke.texi: New asan param description.

gcc/testsuite/ChangeLog:

2015-02-25  Marat Zakirov  

	* c-c++-common/asan/misalign-catch.c: New test.


diff --git a/gcc/asan.c b/gcc/asan.c
index 9e4a629..f9d052f 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -1050,7 +1050,6 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb,
   rtx_code_label *lab;
   rtx_insn *insns;
   char buf[30];
-  unsigned char shadow_bytes[4];
   HOST_WIDE_INT base_offset = offsets[length - 1];
   HOST_WIDE_INT base_align_bias = 0, offset, prev_offset;
   HOST_WIDE_INT asan_frame_size = offsets[0] - base_offset;
@@ -1059,6 +1058,8 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb,
   unsigned char cur_shadow_byte = ASAN_STACK_MAGIC_LEFT;
   tree str_cst, decl, id;
   int use_after_return_class = -1;
+  bool misalign = (flag_sanitize & SANITIZE_KERNEL_ADDRESS)
+		  || ASAN_CATCH_MISALIGNED;
 
   if (shadow_ptr_types[0] == NULL_TREE)
 asan_init_shadow_ptr_types ();
@@ -1193,11 +1194,37 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb,
   if (STRICT_ALIGNMENT)
 set_mem_align (shadow_mem, (GET_MODE_ALIGNMENT (SImode)));
   prev_offset = base_offset;
+
+  vec shadow_mems;
+  vec shadow_bytes;
+
+  shadow_mems.create (0);
+  shadow_bytes.create (0);
+
   for (l = length; l; l -= 2)
 {
   if (l == 2)
 	cur_shadow_byte = ASAN_STACK_MAGIC_RIGHT;
   offset = offsets[l - 1];
+  if (l != length && misalign)
+	{
+	  HOST_WIDE_INT aoff
+	= base_offset + ((offset - base_offset)
+			 & ~(ASAN_RED_ZONE_SIZE - HOST_WIDE_INT_1))
+	  - ASAN_RED_ZONE_SIZE;
+	  if (aoff > prev_offset)
+	{
+	  shadow_mem = adjust_address (shadow_mem, VOIDmode,
+	   (aoff - prev_offset)
+	   >> ASAN_SHADOW_SHIFT);
+	  prev_offset = aoff;
+	  shadow_bytes.safe_push (0);
+	  shadow_bytes.safe_push (0);
+	  shadow_bytes.safe_push (0);
+	  shadow_bytes.safe_push (0);
+	  shadow_mems.safe_push (shadow_mem);
+	}
+	}
   if ((offset - base_offset) & (ASAN_RED_ZONE_SIZE - 1))
 	{
 	  int i;
@@ -1212,13 +1239,13 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb,
 	if (aoff < offset)
 	  {
 		if (aoff < offset - (1 << ASAN_SHADOW_SHIFT) + 1)
-		  shadow_bytes[i] = 0;
+		  shadow_bytes.safe_push (0);
 		else
-		  shadow_bytes[i] = offset - aoff;
+		  shadow_bytes.safe_push (offset - aoff);
 	  }
 	else
-	  shadow_bytes[i] = ASAN_STACK_MAGIC_PARTIAL;
-	  emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
+	  shadow_bytes.safe_push (ASAN_STACK_MAGIC_PARTIAL);
+	  shadow_mems.safe_push (shadow_mem);
 	  offset = aoff;
 	}
   while (offset <= offsets[l - 2] - ASAN_RED_ZONE_SIZE)
@@ -1227,12 +1254,21 @@ asan_emit_stack_protection (rtx base, rtx pbase, unsigned int alignb,
    (offset - prev_offset)
    >> ASAN_SHADOW_SHIFT);
 	  prev_offset = offset;
-	  memset (shadow_bytes, cur_shadow_byte, 4);
-	  emit_move_insn (shadow_mem, asan_shadow_cst (shadow_bytes));
+	  shadow_bytes.safe_push (cur_shadow_byte);
+	  shadow_bytes.safe_push (cur_shadow_byte);
+	  shadow_bytes.safe_push (cur_shadow_byte);
+	  shadow_bytes.safe_push (cur_shadow_byte);
+	  shadow_mems.safe_push (shadow_mem);
 	  offset += ASAN_RED_ZONE_SIZE;
 	}
   cur_shadow_byte = ASAN_STACK_MAGIC_MIDDLE;
 }
+  for (unsigned i = 0; misalign && i < shadow_bytes.length () - 1; i++)
+if (shadow_bytes[i] == 0 && shadow_bytes[i + 1] > 0)
+  shadow_bytes[i] = 8 + (shadow_bytes[i + 1] > 7 ? 0 : shadow_bytes[i + 1]);
+  for (unsigned i = 0; i < shadow_mems.length (); i+

Re: [wwwdocs] Buildstat update for 5.1

2015-06-02 Thread Gerald Pfeifer

On Mon, 1 Jun 2015, Tom G. Christensen wrote:
> Latest results for 5.1.x

Thank you, Tom.  This is online as well.

Gerald

Re: [patch] consolidate some includes into coretypes.h

2015-06-02 Thread Richard Biener

On Tue, Jun 2, 2015 at 2:34 PM, Andrew MacLeod  wrote:
> On 06/02/2015 04:26 AM, Richard Biener wrote:
>>
>> On Mon, Jun 1, 2015 at 11:02 PM, Andrew MacLeod 
>> wrote:
>>>
>>>
>>> Bootstraps from scratch on x86_64-unknown-linux-gnu with no new test
>>> regressions.  I also built it on all the config-list.mk targets with no
>>> additional compilation errors.
>>>
>>> OK for trunk?
>>
>> Generally the idea is sound (amend coretypes.h), but I don't like the
>> GCC_CONFIG_H guard, why does !GENERATOR_FILE not work?
>
> Target files also use coretypes.h. In particular, libgcc includes it and
> does not have GENERATOR_FILE set.  Rather than checking for GCC_CONFIG_H we
> could check
>
> #if !defined (GENERATOR_FILE) && !defined (USED_FOR_TARGET)
>
> I think that should work OK.
>>
>> Furthermore I don't like the special-casing in rtl.h, instead have
>> coretypes.h contain sth like
>>
>> #ifdef GENERATOR_FILE
>> ... rtl.h special-case
>> #else
>> ... GCC_CONFIG_H stuff
>> #endif
>>
>> Thanks,
>> Richard.
>
>
> This one is harder. I don't like the special case either, but you cant
> really figure it out in coretypes.h.  The problem comes from some generator
> files which compile rtl.c and and a couple of other files, and thus have
> GENERATOR_FILE set... These run after the initial set of generators so
> insn-modes.h and friends have been created, and these includes are now
> required.   the presence of rtl.h seems to be the the litmus test and if it
> occurs in the include chain after coretypes.h, then we'll need these files.
>
> I suppose you could just include those files in rtl.h directly without the
> guard...  it is probably the cleanest solution. Otherwise we'd either have
> to add a new identifying macro to a dozen generator files, or include these
> headers there, or some other such thing.

Well, then include the requirements in the generator files instead?  It looks
backwards to add to the includes in rtl.h.

Richard.

> The following tweak to the 2 files address both issues.  how does that seem?
>
>  Andrew
>

Re: [PATCH, 4/8] Add pass_tree_loop_{init,done} to pass_oacc_kernels

2015-06-02 Thread Tom de Vries


On 22-04-15 09:40, Richard Biener wrote:

On Tue, 21 Apr 2015, Thomas Schwinge wrote:


Hi!

On Tue, 25 Nov 2014 12:29:28 +0100, Tom de Vries  wrote:

On 15-11-14 18:21, Tom de Vries wrote:

On 15-11-14 13:14, Tom de Vries wrote:

I'm submitting a patch series with initial support for the oacc kernels
directive.

The patch series uses pass_parallelize_loops to implement parallelization of
loops in the oacc kernels region.

The patch series consists of these 8 patches:
...
  1  Expand oacc kernels after pass_build_ealias
  2  Add pass_oacc_kernels
  3  Add pass_ch_oacc_kernels to pass_oacc_kernels
  4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
  5  Add pass_loop_im to pass_oacc_kernels
  6  Add pass_ccp to pass_oacc_kernels
  7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
  8  Do simple omp lowering for no address taken var
...


This patch adds pass_tree_loop_init and pass_tree_loop_init_done to
pass_oacc_kernels.

Pass_parallelize_loops is run between these passes in the pass group
pass_tree_loop, since it requires loop information.  We do the same for
pass_oacc_kernels.



Updated for moving pass_oacc_kernels down past pass_fre in the pass list.

Bootstrapped and reg-tested as before.

OK for trunk?


Both passes should be basically no-ops.  Why not call
loop_optimizer_init/finalize from expand_omp_ssa instead?



The current pass list is:
...
  NEXT_PASS (pass_build_ealias);
  NEXT_PASS (pass_fre);
  /* Pass group that runs when there are oacc kernels in the
 function.  */
  NEXT_PASS (pass_oacc_kernels);
  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
  NEXT_PASS (pass_ch_oacc_kernels);
  NEXT_PASS (pass_fre);
  NEXT_PASS (pass_tree_loop_init);
  NEXT_PASS (pass_lim);
  NEXT_PASS (pass_copy_prop);
  NEXT_PASS (pass_scev_cprop);
  NEXT_PASS (pass_parallelize_loops_oacc_kernels);
  NEXT_PASS (pass_expand_omp_ssa);
  NEXT_PASS (pass_tree_loop_done);
  POP_INSERT_PASSES ()
  NEXT_PASS (pass_merge_phi);
  NEXT_PASS (pass_dse);
...

Do you want to call loop_optimizer_init from pass_lim and 
loop_optimizer_finalize from pass_expand_omp_ssa, or are things ok as they are?


Thanks,
- Tom


Committed to gomp-4_0-branch in r82:

commit cb95b4a1efcdb96c58cda986d53b20c3537c1ab7
Author: tschwinge 
Date:   Tue Apr 21 19:51:33 2015 +

 Add pass_tree_loop_{init,done} to pass_oacc_kernels

gcc/
* passes.def: Run pass_tree_loop_init and pass_tree_loop_done in pass
group pass_oacc_kernels.
* tree-ssa-loop.c (pass_tree_loop_init::clone)
(pass_tree_loop_done::clone): New function.

 git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@82 
138bc75d-0d04-0410-961f-82ee72b054a4
---
  gcc/ChangeLog.gomp  |5 +
  gcc/passes.def  |2 ++
  gcc/tree-ssa-loop.c |2 ++
  3 files changed, 9 insertions(+)

diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
index d00c5e0..1fb060f 100644
--- gcc/ChangeLog.gomp
+++ gcc/ChangeLog.gomp
@@ -1,5 +1,10 @@
  2015-04-21  Tom de Vries  

+   * passes.def: Run pass_tree_loop_init and pass_tree_loop_done in pass
+   group pass_oacc_kernels.
+   * tree-ssa-loop.c (pass_tree_loop_init::clone)
+   (pass_tree_loop_done::clone): New function.
+
* omp-low.c (loop_in_oacc_kernels_region_p): New function.
* omp-low.h (loop_in_oacc_kernels_region_p): Declare.
* passes.def: Add pass_ch_oacc_kernels to pass group pass_oacc_kernels.
diff --git gcc/passes.def gcc/passes.def
index 5cdbc87..83ae04e 100644
--- gcc/passes.def
+++ gcc/passes.def
@@ -91,7 +91,9 @@ along with GCC; see the file COPYING3.  If not see
  NEXT_PASS (pass_oacc_kernels);
  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
  NEXT_PASS (pass_ch_oacc_kernels);
+ NEXT_PASS (pass_tree_loop_init);
  NEXT_PASS (pass_expand_omp_ssa);
+ NEXT_PASS (pass_tree_loop_done);
  POP_INSERT_PASSES ()
  NEXT_PASS (pass_merge_phi);
  NEXT_PASS (pass_cd_dce);
diff --git gcc/tree-ssa-loop.c gcc/tree-ssa-loop.c
index a041858..2a96a39 100644
--- gcc/tree-ssa-loop.c
+++ gcc/tree-ssa-loop.c
@@ -272,6 +272,7 @@ public:

/* opt_pass methods: */
virtual unsigned int execute (function *);
+  opt_pass * clone () { return new pass_tree_loop_init (m_ctxt); }

  }; // class pass_tree_loop_init

@@ -566,6 +567,7 @@ public:

/* opt_pass methods: */
virtual unsigned int execute (function *) { return tree_ssa_loop_done (); }
+  opt_pass * clone () { return new pass_tree_loop_done (m_ctxt); }

  }; // class pass_tree_loop_done



Grüße,
  Thomas

Re: [PATCH 01/35] Introduce new type-based pool allocator.

2015-06-02 Thread Martin Liška

On 06/02/2015 11:48 AM, Andreas Schwab wrote:
> In file included from ../../gcc/stmt.c:78:0:
> ../../gcc/alloc-pool.h: In function 'void expand_sjlj_dispatch_table(rtx, 
> vec ree_node*>)':
> ../../gcc/alloc-pool.h:303:4: error: 
> 'case_node_pool.pool_allocator:\
> :m_block_size' may be used uninitialized in this function 
> [-Werror=maybe-uninit\
> ialized]
> block = XNEWVEC (char, m_block_size);
> ^
> ../../gcc/stmt.c:1339:33: note: 
> 'case_node_pool.pool_allocator::m_bl\
> ock_size' was declared here
>pool_allocator case_node_pool ("struct sjlj_case pool",
>  ^
> 
> Andreas.
> 

Hi.

This patch for the issue which has been tested on x86_64-unknown-linux-pc and
can bootstrap.

Ready for trunk?
Thanks,
Martin
>From 57355c1e271accc3e35dd5df9d5393ee783d765b Mon Sep 17 00:00:00 2001
From: mliska 
Date: Tue, 2 Jun 2015 13:26:05 +0200
Subject: [PATCH] Pool allocator fallout: fix uninialized class members.

gcc/ChangeLog:

2015-06-02  Martin Liska  

	* alloc-pool.h (pool_allocator::pool_allocator): Set implicit
	values to avoid -Wmaybe-uninitialized errors.
---
 gcc/alloc-pool.h | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/gcc/alloc-pool.h b/gcc/alloc-pool.h
index 96a1342..ddb2a91 100644
--- a/gcc/alloc-pool.h
+++ b/gcc/alloc-pool.h
@@ -159,11 +159,11 @@ template 
 inline
 pool_allocator::pool_allocator (const char *name, size_t num,
    size_t extra_size, bool ignore_type_size):
-  m_name (name), m_elts_per_block (num), m_returned_free_list (NULL),
+  m_name (name), m_id (0), m_elts_per_block (num), m_returned_free_list (NULL),
   m_virgin_free_list (NULL), m_virgin_elts_remaining (0), m_elts_allocated (0),
   m_elts_free (0), m_blocks_allocated (0), m_block_list (NULL),
-  m_ignore_type_size (ignore_type_size), m_extra_size (extra_size),
-  m_initialized (false) {}
+  m_block_size (0), m_ignore_type_size (ignore_type_size),
+  m_extra_size (extra_size), m_initialized (false) {}
 
 /* Initialize a pool allocator.  */
 
@@ -215,7 +215,6 @@ pool_allocator::initialize ()
 
   m_id = last_id;
 #endif
-
 }
 
 /* Free all memory allocated for the given memory pool.  */
-- 
2.1.4

Re: [PATCH][Testsuite] Disable tests with dg-require-fork for simulated targets

2015-06-02 Thread Alan Lawrence


Christophe Lyon wrote:

On 18 May 2015 at 20:25, Mike Stump  wrote:

On May 18, 2015, at 8:01 AM, Alan Lawrence  wrote:

Simulators such as qemu report the presence of fork (it's in glibc) but 
generally do not support synchronization primitives between threads, so any 
tests using fork are unreliable.

Hum, I have a simulator (binutils/sim) that has fork.  All those tests pass for 
me. They seem to be reliable for me.  I didn’t do anything special as I recall. 
 ?


Thanks for having a look at this problem.
I thought about this a while ago, and was wondering whether the guard
shouldn't be "are we using qemu?". Indeed as Mike, other simulators
might support fork and threads quite well.


Indeed they might. Is there a reasonable way to detect qemu? Or some acceptable 
mechanism to look for it, or tell the testsuite we are running under qemu?


--Alan

Re: [PATCH 1/2] Memory statistics enhancement.

2015-06-02 Thread Jeff Law


On 06/01/2015 10:16 AM, mliska wrote:

Hi.

Following 2 patches improve memory statistics infrastructure. First one
ports pool allocator to the new infrastructure. And the second one makes
column alignment properly.

Both can bootstrap on x86_64-linux-pc and survive regression tests.

Ready for trunk?
Thank you,
Martin

Port pool-allocator memory stats to a new infrastructure.

gcc/ChangeLog:

2015-06-02  Martin Liska  

* alloc-pool.c (allocate_pool_descriptor): Remove.
(struct pool_output_info): Likewise.
(print_alloc_pool_statistics): Likewise.
(dump_alloc_pool_statistics): Likewise.
* alloc-pool.h (struct pool_usage): New struct.
(pool_allocator::initialize): Change usage of memory statistics
to a new interface.
(pool_allocator::release): Likewise.
(pool_allocator::allocate): Likewise.
(pool_allocator::remove): Likewise.
* mem-stats-traits.h (enum mem_alloc_origin): Add new enum value
for a pool allocator.
* mem-stats.h (struct mem_location): Add new ctor.
(struct mem_usage): Add counter for number of
instances.
(mem_alloc_description::register_descriptor): New overload of
the function.

 -


diff --git a/gcc/alloc-pool.h b/gcc/alloc-pool.h
index 96a1342..a1727ce 100644
--- a/gcc/alloc-pool.h
+++ b/gcc/alloc-pool.h



+  /* Dump usage coupled to LOC location, where TOTAL is sum of all rows.  */
+  inline void dump (mem_location *loc, mem_usage &total) const
+  {
+char s[4096];
+sprintf (s, "%s:%i (%s)", loc->get_trimmed_filename (),
+loc->m_line, loc->m_function);
Static sized buffer used in a sprintf where the strings are potentially 
user controlled.   Not good, even in dumping code, still not good.



+
+s[48] = '\0';
?!?  Presumably you're just truncating the output line here for the 
subsequent fprintf call.  Consider using a const with a symbolic name 
rather than the magic "48".  I say "consider" because there's magic 
constants all over the place in the dumping code. So it may not be worth 
the effort.  Your call.


 +

+  /* Dump header with NAME.  */
+  static inline void dump_header (const char *name)
+  {
+fprintf (stderr, "%-32s%-48s %6s%11s%16s%17s%12s\n", "Pool name", name,
+"Pools", "Leak", "Peak", "Times", "Elt size");
+print_dash_line ();
+  }
+
+  /* Dump footer.  */
+  inline void dump_footer ()
+  {
+print_dash_line ();
+fprintf (stderr, "%s%75li%10li\n", "Total", (long)m_instances,
+(long)m_allocated);
+print_dash_line ();
+  }
Note the header is static inline, footer is just inline.  Please try to 
make them consistent.


 @@ -235,10 +301,10 @@ pool_allocator::release ()

free (block);
  }

-  if (GATHER_STATISTICS && false)
+  if (GATHER_STATISTICS)
  {
-  alloc_pool_descriptor *desc = allocate_pool_descriptor (m_name);
-  desc->current -= (m_elts_allocated - m_elts_free) * m_elt_size;
+  pool_allocator_usage.release_instance_overhead (this,
+   (m_elts_allocated - m_elts_free) * m_elt_size);

Looks like line wrapping needs to be fixed.


Clearly the biggest issue is that static sized buffer used to hold the 
results of sprintf...  Once that and the smaller issues are fixed, this 
is OK.


jeff

Re: [PATCH, 4/8] Add pass_tree_loop_{init,done} to pass_oacc_kernels

2015-06-02 Thread Richard Biener

On Tue, 2 Jun 2015, Tom de Vries wrote:

> On 22-04-15 09:40, Richard Biener wrote:
> > On Tue, 21 Apr 2015, Thomas Schwinge wrote:
> > 
> > > Hi!
> > > 
> > > On Tue, 25 Nov 2014 12:29:28 +0100, Tom de Vries 
> > > wrote:
> > > > On 15-11-14 18:21, Tom de Vries wrote:
> > > > > On 15-11-14 13:14, Tom de Vries wrote:
> > > > > > I'm submitting a patch series with initial support for the oacc
> > > > > > kernels
> > > > > > directive.
> > > > > > 
> > > > > > The patch series uses pass_parallelize_loops to implement
> > > > > > parallelization of
> > > > > > loops in the oacc kernels region.
> > > > > > 
> > > > > > The patch series consists of these 8 patches:
> > > > > > ...
> > > > > >   1  Expand oacc kernels after pass_build_ealias
> > > > > >   2  Add pass_oacc_kernels
> > > > > >   3  Add pass_ch_oacc_kernels to pass_oacc_kernels
> > > > > >   4  Add pass_tree_loop_{init,done} to pass_oacc_kernels
> > > > > >   5  Add pass_loop_im to pass_oacc_kernels
> > > > > >   6  Add pass_ccp to pass_oacc_kernels
> > > > > >   7  Add pass_parloops_oacc_kernels to pass_oacc_kernels
> > > > > >   8  Do simple omp lowering for no address taken var
> > > > > > ...
> > > > > 
> > > > > This patch adds pass_tree_loop_init and pass_tree_loop_init_done to
> > > > > pass_oacc_kernels.
> > > > > 
> > > > > Pass_parallelize_loops is run between these passes in the pass group
> > > > > pass_tree_loop, since it requires loop information.  We do the same
> > > > > for
> > > > > pass_oacc_kernels.
> > > > > 
> > > > 
> > > > Updated for moving pass_oacc_kernels down past pass_fre in the pass
> > > > list.
> > > > 
> > > > Bootstrapped and reg-tested as before.
> > > > 
> > > > OK for trunk?
> > 
> > Both passes should be basically no-ops.  Why not call
> > loop_optimizer_init/finalize from expand_omp_ssa instead?
> > 
> 
> The current pass list is:
> ...
>   NEXT_PASS (pass_build_ealias);
>   NEXT_PASS (pass_fre);
>   /* Pass group that runs when there are oacc kernels in the
>  function.  */
>   NEXT_PASS (pass_oacc_kernels);
>   PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
>   NEXT_PASS (pass_ch_oacc_kernels);
>   NEXT_PASS (pass_fre);
>   NEXT_PASS (pass_tree_loop_init);
>   NEXT_PASS (pass_lim);
>   NEXT_PASS (pass_copy_prop);
>   NEXT_PASS (pass_scev_cprop);
>   NEXT_PASS (pass_parallelize_loops_oacc_kernels);
>   NEXT_PASS (pass_expand_omp_ssa);
>   NEXT_PASS (pass_tree_loop_done);
>   POP_INSERT_PASSES ()
>   NEXT_PASS (pass_merge_phi);
>   NEXT_PASS (pass_dse);
> ...
> 
> Do you want to call loop_optimizer_init from pass_lim and
> loop_optimizer_finalize from pass_expand_omp_ssa, or are things ok as they
> are?

No, Jakub probably means to call loop_optimizer_init/finalize in 
each of the passes.  Note that keeping loops initialized keeps
you in loop-closed SSA form and also preserves some more loop
properties during cfg-cleanup.  So I think things are ok as they
are.

As far as I understand at least SCEV-cprop and parloops need
loop-closed SSA form to work (LIM doesn't need anything fancy,
apart from disambiguated latches).

Btw, I wonder why you don't organize the oacc-kernel passes in
a new simple-IPA group after pass_local_optimization_passes.

Richard.

> Thanks,
> - Tom
> 
> > > Committed to gomp-4_0-branch in r82:
> > > 
> > > commit cb95b4a1efcdb96c58cda986d53b20c3537c1ab7
> > > Author: tschwinge 
> > > Date:   Tue Apr 21 19:51:33 2015 +
> > > 
> > >  Add pass_tree_loop_{init,done} to pass_oacc_kernels
> > > 
> > >   gcc/
> > >   * passes.def: Run pass_tree_loop_init and pass_tree_loop_done
> > > in pass
> > >   group pass_oacc_kernels.
> > >   * tree-ssa-loop.c (pass_tree_loop_init::clone)
> > >   (pass_tree_loop_done::clone): New function.
> > > 
> > >  git-svn-id:
> > > svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@82
> > > 138bc75d-0d04-0410-961f-82ee72b054a4
> > > ---
> > >   gcc/ChangeLog.gomp  |5 +
> > >   gcc/passes.def  |2 ++
> > >   gcc/tree-ssa-loop.c |2 ++
> > >   3 files changed, 9 insertions(+)
> > > 
> > > diff --git gcc/ChangeLog.gomp gcc/ChangeLog.gomp
> > > index d00c5e0..1fb060f 100644
> > > --- gcc/ChangeLog.gomp
> > > +++ gcc/ChangeLog.gomp
> > > @@ -1,5 +1,10 @@
> > >   2015-04-21  Tom de Vries  
> > > 
> > > + * passes.def: Run pass_tree_loop_init and pass_tree_loop_done in pass
> > > + group pass_oacc_kernels.
> > > + * tree-ssa-loop.c (pass_tree_loop_init::clone)
> > > + (pass_tree_loop_done::clone): New function.
> > > +
> > >   * omp-low.c (loop_in_oacc_kernels_region_p): New function.
> > >   * omp-low.h (loop_in_oacc_kernels_region_p): Declare.
> > >   * passes.def: Add pass_ch_oacc_kernels to pass group
> > > pass_oacc_kernels.
> > > diff --git gcc/passes.de

Re: [PATCH 2/2] Fix memory report layout at various places.

2015-06-02 Thread Jeff Law


On 06/01/2015 10:36 AM, mliska wrote:

gcc/ChangeLog:

2015-06-02  Martin Liska  

* alloc-pool.h (struct pool_usage): Correct space padding.
* ggc-page.c (ggc_print_statistics): Align columns in a report.
* mem-stats.h (struct mem_usage): Add argument to print_dash_line.
* tree.c (dump_tree_statistics): Align columns in a report.
OK.  I'm inclined to say that subsequent patches that do similar things 
should be considered pre-approved.


Jeff

Re: [PATCH 01/35] Introduce new type-based pool allocator.

2015-06-02 Thread Richard Biener

On Tue, Jun 2, 2015 at 3:52 PM, Martin Liška  wrote:
> On 06/02/2015 11:48 AM, Andreas Schwab wrote:
>> In file included from ../../gcc/stmt.c:78:0:
>> ../../gcc/alloc-pool.h: In function 'void expand_sjlj_dispatch_table(rtx, 
>> vec> ree_node*>)':
>> ../../gcc/alloc-pool.h:303:4: error: 
>> 'case_node_pool.pool_allocator:\
>> :m_block_size' may be used uninitialized in this function 
>> [-Werror=maybe-uninit\
>> ialized]
>> block = XNEWVEC (char, m_block_size);
>> ^
>> ../../gcc/stmt.c:1339:33: note: 
>> 'case_node_pool.pool_allocator::m_bl\
>> ock_size' was declared here
>>pool_allocator case_node_pool ("struct sjlj_case pool",
>>  ^
>>
>> Andreas.
>>
>
> Hi.
>
> This patch for the issue which has been tested on x86_64-unknown-linux-pc and
> can bootstrap.
>
> Ready for trunk?

Ok.

Richard.

> Thanks,
> Martin

Re: C++ PATCH to handling of exception specs in system headers

2015-06-02 Thread Rainer Orth

Jason Merrill  writes:

> On 06/01/2015 11:34 AM, Rainer Orth wrote:
>> Isn't this about global vs. std namespace?
>
> Probably.  Does making the declaration in the testcase extern "C" help?

It does indeed, and the test still passes on Linux.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University

Re: [patch] consolidate some includes into coretypes.h

2015-06-02 Thread Andrew MacLeod


On 06/02/2015 09:30 AM, Richard Biener wrote:

On Tue, Jun 2, 2015 at 2:34 PM, Andrew MacLeod  wrote:

On 06/02/2015 04:26 AM, Richard Biener wrote:

On Mon, Jun 1, 2015 at 11:02 PM, Andrew MacLeod 
wrote:


Bootstraps from scratch on x86_64-unknown-linux-gnu with no new test
regressions.  I also built it on all the config-list.mk targets with no
additional compilation errors.

OK for trunk?

Generally the idea is sound (amend coretypes.h), but I don't like the
GCC_CONFIG_H guard, why does !GENERATOR_FILE not work?

Target files also use coretypes.h. In particular, libgcc includes it and
does not have GENERATOR_FILE set.  Rather than checking for GCC_CONFIG_H we
could check

#if !defined (GENERATOR_FILE) && !defined (USED_FOR_TARGET)

I think that should work OK.

Furthermore I don't like the special-casing in rtl.h, instead have
coretypes.h contain sth like

#ifdef GENERATOR_FILE
... rtl.h special-case
#else
... GCC_CONFIG_H stuff
#endif

Thanks,
Richard.


This one is harder. I don't like the special case either, but you cant
really figure it out in coretypes.h.  The problem comes from some generator
files which compile rtl.c and and a couple of other files, and thus have
GENERATOR_FILE set... These run after the initial set of generators so
insn-modes.h and friends have been created, and these includes are now
required.   the presence of rtl.h seems to be the the litmus test and if it
occurs in the include chain after coretypes.h, then we'll need these files.

I suppose you could just include those files in rtl.h directly without the
guard...  it is probably the cleanest solution. Otherwise we'd either have
to add a new identifying macro to a dozen generator files, or include these
headers there, or some other such thing.

Well, then include the requirements in the generator files instead?  It looks
backwards to add to the includes in rtl.h.

Richard.
Except that it is rtl.h that actually has the compilation requirement.  
I could put those includes in each of the generator files which require 
it, but the list is non-trivial:
Each of these files can be compiled with bconfig.h instead of config.h, 
and they each include rtl.h which requires these headers:

genattr.c
genattr-common.c
genattrtab.c
genautomata.c
gencodes.c
genconditions.c
genconfig.c
genemit.c
genextract.c
genflags.c
genmddump.c
genopinit.c
genoutput.c
genpeep.c
genpreds.c
genrecog.c
gensupport.c
print-rtl.c
read-rtl.c
rtl.c


so there are 20 files which require these headers, and there are 11 
others which do not require rtl.h nor the headers (and will fail compile 
if they are included)

gencheck.c
genconstants.c
genenums.c
genmatch.c
genmddeps.c
genmodes.c
ggc-none.c
hash-table.c
inchash.c
read-md.c
vec.c


I suppose one could add something like:
#define EARLY_GENERATOR
in each of the 11 and check for that macro in coretypes.h instead of 
GENERATOR file.  ThIs appears to work fine:


Andrew


diff -cpN /numerical/coretypes.h /CMP/coretypes.h
*** /numerical/coretypes.h	2015-06-01 18:00:36.284376498 -0400
--- /CMP/coretypes.h	2015-06-02 10:15:12.427363555 -0400
*** typedef unsigned char uchar;
*** 300,306 
  #endif
  
  /* Most host source files will require the following headers.  */
! #ifdef GCC_CONFIG_H
  #include "machmode.h"
  #include "signop.h"
  #include "wide-int.h" 
--- 300,306 
  #endif
  
  /* Most host source files will require the following headers.  */
! #if !defined (EARLY_GENERATOR_FILE) && !defined (USED_FOR_TARGET)
  #include "machmode.h"
  #include "signop.h"
  #include "wide-int.h" 
diff -cpN /numerical/gencheck.c /CMP/gencheck.c
*** /numerical/gencheck.c	2015-06-01 18:00:36.290376342 -0400
--- /CMP/gencheck.c	2015-06-02 10:15:12.434363374 -0400
*** along with GCC; see the file COPYING3.
*** 20,25 
--- 20,26 
  /* We don't have insn-modes.h, but we include tm.h.  */
  #define BITS_PER_UNIT 8
  
+ #define EARLY_GENERATOR_FILE
  #include "bconfig.h"
  #include "system.h"
  #include "coretypes.h"
diff -cpN /numerical/genconstants.c /CMP/genconstants.c
*** /numerical/genconstants.c	2015-06-01 18:00:36.290376342 -0400
--- /CMP/genconstants.c	2015-06-02 10:15:12.435363348 -0400
*** along with GCC; see the file COPYING3.
*** 24,29 
--- 24,30 
 look at insn patterns, only (define_constants), and we want to
 minimize dependencies.  */
  
+ #define EARLY_GENERATOR_FILE
  #include "bconfig.h"
  #include "system.h"
  #include "coretypes.h"
diff -cpN /numerical/genenums.c /CMP/genenums.c
*** /numerical/genenums.c	2015-06-01 18:00:36.290376342 -0400
--- /CMP/genenums.c	2015-06-02 10:15:12.435363348 -0400
*** You should have received a copy of the G
*** 17,22 
--- 17,23 
  along with GCC; see the file COPYING3.  If not see
  .  */
  
+ #define EARLY_GENERATOR_FILE
  #include "bconfig.h"
  #include "system.h"
  #include "coretypes.h"
diff -cpN /numerical/genmatch.c /CMP/genmatch.c
*** /numerical/genmatch.c	2015-06-01

[PATCH] Update check after force_const_mem call in the plus_constant function to see if the value returned is not a NULL_RTX.

2015-06-02 Thread Andrew Bennett

Hi,

In the plus_constant function in explow.c the code to update a constant pool 
value
does not deal with the case where the value returned from force_const_mem is a 
NULL_RTX.  This occurs for the MIPS target because its 
cannot_force_const_mem target function does not allow constants (so that the 
move expanders can deal with them later on), this then causes the 
force_const_mem 
function to return a NULL_RTX and then causes GCC to segmentation fault when 
calling
the memory_address_p function.

The fix is to add a check that the tem variable is not a NULL_RTX before
the memory_address_p function call.  I have tested the fix on the 
mips-mti-linux-gnu
target for both mips32r2 o32 and mips64r2 n64 and there have been no 
regressions.

The patch and ChangeLog are below.

Ok to commit?


Many thanks,



Andrew



* explow.c (plus_constant): Update check after force_const_mem call to 
see if the
value returned is not a NULL_RTX.



diff --git a/gcc/explow.c b/gcc/explow.c
index d1a2bf8..8745aea 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -132,7 +132,9 @@ plus_constant (machine_mode mode, rtx x, HOST_WIDE_INT c,
{
  tem = plus_constant (mode, get_pool_constant (XEXP (x, 0)), c);
  tem = force_const_mem (GET_MODE (x), tem);
- if (memory_address_p (GET_MODE (tem), XEXP (tem, 0)))
+ /* Targets may disallow some constants in the constant pool, thus
+force_const_mem may return NULL_RTX.  */
+ if (tem && memory_address_p (GET_MODE (tem), XEXP (tem, 0)))
return tem;
}
   break;

Re: [ping**2] Handle MULTILIB_REUSE in auto-generated SYSROOT_SUFFIX_SPEC macro

2015-06-02 Thread Joseph Myers

On Tue, 19 May 2015, Sandra Loosemore wrote:

> Re-pinging a patch from last year that never got reviewed:
> 
> https://gcc.gnu.org/ml/gcc-patches/2014-06/msg00511.html

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [RFC / CFT] PR c++/66192 - Remove TARGET_RELAXED_ORDERING and use load acquires.

2015-06-02 Thread David Edelsohn

On Fri, May 29, 2015 at 9:18 AM, Ramana Radhakrishnan
 wrote:

> - Turns build_atomic_load into build_atomic_load_byte in order
>   to do an atomic load of 1 byte instead of a full word atomic load.
> - Restructures get_guard_cond to decide whether to use an atomic load
>   or a standard load depending on whether code generated will be in
>   a multi-threaded context or not.
> - Adjusts all callers of get_guard_cond accordingly.
>
> One of the bits of fallout that I've observed in my testing and that I'm not
> sure about what to do is that on *bare-metal* arm-none-eabi targets we still
> put out calls to __sync_synchronize on architecture versions that do not
> have a barrier instruction which will result in a link error.
>
> While it is tempting to take the easy approach of not putting out the call,
> I suspect in practice a number of users of the bare-metal tools use these
> for their own RTOS's and other micro-OS's. Thus generating barriers at
> higher architecture levels and not generating barriers at lower architecture
> levels appears to be a bit dangerous especially on architectures where there
> is backwards compatibility (i.e. -mcpu=arm7tdmi on standard user code is
> still expected to generate code that works on a core that conforms to a
> later architecture revision).
>
> I am considering leaving this in the ARM backend to force people to think
> what they want to do about thread safety with statics and C++ on bare-metal
> systems. If they really do not want thread safety they can well add
> -fno-threadsafe-statics or provide an appropriate implementation for
> __sync_synchronize on their platforms.
>
> Any thoughts / comments ?
>
> regards
> Ramana
>
> gcc/cp/ChangeLog:
>
> 2015-05-29  Ramana Radhakrishnan  
>
> PR c++/66192
> * cp-tree.h (get_guard_cond): Adjust declaration
> * decl.c (expand_static_init): Use atomic load acquire
>  and adjust call to get_guard_cond.
> * decl2.c (build_atomic_load_byte): New function.
> (get_guard_cond): Handle thread_safety.
> (one_static_initialization_or_destruction): Adjust call to
> get_guard_cond.
>
> gcc/ChangeLog:
>
> 2015-05-29  Ramana Radhakrishnan  
>
>
> PR c++/66192
> * config/alpha/alpha.c (TARGET_RELAXED_ORDERING): Likewise.
> * config/ia64/ia64.c (TARGET_RELAXED_ORDERING): Likewise.
> * config/rs6000/rs6000.c (TARGET_RELAXED_ORDERING): Likewise.
> * config/sparc/linux.h (SPARC_RELAXED_ORDERING): Likewise.
> * config/sparc/linux64.h (SPARC_RELAXED_ORDERING): Likewise.
> * config/sparc/sparc.c (TARGET_RELAXED_ORDERING): Likewise.
> * config/sparc/sparc.h (SPARC_RELAXED_ORDERING): Likewise.
> * doc/tm.texi: Regenerate.
> * doc/tm.texi.in (TARGET_RELAXED_ORDERING): Delete.
> * target.def (TARGET_RELAXED_ORDERING): Delete.

Bootstrap on PPC64 BE and LE Linux successful.  The generated code
looks like what we expect and much better than the current code
generation.

Thanks, David

Re: [C patch] PR49551

2015-06-02 Thread Joseph Myers

On Sun, 31 May 2015, Prathamesh Kulkarni wrote:

> However DECL_COMMON (olddecl) is still incorrectly set to 1 after the
> call to memcpy(), while DECL_COMMON (olddecl) should be 0 for the
> above test-case. This patch tries to correctly adjust DECL_COMMON
> (newdecl) before it is copied into olddecl. Does it look reasonable ?
> 
> Bootstrapped and tested on x86_64-unknown-linux-gnu, cross tested on
> arm-linux-gnueabihf using qemu.

This patch is OK.

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [C/C++ PATCH] Implement -Wshift-overflow (PR c++/55095)

2015-06-02 Thread Marek Polacek

On Tue, Jun 02, 2015 at 09:53:14AM +0100, Richard Sandiford wrote:
>   unsigned int min_prec = (wi::min_precision (op0, SIGNED)
>  + TREE_INT_CST_LOW (op1));
>   bool overflowed = min_prec > TYPE_PRECISION (type0);
>   if (overflowed && c_inhibit_evaluation_warnings == 0)
> warning_at (loc, OPT_Wshift_overflow,
>  "result of %qE requires %u bits to represent, "
>  "but %qT only has %u bits",
>  build2_loc (loc, LSHIFT_EXPR, type0, op0, op1),
>  min_prec, type0, TYPE_PRECISION (type0));
> 
> which seems simpler than anything involving wider precision.

Clearly your version is much better, so I'll use it - thanks!
I'm testing a new version of the patch, will post it soon.

Marek

Re: [PATCH 1/2] Memory statistics enhancement.

2015-06-02 Thread Martin Liška

On 06/02/2015 03:58 PM, Jeff Law wrote:
> On 06/01/2015 10:16 AM, mliska wrote:
>> Hi.
>>
>> Following 2 patches improve memory statistics infrastructure. First one
>> ports pool allocator to the new infrastructure. And the second one makes
>> column alignment properly.
>>
>> Both can bootstrap on x86_64-linux-pc and survive regression tests.
>>
>> Ready for trunk?
>> Thank you,
>> Martin
>>
>> Port pool-allocator memory stats to a new infrastructure.
>>
>> gcc/ChangeLog:
>>
>> 2015-06-02  Martin Liska  
>>
>> * alloc-pool.c (allocate_pool_descriptor): Remove.
>> (struct pool_output_info): Likewise.
>> (print_alloc_pool_statistics): Likewise.
>> (dump_alloc_pool_statistics): Likewise.
>> * alloc-pool.h (struct pool_usage): New struct.
>> (pool_allocator::initialize): Change usage of memory statistics
>> to a new interface.
>> (pool_allocator::release): Likewise.
>> (pool_allocator::allocate): Likewise.
>> (pool_allocator::remove): Likewise.
>> * mem-stats-traits.h (enum mem_alloc_origin): Add new enum value
>> for a pool allocator.
>> * mem-stats.h (struct mem_location): Add new ctor.
>> (struct mem_usage): Add counter for number of
>> instances.
>> (mem_alloc_description::register_descriptor): New overload of
>> the function.
>  -
> 
>> diff --git a/gcc/alloc-pool.h b/gcc/alloc-pool.h
>> index 96a1342..a1727ce 100644
>> --- a/gcc/alloc-pool.h
>> +++ b/gcc/alloc-pool.h
> 
>> +  /* Dump usage coupled to LOC location, where TOTAL is sum of all rows.  */
>> +  inline void dump (mem_location *loc, mem_usage &total) const
>> +  {
>> +char s[4096];
>> +sprintf (s, "%s:%i (%s)", loc->get_trimmed_filename (),
>> + loc->m_line, loc->m_function);
> Static sized buffer used in a sprintf where the strings are potentially user 
> controlled.   Not good, even in dumping code, still not good.
> 
>> +
>> +s[48] = '\0';
> ?!?  Presumably you're just truncating the output line here for the 
> subsequent fprintf call.  Consider using a const with a symbolic name rather 
> than the magic "48".  I say "consider" because there's magic constants all 
> over the place in the dumping code. So it may not be worth the effort.  Your 
> call.
> 
>  +
>> +  /* Dump header with NAME.  */
>> +  static inline void dump_header (const char *name)
>> +  {
>> +fprintf (stderr, "%-32s%-48s %6s%11s%16s%17s%12s\n", "Pool name", name,
>> + "Pools", "Leak", "Peak", "Times", "Elt size");
>> +print_dash_line ();
>> +  }
>> +
>> +  /* Dump footer.  */
>> +  inline void dump_footer ()
>> +  {
>> +print_dash_line ();
>> +fprintf (stderr, "%s%75li%10li\n", "Total", (long)m_instances,
>> + (long)m_allocated);
>> +print_dash_line ();
>> +  }
> Note the header is static inline, footer is just inline.  Please try to make 
> them consistent.
> 
>  @@ -235,10 +301,10 @@ pool_allocator::release ()
>> free (block);
>>   }
>>
>> -  if (GATHER_STATISTICS && false)
>> +  if (GATHER_STATISTICS)
>>   {
>> -  alloc_pool_descriptor *desc = allocate_pool_descriptor (m_name);
>> -  desc->current -= (m_elts_allocated - m_elts_free) * m_elt_size;
>> +  pool_allocator_usage.release_instance_overhead (this,
>> +(m_elts_allocated - m_elts_free) * m_elt_size);
> Looks like line wrapping needs to be fixed.
> 
> 
> Clearly the biggest issue is that static sized buffer used to hold the 
> results of sprintf...  Once that and the smaller issues are fixed, this is OK.
> 
> jeff
> 

Hi Jeff.

This patch improves the patch in suggested way. I verified it by valgrind that 
the allocated string buffer
is used correctly.

Ready for trunk?
Thanks,
Martin
>From 3373344b681f343ebe038e4c75abc8fe4c39ebea Mon Sep 17 00:00:00 2001
From: mliska 
Date: Mon, 1 Jun 2015 18:16:52 +0200
Subject: [PATCH 1/2] Port pool-allocator memory stats to a new infrastructure.

gcc/ChangeLog:

2015-06-02  Martin Liska  

	* alloc-pool.c (allocate_pool_descriptor): Remove.
	(struct pool_output_info): Likewise.
	(print_alloc_pool_statistics): Likewise.
	(dump_alloc_pool_statistics): Likewise.
	* alloc-pool.h (struct pool_usage): New struct.
	(pool_allocator::initialize): Change usage of memory statistics
	to a new interface.
	(pool_allocator::release): Likewise.
	(pool_allocator::allocate): Likewise.
	(pool_allocator::remove): Likewise.
	* mem-stats-traits.h (enum mem_alloc_origin): Add new enum value
	for a pool allocator.
	* mem-stats.h (struct mem_location): Add new ctor.
	(struct mem_usage): Add counter for number of
	instances.
	(mem_alloc_description::register_descriptor): New overload of
	* mem-stats.h (mem_location::to_string): New function.
	* bitmap.h (struct bitmap_usage): Use this new function.
	* ggc-common.c (struct ggc_usage): Likewise.
	the function.
---
 gcc/alloc-pool.c   |  60 +
 gcc/alloc-pool.h   | 100 ++---
 gcc/bitmap.h   |  11 +++---
 gcc/ggc-common.

Re: [PATCH] Remove stray cleanup-tree-dump

2015-06-02 Thread Bernhard Reutner-Fischer

On June 2, 2015 2:08:47 PM GMT+02:00, Richard Biener  wrote:
>
>Committed.  Seems to cause half of the vectorizer tests to be dropped
>and test-summary breaking for me.

Right. There should be no cleanup-tree-dump left on trunk.

Thanks,

>
>Richard.
>
>2015-06-02  Richard Biener  
>
>   * gcc.dg/vect/vect-outer-simd-1.c: Remove stray cleanup-tree-dump.
>
>Index: gcc/testsuite/gcc.dg/vect/vect-outer-simd-1.c
>===
>--- gcc/testsuite/gcc.dg/vect/vect-outer-simd-1.c  (revision 224013)
>+++ gcc/testsuite/gcc.dg/vect/vect-outer-simd-1.c  (working copy)
>@@ -72,4 +72,3 @@ int main()
>   return 0;
> } 
> /* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
>-/* { dg-final { cleanup-tree-dump "vect" } } */

Re: [PATCH, 4/8] Add pass_tree_loop_{init,done} to pass_oacc_kernels

2015-06-02 Thread Tom de Vries


On 02-06-15 15:58, Richard Biener wrote:

Btw, I wonder why you don't organize the oacc-kernel passes in
a new simple-IPA group after pass_local_optimization_passes.


I've placed the pass group as early as possible (meaning after ealias) and put 
passes in front only when that served a purpose for parallelization (pass_fre). 
The idea there was to minimize the amount of passes that have to be modified to 
deal (conservatively) with a kernels region.


So AFAICT, there's nothing against placing the pass group after 
pass_local_optimization_passes, other that that it's more work in more passes to 
keep the region intact.


What would be the benefit of doing so?

Thanks,
- Tom

Re: [PATCH testsuite]Refine scanning string in pr65447.c to support small address offset target

2015-06-02 Thread Bernhard Reutner-Fischer

On June 2, 2015 5:56:13 AM GMT+02:00, Bin Cheng  wrote:
>Hi,
>On some arm processors, the offset supported in addressing modes is
>very
>small.  As a result, the dozens of address induction variables will be
>grouped into several groups, rather than only one as on armv7/8.  This
>patch
>refines scanning string to avoid test failure on such processors.
>
>It's an obvious change, and test acts as expected.  So is it OK?  I
>will
>commit it in next 24 hours if there is no objection.
>
>Thanks,
>bin
>
>2015-06-02  Bin Cheng  
>
>   PR tree-optimization/65447
>   * gcc.dg/tree-ssa/pr65447.c: Increase searching number.

There should be no cleanup-tree-dump left on trunk.
Please refresh your patch before pushing when somebody OKs it.

Thanks,

Re: [PATCH] Fix eipa_sra AAPCS issue (PR target/65956)

2015-06-02 Thread Richard Earnshaw

On 01/06/15 13:07, Jakub Jelinek wrote:
> On Thu, May 07, 2015 at 12:16:32PM +0100, Alan Lawrence wrote:
>> So for my two cents, or perhaps three:
> 
> Any progress on this PR?
> A P1 bug that affects several packages stalled for a month isn't a very good
> thing... (not to mention broken profiledbootstrap on ARM due to the same
> issue).
> I've checked and llvm on ARM ignores the alignment on the scalar
> arguments...
> 
>   Jakub
> 

We're working on some updates to the ABI documents.  If we're going to
break ABI compatibility, even in some corner cases, it would make sense
to only do this once.

We need to think about more than just LLVM and GCC, so it's not as
simple as just copying what LLVM does.

Note that there's almost certainly a similar problem for AArch64, though
it is probably less common for it to manifest itself -- probably only
when structs contain 128-bit aligned objects.

R.

RFA: RL78:

2015-06-02 Thread Nick Clifton

Hi DJ,

  This patch contains two small improvements for the RL78 compiler:

  1. A peephole to match:

movwax, !
movwbc, ax
callbc

 with:

movwax, !
callax

  2. A change to avoid pushing the frame pointer register in an
 interrupt handler if the handler never uses or corrupts the
 frame pointer.

  Tested with no regressions using an rx-elf toolchain.

  OK to apply ?

Cheers
  Nick

gcc/ChangeLog
2015-06-02  Nick Clifton  

* config/rl78/rl78-real.md: Add peepholes to avoid a register
copy when calling a function.
* config/rl78/rl78.c (need_to_save): Do not push the frame
pointer in an interrupt handler prologue if it is never used.

Index: gcc/config/rl78/rl78-real.md
===
--- gcc/config/rl78/rl78-real.md(revision 224011)
+++ gcc/config/rl78/rl78-real.md(working copy)
@@ -342,6 +342,25 @@
   [(set (attr "update_Z") (const_string "clobber"))]
   )
 
+;; Peephole to match:
+;;
+;; (set (reg1) (reg2))
+;; (call (mem (reg1)))
+;;
+;;  and replace it with:
+;;
+;; (call (mem (reg2)))
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand") (match_operand:HI 1 
"register_operand"))
+   (call (mem:HI (match_dup 0))(const_int 0))
+  ]
+  "peep2_regno_dead_p (2, REGNO (operands[0]))
+   && REGNO (operands[1]) < 8"
+  [(call (mem:HI (match_dup 1))(const_int 0))
+  ]
+)
+
 (define_insn "*call_value_real"
   [(set (match_operand 0 "register_operand" "=v,v")
(call (match_operand:HI 1 "memory_operand" "Wab,Wca")
@@ -353,6 +372,25 @@
   [(set (attr "update_Z") (const_string "clobber"))]
   )
 
+;; Peephole to match:
+;;
+;; (set (reg1) (reg2))
+;; (set (reg3) (call (mem (reg1
+;;
+;;  and replace it with:
+;;
+;; (set (reg3) (call (mem (reg2
+
+(define_peephole2
+  [(set (match_operand:HI 0 "register_operand") (match_operand:HI 1 
"register_operand"))
+   (set (match_operand:HI 2 "register_operand") (call (mem:HI (match_dup 
0))(const_int 0)))
+  ]
+  "peep2_regno_dead_p (2, REGNO (operands[0]))
+   && REGNO (operands[1]) < 8"
+  [(set (match_dup 2) (call (mem:HI (match_dup 1))(const_int 0)))
+  ]
+)
+
 (define_insn "*cbranchqi4_real_signed"
   [(set (pc) (if_then_else
  (match_operator 0 "rl78_cmp_operator_signed"
Index: gcc/config/rl78/rl78.c
===
--- gcc/config/rl78/rl78.c  (revision 224011)
+++ gcc/config/rl78/rl78.c  (working copy)
@@ -678,10 +678,12 @@
 
   /* If the handler is a non-leaf function then it may call
 non-interrupt aware routines which will happily clobber
-any call_used registers, so we have to preserve them.  */
-  if (!crtl->is_leaf && call_used_regs[regno])
+any call_used registers, so we have to preserve them.
+ We do not have to worry about the frame pointer register
+though, as that is handled below.  */
+  if (!crtl->is_leaf && call_used_regs[regno] && regno < 22)
return true;
-
+  
   /* Otherwise we only have to save a register, call_used
 or not, if it is used by this handler.  */
   return df_regs_ever_live_p (regno);

Re: [PATCH] Fix eipa_sra AAPCS issue (PR target/65956)

2015-06-02 Thread Alan Lawrence


Richard Earnshaw wrote:

On 01/06/15 13:07, Jakub Jelinek wrote:

On Thu, May 07, 2015 at 12:16:32PM +0100, Alan Lawrence wrote:

So for my two cents, or perhaps three:

Any progress on this PR?
A P1 bug that affects several packages stalled for a month isn't a very good
thing... (not to mention broken profiledbootstrap on ARM due to the same
issue).
I've checked and llvm on ARM ignores the alignment on the scalar
arguments...

Jakub



We're working on some updates to the ABI documents.  If we're going to
break ABI compatibility, even in some corner cases, it would make sense
to only do this once.


One question is whether to treat structs differently from scalars in the ABI 
specification. Structs raise lots of corner cases! I notice the following 
oddity, and wonder if anyone can shed any light on this:


typedef __attribute__((aligned(8))) struct { int x; int y; } foo;
typedef struct __attribute__((aligned(8))) { int x; int y; } bar;
typedef struct { int x; int y; } __attribute__((aligned(8))) baz;
typedef struct { int x; int y; } qux __attribute__((aligned(8)));

create typedefs (foo, bar, baz, qux) all with alignment 8, as expected. However, 
the TYPE_MAIN_VARIANT (an anonymous struct type) has alignment 4 for foo and qux 
(so the attribute has been applied only to the typedef), but alignment 8 for bar 
and baz (i.e. the attribute has been applied to the underlying struct).


--Alan

Re: [Patch, fortran, PR44672, v6] [F08] ALLOCATE with SOURCE and no array-spec

2015-06-02 Thread Mikael Morin

Hello Andre,

comments below (out of order, sorry).

Le 29/05/2015 13:46, Andre Vehreschild a écrit :
> Hi Mikael,
> 
> comments inline below:
> 
> On Thu, 28 May 2015 20:06:57 +0200
> Mikael Morin  wrote:
> 
>> Le 28/05/2015 17:29, Andre Vehreschild a écrit :
>>> *** resolve_allocate_expr (gfc_expr *e, gfc_
>>> *** 7103,7112 
>>> --- 7103,7123 
>>> if (!ref2 || ref2->type != REF_ARRAY || ref2->u.ar.type == AR_FULL
>>> || (dimension && ref2->u.ar.dimen == 0))
>>>   {
>>> +   /* F08:C633.  */
>>> +   if (code->expr3)
>>> +   {
>>> + if (!gfc_notify_std (GFC_STD_F2008, "Array specification
>>> required "
>>> +  "in ALLOCATE statement at %L", &e->where))
>>> +   goto failure;
>>> + *array_alloc_wo_spec = true;
>>> +   }
>>> +   else
>>> +   {
>>>   gfc_error ("Array specification required in ALLOCATE statement "
>>>  "at %L", &e->where);
>>>   goto failure;
>>> }
>>> + }
>>>   
>>> /* Make sure that the array section reference makes sense in the
>>>context of an ALLOCATE specification.  */
>> I think we can be a little be more user friendly with the gfc_notify_std
>> error message.
>> Something like:
>> ALLOCATE without array spec at %L
>> ALLOCATE with array bounds determined from SOURCE or MOLD at %L
> 
> I didn't want to mess with the error messages to prevent issues for
> translations. So how is the policy on this? 
> 
I'm not aware of any policy regarding translations.
With a message like:
fortran 2008: array specification required ...
I don't see how the user can understand that the array specification is
_not_ required with fortran 2008, regardless of translations.
I'm rather in favour of not having misleading diagnostic, even if
correctly translated.



>>> *** gfc_array_init_size (tree descriptor, in
>>> *** 5076,5085 
>>>   
>>> /* Set upper bound.  */
>>> gfc_init_se (&se, NULL);
>>> gcc_assert (ubound);
>>> gfc_conv_expr_type (&se, ubound, gfc_array_index_type);
>>> gfc_add_block_to_block (pblock, &se.pre);
>>> ! 
>>> gfc_conv_descriptor_ubound_set (descriptor_block, descriptor,
>>>   gfc_rank_cst[n], se.expr);
>>> conv_ubound = se.expr;
>>> --- 5087,5111 
>>>   
>>> /* Set upper bound.  */
>>> gfc_init_se (&se, NULL);
>>> +   if (expr3_desc != NULL_TREE)
>>> +   {
>>> + /* Set the upper bound to be (desc.ubound - desc.lbound)+ 1.  */
>>> + tmp = fold_build2_loc (input_location, MINUS_EXPR,
>>> +gfc_array_index_type,
>>> +gfc_conv_descriptor_ubound_get (
>>> +  expr3_desc, gfc_rank_cst[n]),
>>> +gfc_conv_descriptor_lbound_get (
>>> +  expr3_desc, gfc_rank_cst[n]));
>>> + se.expr = fold_build2_loc (input_location, PLUS_EXPR,
>>> +gfc_array_index_type, tmp,
>>> +gfc_index_one_node);
>>> +   }
>>> +   else
>>> +   {
>>>   gcc_assert (ubound);
>>>   gfc_conv_expr_type (&se, ubound, gfc_array_index_type);
>>>   gfc_add_block_to_block (pblock, &se.pre);
>>> !   }
>>> gfc_conv_descriptor_ubound_set (descriptor_block, descriptor,
>>>   gfc_rank_cst[n], se.expr);
>>> conv_ubound = se.expr;
>> Your one-based-ness problem was here, wasn't it?
> 
> Correct.
> 
>> I would rather copy directly lbound and ubound from expr3_desc to
>> descriptor.
> 
> It was that way in the previous version of the patch, which does *not* work 
> any
> longer. When gfc_trans_allocate () is responsible for the creating a temporary
> variable for the source=-expression, then it does so using zero based
> expressions. 
> 
>> If the source has non-one-based bounds, the above would produce wrong
>> bounds.
> 
> Counterexample? Note, the expr3_desc is guaranteed to be an artificial 
> variable
> created by conv_expr_descriptor, aka zero-based.
> 
here is a counterexample.

  integer, dimension(:), allocatable :: a, b

  allocate (a(0:3))
  allocate (b, source = a)
  print *, lbound(a, 1), ubound(a, 1)
  print *, lbound(b, 1), ubound(b, 1)
end

output:
0   3
1   4


I think that if you set se.expr with
ubound with gfc_conv_descriptor_ubound_get(...) instead of what you do
above, and se.expr with gfc_conv_descriptor_lbound_get(...) instead of
gfc_index_one_node in the hunk before, it should work.



> 
> 
>>> *** gfc_trans_allocate (gfc_code * code)
>>> *** 5229,5235 
>>> }
>>>   else
>>> tmp = se.expr;
>>> ! if (!code->expr3->mold)
>>> expr3 = tmp;
>>>   else
>>> expr3_tmp = tmp;
>>> --- 5240,5248 
>>> }
>>>   else
>>> tmp = se.expr;
>

Re: [RFA] Factor conversion out of COND_EXPR using match.pd pattern

2015-06-02 Thread Jeff Law


On 05/30/2015 02:33 AM, Bernhard Reutner-Fischer wrote:

On May 30, 2015 6:22:59 AM GMT+02:00, Jeff Law  wrote:

+/* { dg-final { cleanup-tree-dump "original" } } */

Please drop this cleanup dg-final, trunk now does this automatically.

Yea, figured that'd need to be fixed up after your cleanups.

Thanks,
jeff

Re: [PATCH][AArch64][PR 66136] rewrite geniterators.sh in awk

2015-06-02 Thread Szabolcs Nagy

On 01/06/15 13:55, Marcus Shawcroft wrote:
> On 18 May 2015 at 15:57, Szabolcs Nagy  wrote:
>> Rewrote the generator script in awk, to avoid dealing with
>> sed portability issues.
>>
>> gcc/Changelog:
>>
>> 2015-05-18  Szabolcs Nagy  
>>
>> PR target/66136
>> * config/aarch64/geniterators.sh: Rewrite in awk.
> 
> OK provide you have checked the generated output is identical before
> and after this patch.  Ask for an account on sourceware here
> https://sourceware.org/cgi-bin/pdw/ps_form.cgi you want write on
> approval for gcc.
> 
> /Marcus
> 

commited in r224031.

and added myself to write after approval.
Index: ChangeLog
===
--- ChangeLog	(revision 224031)
+++ ChangeLog	(working copy)
@@ -1,3 +1,7 @@
+2015-06-02  Szabolcs Nagy  
+
+	* MAINTAINERS (Write After Approval): Add myself.
+
 2015-05-28  Mike Frysinger  
 
 	* configure.ac (--vtable-verify): Use AS_HELP_STRING for help.
Index: MAINTAINERS
===
--- MAINTAINERS	(revision 224031)
+++ MAINTAINERS	(working copy)
@@ -500,6 +500,7 @@
 Brooks Moses	
 Dirk Mueller	
 Phil Muldoon	
+Szabolcs Nagy	
 Quentin Neill	
 Adam Nemet	
 Thomas Neumann

Re: [PATCH 1/2] Memory statistics enhancement.

2015-06-02 Thread Jeff Law


On 06/02/2015 09:05 AM, Martin Liška wrote:

On 06/02/2015 03:58 PM, Jeff Law wrote:

On 06/01/2015 10:16 AM, mliska wrote:

Hi.

Following 2 patches improve memory statistics infrastructure. First one
ports pool allocator to the new infrastructure. And the second one makes
column alignment properly.

Both can bootstrap on x86_64-linux-pc and survive regression tests.

Ready for trunk?
Thank you,
Martin

Port pool-allocator memory stats to a new infrastructure.

gcc/ChangeLog:

2015-06-02  Martin Liska  

 * alloc-pool.c (allocate_pool_descriptor): Remove.
 (struct pool_output_info): Likewise.
 (print_alloc_pool_statistics): Likewise.
 (dump_alloc_pool_statistics): Likewise.
 * alloc-pool.h (struct pool_usage): New struct.
 (pool_allocator::initialize): Change usage of memory statistics
 to a new interface.
 (pool_allocator::release): Likewise.
 (pool_allocator::allocate): Likewise.
 (pool_allocator::remove): Likewise.
 * mem-stats-traits.h (enum mem_alloc_origin): Add new enum value
 for a pool allocator.
 * mem-stats.h (struct mem_location): Add new ctor.
 (struct mem_usage): Add counter for number of
 instances.
 (mem_alloc_description::register_descriptor): New overload of
 the function.

  -


diff --git a/gcc/alloc-pool.h b/gcc/alloc-pool.h
index 96a1342..a1727ce 100644
--- a/gcc/alloc-pool.h
+++ b/gcc/alloc-pool.h



+  /* Dump usage coupled to LOC location, where TOTAL is sum of all rows.  */
+  inline void dump (mem_location *loc, mem_usage &total) const
+  {
+char s[4096];
+sprintf (s, "%s:%i (%s)", loc->get_trimmed_filename (),
+ loc->m_line, loc->m_function);

Static sized buffer used in a sprintf where the strings are potentially user 
controlled.   Not good, even in dumping code, still not good.


+
+s[48] = '\0';

?!?  Presumably you're just truncating the output line here for the subsequent fprintf call.  
Consider using a const with a symbolic name rather than the magic "48".  I say 
"consider" because there's magic constants all over the place in the dumping code. So it 
may not be worth the effort.  Your call.

  +

+  /* Dump header with NAME.  */
+  static inline void dump_header (const char *name)
+  {
+fprintf (stderr, "%-32s%-48s %6s%11s%16s%17s%12s\n", "Pool name", name,
+ "Pools", "Leak", "Peak", "Times", "Elt size");
+print_dash_line ();
+  }
+
+  /* Dump footer.  */
+  inline void dump_footer ()
+  {
+print_dash_line ();
+fprintf (stderr, "%s%75li%10li\n", "Total", (long)m_instances,
+ (long)m_allocated);
+print_dash_line ();
+  }

Note the header is static inline, footer is just inline.  Please try to make 
them consistent.
It doesn't look like you did anything with this.  Is there a reason that 
the dump_header and dump_footer have different linkage?  Also the 
linkage/return type for dump_header should be on its own line.


With that fixed, this is OK for the trunk.

jeff

Re: Teach gimple_canonical_types_compatible_p about incomplete types

2015-06-02 Thread Jan Hubicka

> On Sat, 30 May 2015, Jan Hubicka wrote:
> 
> > Joseph, does the attached testcase make sense for you? Is it defined? It is 
> > my
> > first attempt to really interpret C standard to detail.
> 
> I suppose it's defined if unsigned int is the type chosen as compatible 
> with that enum.  The test should be skipped for short_enums targets 
> (arm-eabi bare metal) (you can't simply use -fno-short-enums as then that 
> will fail the link-time compatibility checking).

thanks. I did not notice we have -fshort-enum by default targets. I suppose we 
want:
/* { dg-xfail-if "" { arm-eabi-* } { "*" } { "" } } */

Honza
> 
> -- 
> Joseph S. Myers
> jos...@codesourcery.com

Re: [RFA] Factor conversion out of COND_EXPR using match.pd pattern

2015-06-02 Thread Jeff Law


On 06/01/2015 05:07 AM, Richard Biener wrote:

+(simplify
+  (cond @0 (convert @1) INTEGER_CST@2)
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@1))
+   && COMPARISON_CLASS_P (@0)
+   && int_fits_type_p (@2, TREE_TYPE (@1))
+   && ((operand_equal_p (TREE_OPERAND (@0, 0), @2, 0)
+   && operand_equal_p (TREE_OPERAND (@0, 1), @1, 0))
+  || (operand_equal_p (TREE_OPERAND (@0, 0), @1, 0)
+  && operand_equal_p (TREE_OPERAND (@0, 1), @2, 0
+(with { tree itype = TREE_TYPE (@1); tree otype = TREE_TYPE (@2); }
+  (convert:otype (cond:itype @0 @1 (convert:itype @2))
+
+(simplify
+  (cond @0 INTEGER_CST@1 (convert @2))
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@2))
+   && COMPARISON_CLASS_P (@0)
+   && int_fits_type_p (@1, TREE_TYPE (@2))
+   && ((operand_equal_p (TREE_OPERAND (@0, 0), @2, 0)
+   && operand_equal_p (TREE_OPERAND (@0, 1), @1, 0))
+  || (operand_equal_p (TREE_OPERAND (@0, 0), @1, 0)
+  && operand_equal_p (TREE_OPERAND (@0, 1), @2, 0
+(with { tree itype = TREE_TYPE (@2); tree otype = TREE_TYPE (@1); }
+  (convert:otype (cond:itype @0 (convert:itype @1) @2)


Now this is a case where :c support on cond would make sense...
Yea.  The trick would be describing which operands can commute since 
COND_EXPR has 3 operands.  I guess we could just define it in the 
obvious way for COND_EXPR and special case it wherever we need.





 in

theory the preprocessor would also be your friend here.  Or we could
enhance the syntax to support multiple match patterns for the same
result, thus

  (simplify
(cond @0 (convert @1) INTEGER_CST@2)
(cond @0 INTEGER_CST@2 (convert @1))
(if ...

though that would require some extra markup to see where the list of matches
ends.  user-defined predicates can be used for this already though

(match (widening_cond @0 @1 @2)
   (cond @0 (convert @1) INTEGER_CST@2))
(match (widening_cond @0 @1 @2)
   (cond @0 INTEGER_CST@2 (convert @1)))
(simplify
   (widening_cond @0 @1 @2)
   (if (...
If you'd prefer this syntax, I'm happy to switch to it and simplify 
later if we gain :c support on the COND_EXPR.




but the comments from Marc still holds in that you shouldn't rely
on @0 being GENERIC - you want a GIMPLE testcase as well for this,
sth like

_Bool cond = 64 < mode_size[mode];
return cond ? 64 : mode_size[mode];

Yea, I'll poke at that to generate some gimple tests.


Not sure if I'll get another iteration out before heading on PTO or not.

Thanks for the feedback,


Jeff

Re: [RFA] Factor conversion out of COND_EXPR using match.pd pattern

2015-06-02 Thread Jeff Law


On 06/01/2015 04:55 AM, Richard Biener wrote:

On Sat, May 30, 2015 at 11:11 AM, Marc Glisse  wrote:

(only commenting on the technique, not on the transformation itself)


+(simplify
+  (cond @0 (convert @1) INTEGER_CST@2)
+  (if (INTEGRAL_TYPE_P (TREE_TYPE (@1))
+   && COMPARISON_CLASS_P (@0)



If you add COMPARISON_CLASS_P to define_predicates, you could write:
(cond COMPARISON_CLASS_P@0 (convert @1) INTEGER_CST@2)


But that would fail to match on GIMPLE, so I don't like either variant
as Jeffs relies on the awkward fact that on GIMPLE cond expr conditions
are GENERIC and yours wouldn't work.

That said - for this kind of patterns testcases that exercise the patterns
on GIMPLE would be very appreciated.
OK.  I'll see if I can build a testcase to exercise this in gimple.  If 
that's not possible would you prefer the pattern be restricted to 
generic just to be safe?





or maybe use a for loop on comparisons, which would give names to
TREE_OPERAND (@0, *). This should even handle the operand_equal_p
alternative:

(cond (cmp:c@0 @1 @2) (convert @1) INTEGER_CST@2)


Yes, that would be my reference.

OK.  easily done.




+   && int_fits_type_p (@2, TREE_TYPE (@1))
+   && ((operand_equal_p (TREE_OPERAND (@0, 0), @2, 0)
+   && operand_equal_p (TREE_OPERAND (@0, 1), @1, 0))
+  || (operand_equal_p (TREE_OPERAND (@0, 0), @1, 0)
+  && operand_equal_p (TREE_OPERAND (@0, 1), @2, 0
+(with { tree itype = TREE_TYPE (@1); tree otype = TREE_TYPE (@2); }
+  (convert:otype (cond:itype @0 @1 (convert:itype @2))



This should be enough, no need to specify the outer type
(convert (cond:itype @0 @1 (convert:itype @2))


Yes.


I believe we should not have to write cond:itype here, cond should be made
to use the type of its second argument instead of the first one, by default
(expr::gen_transform already has a few special cases).


Indeed.  Patch welcome (I'd have expected it already works...)
One of them is needed, but I can't recall which :-)  I'll pull them to 
generate the failure I'd run into and simplify appropriately and explain 
whichever is remaining.


jeff

[gomp4.1] Support for OpenMP 4.1 privatization of non-static data members in methods

2015-06-02 Thread Jakub Jelinek

Hi!

The OpenMP 4.1 standard is going to allow privatization of non-static data
members separately from the containing class in methods.  The behavior of
access to those members in the OpenMP regions where they are privatized
through this-> is unspecified, I chose to let to privatize only the accesses
not through this->.

Jason on IRC said that he is considering deferring expansion of the
non-static data member accesses into this->field etc. form till
genericization, then of course this patch will need some changes to do the
replacement of such accesses during genericization instead of parsing
and instantiation.

I've only noticed now that reduction clause isn't covered in the tests, will
add that tomorrow.

2015-06-02  Jakub Jelinek  

* gimplify.c (omp_check_private): Handle
omp_member_access_dummy_var vars.
(gimplify_scan_omp_clauses): Set DECL_NAME on
omp_member_access_dummy_var vars.
* omp-low.c (omp_member_access_dummy_var, unshare_and_remap_1,
unshare_and_remap): New functions.
(use_pointer_for_field): omp_member_access_dummy_var vars
don't need to be made addressable.
(build_outer_var_ref): Handle omp_member_access_dummy_var vars.
Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE references.
(lower_send_clauses): Likewise.
(scan_sharing_clauses): Handle OMP_CLAUSE_SHARED_FIRSTPRIVATE
references.
(lower_send_shared_vars): Handle omp_member_access_dummy_var vars.
(create_task_copyfn): Fix up handling of
OMP_CLAUSE_SHARED_FIRSTPRIVATE decls.
(lower_omp_regimplify_p): Use IS_TYPE_OR_DECL_P macro.
(struct lower_omp_regimplify_operands_data): New type.
(lower_omp_regimplify_operands_p, lower_omp_regimplify_operands): New
functions.
(lower_omp_1): Use lower_omp_regimplify_operands instead of
gimple_regimplify_operands.
* omp-low.h (omp_member_access_dummy_var): New prototype.
gcc/cp/
* parser.c (cp_parser_lambda_body): Call
save_omp_privatization_clauses and restore_omp_privatization_clauses
around lambda body parsing.
(cp_parser_oacc_all_clauses, cp_parser_omp_all_clauses,
cp_parser_omp_for_loop, cp_omp_split_clauses, cp_parser_oacc_cache,
cp_parser_cilk_for): Adjust finish_omp_clauses callers.
(cp_parser_pragma): Call push_omp_privatization_clauses and
pop_omp_privatization_clauses around OpenMP pragma parsing.
* cp-gimplify.c (cxx_omp_disregard_value_expr): New function.
* pt.c (apply_late_template_attributes): Adjust finish_omp_clauses
and tsubst_omp_clauses callers.
(tsubst_omp_clauses): Add ALLOW_FIELDS argument, adjust possible
non-static data member arguments and pass ALLOW_FIELDS down to
finish_omp_clauses.
(tsubst_omp_for_iterator): Adjust finish_omp_clauses caller.
(tsubst_expr): Adjust tsubst_omp_clauses caller, call
push_omp_privatization_clauses and pop_omp_privatization_clauses
around instantiation of the constructs.
* cp-tree.h (DECL_OMP_PRIVATIZED_MEMBER): Define.
(finish_omp_clauses): Add ALLOW_FIELDS argument to prototype.
(push_omp_privatization_clauses, pop_omp_privatization_clauses,
save_omp_privatization_clauses, restore_omp_privatization_clauses,
cxx_omp_disregard_value_expr): New prototypes.
* cp-objcp-common.h (LANG_HOOKS_OMP_DISREGARD_VALUE_EXPR): Redefine.
* semantics.c (omp_private_member_map, omp_private_member_vec): New
variables.
(finish_non_static_data_member): Return dummy decl for privatized
non-static data members.
(finish_omp_clauses): Add ALLOW_FIELDS argument.  Handle
non-static data member privatization in {,first,last,copy}private,
reduction and linear clauses.  Diagnose linear with predetermined
decls.
(push_omp_privatization_clauses, pop_omp_privatization_clauses,
save_omp_privatization_clauses, restore_omp_privatization_clauses):
New functions.
(finish_omp_for): Adjust finish_omp_clauses caller.
gcc/testsuite/
* g++.dg/gomp/clause-1.C (T::test): Remove dg-error
on privatization of non-static data members.
* g++.dg/gomp/member-1.C: New test.
* g++.dg/gomp/member-2.C: New test.
libgomp/
* testsuite/libgomp.c++/taskloop-5.C: New test.
* testsuite/libgomp.c++/member-1.C: New test.
* testsuite/libgomp.c++/member-2.C: New test.

--- gcc/gimplify.c.jj   2015-05-21 11:12:09.0 +0200
+++ gcc/gimplify.c  2015-05-29 16:01:15.183190752 +0200
@@ -6061,19 +6061,38 @@ omp_check_private (struct gimplify_omp_c
 {
   ctx = ctx->outer_context;
   if (ctx == NULL)
-   return !(is_global_var (decl)
-/* References might be private, but might be shared too,
-   when checking for copyprivate, assume they might be
-

[PATCH] PR fortran/66380 -- Remove assert() to allow error condition

2015-06-02 Thread Steve Kargl

The attached patch returns an assert() and returns NULL
during the simplification of a bad RESHAPE call.  This
allows gfortran to correctly issue and error message.
Regression tested on trunk.  OK to commit.

2015-05-27  Steven G. Kargl  

* simplify.c (gfc_simplify_reshape): Convert assert into returning
NULL, which triggers an error condition.

2015-05-27  Steven G. Kargl  

* gfortran.dg/reshape_7.f90: New test.

-- 
Steve
Index: fortran/simplify.c
===
--- fortran/simplify.c	(revision 223986)
+++ fortran/simplify.c	(working copy)
@@ -5188,8 +5188,11 @@ gfc_simplify_reshape (gfc_expr *source, 
 	e = gfc_constructor_lookup_expr (source->value.constructor, j);
   else
 	{
-	  gcc_assert (npad > 0);
-
+	  if (npad <= 0)
+	{
+	  mpz_clear (index);
+	  return NULL;
+	}
 	  j = j - nsource;
 	  j = j % npad;
 	  e = gfc_constructor_lookup_expr (pad->value.constructor, j);
Index: testsuite/gfortran.dg/reshape_7.f90
===
--- testsuite/gfortran.dg/reshape_7.f90	(revision 0)
+++ testsuite/gfortran.dg/reshape_7.f90	(working copy)
@@ -0,0 +1,17 @@
+! { dg-do compile }
+! PR fortran/66380
+!
+subroutine p0
+   integer, parameter :: sh(2) = [2, 3]
+   integer, parameter :: &
+   & a(2,2) = reshape([1, 2, 3, 4], sh)   ! { dg-error "Different shape" }
+   if (a(1,1) /= 0) call abort
+end subroutine p0
+
+
+subroutine p1
+   integer, parameter :: sh(2) = [2, 1]
+   integer, parameter :: &
+   &  a(2,2) = reshape([1, 2, 3, 4], sh)  ! { dg-error "Different shape" }
+   if (a(1,1) /= 0) call abort
+end subroutine p1

Re: Teach gimple_canonical_types_compatible_p about incomplete types

2015-06-02 Thread Joseph Myers

On Tue, 2 Jun 2015, Jan Hubicka wrote:

> > On Sat, 30 May 2015, Jan Hubicka wrote:
> > 
> > > Joseph, does the attached testcase make sense for you? Is it defined? It 
> > > is my
> > > first attempt to really interpret C standard to detail.
> > 
> > I suppose it's defined if unsigned int is the type chosen as compatible 
> > with that enum.  The test should be skipped for short_enums targets 
> > (arm-eabi bare metal) (you can't simply use -fno-short-enums as then that 
> > will fail the link-time compatibility checking).
> 
> thanks. I did not notice we have -fshort-enum by default targets. I suppose 
> we want:
> /* { dg-xfail-if "" { arm-eabi-* } { "*" } { "" } } */

Well, not that (which matches "eabi" against the vendor part of the 
triplet), but skip for the short_enums effective-target keyword.

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [PATCH], Add IEEE 128-bit floating point to PowerPC, patch #1

2015-06-02 Thread David Edelsohn

On Fri, May 22, 2015 at 5:24 PM, Michael Meissner
 wrote:
> This patch is the first in a series of patches that will eventually add 
> support
> for IEEE 128-bit floating point support to the PowerPC GCC compiler.  At the
> current time, we do not plan to change the default for long double.  I added a
> new type keyword (__float128) to get access to IEEE 128-bit floating point, 
> and
> another (__ibm128) to get access to IBM extended double type.
>
> Until all of the GCC and LIBGCC patches have been committed, you will not be
> able to use IEEE 128-bit floating point, and -mfloat128-software will not be
> turned on by default.
>
> This patch adds the new modes (KFmode and IFmode) and the switches
> (-mfloat128-{none,software}).
>
> Due to the fact that TFmode in the PowerPC compiler either represents IEEE
> 128-bit floating point or the IBM extended double (double-double) format.  For
> most PowerPC users, the default is to use IBM extended double for long double.
> Because TFmode can be either floating point format, I added new new modes:
>
> KFmode  -- IEEE 128-bit floating point
> IFmode  -- IBM extended double floating point
>
> If the default for TFmode is ibm extended double, the port will eventually use
> KFmode for IEEE 128-bit floating point.  Likewise if the default for TFmode is
> IEEE 128-bit floating point, the port will use TFmode for IEEE 128-bit 
> floating
> point, and IFmode for IBM extended double.
>
> I have bootstraped these patches on a power7 and compared them to the 
> unpatched
> compiler.  There were no changes when running make check.  Are these patches 
> ok
> to install in the trunk?

Mike,

What is the purpose of the TARGET_LONG_DOUBLE_128 change in
rs6000_hard_regno_mode_ok()?

+  /* If we don't allow 128-bit binary floating point, disallow the 128-bit
+ types from going in any registers.  Similarly if __float128 is not
+ supported, don't allow __float128/__ibm128 types.  */
+  if (!TARGET_LONG_DOUBLE_128
+  && (mode == TFmode || mode == KFmode || mode == IFmode))
+return false;

Why is this necessary now?

Thanks, David

Re: Teach gimple_canonical_types_compatible_p about incomplete types

2015-06-02 Thread Jan Hubicka

> > thanks. I did not notice we have -fshort-enum by default targets. I suppose 
> > we want:
> > /* { dg-xfail-if "" { arm-eabi-* } { "*" } { "" } } */
> 
> Well, not that (which matches "eabi" against the vendor part of the 
> triplet), but skip for the short_enums effective-target keyword.
Ok. Did not know about short_enums (my dejagnu-fu is still very limited :( )

/* { dg-skip-if "require -fno-short-enums to work" {target short_enums} } */

Alternatively I suppose I can add enum value set to INT_MAX to force enum to be 
large.

Honza
> 
> -- 
> Joseph S. Myers
> jos...@codesourcery.com

Re: [PATCH], Add IEEE 128-bit floating point to PowerPC, patch #1

2015-06-02 Thread Joseph Myers

Is the use of FRACTIONAL_FLOAT_MODE to avoid iterations over 
floating-point modes including these modes when they shouldn't, as 
discussed previously?

If so, how do you deal (in subsequent patches?) with iterations that 
*should* include these modes?  In particular, where libgcc uses 
__LIBGCC__* macros predefined with -fbuilding-libgcc in an 
interation in c-cppbuiltin.c, how do you handle getting the relevant 
information in libgcc to build applicable libgcc functions for these 
modes?  (I'm presuming that you do want complex arithmetic to work for 
both 128-bit types, for example, although you won't want them to be used 
for intermediate conversions in libgcc operations on other types.)

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [gomp4] Worker-single predication

2015-06-02 Thread Cesar Philippidis

On 06/01/2015 08:58 AM, Bernd Schmidt wrote:
> This extends the previous vector-single support to also handle
> worker-level predication. We can't use the shfl insn because workers
> will live across multiple warps, so we use a location in memory to
> broadcast the branch target.
> This also fixes the oversight where basic blocks inside a parallel
> region but outside all loops weren't being predicated.
> 
> A special case is added for worker-single vector-partitioned; we add a
> jump over the entire loop that is taken by the inactive workers and add
> no predication inside this loop.
> 
> Committed on gomp-4_0-branch.

Thanks. This fixed the problems that I was seeing with variables outside
of acc loops.

I see that calls are being predicated at the moment. Those will need
special handling once we tackle acc routines.

Cesar

[Ping] Re: [C++ Patch[ PR 66130

2015-06-02 Thread Paolo Carlini


Hi,

gently pinging the below. Should be largely uncontroversial...

On 05/18/2015 06:29 PM, Paolo Carlini wrote:

Hi,

Manuel did most of the work for this rather simple issue filed by Tom: 
essentially, invalid_nonstatic_memfn_p gets a location_t parameter 
which is used to pass the location of the place where the use of the 
nonstatic member function is indeed invalid. Besides that, while 
working on the bug we noticed that we must be careful with exprs which 
aren't DECLs.


https://gcc.gnu.org/ml/gcc-patches/2015-05/msg01587.html

Thanks,
Paolo.

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-06-02 Thread Sriraman Tallam

On Mon, Jun 1, 2015 at 1:33 PM, Ramana Radhakrishnan
 wrote:
> On Mon, Jun 1, 2015 at 7:55 PM, Sriraman Tallam  wrote:
>> On Mon, Jun 1, 2015 at 11:41 AM, Ramana Radhakrishnan
>>  wrote:
>>> On Mon, Jun 1, 2015 at 7:01 PM, Sriraman Tallam  wrote:
 On Mon, Jun 1, 2015 at 1:24 AM, Ramana Radhakrishnan
  wrote:
>
>>> Why isn't it just an indirect call in the cases that would require a GOT
>>> slot and a direct call otherwise ? I'm trying to work out what's so
>>> different on each target that mandates this to be in the target backend.
>>> Also it would be better to push the tests into gcc.dg if you can and
>>> check
>>> for the absence of a relocation so that folks at least see these as 
>>> being
>>> UNSUPPORTED on their target.
>>
>>
>
>
> To be even more explicit, shouldn't this be handled similar to the way in
> which -fno-plt is handled in a target agnostic manner ? After all, if you
> can handle this for the command line, doing the same for a function which
> has been decorated with attribute((noplt)) should be simple.

 -fno-plt does not work for non-PIC code, having non-PIC code not use
 PLT was my primary motivation.  Infact, if you go back in this thread,
 I suggested to HJ if I should piggyback on -fno-plt.  I tried using
 the -fno-plt implementation to do this by removing the flag_pic check
 in calls.c, but that does not still work for non-PIC code.
>
> If you want __attribute__ ((noplt)) to work for non-PIC code, we
> should look to code it in the same place surely by making all
> __attribute__((noplt)) calls, indirect calls irrespective of whether
> it's fpic or not.
>
>
>>>
>>> You're missing my point, unless I'm missing something basic here - I
>>> should have been even more explicit and said -fPIC was a given in all
>>> this discussion.
>>>
>>> calls.c:229 has
>>>
>>> else if (flag_pic && !flag_plt && fndecl_or_type
>>>&& TREE_CODE (fndecl_or_type) == FUNCTION_DECL
>>>&& !targetm.binds_local_p (fndecl_or_type))
>>>
>>> why can't we merge the check in here for the attribute noplt ?
>>
>> We can and and please see this thread, that is the exact patch I proposed :
>> https://gcc.gnu.org/ml/gcc-patches/2015-05/msg02682.html
>>
>> However, there was one caveat.  I want this working without -fPIC too.
>> non-PIC code also generates PLT calls and I want them eliminated.
>>
>>>
>>> If a new attribute is added to the "GNU language" in this case, why
>>> isn't this being treated in the same way as the command line option
>>> has been treated ? All this means is that we add an attribute and a
>>> command line option to common code and then not implement it in a
>>> proper target agnostic fashion.
>>
>> You are right.  This is the way I wanted it too but I also wanted the
>> attribute to work without PIC. PLT calls are generated without -fPIC
>> and -fPIE too and I wanted a solution for that.  On looking at the
>> code in more detail,
>>
>> * -fno-plt is made to work with -fPIC, is there a reason to not make
>> it work for non-PIC code?  I can remove the flag_pic check from
>> calls.c
>
> I don't think that's right, you probably have to allow that along with
> (flag_pic || (decl && attribute_no_plt (decl)) - however it seems odd
> to me that the language extension allows this but the flag doesn't.
>
>> * Then, I add the generic attribute "noplt" and everything is fine.
>>
>> There is just one caveat with the above approach, for x86_64
>> (*call_insn) will not generate indirect-calls for *non-PIC* code
>> because constant_call_address_operand in predicates.md will evaluate
>> to false.  This can be fixed appropriately in ix86_output_call_insn in
>> i386.c.
>
> Yes, targets need to massage that into place but that's essentially
> the mechanics of retaining indirect calls in each backend. -fno-plt
> doesn't work for ARM / AArch64 with optimizers currently (and I
> suspect on most other targets) because our predicates are too liberal,
> fixed by treating "noplt" or -fno-plt as the equivalent of
> -mlong-calls.
>
>>
>>
>> Is this alright?  Sorry for the confusion, but the primary reason why
>> I did not do it the way you suggested is because we wanted "noplt"
>> attribute to work for non-PIC code also.
>
> If that is the case, then this is a slightly more complicated
> condition in the same place. We then always have indirect calls for
> functions that are marked noplt and just have target generate this
> appropriately.

I have now modified this patch.

This patch does two things:

1) Adds new generic function attribute "no_plt" that is similar in
functionality  to -fno-plt except that it applies only to calls to
functions that are marked  with this attribute.
2) For x86_64, it makes -fno-plt(and the attribute) also work for
non-PIC code by  directly generating an indirect call via a GOT entry.

For PIC code, no_plt merely shadows the implementation of -fno-plt, no
surprises here.

* c-f

Re: [PATCH], Add IEEE 128-bit floating point to PowerPC, patch #1

2015-06-02 Thread Michael Meissner

On Tue, Jun 02, 2015 at 01:43:08PM -0400, David Edelsohn wrote:
> Mike,
> 
> What is the purpose of the TARGET_LONG_DOUBLE_128 change in
> rs6000_hard_regno_mode_ok()?
> 
> +  /* If we don't allow 128-bit binary floating point, disallow the 128-bit
> + types from going in any registers.  Similarly if __float128 is not
> + supported, don't allow __float128/__ibm128 types.  */
> +  if (!TARGET_LONG_DOUBLE_128
> +  && (mode == TFmode || mode == KFmode || mode == IFmode))
> +return false;
> 
> Why is this necessary now?

I was trying to avoid problems if there was no move/convert patterns for
KFmode/IFmode.  I made it when I transitioned from SPECIAL_FLOAT_MODE to
FRACTIONAL_FLOAT_MODE.  Given there are 2 fractional float modes now, if the
compiler was automatically trying to find a larger type than DFmode, it would
first try IFmode, then KFmode, and finally TFmode (under the old
SPECIAL_FLOAT_MODE, it wouldn't look at those types in the normal course of
things).

With the current set of patches, there is no move or convert options for
IFmode/KFmode, but also the emulator functions are not properly defined.

I can remove the lines and do the build again, if you would prefer.  I don't
think it is strictly necessary.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797

Re: [PATCH], Add IEEE 128-bit floating point to PowerPC, patch #1

2015-06-02 Thread Michael Meissner

On Tue, Jun 02, 2015 at 05:55:10PM +, Joseph Myers wrote:
> Is the use of FRACTIONAL_FLOAT_MODE to avoid iterations over 
> floating-point modes including these modes when they shouldn't, as 
> discussed previously?
> 
> If so, how do you deal (in subsequent patches?) with iterations that 
> *should* include these modes?  In particular, where libgcc uses 
> __LIBGCC__* macros predefined with -fbuilding-libgcc in an 
> interation in c-cppbuiltin.c, how do you handle getting the relevant 
> information in libgcc to build applicable libgcc functions for these 
> modes?  (I'm presuming that you do want complex arithmetic to work for 
> both 128-bit types, for example, although you won't want them to be used 
> for intermediate conversions in libgcc operations on other types.)

I have a catch-22 situation.  We can't really do the glibc stuff until we have
the compiler.  Right now, I use a makefile on libgcc/config/rs6000 that copies
the various TF files and modifies it for KF files.

After we get the basic support in, we can then start tackling glibc.  It may be
when we get to doing the work in glibc itself, we will need to make further
modifications.  However, in order for the glibc people to start, I need the
basic support in the compiler in the tree.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797

Heads-up: testsuite: removed cleanup-ipa-dump, cleanup-rtl-dump,cleanup-tree-dump, cleanup-dump, cleanup-saved-temps

2015-06-02 Thread Bernhard Reutner-Fischer

[just a try to keep fallout low]

On June 2, 2015 5:38:06 PM GMT+02:00, Bernhard Reutner-Fischer 
 wrote:
>On June 2, 2015 2:08:47 PM GMT+02:00, Richard Biener
> wrote:
>>
>>Committed.  Seems to cause half of the vectorizer tests to be dropped
>>and test-summary breaking for me.

As a gentle reminder to reviewers and all:

>Right. There should be no cleanup-tree-dump left on trunk.

On Friday I committed:
* lib/gcc-dg.exp (cleanup-ipa-dump, cleanup-rtl-dump,
cleanup-tree-dump, cleanup-dump, cleanup-saved-temps): Remove.
Adjust all callers.

Please refer to sourcebuild.texi for (currently) remaining manual cleanup tcl 
procs.

Thanks and cheers,

Re: [PATCH] PR fortran/66380 -- Remove assert() to allow error condition

2015-06-02 Thread FX

> 2015-05-27  Steven G. Kargl  
> 
>   * simplify.c (gfc_simplify_reshape): Convert assert into returning
>   NULL, which triggers an error condition.

OK to commit.

Re: [PR65768] Check rtx_cost when propagating constant

2015-06-02 Thread Jeff Law


On 05/31/2015 08:20 PM, Kugan wrote:



On 30/05/15 14:54, Jeff Law wrote:

On 05/29/2015 12:32 AM, Kugan wrote:


  PR target/65768
  * cprop.c (try_replace_reg): Check cost of constants before
propagating.

I should have also noted, fresh bootstrap & regression test is needed
too.


Thanks Jeff for the comments. I did a fresh bootstrap and regression
testing on x86_64-linux-gnu with no new regression. I will wait for
you ACK.

Can you address the 3 issues in my prior message?  I'll include them
here for clarity:

--

The "const_p" variable is poorly named, though I can kindof see how you
settled on it.  Maybe "check_rtx_costs" or something along those lines
would be better.

The comment for the second hunk would probably be better as:

/* If TO is a constant, check the cost of the set after propagation
to the cost of the set before the propagation.  If the cost is
higher, then do not replace FROM with TO.  */


You should try to produce a testcase where this change shows a code
generation improvement.Given we're checking target costs, that test
will naturally be target specific.  But please do try.

So with the two nits fixed and a testcase, I think this can go forward.
--



Thanks Jeff and apologies for missing your previous email. I have now
fixed the comments as you suggested and changed the PR target/65768
testcase such that it tests this case.

I will commit it if there is no objections to this.

No objections.  Thanks for your patience on this!

jeff

RE: [Patch MIPS] Enable TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS hook

2015-06-02 Thread Matthew Fortune

Robert Suchanek  writes:
> diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
> index c3755f5..976f844 100644
> --- a/gcc/config/mips/mips.c
> +++ b/gcc/config/mips/mips.c
> @@ -19415,6 +19415,21 @@ mips_lra_p (void)
>  {
>return mips_lra_flag;
>  }
> +
> +/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.  */
> +
> +static reg_class_t
> +mips_ira_change_pseudo_allocno_class (int regno, reg_class_t
> allocno_class)
> +{
> +  /* LRA will generate unnecessary reloads because the LRA's cost pass
> finds
> + cheaper to move data to/from memory into FP regs rather than GP
> regs.
> + By narrowing the class for allocnos to GR_REGS for integral modes
> early,
> + we refrain from using FP regs until they are absolutely necessary.
> */

I'm not sure this comment is accurately describing the issue (or I have
misunderstood something). I thought this change is to counter LRA's
tendency to use an FPR as a spill instead of memory?

i.e.
/* LRA will allocate an FPR for an integer mode pseudo instead of spilling
   to memory if an FPR is present in the allocno class.  It is rare that
   we actually need to place an integer mode value in an FPR so where
   possible limit the allocation to GR_REGS.  This will slightly pessimize
   code that involves integer to/from float conversions as these will have
   to reload into FPRs in LRA. Such reloads are sometimes eliminated and
   sometimes only partially eliminated.  We choose to take this penalty
   in order to eliminate usage of FPRs in code that does not use floating
   point data.

   This change has a similar effect to increasing the cost of FPR->GPR
   register moves for integer modes so that they are higher than the cost
   of memory but changing the allocno class is more reliable.

   This is also similar to forbidding integer mode values in FPRs entirely
   but this would lead to an inconsistency in the integer to/from float
   instructions that say integer mode values must be placed in FPRs.  */

I'm keen to get the description of this right so please feel free to change
it further if it isn't clear (or correct).

I don't know if this change will lead to classic reload being unusable for
MIPS. I'm not worried about that but I think it is probably wise to
remove classic reload support for MIPS now; we are dependent on LRA for
several features already.

Do you have any details on when we are left with suboptimal code for
int->float conversions? I'd like to keep a record of them in this thread
or in the comment so we know what is left to fix.

> +  if (INTEGRAL_MODE_P (PSEUDO_REGNO_MODE (regno)) && allocno_class ==
> ALL_REGS)
> +return GR_REGS;
> +  return allocno_class;
> +}
> +

Trim the extra trailing newline.

OK to commit if you are happy with the comment.

Thanks,
Matthew

> 
> 
>  /* Initialize the GCC target structure.  */
>  #undef TARGET_ASM_ALIGNED_HI_OP
> @@ -19671,6 +19686,8 @@ mips_lra_p (void)
>  #define TARGET_SPILL_CLASS mips_spill_class
>  #undef TARGET_LRA_P
>  #define TARGET_LRA_P mips_lra_p
> +#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
> +#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
> mips_ira_change_pseudo_allocno_class
> 
>  struct gcc_target targetm = TARGET_INITIALIZER;
>

Re: [patch 10/10] debug-early merge: compiler proper

2015-06-02 Thread Aldy Hernandez


On 06/02/2015 04:11 AM, Richard Biener wrote:


I suppose we can change things this way as a followup (as it needs some work)


Ok.

I cleaned things up using variably_modified_type_p() as you did, instead 
of the INTEGER_CST nonsense.  Attached are the latest dwarf2out.c 
changes against mainline.


Jason are you ok with the attached wrt VLAs?  If so, I'm putting this 
aside while I work on DECL_ABSTRACT and friends, and hopefully wrap 
things up.


Tested on x86-64 Linux, gcc and gdb testsuites.

Thanks guys.

Aldy
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 15c545e..62b06c4 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -1268,16 +1268,6 @@ struct GTY((for_user)) dwarf_file_data {
   int emitted_number;
 };
 
-typedef struct GTY(()) deferred_locations_struct
-{
-  tree variable;
-  dw_die_ref die;
-} deferred_locations;
-
-
-static GTY(()) vec *deferred_locations_list;
-
-
 /* Describe an entry into the .debug_addr section.  */
 
 enum ate_kind {
@@ -2448,6 +2438,7 @@ build_cfa_aligned_loc (dw_cfa_location *cfa,
 
 static void dwarf2out_init (const char *);
 static void dwarf2out_finish (const char *);
+static void dwarf2out_early_finish (void);
 static void dwarf2out_assembly_start (void);
 static void dwarf2out_define (unsigned int, const char *);
 static void dwarf2out_undef (unsigned int, const char *);
@@ -2457,7 +2448,8 @@ static void dwarf2out_function_decl (tree);
 static void dwarf2out_begin_block (unsigned, unsigned);
 static void dwarf2out_end_block (unsigned, unsigned);
 static bool dwarf2out_ignore_block (const_tree);
-static void dwarf2out_global_decl (tree);
+static void dwarf2out_early_global_decl (tree);
+static void dwarf2out_late_global_decl (tree);
 static void dwarf2out_type_decl (tree, int);
 static void dwarf2out_imported_module_or_decl (tree, tree, tree, bool);
 static void dwarf2out_imported_module_or_decl_1 (tree, tree, tree,
@@ -2474,6 +2466,7 @@ const struct gcc_debug_hooks dwarf2_debug_hooks =
 {
   dwarf2out_init,
   dwarf2out_finish,
+  dwarf2out_early_finish,
   dwarf2out_assembly_start,
   dwarf2out_define,
   dwarf2out_undef,
@@ -2495,7 +2488,8 @@ const struct gcc_debug_hooks dwarf2_debug_hooks =
   dwarf2out_begin_function,
   dwarf2out_end_function,	/* end_function */
   dwarf2out_function_decl,	/* function_decl */
-  dwarf2out_global_decl,
+  dwarf2out_early_global_decl,
+  dwarf2out_late_global_decl,
   dwarf2out_type_decl,		/* type_decl */
   dwarf2out_imported_module_or_decl,
   debug_nothing_tree,		/* deferred_inline_function */
@@ -2636,10 +2630,20 @@ typedef struct GTY((chain_circular ("%h.die_sib"), for_user)) die_struct {
   /* Die is used and must not be pruned as unused.  */
   BOOL_BITFIELD die_perennial_p : 1;
   BOOL_BITFIELD comdat_type_p : 1; /* DIE has a type signature */
+  /* Die was generated early via dwarf2out_early_global_decl.  */
+  BOOL_BITFIELD dumped_early : 1;
   /* Lots of spare bits.  */
 }
 die_node;
 
+/* Set to TRUE while dwarf2out_early_global_decl is running.  */
+static bool early_dwarf;
+struct set_early_dwarf {
+  bool saved;
+  set_early_dwarf () : saved(early_dwarf) { early_dwarf = true; }
+  ~set_early_dwarf () { early_dwarf = saved; }
+};
+
 /* Evaluate 'expr' while 'c' is set to each child of DIE in order.  */
 #define FOR_EACH_CHILD(die, c, expr) do {	\
   c = die->die_child;\
@@ -2690,9 +2694,13 @@ typedef struct GTY(()) comdat_type_struct
 }
 comdat_type_node;
 
-/* The limbo die list structure.  */
+/* A list of DIEs for which we can't determine ancestry (parent_die
+   field) just yet.  Later in dwarf2out_finish we will fill in the
+   missing bits.  */
 typedef struct GTY(()) limbo_die_struct {
   dw_die_ref die;
+  /* The tree for which this DIE was created for.  We use this to
+ determine ancestry later.  */
   tree created_for;
   struct limbo_die_struct *next;
 }
@@ -2939,7 +2947,7 @@ static GTY((length ("abbrev_die_table_allocated")))
 /* Number of elements currently allocated for abbrev_die_table.  */
 static GTY(()) unsigned abbrev_die_table_allocated;
 
-/* Number of elements in type_die_table currently in use.  */
+/* Number of elements in abbrev_die_table currently in use.  */
 static GTY(()) unsigned abbrev_die_table_in_use;
 
 /* Size (in elements) of increments by which we may expand the
@@ -3021,9 +3029,6 @@ static GTY(()) struct dwarf_file_data * last_emitted_file;
 /* Number of internal labels generated by gen_internal_sym().  */
 static GTY(()) int label_num;
 
-/* Cached result of previous call to lookup_filename.  */
-static GTY(()) struct dwarf_file_data * file_table_last_lookup;
-
 static GTY(()) vec *tmpl_value_parm_die_table;
 
 /* Instances of generic types for which we need to generate debug
@@ -3108,7 +3113,7 @@ static inline dw_die_ref get_AT_ref (dw_die_ref, enum dwarf_attribute);
 static bool is_cxx (void);
 static bool is_fortran (void);
 static bool is_ada (void);
-static void remove_AT (dw_die_ref, enum dwarf_attribute);
+static bool remove_AT (dw_die_ref, enum dw

Re: [C/C++ PATCH] Implement -Wshift-overflow (PR c++/55095) (take 2)

2015-06-02 Thread Marek Polacek

On Fri, May 29, 2015 at 08:49:58PM +, Joseph Myers wrote:
> On Mon, 25 May 2015, Marek Polacek wrote:
> 
> > +/* Warn if signed left shift overflows.  Note that we don't warn
> > +   about left-shifting 1 into the sign bit; cf.
> > +   
> > 
> > +   for C++ and 
> > +   for C.  LOC is a location of the shift; OP0 and OP1 are the operands.
> > +   Return true if an overflow is detected, false otherwise.  */
> 
> But for C that was declared not a defect.  See 
> .  So 
> for C99 and later we *should* consider this an overflow (for the purposes 
> of pedwarns-if-pedantic in contexts where an integer constant expression 
> is required; maybe -Wshift-overflow=2 for other warnings?).  If then a 
> future C standard changes things here (in the list of issues to be 
> considered for a future C standard in Standing Document 3, 
> ), then, as a 
> non-defect change, it should be considered a non-overflow in GCC only for 
> future C standard versions as well as C90.
> 
> (Although treating something as not an integer constant expression does 
> have consequences beyond pedwarns-if-pedantic - a zero derived from that 
> expression is not a null pointer constant and that can affect the types of 
> conditional expressions - I don't expect any significant breakage of real 
> code from that.)

Well, ok then.  This new version incorporates Richard S.'s suggestion,
and warns even for 1 << 31 in C99/C11 (also in C90 when -Wshift-overflow
is explicitely specified).  For C++, it warns about 1 << 31 by default
only in C++11 mode, in C++14 never, otherwise only if -Wshift-overflow
is specified.

But the fallout seems to be nonnegligible.  So I think the default should
be -Wshift-overflow=1 that doesn't warn about 1 << 31, but still rejects
e.g. enum { A = 1 << 31 };.  And -Wshift-overflow=2 would warn even about
1 << 31.  (Perhaps this is exactly what you had in mind, but I'm not sure.)

In C90, shift overflow in contexts where an integer constant expression
is required does not cause the program be rejected.

Bootstrapped/regtested on x86_64-linux.

2015-06-02  Marek Polacek  
Richard Sandiford  

PR c++/55095
* c-common.c (c_fully_fold_internal): Warn about left shift overflows.
Use EXPR_LOC_OR_LOC.
(maybe_warn_shift_overflow): New function.
* c-common.h (maybe_warn_shift_overflow): Declare.
* c-opts.c (c_common_post_options): Set warn_shift_overflow.
* c.opt (Wshift-overflow): New option.

* c-typeck.c (digest_init): Pass OPT_Wpedantic to pedwarn_init.
(build_binary_op): Warn about left shift overflows.

* typeck.c (cp_build_binary_op): Warn about left shift overflows.

* doc/invoke.texi: Document -Wshift-overflow.

* c-c++-common/Wshift-overflow-1.c: New test.
* c-c++-common/Wshift-overflow-2.c: New test.
* c-c++-common/Wshift-overflow-3.c: New test.
* c-c++-common/Wshift-overflow-4.c: New test.
* g++.dg/cpp1y/left-shift-1.C: New test.
* gcc.dg/c90-left-shift-2.c: New test.
* gcc.dg/c90-left-shift-3.c: New test.
* gcc.dg/c99-left-shift-2.c: New test.
* gcc.dg/c99-left-shift-3.c: New test.
* gcc.c-torture/execute/pr40386.c: Use -Wno-shift-overflow.
* gcc.dg/fixuns-trunc-1.c: Likewise.
* gcc.dg/multiple-overflow-warn-3.c: Likewise.
* gcc.dg/pr40501.c: Likewise.
* gcc.dg/tree-ssa/vrp67.c: Likewise.
* gcc.dg/vect/pr33373.c: Likewise.
* gcc.dg/vect/vect-shift-2-big-array.c: Likewise.
* gcc.dg/vect/vect-shift-2.c: Likewise.
* gcc.target/i386/avx-vmaskmovps-1.c: Likewise.
* gcc.target/i386/avx-vmaskmovps-2.c: Likewise.
* gcc.target/i386/avx-vmaskmovps-256-1.c: Likewise.
* gcc.target/i386/avx-vmaskmovps-256-2.c: Likewise.
* gcc.target/i386/avx2-vpmaskloadd-2.c: Likewise.
* gcc.target/i386/avx2-vpmaskloadd256-2.c: Likewise.
* gcc.target/i386/avx2-vpmaskstored-2.c: Likewise.
* gcc.target/i386/avx2-vpmaskstored256-2.c: Likewise.

diff --git gcc/c-family/c-common.c gcc/c-family/c-common.c
index 36c984c..7ff6343 100644
--- gcc/c-family/c-common.c
+++ gcc/c-family/c-common.c
@@ -1371,7 +1371,7 @@ c_fully_fold_internal (tree expr, bool in_init, bool 
*maybe_const_operands,
   if (TREE_OVERFLOW_P (ret)
  && !TREE_OVERFLOW_P (op0)
  && !TREE_OVERFLOW_P (op1))
-   overflow_warning (EXPR_LOCATION (expr), ret);
+   overflow_warning (EXPR_LOC_OR_LOC (expr, input_location), ret);
   if (code == LSHIFT_EXPR
  && TREE_CODE (orig_op0) != INTEGER_CST
  && TREE_CODE (TREE_TYPE (orig_op0)) == INTEGER_TYPE
@@ -1401,6 +1401,18 @@ c_fully_fold_internal (tree

Re: [PATCH], Add IEEE 128-bit floating point to PowerPC, patch #1

2015-06-02 Thread David Edelsohn

On Tue, Jun 2, 2015 at 2:27 PM, Michael Meissner
 wrote:
> On Tue, Jun 02, 2015 at 01:43:08PM -0400, David Edelsohn wrote:
>> Mike,
>>
>> What is the purpose of the TARGET_LONG_DOUBLE_128 change in
>> rs6000_hard_regno_mode_ok()?
>>
>> +  /* If we don't allow 128-bit binary floating point, disallow the 128-bit
>> + types from going in any registers.  Similarly if __float128 is not
>> + supported, don't allow __float128/__ibm128 types.  */
>> +  if (!TARGET_LONG_DOUBLE_128
>> +  && (mode == TFmode || mode == KFmode || mode == IFmode))
>> +return false;
>>
>> Why is this necessary now?
>
> I was trying to avoid problems if there was no move/convert patterns for
> KFmode/IFmode.  I made it when I transitioned from SPECIAL_FLOAT_MODE to
> FRACTIONAL_FLOAT_MODE.  Given there are 2 fractional float modes now, if the
> compiler was automatically trying to find a larger type than DFmode, it would
> first try IFmode, then KFmode, and finally TFmode (under the old
> SPECIAL_FLOAT_MODE, it wouldn't look at those types in the normal course of
> things).
>
> With the current set of patches, there is no move or convert options for
> IFmode/KFmode, but also the emulator functions are not properly defined.

The first patch is okay.  This clearly is going to require a lot of
interations with libgcc and GLIBC.

Thanks, David

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-06-02 Thread Bernhard Reutner-Fischer

On June 2, 2015 8:15:42 PM GMT+02:00, Sriraman Tallam  
wrote:
[]

>I have now modified this patch.
>
>This patch does two things:
>
>1) Adds new generic function attribute "no_plt" that is similar in
>functionality  to -fno-plt except that it applies only to calls to
>functions that are marked  with this attribute.
>2) For x86_64, it makes -fno-plt(and the attribute) also work for
>non-PIC code by  directly generating an indirect call via a GOT entry.
>
>For PIC code, no_plt merely shadows the implementation of -fno-plt, no
>surprises here.
>
>* c-family/c-common.c (no_plt): New attribute.
>(handle_no_plt_attribute): New handler.
>* calls.c (prepare_call_address): Check for no_plt
>attribute.
>* config/i386/i386.c (ix86_function_ok_for_sibcall): Check
>for no_plt attribute.
>(ix86_expand_call):  Ditto.
>(nopic_no_plt_attribute): New function.
>(ix86_output_call_insn): Output indirect call for non-pic
>no plt calls.
>* doc/extend.texi (no_plt): Document new attribute.
>* testsuite/gcc.target/i386/noplt-1.c: New test.
>* testsuite/gcc.target/i386/noplt-2.c: New test.
>* testsuite/gcc.target/i386/noplt-3.c: New test.
>* testsuite/gcc.target/i386/noplt-4.c: New test.
>
>
>Please review.

--- config/i386/i386.c  (revision 223720)
+++ config/i386/i386.c  (working copy)
@@ -5479,6 +5479,8 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
   && !TARGET_64BIT
   && flag_pic
   && flag_plt
+  && (TREE_CODE (decl) != FUNCTION_DECL
+ || !lookup_attribute ("no_plt", DECL_ATTRIBUTES (decl)))
   && decl && !targetm.binds_local_p (decl))
 return false;

Wrong order or && decl is redundant. Stopped reading here.

Thanks,

Re: [RFC][PATCH][X86_64] Eliminate PLT stubs for specified external functions via -fno-plt=

2015-06-02 Thread Sriraman Tallam

On Tue, Jun 2, 2015 at 12:32 PM, Bernhard Reutner-Fischer
 wrote:
> On June 2, 2015 8:15:42 PM GMT+02:00, Sriraman Tallam  
> wrote:
> []
>
>>I have now modified this patch.
>>
>>This patch does two things:
>>
>>1) Adds new generic function attribute "no_plt" that is similar in
>>functionality  to -fno-plt except that it applies only to calls to
>>functions that are marked  with this attribute.
>>2) For x86_64, it makes -fno-plt(and the attribute) also work for
>>non-PIC code by  directly generating an indirect call via a GOT entry.
>>
>>For PIC code, no_plt merely shadows the implementation of -fno-plt, no
>>surprises here.
>>
>>* c-family/c-common.c (no_plt): New attribute.
>>(handle_no_plt_attribute): New handler.
>>* calls.c (prepare_call_address): Check for no_plt
>>attribute.
>>* config/i386/i386.c (ix86_function_ok_for_sibcall): Check
>>for no_plt attribute.
>>(ix86_expand_call):  Ditto.
>>(nopic_no_plt_attribute): New function.
>>(ix86_output_call_insn): Output indirect call for non-pic
>>no plt calls.
>>* doc/extend.texi (no_plt): Document new attribute.
>>* testsuite/gcc.target/i386/noplt-1.c: New test.
>>* testsuite/gcc.target/i386/noplt-2.c: New test.
>>* testsuite/gcc.target/i386/noplt-3.c: New test.
>>* testsuite/gcc.target/i386/noplt-4.c: New test.
>>
>>
>>Please review.
>
> --- config/i386/i386.c  (revision 223720)
> +++ config/i386/i386.c  (working copy)
> @@ -5479,6 +5479,8 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
>&& !TARGET_64BIT
>&& flag_pic
>&& flag_plt
> +  && (TREE_CODE (decl) != FUNCTION_DECL
> + || !lookup_attribute ("no_plt", DECL_ATTRIBUTES (decl)))
>&& decl && !targetm.binds_local_p (decl))
>  return false;
>
> Wrong order or && decl is redundant. Stopped reading here.

Fixed and new patch attached.

Thanks
Sri

>
> Thanks,
>
* c-family/c-common.c (no_plt): New attribute.
(handle_no_plt_attribute): New handler.
* calls.c (prepare_call_address): Check for no_plt
attribute.
* config/i386/i386.c (ix86_function_ok_for_sibcall): Check
for no_plt attribute.
(ix86_expand_call):  Ditto.
(nopic_no_plt_attribute): New function.
(ix86_output_call_insn): Output indirect call for non-pic
no plt calls.
* doc/extend.texi (no_plt): Document new attribute.
* testsuite/gcc.target/i386/noplt-1.c: New test.
* testsuite/gcc.target/i386/noplt-2.c: New test.
* testsuite/gcc.target/i386/noplt-3.c: New test.
* testsuite/gcc.target/i386/noplt-4.c: New test.

This patch does two things:

* Adds new generic function attribute "no_plt" that is similar in functionality
  to -fno-plt except that it applies only to calls to functions that are marked
  with this attribute.
* For x86_64, it makes -fno-plt(and the attribute) also work for non-PIC code by
  directly generating an indirect call via a GOT entry.

Index: c-family/c-common.c
===
--- c-family/c-common.c (revision 223720)
+++ c-family/c-common.c (working copy)
@@ -357,6 +357,7 @@ static tree handle_mode_attribute (tree *, tree, t
 static tree handle_section_attribute (tree *, tree, tree, int, bool *);
 static tree handle_aligned_attribute (tree *, tree, tree, int, bool *);
 static tree handle_weak_attribute (tree *, tree, tree, int, bool *) ;
+static tree handle_no_plt_attribute (tree *, tree, tree, int, bool *) ;
 static tree handle_alias_ifunc_attribute (bool, tree *, tree, tree, bool *);
 static tree handle_ifunc_attribute (tree *, tree, tree, int, bool *);
 static tree handle_alias_attribute (tree *, tree, tree, int, bool *);
@@ -706,6 +707,8 @@ const struct attribute_spec c_common_attribute_tab
  handle_aligned_attribute, false },
   { "weak",   0, 0, true,  false, false,
  handle_weak_attribute, false },
+  { "no_plt",   0, 0, true,  false, false,
+ handle_no_plt_attribute, false },
   { "ifunc",  1, 1, true,  false, false,
  handle_ifunc_attribute, false },
   { "alias",  1, 1, true,  false, false,
@@ -8185,6 +8188,25 @@ handle_weak_attribute (tree *node, tree name,
   return NULL_TREE;
 }
 
+/* Handle a "no_plt" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+handle_no_plt_attribute (tree *node, tree name,
+  tree ARG_UNUSED (args),
+  int ARG_UNUSED (flags),
+  bool * ARG_UNUSED (no_add_attrs))
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+{
+  warning (OPT_Wattributes,
+  "%qE attribute is only applicable on functions", name);
+  *no_add_attrs = true;
+  return NULL_TREE;
+}
+  return NULL_TREE;
+}
+
 /* Handle an "alias" or "ifunc" attribute; arguments as in
struct attribute_spec.handler, except that

Re: [PATCH] Update check after force_const_mem call in the plus_constant function to see if the value returned is not a NULL_RTX.

2015-06-02 Thread Jeff Law


On 06/02/2015 08:19 AM, Andrew Bennett wrote:

Hi,

In the plus_constant function in explow.c the code to update a constant pool 
value
does not deal with the case where the value returned from force_const_mem is a
NULL_RTX.  This occurs for the MIPS target because its
cannot_force_const_mem target function does not allow constants (so that the
move expanders can deal with them later on), this then causes the 
force_const_mem
function to return a NULL_RTX and then causes GCC to segmentation fault when 
calling
the memory_address_p function.

The fix is to add a check that the tem variable is not a NULL_RTX before
the memory_address_p function call.  I have tested the fix on the 
mips-mti-linux-gnu
target for both mips32r2 o32 and mips64r2 n64 and there have been no 
regressions.

The patch and ChangeLog are below.

Ok to commit?


Many thanks,



Andrew



* explow.c (plus_constant): Update check after force_const_mem call to 
see if the
value returned is not a NULL_RTX.

OK.  Please install.

Thanks,
Jeff

Re: [PATCH], Add IEEE 128-bit floating point to PowerPC, patch #1

2015-06-02 Thread Joseph Myers

On Tue, 2 Jun 2015, Michael Meissner wrote:

> On Tue, Jun 02, 2015 at 05:55:10PM +, Joseph Myers wrote:
> > Is the use of FRACTIONAL_FLOAT_MODE to avoid iterations over 
> > floating-point modes including these modes when they shouldn't, as 
> > discussed previously?
> > 
> > If so, how do you deal (in subsequent patches?) with iterations that 
> > *should* include these modes?  In particular, where libgcc uses 
> > __LIBGCC__* macros predefined with -fbuilding-libgcc in an 
> > interation in c-cppbuiltin.c, how do you handle getting the relevant 
> > information in libgcc to build applicable libgcc functions for these 
> > modes?  (I'm presuming that you do want complex arithmetic to work for 
> > both 128-bit types, for example, although you won't want them to be used 
> > for intermediate conversions in libgcc operations on other types.)
> 
> I have a catch-22 situation.  We can't really do the glibc stuff until we have
> the compiler.  Right now, I use a makefile on libgcc/config/rs6000 that copies
> the various TF files and modifies it for KF files.

The functions I'm mainly thinking of are the libgcc2.c ones rather than 
the soft-fp ones (powi?f2 mul?c3 div?c3).

> After we get the basic support in, we can then start tackling glibc.  It may 
> be
> when we get to doing the work in glibc itself, we will need to make further
> modifications.  However, in order for the glibc people to start, I need the
> basic support in the compiler in the tree.

It's not obvious what glibc support should look like in the absence of a 
change to the default for long double; that would require discussion on 
libc-alpha at an early stage to establish a consensus on the design.

libquadmath support should be easy (given working compiler / libgcc 
support).  But if you want more than libquadmath support, there are 
several possible forms for support in glibc proper depending on e.g. 
whether you want to support a -m option to change long double, or using 
the functions via the __float128 type name and separate names for the 
functions, or both.

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: [PATCH], Add IEEE 128-bit floating point to PowerPC, patch #1

2015-06-02 Thread David Edelsohn

On Tue, Jun 2, 2015 at 4:14 PM, Joseph Myers  wrote:
> On Tue, 2 Jun 2015, Michael Meissner wrote:
>
>> On Tue, Jun 02, 2015 at 05:55:10PM +, Joseph Myers wrote:
>> > Is the use of FRACTIONAL_FLOAT_MODE to avoid iterations over
>> > floating-point modes including these modes when they shouldn't, as
>> > discussed previously?
>> >
>> > If so, how do you deal (in subsequent patches?) with iterations that
>> > *should* include these modes?  In particular, where libgcc uses
>> > __LIBGCC__* macros predefined with -fbuilding-libgcc in an
>> > interation in c-cppbuiltin.c, how do you handle getting the relevant
>> > information in libgcc to build applicable libgcc functions for these
>> > modes?  (I'm presuming that you do want complex arithmetic to work for
>> > both 128-bit types, for example, although you won't want them to be used
>> > for intermediate conversions in libgcc operations on other types.)
>>
>> I have a catch-22 situation.  We can't really do the glibc stuff until we 
>> have
>> the compiler.  Right now, I use a makefile on libgcc/config/rs6000 that 
>> copies
>> the various TF files and modifies it for KF files.
>
> The functions I'm mainly thinking of are the libgcc2.c ones rather than
> the soft-fp ones (powi?f2 mul?c3 div?c3).
>
>> After we get the basic support in, we can then start tackling glibc.  It may 
>> be
>> when we get to doing the work in glibc itself, we will need to make further
>> modifications.  However, in order for the glibc people to start, I need the
>> basic support in the compiler in the tree.
>
> It's not obvious what glibc support should look like in the absence of a
> change to the default for long double; that would require discussion on
> libc-alpha at an early stage to establish a consensus on the design.
>
> libquadmath support should be easy (given working compiler / libgcc
> support).  But if you want more than libquadmath support, there are
> several possible forms for support in glibc proper depending on e.g.
> whether you want to support a -m option to change long double, or using
> the functions via the __float128 type name and separate names for the
> functions, or both.

Sounds like a fun night at a Czech pub during GNU Cauldron.

- David

Re: [PATCH], Add IEEE 128-bit floating point to PowerPC, patch #1

2015-06-02 Thread Michael Meissner

On Tue, Jun 02, 2015 at 08:14:12PM +, Joseph Myers wrote:
> On Tue, 2 Jun 2015, Michael Meissner wrote:
> 
> > On Tue, Jun 02, 2015 at 05:55:10PM +, Joseph Myers wrote:
> > > Is the use of FRACTIONAL_FLOAT_MODE to avoid iterations over 
> > > floating-point modes including these modes when they shouldn't, as 
> > > discussed previously?
> > > 
> > > If so, how do you deal (in subsequent patches?) with iterations that 
> > > *should* include these modes?  In particular, where libgcc uses 
> > > __LIBGCC__* macros predefined with -fbuilding-libgcc in an 
> > > interation in c-cppbuiltin.c, how do you handle getting the relevant 
> > > information in libgcc to build applicable libgcc functions for these 
> > > modes?  (I'm presuming that you do want complex arithmetic to work for 
> > > both 128-bit types, for example, although you won't want them to be used 
> > > for intermediate conversions in libgcc operations on other types.)
> > 
> > I have a catch-22 situation.  We can't really do the glibc stuff until we 
> > have
> > the compiler.  Right now, I use a makefile on libgcc/config/rs6000 that 
> > copies
> > the various TF files and modifies it for KF files.
> 
> The functions I'm mainly thinking of are the libgcc2.c ones rather than 
> the soft-fp ones (powi?f2 mul?c3 div?c3).

Ok, I will look into those.

> > After we get the basic support in, we can then start tackling glibc.  It 
> > may be
> > when we get to doing the work in glibc itself, we will need to make further
> > modifications.  However, in order for the glibc people to start, I need the
> > basic support in the compiler in the tree.
> 
> It's not obvious what glibc support should look like in the absence of a 
> change to the default for long double; that would require discussion on 
> libc-alpha at an early stage to establish a consensus on the design.
> 
> libquadmath support should be easy (given working compiler / libgcc 
> support).  But if you want more than libquadmath support, there are 
> several possible forms for support in glibc proper depending on e.g. 
> whether you want to support a -m option to change long double, or using 
> the functions via the __float128 type name and separate names for the 
> functions, or both.

There already is an option to change long double, but it currently does not
work (and in fact is disabled in 64-bit environments).

I see there are many roadblocks to changing the type of long double
(i.e. making sure printf %Lg works correctly, etc.).  None of the distros want
another multilib (where long double is IEEE 128-bit).  For the scope of GCC
6.0, my assumption is long double will remain IBM extended double.

My assumption of the steps are:

1) Get the basic compiler support in.

2) Add the basic soft-float, either with my current hack or preferrably doing
   it right in glibc (I suspect we may want to temporarily do it via the hack,
   and when the glibc support is in, remove the hack).

3) Deal with all of the complexities of libgcc2 and glibc for the additional
   type.

4) Add float128 versions of the basic math libraries.  For this it will
   probably be simpler if we can force long double to be IEEE 128-bit so you
   don't have to change as much code, but you want to suppress whatever check
   there will be to prevent user code from linking against the wrong library.

5) Add support in other libraries as needed (IBM's MASS library, the new vector
   library, libquadmath, etc.).

Note, by the time of GCC 7.0, the C17/C++-17 standards may be final, and there
they have new names for IEEE 128-bit, etc.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797

1 2 >

1 - 100 of 121 matches

Mail list logo