[PATCH] Simplify/speedup LIM

2017-08-21 Thread Richard Biener

This avoids redundant ref finding.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2017-08-21  Richard Biener  

* tree-ssa-loop-im.c (struct lim_aux_data): Add ref index member.
(mem_ref_in_stmt): Remove.
(determine_max_movement): Use ref index to get at the reference.
(invariantness_dom_walker::before_dom_children): Deal with
lim data already initialized.
(gather_mem_refs_stmt): Initialize lim data and record ref index.

Index: gcc/tree-ssa-loop-im.c
===
--- gcc/tree-ssa-loop-im.c  (revision 251179)
+++ gcc/tree-ssa-loop-im.c  (working copy)
@@ -86,7 +86,9 @@ struct lim_aux_data
   unsigned cost;   /* Cost of the computation performed by the
   statement.  */
 
-  vec depends;   /* Vector of statements that must be 
also
+  unsigned ref;/* The simple_mem_ref in this stmt or 
0.  */
+
+  vec depends;   /* Vector of statements that must be also
   hoisted out of the loop when this statement
   is hoisted; i.e. those that define the
   operands of the statement and are inside of
@@ -586,27 +588,6 @@ simple_mem_ref_in_stmt (gimple *stmt, bo
 return NULL;
 }
 
-/* Returns the memory reference contained in STMT.  */
-
-static im_mem_ref *
-mem_ref_in_stmt (gimple *stmt)
-{
-  bool store;
-  tree *mem = simple_mem_ref_in_stmt (stmt, &store);
-  hashval_t hash;
-  im_mem_ref *ref;
-
-  if (!mem)
-return NULL;
-  gcc_assert (!store);
-
-  hash = iterative_hash_expr (*mem, 0);
-  ref = memory_accesses.refs->find_with_hash (*mem, hash);
-
-  gcc_assert (ref != NULL);
-  return ref;
-}
-
 /* From a controlling predicate in DOM determine the arguments from
the PHI node PHI that are chosen if the predicate evaluates to
true and false and store them to *TRUE_ARG_P and *FALSE_ARG_P if
@@ -747,23 +728,18 @@ determine_max_movement (gimple *stmt, bo
 
   if (gimple_vuse (stmt))
 {
-  im_mem_ref *ref = mem_ref_in_stmt (stmt);
-
-  if (ref)
+  im_mem_ref *ref
+   = lim_data ? memory_accesses.refs_list[lim_data->ref] : NULL;
+  if (ref
+ && MEM_ANALYZABLE (ref))
{
- lim_data->max_loop
- = outermost_indep_loop (lim_data->max_loop, loop, ref);
+ lim_data->max_loop = outermost_indep_loop (lim_data->max_loop,
+loop, ref);
  if (!lim_data->max_loop)
return false;
}
-  else
-   {
- if ((val = gimple_vuse (stmt)) != NULL_TREE)
-   {
- if (!add_dependency (val, lim_data, loop, false))
-   return false;
-   }
-   }
+  else if (! add_dependency (gimple_vuse (stmt), lim_data, loop, false))
+   return false;
 }
 
   lim_data->cost += stmt_cost (stmt);
@@ -1000,7 +976,9 @@ invariantness_dom_walker::before_dom_chi
if (pos == MOVE_IMPOSSIBLE)
  continue;
 
-   lim_data = init_lim_data (stmt);
+   lim_data = get_lim_data (stmt);
+   if (! lim_data)
+ lim_data = init_lim_data (stmt);
lim_data->always_executed_in = outermost;
 
if (!determine_max_movement (stmt, false))
@@ -1037,7 +1015,9 @@ invariantness_dom_walker::before_dom_chi
 store-motion work.  */
  else if (stmt_makes_single_store (stmt))
{
- struct lim_aux_data *lim_data = init_lim_data (stmt);
+ struct lim_aux_data *lim_data = get_lim_data (stmt);
+ if (! lim_data)
+   lim_data = init_lim_data (stmt);
  lim_data->always_executed_in = outermost;
}
  continue;
@@ -1073,7 +1053,9 @@ invariantness_dom_walker::before_dom_chi
stmt = rewrite_bittest (&bsi);
}
 
-  lim_data = init_lim_data (stmt);
+  lim_data = get_lim_data (stmt);
+  if (! lim_data)
+   lim_data = init_lim_data (stmt);
   lim_data->always_executed_in = outermost;
 
   if (maybe_never && pos == MOVE_PRESERVE_EXECUTION)
@@ -1498,6 +1480,7 @@ gather_mem_refs_stmt (struct loop *loop,
   bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], 
ref->id);
   mark_ref_stored (ref, loop);
 }
+  init_lim_data (stmt)->ref = ref->id;
   return;
 }
 


[PATCH] Fix PR81884

2017-08-21 Thread Richard Biener

This PR shows an issue in the must-alias oracle (stmt_kills_ref_p) which
when determining must-alias by finding a common base in two reference
trees doesn't handle trailing arrays correctly.  The fix is to do
array-at-struct-end detection on-the-fly by recording the innermost
ARRAY_REF we drop when searching for the common base and not allow
that to be an array_at_struct_end_p.  Similar to trunk 
array_at_struct_end_p this doesn't allow sub-arrays to be flexible,
for (*)[2'][3''] only ' is allowed to expand below the declared bound
(the artificial case of ' having just one element and thus effectively
not being an array and '' being a flexible array is not supported by
GCC).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Poking more holes appreciated ;)

Richard.

2017-08-21  Richard Biener  

PR middle-end/81884
* tree-ssa-alias.c (stmt_kills_ref_p): Handle array accesses
at struct end conservatively when comparing common bases.

* g++.dg/torture/pr81884.C: New testcase.

Index: gcc/tree-ssa-alias.c
===
--- gcc/tree-ssa-alias.c(revision 251103)
+++ gcc/tree-ssa-alias.c(working copy)
@@ -2415,6 +2415,7 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *
   if (ref->ref)
{
  tree base = ref->ref;
+ tree innermost_dropped_array_ref = NULL_TREE;
  if (handled_component_p (base))
{
  tree saved_lhs0 = NULL_TREE;
@@ -2434,6 +2435,11 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *
  TREE_OPERAND (base, 0) = saved_base0;
  if (res)
break;
+ /* Remember if we drop an array-ref that we need to
+double-check not being at struct end.  */ 
+ if (TREE_CODE (base) == ARRAY_REF
+ || TREE_CODE (base) == ARRAY_RANGE_REF)
+   innermost_dropped_array_ref = base;
  /* Otherwise drop handled components of the access.  */
  base = saved_base0;
}
@@ -2442,15 +2448,22 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *
TREE_OPERAND (lhs, 0) = saved_lhs0;
}
  /* Finally check if the lhs has the same address and size as the
-base candidate of the access.  */
- if (lhs == base
- || (((TYPE_SIZE (TREE_TYPE (lhs))
-   == TYPE_SIZE (TREE_TYPE (base)))
-  || (TYPE_SIZE (TREE_TYPE (lhs))
-  && TYPE_SIZE (TREE_TYPE (base))
-  && operand_equal_p (TYPE_SIZE (TREE_TYPE (lhs)),
-  TYPE_SIZE (TREE_TYPE (base)), 0)))
- && operand_equal_p (lhs, base, OEP_ADDRESS_OF)))
+base candidate of the access.  Watch out if we have dropped
+an array-ref that was at struct end, this means ref->ref may
+be outside of the TYPE_SIZE of its base.  */
+ if ((! innermost_dropped_array_ref
+  || ! array_at_struct_end_p (innermost_dropped_array_ref, false))
+ && (lhs == base
+ || (((TYPE_SIZE (TREE_TYPE (lhs))
+   == TYPE_SIZE (TREE_TYPE (base)))
+  || (TYPE_SIZE (TREE_TYPE (lhs))
+  && TYPE_SIZE (TREE_TYPE (base))
+  && operand_equal_p (TYPE_SIZE (TREE_TYPE (lhs)),
+  TYPE_SIZE (TREE_TYPE (base)),
+  0)))
+ && operand_equal_p (lhs, base,
+ OEP_ADDRESS_OF
+ | OEP_MATCH_SIDE_EFFECTS
return true;
}
 
Index: gcc/testsuite/g++.dg/torture/pr81884.C
===
--- gcc/testsuite/g++.dg/torture/pr81884.C  (nonexistent)
+++ gcc/testsuite/g++.dg/torture/pr81884.C  (working copy)
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+
+typedef unsigned long uint64_t;
+
+struct value_t {
+uint64_t _count;
+value_t(uint64_t c) : _count(c) {}
+};
+
+struct X {
+value_t eventTime;
+uint64_t arr[0];
+};
+
+X* x;
+
+__attribute__((noclone, noinline))
+void initialize()
+{
+  x->arr[0] = 11;
+  x->arr[1] = 12;
+  x->eventTime = value_t(10);
+  x->arr[2] = 13;
+  x->arr[3] = 14;
+}
+
+int main()
+{
+  char buffer[sizeof(X) + sizeof(uint64_t)*4];
+  x = (X*)buffer;
+  x->eventTime = value_t(999);
+  x->arr[0] = 1;
+  x->arr[1] = 2;
+  x->arr[2] = 3;
+  x->arr[3] = 4;
+  initialize();
+  if (x->arr[0] != 11 || x->arr[1] != 12 || x->arr[2] != 13 || x->arr[3] != 14)
+__builtin_abort ();
+}


Re: [PATCH] Fix PR81488

2017-08-21 Thread Richard Biener
On Fri, Aug 18, 2017 at 8:36 PM, Bill Schmidt
 wrote:
> Hi,
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81488 reports a problem with 
> SLSR where
> too many memory resources are required to complete SLSR processing of 
> conditional
> candidates.  The code in question contains large trees of PHI dependencies 
> that must
> be examined in order to find all candidates for replacement.  Not only are the
> dependency chains deep, but many PHIs contain duplicate source operands 
> arriving by
> different paths, and SLSR isn't currently smart enough to avoid tracing them 
> more
> than once.  This leads to exponential behavior and a bad ending.
>
> Even removing the exponential behavior is not sufficient to fix the problem.  
> The
> dependencies are just too complex.  So it is also necessary to put a limit on 
> how
> much time we want to spend examining PHI nodes before giving up.  I've 
> arbitrarily
> chosen 16 as the maximum number of PHI nodes to visit for each candidate, 
> which
> seems likely to be sufficient in most cases.
>
> A side benefit of removing the exponential behavior is better accuracy in 
> making
> cost-model decisions.  With tracing through the same PHI dependencies more 
> than
> once, the insertion (+) and replacement (-) costs are overcounted.  This 
> should
> now be improved.
>
> The original bug went latent on the trunk after it was reported, but I was 
> able
> to reproduce with an older revision and verify that the following patch fixes
> the problem.  I've also bootstrapped and tested it on powerpc64le-linux-gnu 
> with
> no regressions.  Is this ok for trunk?

Ok.

Thanks,
Richard.

> Thanks,
> Bill
>
>
> 2017-08-18  Bill Schmidt  
>
> PR tree-optimization/81488
> * gimple-ssa-strength-reduction (struct slsr_cand_d): Add visited
> and cached_basis fields.
> (MAX_SPREAD): New constant.
> (alloc_cand_and_find_basis): Initialize new fields.
> (clear_visited): New function.
> (create_phi_basis_1): Rename from create_phi_basis, set visited
> and cached_basis fields.
> (create_phi_basis): New wrapper function.
> (phi_add_costs_1): Rename from phi_add_costs, add spread
> parameter, set visited field, short-circuit when limits reached.
> (phi_add_costs): New wrapper function.
> (record_phi_increments_1): Rename from record_phi_increments, set
> visited field.
> (record_phi_increments): New wrapper function.
> (phi_incr_cost_1): Rename from phi_incr_cost, set visited field.
> (phi_incr_cost): New wrapper function.
> (all_phi_incrs_profitable_1): Rename from
> all_phi_incrs_profitable, set visited field.
> (all_phi_incrs_profitable): New wrapper function.
>
>
> Index: gcc/gimple-ssa-strength-reduction.c
> ===
> --- gcc/gimple-ssa-strength-reduction.c (revision 251159)
> +++ gcc/gimple-ssa-strength-reduction.c (working copy)
> @@ -281,6 +281,14 @@ struct slsr_cand_d
>/* Savings that can be expected from eliminating dead code if this
>   candidate is replaced.  */
>int dead_savings;
> +
> +  /* For PHI candidates, use a visited flag to keep from processing the
> + same PHI twice from multiple paths.  */
> +  int visited;
> +
> +  /* We sometimes have to cache a phi basis with a phi candidate to
> + avoid processing it twice.  Valid only if visited==1.  */
> +  tree cached_basis;
>  };
>
>  typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
> @@ -369,7 +377,11 @@ enum count_phis_status
>DONT_COUNT_PHIS = 0,
>COUNT_PHIS = 1
>  };
> -
> +
> +/* Constrain how many PHI nodes we will visit for a conditional
> +   candidate (depth and breadth).  */
> +const int MAX_SPREAD = 16;
> +
>  /* Pointer map embodying a mapping from statements to candidates.  */
>  static hash_map *stmt_cand_map;
>
> @@ -671,6 +683,8 @@ alloc_cand_and_find_basis (enum cand_kind kind, gi
>c->sibling = 0;
>c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
>c->dead_savings = savings;
> +  c->visited = 0;
> +  c->cached_basis = NULL_TREE;
>
>cand_vec.safe_push (c);
>
> @@ -2317,19 +2331,33 @@ create_add_on_incoming_edge (slsr_cand_t c, tree b
>return lhs;
>  }
>
> -/* Given a candidate C with BASIS_NAME being the LHS of C's basis which
> -   is hidden by the phi node FROM_PHI, create a new phi node in the same
> -   block as FROM_PHI.  The new phi is suitable for use as a basis by C,
> -   with its phi arguments representing conditional adjustments to the
> -   hidden basis along conditional incoming paths.  Those adjustments are
> -   made by creating add statements (and sometimes recursively creating
> -   phis) along those incoming paths.  LOC is the location to attach to
> -   the introduced statements.  KNOWN_STRIDE is true iff C's stride is a
> -   constant.  */
> +/* Clear the visited field for a tree of PHI candidates.  */
>
> +static vo

Re: [PATCH] Fix fallout from VRP strict-overflow changes

2017-08-21 Thread Richard Biener
On Sat, 19 Aug 2017, Andreas Schwab wrote:

> On Aug 17 2017, Richard Biener  wrote:
> 
> > I was notifed I broke proper handling of undefined overflow in
> > multiplicative ops handling.  The following resurrects previous
> > behavior (and adds a testcase).
> >
> > Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.
> 
> This breaks gfortran.dg/alloc_comp_auto_array_2.f90 on aarch64 with
> -mabi=ilp32 (only for -O3):
> 
> FAIL: gfortran.dg/alloc_comp_auto_array_2.f90   -O3 -g  (test for excess 
> errors)
> Excess errors:
> /opt/gcc/gcc-20170818/gcc/testsuite/gfortran.dg/alloc_comp_auto_array_2.f90:33:0:
>  Warning: '__builtin_memcpy' specified size between 2147483648 and 4294967295 
> exceeds maximum object size 2147483647 [-Wstringop-overflow=]

I believe this is an issue that went latent when I broke VRP earlier.

I have opened PR81908, will amend with some initial analysis.

Richard.


Re: [PATCH] Don't override user alignment with the same value

2017-08-21 Thread Richard Biener
On Sat, Aug 19, 2017 at 10:18 PM, H.J. Lu  wrote:
> Don't override alignment specified by user with the same value to
> preserve TYPE_USER_ALIGN.  This fixes PR 53037 tests on Sparc.
>
> Does it look right?

Doesn't match do_type_align so it introduces inconsistencies.  The documentation
for TYPE_USER_ALIGN doesn't specify when both cases conflict:

/* 1 if the alignment for this type was requested by "aligned" attribute,
   0 if it is the default for this type.  */

Note that for example the vectorizer looks at DECL_USER_ALIGN  (for
non-field-decls)
to decide whether it can increase alignment.

Richard.

>
> H.J.
> --
> * stor-layout.c (finalize_type_size): Don't override alignment
> specified by user with the same value.
> ---
>  gcc/stor-layout.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
> index 3028d55773a..6dd605810ac 100644
> --- a/gcc/stor-layout.c
> +++ b/gcc/stor-layout.c
> @@ -1784,7 +1784,7 @@ finalize_type_size (tree type)
>
>/* Don't override a larger alignment requirement coming from a user
>  alignment of one of the fields.  */
> -  if (mode_align >= TYPE_ALIGN (type))
> +  if (mode_align > TYPE_ALIGN (type))
> {
>   SET_TYPE_ALIGN (type, mode_align);
>   TYPE_USER_ALIGN (type) = 0;
> --
> 2.13.5
>


[committed] Fix bogus CONST_WIDE_INT hash

2017-08-21 Thread Richard Sandiford
The CONST_WIDE_INT case in const_rtx_hash_1 started the hash
with the precision of the mode, but the mode is always VOIDmode.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  Applied as obvious.

Richard


2017-08-21  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* varasm.c (const_rtx_hash_1): Don't hash in the mode of a
CONST_WIDE_INT.

Index: gcc/varasm.c
===
--- gcc/varasm.c2017-08-10 14:36:08.449457108 +0100
+++ gcc/varasm.c2017-08-21 10:43:04.352530400 +0100
@@ -3639,7 +3639,7 @@ const_rtx_hash_1 (const_rtx x)
   break;
 
 case CONST_WIDE_INT:
-  hwi = GET_MODE_PRECISION (mode);
+  hwi = 0;
   {
for (i = 0; i < CONST_WIDE_INT_NUNITS (x); i++)
  hwi ^= CONST_WIDE_INT_ELT (x, i);


[committed] Pass rtx and index to read-md.c iterator routines

2017-08-21 Thread Richard Sandiford
The read-md.c iterator callbacks previously used a void * to record the
position at which the iterator value should be installed.  This doesn't
scale easily to the SUBREG_BYTE representation used by a later patch,
so this patch replaces the void * with both an rtx and an operand
number.  The operand number is ignored for modes and codes.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  Applied.

Richard


2017-08-21  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* read-md.h (md_reader::record_potential_iterator_use): Replace
pointer argument with an rtx and an index.
* read-rtl.c (iterator_group::apply_iterator): Likewise.
(apply_mode_iterator): Likewise.
(apply_code_iterator): Likewise.
(apply_int_iterator): Likewise.
(apply_subst_iterator): Likewise.
(record_iterator_use): Likewise.
(record_attribute_use): Likewise.
(md_reader::record_potential_iterator_use): Likewise.  Update calls
to record_iterator_use and apply_iterator.
(iterator_use): Replace ptr with x and index.
(attribute_use): Likewise.
(apply_attribute_uses): Update calls to apply_iterator.
(apply_iterators): Likewise.  Update initialization of iterator_use.
(rtx_reader::read_rtx_code): Update calls to record_iterator_use
and record_potential_iterator_use.
(rtx_reader::read_rtx_operand): Likewise.

Index: gcc/read-md.h
===
--- gcc/read-md.h   2017-02-23 19:54:20.0 +
+++ gcc/read-md.h   2017-08-21 10:47:28.924056316 +0100
@@ -162,7 +162,8 @@ struct enum_type {
   rtx copy_rtx_for_iterators (rtx original);
   void read_conditions ();
   void record_potential_iterator_use (struct iterator_group *group,
- void *ptr, const char *name);
+ rtx x, unsigned int index,
+ const char *name);
   struct mapping *read_mapping (struct iterator_group *group, htab_t table);
 
   const char *get_top_level_filename () const { return m_toplevel_fname; }
Index: gcc/read-rtl.c
===
--- gcc/read-rtl.c  2017-02-23 19:54:15.0 +
+++ gcc/read-rtl.c  2017-08-21 10:47:28.924056316 +0100
@@ -76,8 +76,10 @@ struct iterator_group {
  return its integer value.  */
   int (*find_builtin) (const char *);
 
-  /* Make the given pointer use the given iterator value.  */
-  void (*apply_iterator) (void *, int);
+  /* Make the given rtx use the iterator value given by the third argument.
+ If the iterator applies to operands, the second argument gives the
+ operand index, otherwise it is ignored.  */
+  void (*apply_iterator) (rtx, unsigned int, int);
 };
 
 /* Records one use of an iterator.  */
@@ -85,8 +87,11 @@ struct iterator_use {
   /* The iterator itself.  */
   struct mapping *iterator;
 
-  /* The location of the use, as passed to the apply_iterator callback.  */
-  void *ptr;
+  /* The location of the use, as passed to the apply_iterator callback.
+ The index is the number of the operand that used the iterator
+ if applicable, otherwise it is ignored.  */
+  rtx x;
+  unsigned int index;
 };
 
 /* Records one use of an attribute (the "<[iterator:]attribute>" syntax)
@@ -98,8 +103,11 @@ struct attribute_use {
   /* The name of the attribute, possibly with an "iterator:" prefix.  */
   const char *value;
 
-  /* The location of the use, as passed to GROUP's apply_iterator callback.  */
-  void *ptr;
+  /* The location of the use, as passed to GROUP's apply_iterator callback.
+ The index is the number of the operand that used the iterator
+ if applicable, otherwise it is ignored.  */
+  rtx x;
+  unsigned int index;
 };
 
 /* This struct is used to link subst_attr named ATTR_NAME with
@@ -150,9 +158,9 @@ find_mode (const char *name)
 }
 
 static void
-apply_mode_iterator (void *loc, int mode)
+apply_mode_iterator (rtx x, unsigned int, int mode)
 {
-  PUT_MODE ((rtx) loc, (machine_mode) mode);
+  PUT_MODE (x, (machine_mode) mode);
 }
 
 /* In compact dumps, the code of insns is prefixed with "c", giving "cinsn",
@@ -193,9 +201,9 @@ find_code (const char *name)
 }
 
 static void
-apply_code_iterator (void *loc, int code)
+apply_code_iterator (rtx x, unsigned int, int code)
 {
-  PUT_CODE ((rtx) loc, (enum rtx_code) code);
+  PUT_CODE (x, (enum rtx_code) code);
 }
 
 /* Implementations of the iterator_group callbacks for ints.  */
@@ -212,9 +220,9 @@ find_int (const char *name)
 }
 
 static void
-apply_int_iterator (void *loc, int value)
+apply_int_iterator (rtx x, unsigned int index, int value)
 {
-  *(int *)loc = value;
+  XINT (x, index) = value;
 }
 
 #ifdef GENERATOR_FILE
@@ -222,14 +230,13 @@ apply_int_iterator (void *loc, int value
 /* This routine adds attribute or does nothing depending on VA

Move vector_type_mode to tree.c

2017-08-21 Thread Richard Sandiford
...so that it's possible to use TYPE_MODE in tree.h.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2017-08-21  Richard Sandiford  

gcc/
* stor-layout.h (vector_type_mode): Move to...
* tree.h (vector_type_mode): ...here.
* stor-layout.c (vector_type_mode): Move to...
* tree.c (vector_type_mode): ...here.  Include rtl.h and regs.h.

Index: gcc/stor-layout.h
===
--- gcc/stor-layout.h   2017-02-23 19:54:15.0 +
+++ gcc/stor-layout.h   2017-08-21 10:52:43.715019857 +0100
@@ -114,7 +114,4 @@ extern void relayout_decl (tree);
belongs to a function parameter.  */
 extern tree variable_size (tree);
 
-/* Vector types need to check target flags to determine type.  */
-extern machine_mode vector_type_mode (const_tree);
-
 #endif  // GCC_STOR_LAYOUT_H
Index: gcc/tree.h
===
--- gcc/tree.h  2017-08-21 10:42:04.341552187 +0100
+++ gcc/tree.h  2017-08-21 10:52:43.717019857 +0100
@@ -1852,7 +1852,8 @@ #define TYPE_MODE(NODE) \
 #define SET_TYPE_MODE(NODE, MODE) \
   (TYPE_CHECK (NODE)->type_common.mode = (MODE))
 
-extern machine_mode element_mode (const_tree t);
+extern machine_mode element_mode (const_tree);
+extern machine_mode vector_type_mode (const_tree);
 
 /* The "canonical" type for this type node, which is used by frontends to
compare the type for equality with another type.  If two types are
Index: gcc/stor-layout.c
===
--- gcc/stor-layout.c   2017-08-21 10:42:05.814707501 +0100
+++ gcc/stor-layout.c   2017-08-21 10:52:43.715019857 +0100
@@ -2494,45 +2494,6 @@ min_align_of_type (tree type)
 }
   return align / BITS_PER_UNIT;
 }
-
-/* Vector types need to re-check the target flags each time we report
-   the machine mode.  We need to do this because attribute target can
-   change the result of vector_mode_supported_p and have_regs_of_mode
-   on a per-function basis.  Thus the TYPE_MODE of a VECTOR_TYPE can
-   change on a per-function basis.  */
-/* ??? Possibly a better solution is to run through all the types
-   referenced by a function and re-compute the TYPE_MODE once, rather
-   than make the TYPE_MODE macro call a function.  */
-
-machine_mode
-vector_type_mode (const_tree t)
-{
-  machine_mode mode;
-
-  gcc_assert (TREE_CODE (t) == VECTOR_TYPE);
-
-  mode = t->type_common.mode;
-  if (VECTOR_MODE_P (mode)
-  && (!targetm.vector_mode_supported_p (mode)
- || !have_regs_of_mode[mode]))
-{
-  machine_mode innermode = TREE_TYPE (t)->type_common.mode;
-
-  /* For integers, try mapping it to a same-sized scalar mode.  */
-  if (GET_MODE_CLASS (innermode) == MODE_INT)
-   {
- mode = mode_for_size (TYPE_VECTOR_SUBPARTS (t)
-   * GET_MODE_BITSIZE (innermode), MODE_INT, 0);
-
- if (mode != VOIDmode && have_regs_of_mode[mode])
-   return mode;
-   }
-
-  return BLKmode;
-}
-
-  return mode;
-}
 
 /* Create and return a type for signed integers of PRECISION bits.  */
 
Index: gcc/tree.c
===
--- gcc/tree.c  2017-08-21 10:42:04.340629158 +0100
+++ gcc/tree.c  2017-08-21 10:52:43.717019857 +0100
@@ -64,6 +64,8 @@ Software Foundation; either version 3, o
 #include "selftest.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "rtl.h"
+#include "regs.h"
 
 /* Tree code classes.  */
 
@@ -12598,9 +12600,47 @@ element_mode (const_tree t)
 t = TREE_TYPE (t);
   return TYPE_MODE (t);
 }
- 
 
-/* Veirfy that basic properties of T match TV and thus T can be a variant of
+/* Vector types need to re-check the target flags each time we report
+   the machine mode.  We need to do this because attribute target can
+   change the result of vector_mode_supported_p and have_regs_of_mode
+   on a per-function basis.  Thus the TYPE_MODE of a VECTOR_TYPE can
+   change on a per-function basis.  */
+/* ??? Possibly a better solution is to run through all the types
+   referenced by a function and re-compute the TYPE_MODE once, rather
+   than make the TYPE_MODE macro call a function.  */
+
+machine_mode
+vector_type_mode (const_tree t)
+{
+  machine_mode mode;
+
+  gcc_assert (TREE_CODE (t) == VECTOR_TYPE);
+
+  mode = t->type_common.mode;
+  if (VECTOR_MODE_P (mode)
+  && (!targetm.vector_mode_supported_p (mode)
+ || !have_regs_of_mode[mode]))
+{
+  machine_mode innermode = TREE_TYPE (t)->type_common.mode;
+
+  /* For integers, try mapping it to a same-sized scalar mode.  */
+  if (GET_MODE_CLASS (innermode) == MODE_INT)
+   {
+ mode = mode_for_size (TYPE_VECTOR_SUBPARTS (t)
+   * GET_MODE_BITSIZE (innermode), MODE_INT, 0);
+
+ if (mode != VOIDmode && have_regs_of_mode[mode])
+   return mode;
+   }
+
+

Re: Add a full_integral_type_p helper function

2017-08-21 Thread Richard Sandiford
Richard Biener  writes:
> On Fri, Aug 18, 2017 at 1:04 PM, Richard Sandiford
>  wrote:
>> Richard Biener  writes:
>>> On Fri, Aug 18, 2017 at 10:10 AM, Richard Sandiford
>>>  wrote:
 There are several places that test whether:

 TYPE_PRECISION (t) == GET_MODE_PRECISION (TYPE_MODE (t))

 for some integer type T.  With SVE variable-length modes, this would
 need to become:

 TYPE_PRECISION (t) == GET_MODE_PRECISION (SCALAR_TYPE_MODE (t))

 (or SCALAR_INT_TYPE_MODE, it doesn't matter which in this case).
 But rather than add the "SCALAR_" everywhere, it seemed neater to
 introduce a new helper function that tests whether T is an integral
 type that has the same number of bits as its underlying mode.  This
 patch does that, calling it full_integral_type_p.

 It isn't possible to use TYPE_MODE in tree.h because vector_type_mode
 is defined in stor-layout.h, so for now the function accesses the mode
 field directly.  After the 77-patch machine_mode series (thanks again
 Jeff for the reviews) it would use SCALAR_TYPE_MODE instead.

 Of the changes that didn't previously have an INTEGRAL_TYPE_P check:

 - for fold_single_bit_test_into_sign_test it is obvious from the
   integer_foop tests that this is restricted to integral types.

 - vect_recog_vector_vector_shift_pattern is inherently restricted
   to integral types.

 - the register_edge_assert_for_2 hunk is dominated by:

   TREE_CODE (val) == INTEGER_CST

 - the ubsan_instrument_shift hunk is preceded by an early exit:

   if (!INTEGRAL_TYPE_P (type0))
 return NULL_TREE;

 - the second and third match.pd hunks are from:

 /* Fold (X << C1) & C2 into (X << C1) & (C2 | ((1 << C1) - 1))
 (X >> C1) & C2 into (X >> C1) & (C2 | ~((type) -1 >> C1))
if the new mask might be further optimized.  */

 I'm a bit confused about:

 /* Try to fold (type) X op CST -> (type) (X op ((type-x) CST))
when profitable.
For bitwise binary operations apply operand conversions to the
binary operation result instead of to the operands.  This allows
to combine successive conversions and bitwise binary operations.
We combine the above two cases by using a conditional convert.  */
 (for bitop (bit_and bit_ior bit_xor)
  (simplify
   (bitop (convert @0) (convert? @1))
   (if (((TREE_CODE (@1) == INTEGER_CST
  && INTEGRAL_TYPE_P (TREE_TYPE (@0))
  && int_fits_type_p (@1, TREE_TYPE (@0)))
 || types_match (@0, @1))
/* ???  This transform conflicts with fold-const.c doing
   Convert (T)(x & c) into (T)x & (T)c, if c is an integer
   constants (if x has signed type, the sign bit cannot be set
   in c).  This folds extension into the BIT_AND_EXPR.
   Restrict it to GIMPLE to avoid endless recursions.  */
&& (bitop != BIT_AND_EXPR || GIMPLE)
&& (/* That's a good idea if the conversion widens the operand, thus
 after hoisting the conversion the operation will be narrower.  */
TYPE_PRECISION (TREE_TYPE (@0)) < TYPE_PRECISION (type)
/* It's also a good idea if the conversion is to a non-integer
   mode.  */
|| GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT
/* Or if the precision of TO is not the same as the precision
   of its mode.  */
 || TYPE_PRECISION (type) != GET_MODE_PRECISION (TYPE_MODE (type
(convert (bitop @0 (convert @1))

 though.  The "INTEGRAL_TYPE_P (TREE_TYPE (@0))" suggests that we can't
 rely on @0 and @1 being integral (although conversions from float would
 use FLOAT_EXPR), but then what is:
>>>
>>> bit_and is valid on POINTER_TYPE and vector integer types
>>>

/* It's also a good idea if the conversion is to a non-integer
   mode.  */
|| GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT

 letting through?  MODE_PARTIAL_INT maybe, but that's a sort of integer
 mode too.  MODE_COMPLEX_INT or MODE_VECTOR_INT?  I thought for those
 it would be better to apply the scalar rules to the element type.
>>>
>>> I suppose extra caution ;)  I think I have seen BLKmode for not naturally
>>> aligned integer types at least on strict-align targets?  The code is
>>> a copy from original code in tree-ssa-forwprop.c.
>>>
 Either way, having allowed all non-INT modes, using full_integral_type_p
 for the remaining condition seems correct.

 If the feeling is that this isn't a useful abstraction, I can just update
 each site individually to cope with variable-sized modes.
>>>
>>> I think "full_integral_type_p" is a name from which I cannot infer
>>> its meaning.  Maybe type_has_mode_pre

Re: Heads-Up: early LTO debug to land, breaking Mach-O / [X]COFF

2017-08-21 Thread Richard Biener
On Fri, 12 May 2017, Richard Biener wrote:

> 
> This is a heads-up that I am in the process of implementing the last
> of Jasons review comments on the dwarf2out parts of early LTO debug
> support.  I hope to post final patches early next week after thoroughly
> re-testing everything.
> 
> Note that Mach-O and [X]COFF support in the simple-object machinery
> is still missing for the early LTO debug feature so I am going to
> break LTOing with DWARF debuginfo on Darwin and Windows (CCing
> maintainers).  Mach-O support has been worked on a bit by Iain
> and myself but the simple-object piece is still missing.
> A workaround is to use stabs on these targets with LTO.
> 
> DWARF part: https://gcc.gnu.org/ml/gcc-patches/2016-11/msg01023.html
> simple-object part: 
> https://gcc.gnu.org/ml/gcc-patches/2016-10/msg01733.html
> 
> both still apply with some fuzz.

The patches have been applied now (r251220).  You'll see -flto -g
testcases on the above archs fail (I'm still curious how exactly).
There's going to be a "workaround" I plan to apply no earlier than
stage3, it will effectively force -g0 on those targets when you
use DWARF.

Seeing the FAILs might be enough hunch to look into the (small)
task of implementing the simple-object implementation of
simple_object_copy_lto_debug_sections for your object format.

Richard.


Re: RFC: [PATCH] Add warn_if_not_aligned attribute

2017-08-21 Thread Szabolcs Nagy
On 17/08/17 15:56, H.J. Lu wrote:
> On Thu, Aug 17, 2017 at 6:52 AM, Joseph Myers  wrote:
>> On Sat, 8 Jul 2017, H.J. Lu wrote:
>>
>>> +@item -Wpacked-not-aligned @r{(C, C++, Objective-C and Objective-C++ only)}
>>> +@opindex Wpacked-not-aligned
>>> +@opindex Wno-packed-not-aligned
>>> +Warn if a structure field with explicitly specified alignment in a
>>> +packed struct or union is misaligned.  For example, a warning will
>>> +be issued on @code{struct S}, like, @code{warning: alignment 1 of
>>> +'struct S' is less than 8}, in this code:
>>
>> Use @samp for warnings quoted in the manual, as previously discussed.
>>
>> OK with that change, in the absence of C++ maintainer objections within 48
>> hours.
>>
> 
> Here is the updated patch.  I moved c++ changes to merge_decls, where
> alignment is merged,  and check_bitfield_type_and_width, where bit-fields
> are checked.
> 
> Tested on x86-64 and i686.
> 

i assume packed semantics is same on arm so these
should warn on arm too ?

on arm i see:

FAIL: gcc.dg/pr53037-2.c  (test for warnings, line 8)
FAIL: gcc.dg/pr53037-2.c  (test for warnings, line 16)
FAIL: gcc.dg/pr53037-2.c  (test for warnings, line 32)
FAIL: gcc.dg/pr53037-3.c  (test for warnings, line 8)
FAIL: gcc.dg/pr53037-3.c  (test for warnings, line 16)
FAIL: gcc.dg/pr53037-3.c  (test for warnings, line 32)

FAIL: g++.dg/pr53037-2.C  -std=gnu++98  (test for warnings, line 6)
FAIL: g++.dg/pr53037-2.C  -std=gnu++98  (test for warnings, line 16)
FAIL: g++.dg/pr53037-2.C  -std=gnu++98  (test for warnings, line 29)
FAIL: g++.dg/pr53037-2.C  -std=gnu++11  (test for warnings, line 6)
FAIL: g++.dg/pr53037-2.C  -std=gnu++11  (test for warnings, line 16)
FAIL: g++.dg/pr53037-2.C  -std=gnu++11  (test for warnings, line 29)
FAIL: g++.dg/pr53037-2.C  -std=gnu++14  (test for warnings, line 6)
FAIL: g++.dg/pr53037-2.C  -std=gnu++14  (test for warnings, line 16)
FAIL: g++.dg/pr53037-2.C  -std=gnu++14  (test for warnings, line 29)
FAIL: g++.dg/pr53037-3.C  -std=gnu++98  (test for warnings, line 6)
FAIL: g++.dg/pr53037-3.C  -std=gnu++98  (test for warnings, line 16)
FAIL: g++.dg/pr53037-3.C  -std=gnu++98  (test for warnings, line 29)
FAIL: g++.dg/pr53037-3.C  -std=gnu++11  (test for warnings, line 6)
FAIL: g++.dg/pr53037-3.C  -std=gnu++11  (test for warnings, line 16)
FAIL: g++.dg/pr53037-3.C  -std=gnu++11  (test for warnings, line 29)
FAIL: g++.dg/pr53037-3.C  -std=gnu++14  (test for warnings, line 6)
FAIL: g++.dg/pr53037-3.C  -std=gnu++14  (test for warnings, line 16)
FAIL: g++.dg/pr53037-3.C  -std=gnu++14  (test for warnings, line 29)



Re: Move vector_type_mode to tree.c

2017-08-21 Thread Richard Biener
On Mon, Aug 21, 2017 at 11:54 AM, Richard Sandiford
 wrote:
> ...so that it's possible to use TYPE_MODE in tree.h.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Ok.

Richard.

> Richard
>
>
> 2017-08-21  Richard Sandiford  
>
> gcc/
> * stor-layout.h (vector_type_mode): Move to...
> * tree.h (vector_type_mode): ...here.
> * stor-layout.c (vector_type_mode): Move to...
> * tree.c (vector_type_mode): ...here.  Include rtl.h and regs.h.
>
> Index: gcc/stor-layout.h
> ===
> --- gcc/stor-layout.h   2017-02-23 19:54:15.0 +
> +++ gcc/stor-layout.h   2017-08-21 10:52:43.715019857 +0100
> @@ -114,7 +114,4 @@ extern void relayout_decl (tree);
> belongs to a function parameter.  */
>  extern tree variable_size (tree);
>
> -/* Vector types need to check target flags to determine type.  */
> -extern machine_mode vector_type_mode (const_tree);
> -
>  #endif  // GCC_STOR_LAYOUT_H
> Index: gcc/tree.h
> ===
> --- gcc/tree.h  2017-08-21 10:42:04.341552187 +0100
> +++ gcc/tree.h  2017-08-21 10:52:43.717019857 +0100
> @@ -1852,7 +1852,8 @@ #define TYPE_MODE(NODE) \
>  #define SET_TYPE_MODE(NODE, MODE) \
>(TYPE_CHECK (NODE)->type_common.mode = (MODE))
>
> -extern machine_mode element_mode (const_tree t);
> +extern machine_mode element_mode (const_tree);
> +extern machine_mode vector_type_mode (const_tree);
>
>  /* The "canonical" type for this type node, which is used by frontends to
> compare the type for equality with another type.  If two types are
> Index: gcc/stor-layout.c
> ===
> --- gcc/stor-layout.c   2017-08-21 10:42:05.814707501 +0100
> +++ gcc/stor-layout.c   2017-08-21 10:52:43.715019857 +0100
> @@ -2494,45 +2494,6 @@ min_align_of_type (tree type)
>  }
>return align / BITS_PER_UNIT;
>  }
> -
> -/* Vector types need to re-check the target flags each time we report
> -   the machine mode.  We need to do this because attribute target can
> -   change the result of vector_mode_supported_p and have_regs_of_mode
> -   on a per-function basis.  Thus the TYPE_MODE of a VECTOR_TYPE can
> -   change on a per-function basis.  */
> -/* ??? Possibly a better solution is to run through all the types
> -   referenced by a function and re-compute the TYPE_MODE once, rather
> -   than make the TYPE_MODE macro call a function.  */
> -
> -machine_mode
> -vector_type_mode (const_tree t)
> -{
> -  machine_mode mode;
> -
> -  gcc_assert (TREE_CODE (t) == VECTOR_TYPE);
> -
> -  mode = t->type_common.mode;
> -  if (VECTOR_MODE_P (mode)
> -  && (!targetm.vector_mode_supported_p (mode)
> - || !have_regs_of_mode[mode]))
> -{
> -  machine_mode innermode = TREE_TYPE (t)->type_common.mode;
> -
> -  /* For integers, try mapping it to a same-sized scalar mode.  */
> -  if (GET_MODE_CLASS (innermode) == MODE_INT)
> -   {
> - mode = mode_for_size (TYPE_VECTOR_SUBPARTS (t)
> -   * GET_MODE_BITSIZE (innermode), MODE_INT, 0);
> -
> - if (mode != VOIDmode && have_regs_of_mode[mode])
> -   return mode;
> -   }
> -
> -  return BLKmode;
> -}
> -
> -  return mode;
> -}
>
>  /* Create and return a type for signed integers of PRECISION bits.  */
>
> Index: gcc/tree.c
> ===
> --- gcc/tree.c  2017-08-21 10:42:04.340629158 +0100
> +++ gcc/tree.c  2017-08-21 10:52:43.717019857 +0100
> @@ -64,6 +64,8 @@ Software Foundation; either version 3, o
>  #include "selftest.h"
>  #include "stringpool.h"
>  #include "attribs.h"
> +#include "rtl.h"
> +#include "regs.h"
>
>  /* Tree code classes.  */
>
> @@ -12598,9 +12600,47 @@ element_mode (const_tree t)
>  t = TREE_TYPE (t);
>return TYPE_MODE (t);
>  }
> -
>
> -/* Veirfy that basic properties of T match TV and thus T can be a variant of
> +/* Vector types need to re-check the target flags each time we report
> +   the machine mode.  We need to do this because attribute target can
> +   change the result of vector_mode_supported_p and have_regs_of_mode
> +   on a per-function basis.  Thus the TYPE_MODE of a VECTOR_TYPE can
> +   change on a per-function basis.  */
> +/* ??? Possibly a better solution is to run through all the types
> +   referenced by a function and re-compute the TYPE_MODE once, rather
> +   than make the TYPE_MODE macro call a function.  */
> +
> +machine_mode
> +vector_type_mode (const_tree t)
> +{
> +  machine_mode mode;
> +
> +  gcc_assert (TREE_CODE (t) == VECTOR_TYPE);
> +
> +  mode = t->type_common.mode;
> +  if (VECTOR_MODE_P (mode)
> +  && (!targetm.vector_mode_supported_p (mode)
> + || !have_regs_of_mode[mode]))
> +{
> +  machine_mode innermode = TREE_TYPE (t)->type_common.mode;
> +
> +  /* For integers, try mapping it to a same-si

Remove the frame size argument from function_prologue/epilogue

2017-08-21 Thread Richard Sandiford
Later patches will add support for frame sizes that are a run-time
invariant but not a compile-time constant.  This then raised the
question of whether the frame size argument to the function_prologue/
epilogue hooks should be updated accordingly.

It turned out that only two targets actually used this argument, and
even they got other frame-related information from the cached machine
structure.  This patch therefore removes the argument and makes the two
targets use get_frame_size () instead.

Tested on aarch64-linux-gnu and x86_64-linux-gnu, and by building
one target per CPU and checking that there were no differences in
assembly for the testsuite.  OK to install?

Richard


2017-08-21  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* target.def (function_prologue): Remove frame size argument.
(function_epilogue): Likewise.
* doc/tm.texi: Regenerate.
* final.c (final_start_function): Update call to function_prologue.
(final_end_function): Update call to function_epilogue.
(default_function_pro_epilogue): Remove frame size argument.
* output.h (default_function_pro_epilogue): Likewise.
* config/arm/arm.c (arm_output_function_epilogue): Likewise.
(arm_output_function_prologue): Likewise.
* config/frv/frv.c (frv_function_prologue): Likewise.
(frv_function_epilogue): Likewise.
* config/i386/i386.c (ix86_output_function_epilogue): Likewise.
* config/ia64/ia64.c (ia64_output_function_prologue): Likewise.
(ia64_output_function_epilogue): Likewise.
* config/m32r/m32r.c (m32r_output_function_prologue): Likewise.
(m32r_output_function_epilogue): Likewise.
* config/microblaze/microblaze.c (microblaze_function_prologue)
(microblaze_function_epilogue): Likewise.
* config/mips/mips.c (mips_output_function_prologue): Likewise.
(mips_output_function_epilogue): Likewise.
* config/mmix/mmix.c (mmix_target_asm_function_prologue): Likewise.
(mmix_target_asm_function_epilogue): Likewise.
* config/msp430/msp430.c (msp430_start_function): Likewise.
* config/nds32/nds32.c (nds32_asm_function_prologue): Likewise.
(nds32_asm_function_epilogue): Likewise.
* config/nios2/nios2.c (nios2_asm_function_prologue): Likewise.
* config/pa/pa.c (pa_output_function_prologue): Likewise.
(pa_output_function_epilogue): Likewise.
* config/powerpcspe/powerpcspe.c (rs6000_output_function_prologue)
(rs6000_output_function_epilogue): Likewise.
* config/rl78/rl78.c (rl78_start_function): Likewise.
* config/rs6000/rs6000.c (rs6000_output_function_prologue): Likewise.
(rs6000_output_function_epilogue): Likewise.
* config/rx/rx.c (rx_output_function_prologue): Likewise.
* config/sh/sh.c (sh_output_function_epilogue): Likewise.
* config/sparc/sparc.c (sparc_asm_function_prologue): Likewise.
(sparc_asm_function_epilogue): Likewise.

Index: gcc/target.def
===
--- gcc/target.def  2017-07-27 10:37:54.747032158 +0100
+++ gcc/target.def  2017-08-21 11:54:34.198323780 +0100
@@ -306,8 +306,8 @@ DEFHOOK
 function.  The prologue is responsible for setting up the stack frame,\n\
 initializing the frame pointer register, saving registers that must be\n\
 saved, and allocating @var{size} additional bytes of storage for the\n\
-local variables.  @var{size} is an integer.  @var{file} is a stdio\n\
-stream to which the assembler code should be output.\n\
+local variables.  @var{file} is a stdio stream to which the assembler\n\
+code should be output.\n\
 \n\
 The label for the beginning of the function need not be output by this\n\
 macro.  That has already been done when the macro is run.\n\
@@ -344,7 +344,7 @@ for a machine if doing so is more conven
 compatibility reasons.  Except in cases where required by standard\n\
 or by a debugger, there is no reason why the stack layout used by GCC\n\
 need agree with that used by other compilers for a machine.",
- void, (FILE *file, HOST_WIDE_INT size),
+ void, (FILE *file),
  default_function_pro_epilogue)
 
 /* Output the assembler code for end of prologue.  */
@@ -374,7 +374,7 @@ DEFHOOK
 function.  The epilogue is responsible for restoring the saved\n\
 registers and stack pointer to their values when the function was\n\
 called, and returning control to the caller.  This macro takes the\n\
-same arguments as the macro @code{TARGET_ASM_FUNCTION_PROLOGUE}, and the\n\
+same argument as the macro @code{TARGET_ASM_FUNCTION_PROLOGUE}, and the\n\
 registers to restore are determined from @code{regs_ever_live} and\n\
 @code{CALL_USED_REGISTERS} in the same way.\n\
 \n\
@@ -414,7 +414,7 @@ functions pop their own arguments.  @cod
 needs to know what was decided.  The number of bytes of the current\n\
 function's arguments that this f

Simplify pad_below implementation

2017-08-21 Thread Richard Sandiford
This patch simplifies the alignment calculations in pad_below.
The first arm of the "if" was:

- taking GET_MODE_BITSIZE (always equal to GET_MODE_SIZE * BITS_PER_UNIT),
- rounding up to the next multiple of PARM_BOUNDARY
- converting bits to bytes and
- subtracting the GET_MODE_SIZE

so was in effect calculating the number of bytes needed to round
GET_MODE_SIZE up to (PARM_BOUNDARY / BITS_PER_UNIT).  That can be
done more directly as -size & (align - 1), which is easier to
convert to variable-sized modes.

Tested on aarch64-linux-gnu and x86_64-linux-gnu, and by building
one target per CPU and checking that there were no differences in
assembly for the testsuite.  OK to install?

Richard


2017-08-21  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* function.c (pad_below): Simplify padding calculation.

Index: gcc/function.c
===
--- gcc/function.c  2017-08-21 10:42:34.185530464 +0100
+++ gcc/function.c  2017-08-21 11:55:41.018148268 +0100
@@ -4322,21 +4322,16 @@ pad_to_arg_alignment (struct args_size *
 static void
 pad_below (struct args_size *offset_ptr, machine_mode passed_mode, tree 
sizetree)
 {
+  unsigned int align = PARM_BOUNDARY / BITS_PER_UNIT;
   if (passed_mode != BLKmode)
-{
-  if (GET_MODE_BITSIZE (passed_mode) % PARM_BOUNDARY)
-   offset_ptr->constant
- += (((GET_MODE_BITSIZE (passed_mode) + PARM_BOUNDARY - 1)
-  / PARM_BOUNDARY * PARM_BOUNDARY / BITS_PER_UNIT)
- - GET_MODE_SIZE (passed_mode));
-}
+offset_ptr->constant += -GET_MODE_SIZE (passed_mode) & (align - 1);
   else
 {
   if (TREE_CODE (sizetree) != INTEGER_CST
- || (TREE_INT_CST_LOW (sizetree) * BITS_PER_UNIT) % PARM_BOUNDARY)
+ || (TREE_INT_CST_LOW (sizetree) & (align - 1)) != 0)
{
  /* Round the size up to multiple of PARM_BOUNDARY bits.  */
- tree s2 = round_up (sizetree, PARM_BOUNDARY / BITS_PER_UNIT);
+ tree s2 = round_up (sizetree, align);
  /* Add it in.  */
  ADD_PARM_SIZE (*offset_ptr, s2);
  SUB_PARM_SIZE (*offset_ptr, sizetree);


[PATCH 1/2] Don't warn function alignment if warn_if_not_aligned_p is true

2017-08-21 Thread H.J. Lu
When warn_if_not_aligned_p is true, a warning will be issued on function
declaration later.  There is no need to warn function alignment when
warn_if_not_aligned_p is true.

OK for trunk?

H.J.
--
* c-attribs.c (common_handle_aligned_attribute): Don't warn
function alignment if warn_if_not_aligned_p is true.
---
 gcc/c-family/c-attribs.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index 5f79468407f..78969532543 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -1754,9 +1754,12 @@ common_handle_aligned_attribute (tree *node, tree args, 
int flags,
   This formally comes from the c++11 specification but we are
   doing it for the GNU attribute syntax as well.  */
 *no_add_attrs = true;
-  else if (TREE_CODE (decl) == FUNCTION_DECL
+  else if (!warn_if_not_aligned_p
+  && TREE_CODE (decl) == FUNCTION_DECL
   && DECL_ALIGN (decl) > (1U << i) * BITS_PER_UNIT)
 {
+  /* Don't warn function alignment here if warn_if_not_aligned_p is
+true.  It will be warned later.  */
   if (DECL_USER_ALIGN (decl))
error ("alignment for %q+D was previously specified as %d "
   "and may not be decreased", decl,
-- 
2.13.5



[PATCH 2/2] Update expected alignment in pr53037-1.C/pr53037-1.c

2017-08-21 Thread H.J. Lu
Since alignment of int is 2 bytes for m68k, expect either 2 or 4
alignments in warning.

OK for trunk?

H.J.
---
* g++.dg/pr53037-1.C: Expect either 2 or 4 alignments in
warning.
* gcc.dg/pr53037-1.c: Likewise.
---
 gcc/testsuite/g++.dg/pr53037-1.C | 6 +++---
 gcc/testsuite/gcc.dg/pr53037-1.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/g++.dg/pr53037-1.C b/gcc/testsuite/g++.dg/pr53037-1.C
index a3d8f99b54e..3ebaf241670 100644
--- a/gcc/testsuite/g++.dg/pr53037-1.C
+++ b/gcc/testsuite/g++.dg/pr53037-1.C
@@ -5,7 +5,7 @@
 typedef unsigned long long __u64
   __attribute__((aligned(4),warn_if_not_aligned(8)));
 
-struct foo1 /* { dg-warning "alignment 4 of 'foo1' is less than 8" } */
+struct foo1 /* { dg-warning "alignment \[24\] of 'foo1' is less than 8" } */
 {
   int i1;
   int i2;
@@ -35,7 +35,7 @@ struct foo4
   __u64 x;
 } __attribute__((aligned(8)));
 
-struct foo5 /* { dg-warning "alignment 4 of 'foo5' is less than 16" } */
+struct foo5 /* { dg-warning "alignment \[24\] of 'foo5' is less than 16" } */
 {
   int i1;
   int x __attribute__((warn_if_not_aligned(16))); /* { dg-warning "'foo5::x' 
offset 4 in 'foo5' isn't aligned to 16" } */
@@ -68,7 +68,7 @@ union bar2
   __u64 x;
 } __attribute__((aligned(8)));
 
-union bar3 /* { dg-warning "alignment 4 of 'bar3' is less than 16" } */
+union bar3 /* { dg-warning "alignment \[1-4\] of 'bar3' is less than 16" } */
 {
   int i1;
   int x __attribute__((warn_if_not_aligned(16))); 
diff --git a/gcc/testsuite/gcc.dg/pr53037-1.c b/gcc/testsuite/gcc.dg/pr53037-1.c
index 93af0a50cd4..1e4e0cbb204 100644
--- a/gcc/testsuite/gcc.dg/pr53037-1.c
+++ b/gcc/testsuite/gcc.dg/pr53037-1.c
@@ -39,7 +39,7 @@ struct foo5
 {
   int i1;
   int x __attribute__((warn_if_not_aligned(16))); /* { dg-warning "'x' offset 
4 in 'struct foo5' isn't aligned to 16" } */
-}; /* { dg-warning "alignment 4 of 'struct foo5' is less than 16" } */
+}; /* { dg-warning "alignment \[24\] of 'struct foo5' is less than 16" } */
 
 struct foo6
 {
@@ -72,7 +72,7 @@ union bar3
 {
   int i1;
   int x __attribute__((warn_if_not_aligned(16))); 
-}; /* { dg-warning "alignment 4 of 'union bar3' is less than 16" } */
+}; /* { dg-warning "alignment \[24\] of 'union bar3' is less than 16" } */
 
 union bar4
 {
-- 
2.13.5



[patch,avr] Fir PR91910: ICE for bad attribute "address".

2017-08-21 Thread Georg-Johann Lay

"address" attribute only must be specified with VARs,
yet the compiler dived into attribute analysis for
non-VARs, resulting in ICE.

This patch also adds OPT_Wattributes as warning filter.

Ok to apply?

Johann

gcc/
PR target/81910
* config/avr/avr.c (avr_handle_addr_attribute): Early return if
not VAR_P. Filter attribute warnings with OPT_Wattributes.
(avr_attribute_table) : Initialize
.decl_required with true.
Index: config/avr/avr.c
===
--- config/avr/avr.c	(revision 251142)
+++ config/avr/avr.c	(working copy)
@@ -9790,10 +9790,12 @@ avr_handle_addr_attribute (tree *node, t
   bool io_p = (strncmp (IDENTIFIER_POINTER (name), "io", 2) == 0);
   location_t loc = DECL_SOURCE_LOCATION (*node);
 
-  if (TREE_CODE (*node) != VAR_DECL)
+  if (!VAR_P (*node))
 {
-  warning_at (loc, 0, "%qE attribute only applies to variables", name);
+  warning_at (loc, OPT_Wattributes, "%qE attribute only applies to "
+		  "variables", name);
   *no_add = true;
+  return NULL_TREE;
 }
 
   if (args != NULL_TREE)
@@ -9803,8 +9805,8 @@ avr_handle_addr_attribute (tree *node, t
   tree arg = TREE_VALUE (args);
   if (TREE_CODE (arg) != INTEGER_CST)
 	{
-	  warning (0, "%qE attribute allows only an integer constant argument",
-		   name);
+	  warning_at (loc, OPT_Wattributes, "%qE attribute allows only an "
+		  "integer constant argument", name);
 	  *no_add = true;
 	}
   else if (io_p
@@ -9813,19 +9815,20 @@ avr_handle_addr_attribute (tree *node, t
 			? low_io_address_operand : io_address_operand)
 			 (GEN_INT (TREE_INT_CST_LOW (arg)), QImode)))
 	{
-	  warning_at (loc, 0, "%qE attribute address out of range", name);
+	  warning_at (loc, OPT_Wattributes, "%qE attribute address "
+		  "out of range", name);
 	  *no_add = true;
 	}
   else
 	{
 	  tree attribs = DECL_ATTRIBUTES (*node);
-	  const char *names[] = { "io", "io_low", "address", NULL } ;
+	  const char *names[] = { "io", "io_low", "address", NULL };
 	  for (const char **p = names; *p; p++)
 	{
 	  tree other = lookup_attribute (*p, attribs);
 	  if (other && TREE_VALUE (other))
 		{
-		  warning_at (loc, 0,
+		  warning_at (loc, OPT_Wattributes,
 			  "both %s and %qE attribute provide address",
 			  *p, name);
 		  *no_add = true;
@@ -9836,7 +9839,8 @@ avr_handle_addr_attribute (tree *node, t
 }
 
   if (*no_add == false && io_p && !TREE_THIS_VOLATILE (*node))
-warning_at (loc, 0, "%qE attribute on non-volatile variable", name);
+warning_at (loc, OPT_Wattributes, "%qE attribute on non-volatile variable",
+		name);
 
   return NULL_TREE;
 }
@@ -9886,11 +9890,11 @@ avr_attribute_table[] =
 false },
   { "OS_main",   0, 0, false, true,  true,   avr_handle_fntype_attribute,
 false },
-  { "io",0, 1, false, false, false,  avr_handle_addr_attribute,
+  { "io",0, 1, true, false, false,  avr_handle_addr_attribute,
 false },
-  { "io_low",0, 1, false, false, false,  avr_handle_addr_attribute,
+  { "io_low",0, 1, true, false, false,  avr_handle_addr_attribute,
 false },
-  { "address",   1, 1, false, false, false,  avr_handle_addr_attribute,
+  { "address",   1, 1, true, false, false,  avr_handle_addr_attribute,
 false },
   { "absdata",   0, 0, true, false, false,  avr_handle_absdata_attribute,
 false },


Re: [patch, fortran] Bug 81296 - derived type I/o problem

2017-08-21 Thread Paul Richard Thomas
Hi Jerry,

That looks good to me - OK for trunk and for backporting.

Thanks for the patch.

Paul

PS Did you have time to think about that rather more difficult bug,
involving a mix of DT descriptors and intrinsic descriptors for the
declared type?

On 21 August 2017 at 03:23, Jerry DeLisle  wrote:
> Hi all,
>
> The attached patch adds a check for the format label containing a "DT"
> format descriptor and enables the generation of the correct code.  The patch
> modifies an existing test case as a future check on this.
>
> Regression tested on x86_64-linux.
>
> OK for trunk and backport to 7?
>
> Regards,
>
> Jerry
>
> 2017-08-21  Jerry DeLisle  
>
> PR fortran/81296
> * trans-io.c (get_dtio_proc): Add check for format label and set
> formatted flag accordingly. Reorganize the code a little.



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: RFC: [PATCH] Add warn_if_not_aligned attribute

2017-08-21 Thread H.J. Lu
On Mon, Aug 21, 2017 at 3:59 AM, Szabolcs Nagy  wrote:
> On 17/08/17 15:56, H.J. Lu wrote:
>> On Thu, Aug 17, 2017 at 6:52 AM, Joseph Myers  
>> wrote:
>>> On Sat, 8 Jul 2017, H.J. Lu wrote:
>>>
 +@item -Wpacked-not-aligned @r{(C, C++, Objective-C and Objective-C++ 
 only)}
 +@opindex Wpacked-not-aligned
 +@opindex Wno-packed-not-aligned
 +Warn if a structure field with explicitly specified alignment in a
 +packed struct or union is misaligned.  For example, a warning will
 +be issued on @code{struct S}, like, @code{warning: alignment 1 of
 +'struct S' is less than 8}, in this code:
>>>
>>> Use @samp for warnings quoted in the manual, as previously discussed.
>>>
>>> OK with that change, in the absence of C++ maintainer objections within 48
>>> hours.
>>>
>>
>> Here is the updated patch.  I moved c++ changes to merge_decls, where
>> alignment is merged,  and check_bitfield_type_and_width, where bit-fields
>> are checked.
>>
>> Tested on x86-64 and i686.
>>
>
> i assume packed semantics is same on arm so these
> should warn on arm too ?
>
> on arm i see:
>
> FAIL: gcc.dg/pr53037-2.c  (test for warnings, line 8)
> FAIL: gcc.dg/pr53037-2.c  (test for warnings, line 16)
> FAIL: gcc.dg/pr53037-2.c  (test for warnings, line 32)
> FAIL: gcc.dg/pr53037-3.c  (test for warnings, line 8)
> FAIL: gcc.dg/pr53037-3.c  (test for warnings, line 16)
> FAIL: gcc.dg/pr53037-3.c  (test for warnings, line 32)
>
> FAIL: g++.dg/pr53037-2.C  -std=gnu++98  (test for warnings, line 6)
> FAIL: g++.dg/pr53037-2.C  -std=gnu++98  (test for warnings, line 16)
> FAIL: g++.dg/pr53037-2.C  -std=gnu++98  (test for warnings, line 29)
> FAIL: g++.dg/pr53037-2.C  -std=gnu++11  (test for warnings, line 6)
> FAIL: g++.dg/pr53037-2.C  -std=gnu++11  (test for warnings, line 16)
> FAIL: g++.dg/pr53037-2.C  -std=gnu++11  (test for warnings, line 29)
> FAIL: g++.dg/pr53037-2.C  -std=gnu++14  (test for warnings, line 6)
> FAIL: g++.dg/pr53037-2.C  -std=gnu++14  (test for warnings, line 16)
> FAIL: g++.dg/pr53037-2.C  -std=gnu++14  (test for warnings, line 29)
> FAIL: g++.dg/pr53037-3.C  -std=gnu++98  (test for warnings, line 6)
> FAIL: g++.dg/pr53037-3.C  -std=gnu++98  (test for warnings, line 16)
> FAIL: g++.dg/pr53037-3.C  -std=gnu++98  (test for warnings, line 29)
> FAIL: g++.dg/pr53037-3.C  -std=gnu++11  (test for warnings, line 6)
> FAIL: g++.dg/pr53037-3.C  -std=gnu++11  (test for warnings, line 16)
> FAIL: g++.dg/pr53037-3.C  -std=gnu++11  (test for warnings, line 29)
> FAIL: g++.dg/pr53037-3.C  -std=gnu++14  (test for warnings, line 6)
> FAIL: g++.dg/pr53037-3.C  -std=gnu++14  (test for warnings, line 16)
> FAIL: g++.dg/pr53037-3.C  -std=gnu++14  (test for warnings, line 29)
>

See:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53037#c29

-- 
H.J.


[PING][PATCH][PR sanitizer/77631] Support separate debug info in libbacktrace

2017-08-21 Thread Denis Khalikov

Hello,
this is a ping for that patch:
https://gcc.gnu.org/ml/gcc-patches/2017-07/msg01958.html

Thanks.


Re: Add a full_integral_type_p helper function

2017-08-21 Thread Richard Biener
On Mon, Aug 21, 2017 at 11:58 AM, Richard Sandiford
 wrote:
> Richard Biener  writes:
>> On Fri, Aug 18, 2017 at 1:04 PM, Richard Sandiford
>>  wrote:
>>> Richard Biener  writes:
 On Fri, Aug 18, 2017 at 10:10 AM, Richard Sandiford
  wrote:
> There are several places that test whether:
>
> TYPE_PRECISION (t) == GET_MODE_PRECISION (TYPE_MODE (t))
>
> for some integer type T.  With SVE variable-length modes, this would
> need to become:
>
> TYPE_PRECISION (t) == GET_MODE_PRECISION (SCALAR_TYPE_MODE (t))
>
> (or SCALAR_INT_TYPE_MODE, it doesn't matter which in this case).
> But rather than add the "SCALAR_" everywhere, it seemed neater to
> introduce a new helper function that tests whether T is an integral
> type that has the same number of bits as its underlying mode.  This
> patch does that, calling it full_integral_type_p.
>
> It isn't possible to use TYPE_MODE in tree.h because vector_type_mode
> is defined in stor-layout.h, so for now the function accesses the mode
> field directly.  After the 77-patch machine_mode series (thanks again
> Jeff for the reviews) it would use SCALAR_TYPE_MODE instead.
>
> Of the changes that didn't previously have an INTEGRAL_TYPE_P check:
>
> - for fold_single_bit_test_into_sign_test it is obvious from the
>   integer_foop tests that this is restricted to integral types.
>
> - vect_recog_vector_vector_shift_pattern is inherently restricted
>   to integral types.
>
> - the register_edge_assert_for_2 hunk is dominated by:
>
>   TREE_CODE (val) == INTEGER_CST
>
> - the ubsan_instrument_shift hunk is preceded by an early exit:
>
>   if (!INTEGRAL_TYPE_P (type0))
> return NULL_TREE;
>
> - the second and third match.pd hunks are from:
>
> /* Fold (X << C1) & C2 into (X << C1) & (C2 | ((1 << C1) - 1))
> (X >> C1) & C2 into (X >> C1) & (C2 | ~((type) -1 >> C1))
>if the new mask might be further optimized.  */
>
> I'm a bit confused about:
>
> /* Try to fold (type) X op CST -> (type) (X op ((type-x) CST))
>when profitable.
>For bitwise binary operations apply operand conversions to the
>binary operation result instead of to the operands.  This allows
>to combine successive conversions and bitwise binary operations.
>We combine the above two cases by using a conditional convert.  */
> (for bitop (bit_and bit_ior bit_xor)
>  (simplify
>   (bitop (convert @0) (convert? @1))
>   (if (((TREE_CODE (@1) == INTEGER_CST
>  && INTEGRAL_TYPE_P (TREE_TYPE (@0))
>  && int_fits_type_p (@1, TREE_TYPE (@0)))
> || types_match (@0, @1))
>/* ???  This transform conflicts with fold-const.c doing
>   Convert (T)(x & c) into (T)x & (T)c, if c is an integer
>   constants (if x has signed type, the sign bit cannot be set
>   in c).  This folds extension into the BIT_AND_EXPR.
>   Restrict it to GIMPLE to avoid endless recursions.  */
>&& (bitop != BIT_AND_EXPR || GIMPLE)
>&& (/* That's a good idea if the conversion widens the operand, 
> thus
> after hoisting the conversion the operation will be narrower.  */
>TYPE_PRECISION (TREE_TYPE (@0)) < TYPE_PRECISION (type)
>/* It's also a good idea if the conversion is to a non-integer
>   mode.  */
>|| GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT
>/* Or if the precision of TO is not the same as the precision
>   of its mode.  */
> || TYPE_PRECISION (type) != GET_MODE_PRECISION (TYPE_MODE (type
>(convert (bitop @0 (convert @1))
>
> though.  The "INTEGRAL_TYPE_P (TREE_TYPE (@0))" suggests that we can't
> rely on @0 and @1 being integral (although conversions from float would
> use FLOAT_EXPR), but then what is:

 bit_and is valid on POINTER_TYPE and vector integer types

>
>/* It's also a good idea if the conversion is to a non-integer
>   mode.  */
>|| GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT
>
> letting through?  MODE_PARTIAL_INT maybe, but that's a sort of integer
> mode too.  MODE_COMPLEX_INT or MODE_VECTOR_INT?  I thought for those
> it would be better to apply the scalar rules to the element type.

 I suppose extra caution ;)  I think I have seen BLKmode for not naturally
 aligned integer types at least on strict-align targets?  The code is
 a copy from original code in tree-ssa-forwprop.c.

> Either way, having allowed all non-INT modes, using full_integral_type_p
> for the remaining condition seems correct.
>
> If the feeling is that this isn't a useful abstraction, I can just update
> e

Re: Remove the frame size argument from function_prologue/epilogue

2017-08-21 Thread Richard Biener
On Mon, Aug 21, 2017 at 1:12 PM, Richard Sandiford
 wrote:
> Later patches will add support for frame sizes that are a run-time
> invariant but not a compile-time constant.  This then raised the
> question of whether the frame size argument to the function_prologue/
> epilogue hooks should be updated accordingly.
>
> It turned out that only two targets actually used this argument, and
> even they got other frame-related information from the cached machine
> structure.  This patch therefore removes the argument and makes the two
> targets use get_frame_size () instead.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu, and by building
> one target per CPU and checking that there were no differences in
> assembly for the testsuite.  OK to install?

Ok.

RIchard.

> Richard
>
>
> 2017-08-21  Richard Sandiford  
> Alan Hayward  
> David Sherwood  
>
> gcc/
> * target.def (function_prologue): Remove frame size argument.
> (function_epilogue): Likewise.
> * doc/tm.texi: Regenerate.
> * final.c (final_start_function): Update call to function_prologue.
> (final_end_function): Update call to function_epilogue.
> (default_function_pro_epilogue): Remove frame size argument.
> * output.h (default_function_pro_epilogue): Likewise.
> * config/arm/arm.c (arm_output_function_epilogue): Likewise.
> (arm_output_function_prologue): Likewise.
> * config/frv/frv.c (frv_function_prologue): Likewise.
> (frv_function_epilogue): Likewise.
> * config/i386/i386.c (ix86_output_function_epilogue): Likewise.
> * config/ia64/ia64.c (ia64_output_function_prologue): Likewise.
> (ia64_output_function_epilogue): Likewise.
> * config/m32r/m32r.c (m32r_output_function_prologue): Likewise.
> (m32r_output_function_epilogue): Likewise.
> * config/microblaze/microblaze.c (microblaze_function_prologue)
> (microblaze_function_epilogue): Likewise.
> * config/mips/mips.c (mips_output_function_prologue): Likewise.
> (mips_output_function_epilogue): Likewise.
> * config/mmix/mmix.c (mmix_target_asm_function_prologue): Likewise.
> (mmix_target_asm_function_epilogue): Likewise.
> * config/msp430/msp430.c (msp430_start_function): Likewise.
> * config/nds32/nds32.c (nds32_asm_function_prologue): Likewise.
> (nds32_asm_function_epilogue): Likewise.
> * config/nios2/nios2.c (nios2_asm_function_prologue): Likewise.
> * config/pa/pa.c (pa_output_function_prologue): Likewise.
> (pa_output_function_epilogue): Likewise.
> * config/powerpcspe/powerpcspe.c (rs6000_output_function_prologue)
> (rs6000_output_function_epilogue): Likewise.
> * config/rl78/rl78.c (rl78_start_function): Likewise.
> * config/rs6000/rs6000.c (rs6000_output_function_prologue): Likewise.
> (rs6000_output_function_epilogue): Likewise.
> * config/rx/rx.c (rx_output_function_prologue): Likewise.
> * config/sh/sh.c (sh_output_function_epilogue): Likewise.
> * config/sparc/sparc.c (sparc_asm_function_prologue): Likewise.
> (sparc_asm_function_epilogue): Likewise.
>
> Index: gcc/target.def
> ===
> --- gcc/target.def  2017-07-27 10:37:54.747032158 +0100
> +++ gcc/target.def  2017-08-21 11:54:34.198323780 +0100
> @@ -306,8 +306,8 @@ DEFHOOK
>  function.  The prologue is responsible for setting up the stack frame,\n\
>  initializing the frame pointer register, saving registers that must be\n\
>  saved, and allocating @var{size} additional bytes of storage for the\n\
> -local variables.  @var{size} is an integer.  @var{file} is a stdio\n\
> -stream to which the assembler code should be output.\n\
> +local variables.  @var{file} is a stdio stream to which the assembler\n\
> +code should be output.\n\
>  \n\
>  The label for the beginning of the function need not be output by this\n\
>  macro.  That has already been done when the macro is run.\n\
> @@ -344,7 +344,7 @@ for a machine if doing so is more conven
>  compatibility reasons.  Except in cases where required by standard\n\
>  or by a debugger, there is no reason why the stack layout used by GCC\n\
>  need agree with that used by other compilers for a machine.",
> - void, (FILE *file, HOST_WIDE_INT size),
> + void, (FILE *file),
>   default_function_pro_epilogue)
>
>  /* Output the assembler code for end of prologue.  */
> @@ -374,7 +374,7 @@ DEFHOOK
>  function.  The epilogue is responsible for restoring the saved\n\
>  registers and stack pointer to their values when the function was\n\
>  called, and returning control to the caller.  This macro takes the\n\
> -same arguments as the macro @code{TARGET_ASM_FUNCTION_PROLOGUE}, and the\n\
> +same argument as the macro @code{TARGET_ASM_FUNCTION_PROLOGUE}, and the\n\
>  registers to restore are determined from @c

Re: Simplify pad_below implementation

2017-08-21 Thread Richard Biener
On Mon, Aug 21, 2017 at 1:14 PM, Richard Sandiford
 wrote:
> This patch simplifies the alignment calculations in pad_below.
> The first arm of the "if" was:
>
> - taking GET_MODE_BITSIZE (always equal to GET_MODE_SIZE * BITS_PER_UNIT),
> - rounding up to the next multiple of PARM_BOUNDARY
> - converting bits to bytes and
> - subtracting the GET_MODE_SIZE
>
> so was in effect calculating the number of bytes needed to round
> GET_MODE_SIZE up to (PARM_BOUNDARY / BITS_PER_UNIT).  That can be
> done more directly as -size & (align - 1), which is easier to
> convert to variable-sized modes.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu, and by building
> one target per CPU and checking that there were no differences in
> assembly for the testsuite.  OK to install?

Ok.

Richard.

> Richard
>
>
> 2017-08-21  Richard Sandiford  
> Alan Hayward  
> David Sherwood  
>
> gcc/
> * function.c (pad_below): Simplify padding calculation.
>
> Index: gcc/function.c
> ===
> --- gcc/function.c  2017-08-21 10:42:34.185530464 +0100
> +++ gcc/function.c  2017-08-21 11:55:41.018148268 +0100
> @@ -4322,21 +4322,16 @@ pad_to_arg_alignment (struct args_size *
>  static void
>  pad_below (struct args_size *offset_ptr, machine_mode passed_mode, tree 
> sizetree)
>  {
> +  unsigned int align = PARM_BOUNDARY / BITS_PER_UNIT;
>if (passed_mode != BLKmode)
> -{
> -  if (GET_MODE_BITSIZE (passed_mode) % PARM_BOUNDARY)
> -   offset_ptr->constant
> - += (((GET_MODE_BITSIZE (passed_mode) + PARM_BOUNDARY - 1)
> -  / PARM_BOUNDARY * PARM_BOUNDARY / BITS_PER_UNIT)
> - - GET_MODE_SIZE (passed_mode));
> -}
> +offset_ptr->constant += -GET_MODE_SIZE (passed_mode) & (align - 1);
>else
>  {
>if (TREE_CODE (sizetree) != INTEGER_CST
> - || (TREE_INT_CST_LOW (sizetree) * BITS_PER_UNIT) % PARM_BOUNDARY)
> + || (TREE_INT_CST_LOW (sizetree) & (align - 1)) != 0)
> {
>   /* Round the size up to multiple of PARM_BOUNDARY bits.  */
> - tree s2 = round_up (sizetree, PARM_BOUNDARY / BITS_PER_UNIT);
> + tree s2 = round_up (sizetree, align);
>   /* Add it in.  */
>   ADD_PARM_SIZE (*offset_ptr, s2);
>   SUB_PARM_SIZE (*offset_ptr, sizetree);


Re: [PATCH] PR c++/80287 add new testcase

2017-08-21 Thread Nathan Sidwell

On 08/18/2017 04:28 AM, Yvan Roux wrote:


https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00730.html


Thanks,
Yvan

gcc/testsuite
2017-07-13  Yvan Roux  

 PR c++/80287
 * g++.dg/pr80287.C: New test.


ok

--
Nathan Sidwell


Re: [PATCH GCC][01/06]New interface returning all adjacent vertices in graph

2017-08-21 Thread Richard Biener
On Mon, Aug 14, 2017 at 11:19 AM, Bin Cheng  wrote:
> Hi,
> This simple patch adds new interface returning adjacent vertices for a vertex 
> in graph.
> Bootstrap and test in series.  Is it OK?

The comment of the function doesn't match its implementation.  Why did
you choose
to use the dfs helpers instead of (more clearly IMHO) using

 e = v->succ;
 while (e)
   {
 adj->safe_push (e->dest);
 e = e->succ_next;

given you do not expose the direction as arggument to adjacent_vertices?  Btw,
is this "adjacent" a term understood in the context of (directed) graphs?

Richard.

> Thanks,
> bin
> 2017-08-10  Bin Cheng  
>
> * graphds.c (adjacent_vertices): New function.
> * graphds.h (adjacent_vertices): New declaration.


Re: [AArch64], patch] PR71727 fix -mstrict-align

2017-08-21 Thread Christophe Lyon
ping ?
https://gcc.gnu.org/ml/gcc-patches/2017-07/msg01063.html

Christophe


On 18 July 2017 at 14:50, Christophe Lyon  wrote:
> Hello,
>
> I've received a complaint that GCC for AArch64 would generate
> vectorized code relying on unaligned memory accesses even when using
> -mstrict-align. This is a problem for code where such accesses lead to
> memory faults.
>
> A previous patch (r24) introduced
> aarch64_builtin_support_vector_misalignment, which rejects such
> accesses when the element size is 64 bits, and accept them otherwise,
> which I think it shouldn't. The testcase added at that time only used
> 64 bits elements, and therefore didn't fully test the patch.
>
> The report I received is about vectorized accesses to an array of
> unsigned chars, whose start address is not aligned on a 128 bits
> boundary.
>
> The attached patch fixes the problem by making
> aarch64_builtin_support_vector_misalignment always return false when
> the misalignment is not known at compile time.
>
> I've also added a testcase, which tries to check if the array start
> address alignment is checked (using %16, and-ing with #15), so that
> loop peeling is performed *before* using vectorized accesses. Without
> the patch, vectorized accesses are used at the beginning of the array,
> and byte accesses are used for the remainder at the end, and there is
> not such 'and wX,wX,15'.
>
> BTW, I'm not sure about the same hook for arm... it seems to me it has
> a similar problem.
>
> OK?
>
> Thanks,
>
> Christophe


Re: [PATCH GCC][02/06]New field in struct dependence_info indicating fixed length access

2017-08-21 Thread Richard Biener
On Mon, Aug 14, 2017 at 11:19 AM, Bin Cheng  wrote:
> Hi,
> This simple patch adds new field in struct dependence_info.  The new field
> indicates if non-dependence information is only valid for fixed memory access
> length of this reference.  There is a concern that this costs an additional
> byte for all tree nodes, but I do not know easy way out because we need to
> differentiate dependence_info derived from runtime alias check with others
> derived from restrict pointer.
> Bootstrap and test in series.  any comment?

This increases each tree node by 8 bytes, so no.  You'd have to carve the
bit away from either clique or base or find a bit elsewhere.

Note I do not understand the comment.

IIRC we discussed this in the context of versioning for aliasing where
for example vectorizer alias checks are performed with a min. dependence
distance in mind.  So when marking refs as non-aliasing we have to ensure
the info still holds when unrolling the loop n > min. dependence distance time
which is ensured by clearing the non-alias info.

Basically the versioned loop copy now adheres to safelen = vect-factor and
the question is what safelen translates to (see also PR81877 for some fun
around safelen).

Richard.

> Thanks,
> bin
> 2017-08-10  Bin Cheng  
>
> * tree-core.h (struct tree_base.dependence_info): New field.
> * tree.c (copy_node): Reset dependence info for fixed length
> memory access.
> * tree.h (MR_DEPENDENCE_FIXED_LENGTH_P): New macro.


[PATCH] Fix PR81900

2017-08-21 Thread Richard Biener

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2017-08-21  Richard Biener  

PR tree-optimization/81900
* tree-ssa-pre.c (compute_antic_aux): Properly compute changed
for blocks with abnormal predecessors.
(compute_antic): Do not set visited flag prematurely.

* gcc.dg/torture/pr81900.c: New testcase.

Index: gcc/tree-ssa-pre.c
===
--- gcc/tree-ssa-pre.c  (revision 251217)
+++ gcc/tree-ssa-pre.c  (working copy)
@@ -2119,16 +2170,15 @@ static sbitmap has_abnormal_preds;
 static bool
 compute_antic_aux (basic_block block, bool block_has_abnormal_pred_edge)
 {
-  bool changed = false;
   bitmap_set_t S, old, ANTIC_OUT;
   bitmap_iterator bi;
   unsigned int bii;
   edge e;
   edge_iterator ei;
-  bool was_visited = BB_VISITED (block);
 
-  old = ANTIC_OUT = S = NULL;
+  bool changed = ! BB_VISITED (block);
   BB_VISITED (block) = 1;
+  old = ANTIC_OUT = S = NULL;
 
   /* If any edges from predecessors are abnormal, antic_in is empty,
  so do nothing.  */
@@ -2217,7 +2267,7 @@ compute_antic_aux (basic_block block, bo
   /* clean (ANTIC_IN (block)) is defered to after the iteration converged
  because it can cause non-convergence, see for example PR81181.  */
 
-  if (!was_visited || !bitmap_set_equal (old, ANTIC_IN (block)))
+  if (!bitmap_set_equal (old, ANTIC_IN (block)))
 changed = true;
 
  maybe_dump_sets:
@@ -2396,9 +2446,6 @@ compute_antic (void)
if (e->flags & EDGE_ABNORMAL)
  {
bitmap_set_bit (has_abnormal_preds, block->index);
-
-   /* We also anticipate nothing.  */
-   BB_VISITED (block) = 1;
break;
  }
 
Index: gcc/testsuite/gcc.dg/torture/pr81900.c
===
--- gcc/testsuite/gcc.dg/torture/pr81900.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr81900.c  (working copy)
@@ -0,0 +1,19 @@
+/* PR/81900 */
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } { "" } } */
+/* { dg-options "-fdump-tree-optimized" } */
+
+void Perl_croak() __attribute__((noreturn));
+char *Perl_sv_gets();
+void __attribute__((returns_twice)) __sigsetjmp ();
+void a() {
+__sigsetjmp();
+char *b;
+if ((b = Perl_sv_gets()) == 0)
+  Perl_croak("No Perl script found in input\n");
+if (*b == '#')
+  __asm__("" : : ""("badbad"));
+}
+
+/* Do not elide the test against zero.  */
+/* { dg-final { scan-tree-dump " == 0B" "optimized" } } */


Make more use of paradoxical_subreg_p

2017-08-21 Thread Richard Sandiford
This patch makes more use of the existing paradoxical_subreg_p
predicate and also adds a version that operates on outer and
inner modes.

Some of the affected tests were based on GET_MODE_SIZE rather than
GET_MODE_PRECISION and so the patch could change the result for modes
that have the same size but different precisions.  I think in each
case the change should be a no-op or more correct, since a mode with
precision N bits can't be expected to hold all of a mode with precision
M>N bits.

The patch changes the branch taken in simplify_subreg for modes with
equal precision, but the new form matches the commentary more closely.
Both branches should be equally good in that situation.

Tested on aarch64-linux-gnu and x86_64-linux-gnu, and by checking that
there were no changes in the testsuite assembly output for one target
per CPU.  OK to install?

Richard


2017-08-21  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* rtl.h (paradoxical_subreg_p): Define inline, and add a version
that takes the outer and inner modes.
* doc/rtl.texi: Use paradoxical_subreg_p instead of a GET_MODE_SIZE
comparison as the canonical test for a paradoxical subreg.
* combine.c (simplify_set): Use paradoxical_subreg_p.
(make_extraction): Likewise.
(force_to_mode): Likewise.
(rtx_equal_for_field_assignment_p): Likewise.
(gen_lowpart_for_combine): Likewise.
(simplify_comparison): Likewise.
* cse.c (equiv_constant): Likewise.
* expmed.c (store_bit_field_1): Likewise.
* final.c (alter_subreg): Likewise.
* fwprop.c (propagate_rtx): Likewise.
(forward_propagate_subreg): Likewise.
* ira-conflicts.c (ira_build_conflicts): Likewise.
* lower-subreg.c (simplify_gen_subreg_concatn): Likewise.
* lra-constraints.c (curr_insn_transform): Likewise.
(split_reg): Likewise.
* lra-eliminations.c (move_plus_up): Likewise.
(lra_eliminate_regs_1): Likewise.
* recog.c (general_operand): Likewise.
* ree.c (combine_reaching_defs): Likewise.
* reload.c (push_reload): Likewise.
(find_reloads): Likewise.
* reload1.c (elimination_effects): Likewise.
(compute_reload_subreg_offset): Likewise.
(choose_reload_regs): Likewise.
* rtlanal.c (subreg_lsb_1): Likewise.
* simplify-rtx.c (simplify_unary_operation_1): Likewise.
(simplify_subreg): Likewise.
* var-tracking.c (track_loc_p): Likewise.
* emit-rtl.c (byte_lowpart_offset): Likewise.
(paradoxical_subreg_p): Delete out-of-line definition.

Index: gcc/rtl.h
===
--- gcc/rtl.h   2017-08-21 10:42:34.185530464 +0100
+++ gcc/rtl.h   2017-08-21 14:20:43.099964655 +0100
@@ -2784,10 +2784,28 @@ extern rtx operand_subword (rtx, unsigne
 
 /* In emit-rtl.c */
 extern rtx operand_subword_force (rtx, unsigned int, machine_mode);
-extern bool paradoxical_subreg_p (const_rtx);
 extern int subreg_lowpart_p (const_rtx);
 extern unsigned int subreg_size_lowpart_offset (unsigned int, unsigned int);
 
+/* Return true if a subreg with the given outer and inner modes is
+   paradoxical.  */
+
+inline bool
+paradoxical_subreg_p (machine_mode outermode, machine_mode innermode)
+{
+  return GET_MODE_PRECISION (outermode) > GET_MODE_PRECISION (innermode);
+}
+
+/* Return true if X is a paradoxical subreg, false otherwise.  */
+
+inline bool
+paradoxical_subreg_p (const_rtx x)
+{
+  if (GET_CODE (x) != SUBREG)
+return false;
+  return paradoxical_subreg_p (GET_MODE (x), GET_MODE (SUBREG_REG (x)));
+}
+
 /* Return the SUBREG_BYTE for an OUTERMODE lowpart of an INNERMODE value.  */
 
 inline unsigned int
Index: gcc/doc/rtl.texi
===
--- gcc/doc/rtl.texi2017-07-27 10:37:54.486030028 +0100
+++ gcc/doc/rtl.texi2017-08-21 14:20:43.094947435 +0100
@@ -1872,7 +1872,7 @@ expression is called @dfn{paradoxical}.
 class of @code{subreg} is:
 
 @smallexample
-GET_MODE_SIZE (@var{m1}) > GET_MODE_SIZE (@var{m2})
+paradoxical_subreg_p (@var{m1}, @var{m2})
 @end smallexample
 
 Paradoxical @code{subreg}s can be used as both lvalues and rvalues.
Index: gcc/combine.c
===
--- gcc/combine.c   2017-08-21 10:42:34.185530464 +0100
+++ gcc/combine.c   2017-08-21 14:20:43.092940547 +0100
@@ -6809,9 +6809,7 @@ simplify_set (rtx x)
   / UNITS_PER_WORD)
  == ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)))
   + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD))
-  && (WORD_REGISTER_OPERATIONS
- || (GET_MODE_SIZE (GET_MODE (src))
- <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (src)
+  && (WORD_REGISTER_OPERATIONS || !paradoxical_subreg_p (src))
 #ifdef CANNOT_CHANGE_MODE_CLASS
   && ! (REG_P (dest) && REGNO (dest) < FIRST

[c++/81899] bound tpl-tpl-parm ICE

2017-08-21 Thread Nathan Sidwell
This patch fixes pr 81899.  It was an underlying bug exposed by my 
removal of LANG_TYPE_SPECIFIC from BOUND_TEMPLATE_TEMPLATE_PARM.


B_T_T_P happened to not set CLASSTYPE_USE_TEMPLATE, even though, by 
definition, they are template instantiations.  However they also 
happened to not be DECL_HIDDEN, so we lucked out.


This patch adds an explicit check for not being a B_T_T_P before 
checking CLASSTYPE_USE_TEMPLATE.


applied to trunk.

nathan
--
Nathan Sidwell
2017-08-21  Nathan Sidwell  

	PR c++/81899
	* pt.c (instantiate_class_template_1):
	BOUND_TEMPLATE_TEMPLATE_PARM is never friend-injected.

	PR c++/81899
	* g++.dg/template/pr81899.C: New.

Index: cp/pt.c
===
--- cp/pt.c	(revision 251221)
+++ cp/pt.c	(working copy)
@@ -10724,10 +10724,15 @@ instantiate_class_template_1 (tree type)
 		adjust_processing_template_decl = true;
 		  --processing_template_decl;
 		}
-	  else if (!CLASSTYPE_USE_TEMPLATE (friend_type)
+	  else if (TREE_CODE (friend_type) != BOUND_TEMPLATE_TEMPLATE_PARM
+		   && !CLASSTYPE_USE_TEMPLATE (friend_type)
 		   && TYPE_HIDDEN_P (friend_type))
 		{
 		  /* friend class C;
Index: testsuite/g++.dg/template/pr81899.C
===
--- testsuite/g++.dg/template/pr81899.C	(revision 0)
+++ testsuite/g++.dg/template/pr81899.C	(working copy)
@@ -0,0 +1,11 @@
+// PR 81899 we tried to treat a bound-tpl-tpl-parm as-if a real record-type
+
+template  class FunctorData>
+struct functor {
+  friend FunctorData;
+  void foo();
+};
+
+template  struct data;
+
+template<> void functor::foo();


Add a partial_subreg_p predicate

2017-08-21 Thread Richard Sandiford
This patch adds a partial_subreg_p predicate to go alongside
paradoxical_subreg_p.

The first two changes to cse_insn preserve the current behaviour,
but the condition seems strange.  Shouldn't we be able to continue
to cse if the inner modes of the two subregs have the same size?

The patch also preserves the existing condition in
simplify_operand_subreg, but perhaps it should be using
a df_read_modify_subreg_p-style check instead.  We don't
need to reload the inner value first if the inner value
is no bigger than a word, for example.

The use in curr_insn_transform also seemed strange.  Surely the
modes of the SET_DEST and SET_SRC will be the same, given that
this code isn't meant for constants?

Like the paradoxical_subreg_p patch, this one replaces some tests that
were based on GET_MODE_SIZE rather than GET_MODE_PRECISION.  In each
case the change should be a no-op or an improvement.

Doing this in regcprop.c prevents some replacements of the 82-bit RFmode
with the 80-bit XFmode on ia64.  I don't understand the target details
here particularly well, but from the way the modes are described in
ia64-modes.def, it isn't valid to assume that an XFmode can carry an
RFmode payload.  A comparison of the testsuite assembly output for one
target per CPU showed no other differences.

Some of the places changed here are tracking the widest access mode
found for a register.  The series tries to standardise on:

  if (partial_subreg_p (widest_seen, new_mode))
widest_seen = new_mode;

rather than:

  if (paradoxical_subreg_p (new_mode, widest_seen))
widest_seen = new_mode;

Either would have been OK.

Tested on aarch64-linux-gnu and x86_64-linux-gnu in addition to the above.
OK to install?

Richard


2017-08-21  Richard Sandiford  
Alan Hayward  
David Sherwood  

gcc/
* rtl.h (partial_subreg_p): New function.
* caller-save.c (save_call_clobbered_regs): Use it.
* calls.c (expand_call): Likewise.
* combine.c (combinable_i3pat): Likewise.
(simplify_set): Likewise.
(make_extraction): Likewise.
(make_compound_operation_int): Likewise.
(gen_lowpart_or_truncate): Likewise.
(force_to_mode): Likewise.
(make_field_assignment): Likewise.
(reg_truncated_to_mode): Likewise.
(record_truncated_value): Likewise.
(move_deaths): Likewise.
* cse.c (record_jump_cond): Likewise.
(cse_insn): Likewise.
* cselib.c (cselib_lookup_1): Likewise.
* df-scan.c (df_read_modify_subreg_p): Likewise.
* expmed.c (extract_bit_field_using_extv): Likewise.
* function.c (assign_parm_setup_reg): Likewise.
* ifcvt.c (noce_convert_multiple_sets): Likewise.
* ira-build.c (create_insn_allocnos): Likewise.
* lra-coalesce.c (merge_pseudos): Likewise.
* lra-constraints.c (match_reload): Likewise.
(simplify_operand_subreg): Likewise.
(curr_insn_transform): Likewise.
* lra-lives.c (process_bb_lives): Likewise.
* lra.c (new_insn_reg): Likewise.
(lra_substitute_pseudo): Likewise.
* regcprop.c (mode_change_ok): Likewise.
(maybe_mode_change): Likewise.
(copyprop_hardreg_forward_1): Likewise.
* reload.c (push_reload): Likewise.
(find_reloads): Likewise.
(find_reloads_subreg_address): Likewise.
* reload1.c (alter_reg): Likewise.
(eliminate_regs_1): Likewise.
* simplify-rtx.c (simplify_unary_operation_1): Likewise.

Index: gcc/rtl.h
===
--- gcc/rtl.h   2017-08-21 14:20:43.099964655 +0100
+++ gcc/rtl.h   2017-08-21 14:21:03.728890033 +0100
@@ -2787,6 +2787,30 @@ extern rtx operand_subword_force (rtx, u
 extern int subreg_lowpart_p (const_rtx);
 extern unsigned int subreg_size_lowpart_offset (unsigned int, unsigned int);
 
+/* Return true if a subreg of mode OUTERMODE would only access part of
+   an inner register with mode INNERMODE.  The other bits of the inner
+   register would then be "don't care" on read.  The behavior for writes
+   depends on REGMODE_NATURAL_SIZE; bits in the same REGMODE_NATURAL_SIZE-d
+   chunk would be clobbered but other bits would be preserved.  */
+
+inline bool
+partial_subreg_p (machine_mode outermode, machine_mode innermode)
+{
+  return GET_MODE_PRECISION (outermode) < GET_MODE_PRECISION (innermode);
+}
+
+/* Likewise return true if X is a subreg that is smaller than the inner
+   register.  Use df_read_modify_subreg_p to test whether writing to such
+   a subreg preserves any part of the inner register.  */
+
+inline bool
+partial_subreg_p (const_rtx x)
+{
+  if (GET_CODE (x) != SUBREG)
+return false;
+  return partial_subreg_p (GET_MODE (x), GET_MODE (SUBREG_REG (x)));
+}
+
 /* Return true if a subreg with the given outer and inner modes is
paradoxical.  */
 
Index: gcc/caller-save.c
===

Re: [PATCH 2/2] C: use full locations within c_parser_expr_list's vec

2017-08-21 Thread Joseph Myers
These two patches are OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] Remove -feliminate-dwarf2-dups

2017-08-21 Thread Richard Biener

This was agreed upon and now that early LTO debug landed here it is.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

In case the gcc_unreachable () gets through I'll instead remove the
if as noted in my followup TODO.

Richard.

2017-08-21  Richard Biener  

* common.opt (feliminate-dwarf2-dups): Ignore.
* doc/invoke.texi (feliminate-dwarf2-dups): Remove documentation.
* dwarf2out.c (push_new_compile_unit, pop_compile_unit,
same_die_p_wrap, compute_section_prefix,
is_symbol_die, assign_symbol_names, break_out_includes): Remove.
(comdat_symbol_id, comdat_symbol_number): Likewise.
(cu_hash_table_entry, cu_hash_table_entry_hasher, cu_hash_type):
Likewise.
(check_duplicate_cu, record_comdat_symbol_number): Likewise.
(output_die): Mark unreachable path unreachable.
(dwarf2out_start_source_file): Do not create DW_TAG_GNU_BINCL.
(dwarf2out_end_source_file): Do not create DW_TAG_GNU_EINCL.
(dwarf2out_init): Remove code handling flag_eliminate_dwarf2_dups.
(dwarf2out_early_finish): Likewise.

* g++.dg/debug/dwarf2/dwarf2-1.C: Remove -feliminate-dwarf2-dups.
* g++.dg/debug/dwarf2/dwarf2-2.C: Likewise.
* g++.dg/debug/dwarf2/pr46123-2.C: Likewise.
* g++.dg/debug/dwarf2/typedef5.C: Likewise.
* gcc.dg/debug/dwarf2/dwarf2-3.c: Likewise.
* gcc.dg/debug/dwarf2/dwarf2-3.h: Likewise.
* gcc.dg/debug/dwarf2/dups-types.c: Remove.
* gcc.dg/debug/dwarf2/dups-types.h: Likewise.

Index: gcc/common.opt
===
--- gcc/common.opt  (revision 251222)
+++ gcc/common.opt  (working copy)
@@ -1303,8 +1303,8 @@ Common Report Var(flag_early_inlining) I
 Perform early inlining.
 
 feliminate-dwarf2-dups
-Common Report Var(flag_eliminate_dwarf2_dups)
-Perform DWARF duplicate elimination.
+Common Ignore
+Does nothing.  Preserved for backward compatibility.
 
 fipa-sra
 Common Report Var(flag_ipa_sra) Init(0) Optimization
Index: gcc/doc/invoke.texi
===
--- gcc/doc/invoke.texi (revision 251222)
+++ gcc/doc/invoke.texi (working copy)
@@ -347,7 +347,7 @@ Objective-C and Objective-C++ Dialects}.
 -gcolumn-info  -gno-column-info @gol
 -gvms  -gxcoff  -gxcoff+  -gz@r{[}=@var{type}@r{]} @gol
 -fdebug-prefix-map=@var{old}=@var{new}  -fdebug-types-section @gol
--feliminate-dwarf2-dups  -fno-eliminate-unused-debug-types @gol
+-fno-eliminate-unused-debug-types @gol
 -femit-struct-debug-baseonly  -femit-struct-debug-reduced @gol
 -femit-struct-debug-detailed@r{[}=@var{spec-list}@r{]} @gol
 -feliminate-unused-debug-symbols  -femit-class-debug-always @gol
@@ -6834,8 +6834,8 @@ for maximum benefit.
 
 GCC no longer supports DWARF Version 1, which is substantially
 different than Version 2 and later.  For historical reasons, some
-other DWARF-related options (including @option{-feliminate-dwarf2-dups} 
-and @option{-fno-dwarf2-cfi-asm}) retain a reference to DWARF Version 2
+other DWARF-related options such as
+@option{-fno-dwarf2-cfi-asm}) retain a reference to DWARF Version 2
 in their names, but apply to all currently-supported versions of DWARF.
 
 @item -gstabs
@@ -7027,12 +7027,6 @@ writing compressed debug sections, the o
 if the assembler does not support them, @option{-gz} is silently ignored
 when producing object files.
 
-@item -feliminate-dwarf2-dups
-@opindex feliminate-dwarf2-dups
-Compress DWARF debugging information by eliminating duplicated
-information about each symbol.  This option only makes sense when
-generating DWARF debugging information.
-
 @item -femit-struct-debug-baseonly
 @opindex femit-struct-debug-baseonly
 Emit debug information for struct-like types
Index: gcc/dwarf2out.c
===
--- gcc/dwarf2out.c (revision 251222)
+++ gcc/dwarf2out.c (working copy)
@@ -3422,8 +3422,6 @@ static void equate_decl_number_to_die (t
 static struct var_loc_node *add_var_loc_to_decl (tree, rtx, const char *);
 static void print_spaces (FILE *);
 static void print_die (dw_die_ref, FILE *);
-static dw_die_ref push_new_compile_unit (dw_die_ref, dw_die_ref);
-static dw_die_ref pop_compile_unit (dw_die_ref);
 static void loc_checksum (dw_loc_descr_ref, struct md5_ctx *);
 static void attr_checksum (dw_attr_node *, struct md5_ctx *, int *);
 static void die_checksum (dw_die_ref, struct md5_ctx *, int *);
@@ -3441,14 +3439,9 @@ static int same_loc_p (dw_loc_descr_ref,
 static int same_dw_val_p (const dw_val_node *, const dw_val_node *, int *);
 static int same_attr_p (dw_attr_node *, dw_attr_node *, int *);
 static int same_die_p (dw_die_ref, dw_die_ref, int *);
-static int same_die_p_wrap (dw_die_ref, dw_die_ref);
-static void compute_section_prefix (dw_die_ref);
 static int is_type_die (dw_die_ref);
 static int is_comdat_die (dw_die_ref);
-static int is_symbol_die (d

[PATCH] PR libstdc++/81912 make std::__iterator_category constexpr

2017-08-21 Thread Jonathan Wakely

We made the interator range access functions constexpr for C++17, but
didn't test it, and so didn't notice that std::__iterator_category
isn't constexpr. This fixes it, and improve tests a bit.

PR libstdc++/81912
* include/bits/stl_iterator_base_types.h (__iterator_category): Add
constexpr for C++11 and later.
* testsuite/24_iterators/container_access.cc: Add target selector.
* testsuite/24_iterators/range_access.cc: Fix clause number in
comment.
* testsuite/24_iterators/range_access_cpp14.cc: Likewise.
* testsuite/24_iterators/range_access_cpp17.cc: New.

Tested x86_64-linux, committed to trunk. Will backport to gcc-7-branch
too.
commit b9061802f6c78de5182da956933754c9545c94e8
Author: Jonathan Wakely 
Date:   Mon Aug 21 11:48:52 2017 +0100

PR libstdc++/81912 make std::__iterator_category constexpr

PR libstdc++/81912
* include/bits/stl_iterator_base_types.h (__iterator_category): Add
constexpr for C++11 and later.
* testsuite/24_iterators/container_access.cc: Add target selector.
* testsuite/24_iterators/range_access.cc: Fix clause number in
comment.
* testsuite/24_iterators/range_access_cpp14.cc: Likewise.
* testsuite/24_iterators/range_access_cpp17.cc: New.

diff --git a/libstdc++-v3/include/bits/stl_iterator_base_types.h 
b/libstdc++-v3/include/bits/stl_iterator_base_types.h
index 24ed016b91e..d2c36ed2ac3 100644
--- a/libstdc++-v3/include/bits/stl_iterator_base_types.h
+++ b/libstdc++-v3/include/bits/stl_iterator_base_types.h
@@ -200,7 +200,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
*  sugar for internal library use only.
   */
   template
-inline typename iterator_traits<_Iter>::iterator_category
+inline _GLIBCXX_CONSTEXPR
+typename iterator_traits<_Iter>::iterator_category
 __iterator_category(const _Iter&)
 { return typename iterator_traits<_Iter>::iterator_category(); }
 
diff --git a/libstdc++-v3/testsuite/24_iterators/container_access.cc 
b/libstdc++-v3/testsuite/24_iterators/container_access.cc
index 7f60d2bda48..b8d0e80a2de 100644
--- a/libstdc++-v3/testsuite/24_iterators/container_access.cc
+++ b/libstdc++-v3/testsuite/24_iterators/container_access.cc
@@ -1,4 +1,4 @@
-// { dg-do run }
+// { dg-do run { target c++1z } }
 // { dg-options "-std=gnu++17" }
 
 // Copyright (C) 2015-2017 Free Software Foundation, Inc.
@@ -62,7 +62,6 @@ test03()
   static_assert(s == 3);
   constexpr auto e = std::empty(il3);
   static_assert(!e);
-
 }
 
 void
diff --git a/libstdc++-v3/testsuite/24_iterators/range_access.cc 
b/libstdc++-v3/testsuite/24_iterators/range_access.cc
index 6ae5499b0d8..5b978b630e1 100644
--- a/libstdc++-v3/testsuite/24_iterators/range_access.cc
+++ b/libstdc++-v3/testsuite/24_iterators/range_access.cc
@@ -17,7 +17,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// 24.6.5, range access [iterator.range]
+// C++ 2011 24.6.5, range access [iterator.range]
 
 #include 
 
diff --git a/libstdc++-v3/testsuite/24_iterators/range_access_cpp14.cc 
b/libstdc++-v3/testsuite/24_iterators/range_access_cpp14.cc
index 138b189aaa6..eb75d92a463 100644
--- a/libstdc++-v3/testsuite/24_iterators/range_access_cpp14.cc
+++ b/libstdc++-v3/testsuite/24_iterators/range_access_cpp14.cc
@@ -17,7 +17,7 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
-// 24.6.5, range access [iterator.range]
+// C++ 2014 24.7, range access [iterator.range]
 
 #include 
 #include 
diff --git a/libstdc++-v3/testsuite/24_iterators/range_access_cpp17.cc 
b/libstdc++-v3/testsuite/24_iterators/range_access_cpp17.cc
new file mode 100644
index 000..1d5b0739007
--- /dev/null
+++ b/libstdc++-v3/testsuite/24_iterators/range_access_cpp17.cc
@@ -0,0 +1,57 @@
+// { dg-do compile { target c++1z } }
+// { dg-options "-std=gnu++17" }
+
+// Copyright (C) 2017 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// C++ 2017 27.7, range access [iterator.range]
+
+#include 
+
+void
+test01()
+{
+  using std::reverse_iterator;
+  static int i[1];
+  static_assert(std::cbegin(i) == i);
+  static_assert(std::cend(i) == i+1);
+  static_assert(std::rbegin(i) == reverse_iterator

[PATCH, i386]: Use btr/bts/btc some more (PR target/46091)

2017-08-21 Thread Uros Bizjak
Hello!

Attached patch adds btr/bts/btc patterns with variable count operands,
so combine is able to synthesize these instructions from shift+logic
operations.

2017-08-21  Uros Bizjak  

PR target/46091
* config/i386/i386.md (*btsq_imm): Rename from *btsq.
(*btrq_imm): Rename from *btrq.
(*btcq_imm): Rename from *btcq.
(btsc): New code attribute.
(*): New insn pattern.
(*btr): Ditto.
(*_mask): New insn_and_split pattern.
(*btr_mask): Ditto.

testsuite/ChangeLog:

2017-08-21  Uros Bizjak  

PR target/46091
* gcc.target/i386/pr46091-4.c: New test.
* gcc.target/i386/pr46091-4a.c: Ditto.
* gcc.target/i386/pr46091-5.c: Ditto.
* gcc.target/i386/pr46091-5a.c: Ditto.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 251233)
+++ config/i386/i386.md (working copy)
@@ -1081,6 +1081,9 @@
 ;; Immediate operand constraint for shifts.
 (define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
 
+;; Print register name in the specified mode.
+(define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
+
 ;; General operand predicate for integer modes.
 (define_mode_attr general_operand
[(QI "general_operand")
@@ -10998,20 +11001,103 @@
 
 ;; Bit set / bit test instructions
 
-;; %%% bts, btr, btc, bt.
-;; In general these instructions are *slow* with variable operand
-;; when applied to memory.  When applied to registers, it depends
-;; on the cpu implementation.  They're never faster than the
-;; corresponding and/ior/xor operations, so with 32-bit there's
-;; no point.  But in 64-bit, we can't hold the relevant immediates
-;; within the instruction itself, so operating on bits in the high
-;; 32-bits of a register becomes easier.
+;; %%% bts, btr, btc
+
+;; These instructions are *slow* when applied to memory.
+
+(define_code_attr btsc [(ior "bts") (xor "btc")])
+
+(define_insn "*"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+   (any_or:SWI48
+ (ashift:SWI48 (const_int 1)
+   (match_operand:QI 1 "register_operand" "r"))
+ (match_operand:SWI48 2 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT"
+  "{}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "znver1_decode" "double")
+   (set_attr "mode" "")])
+
+;; Avoid useless masking of count operand.
+(define_insn_and_split "*_mask"
+  [(set (match_operand:SWI48 0 "register_operand")
+   (any_or:SWI48
+ (ashift:SWI48
+   (const_int 1)
+   (subreg:QI
+ (and:SI
+   (match_operand:SI 1 "register_operand")
+   (match_operand:SI 2 "const_int_operand")) 0))
+ (match_operand:SWI48 3 "nonimmediate_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode)-1))
+   == GET_MODE_BITSIZE (mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+ [(set (match_dup 0)
+  (any_or:SWI48
+(ashift:SWI48 (const_int 1)
+  (match_dup 1))
+(match_dup 3)))
+  (clobber (reg:CC FLAGS_REG))])]
+  "operands[1] = gen_lowpart (QImode, operands[1]);")
+
+(define_insn "*btr"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+   (and:SWI48
+ (rotate:SWI48 (const_int -2)
+   (match_operand:QI 1 "register_operand" "r"))
+   (match_operand:SWI48 2 "nonimmediate_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT"
+  "btr{}\t{%1, %0|%0, %1}"
+  [(set_attr "type" "alu1")
+   (set_attr "prefix_0f" "1")
+   (set_attr "znver1_decode" "double")
+   (set_attr "mode" "")])
+
+;; Avoid useless masking of count operand.
+(define_insn_and_split "*btr_mask"
+  [(set (match_operand:SWI48 0 "register_operand")
+   (and:SWI48
+ (rotate:SWI48
+   (const_int -2)
+   (subreg:QI
+ (and:SI
+   (match_operand:SI 1 "register_operand")
+   (match_operand:SI 2 "const_int_operand")) 0))
+ (match_operand:SWI48 3 "nonimmediate_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(INTVAL (operands[2]) & (GET_MODE_BITSIZE (mode)-1))
+   == GET_MODE_BITSIZE (mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+ [(set (match_dup 0)
+  (and:SWI48
+(rotate:SWI48 (const_int -2)
+  (match_dup 1))
+(match_dup 3)))
+  (clobber (reg:CC FLAGS_REG))])]
+  "operands[1] = gen_lowpart (QImode, operands[1]);")
+
+;; These instructions are never faster than the corresponding
+;; and/ior/xor operations when using immediate operand, so with
+;; 32-bit there's no point.  But in 64-bit, we can't hold the
+;; relevant immediates within the instruction itself, so operating
+;; on bits in the h

Re: [PATCH] Don't override user alignment with the same value

2017-08-21 Thread H.J. Lu
On Mon, Aug 21, 2017 at 12:59 AM, Richard Biener
 wrote:
> On Sat, Aug 19, 2017 at 10:18 PM, H.J. Lu  wrote:
>> Don't override alignment specified by user with the same value to
>> preserve TYPE_USER_ALIGN.  This fixes PR 53037 tests on Sparc.
>>
>> Does it look right?
>
> Doesn't match do_type_align so it introduces inconsistencies.  The 
> documentation
> for TYPE_USER_ALIGN doesn't specify when both cases conflict:
>
> /* 1 if the alignment for this type was requested by "aligned" attribute,
>0 if it is the default for this type.  */
>
> Note that for example the vectorizer looks at DECL_USER_ALIGN  (for
> non-field-decls)
> to decide whether it can increase alignment.
>
> Richard.
>
>>
>> H.J.
>> --
>> * stor-layout.c (finalize_type_size): Don't override alignment
>> specified by user with the same value.
>> ---
>>  gcc/stor-layout.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
>> index 3028d55773a..6dd605810ac 100644
>> --- a/gcc/stor-layout.c
>> +++ b/gcc/stor-layout.c
>> @@ -1784,7 +1784,7 @@ finalize_type_size (tree type)
>>
>>/* Don't override a larger alignment requirement coming from a user
>>  alignment of one of the fields.  */
>> -  if (mode_align >= TYPE_ALIGN (type))
>> +  if (mode_align > TYPE_ALIGN (type))
>> {
>>   SET_TYPE_ALIGN (type, mode_align);
>>   TYPE_USER_ALIGN (type) = 0;
>> --
>> 2.13.5
>>

According to Eric:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53037#c32

"if (mode_align >= TYPE_ALIGN (type))" was intentional.  I am
not familiar with STRICT_ALIGNMENT target and there is no
testcase to show why it is needed.


-- 
H.J.


[PATCH, gcc-7-branch] Backport PR80038

2017-08-21 Thread Xi Ruoyao
On 2017-04-25 09:30 -0600, Jeff Law wrote:
> On 04/14/2017 06:44 PM, Xi Ruoyao wrote:
> > On 2017-04-14 15:00 +0800, Xi Ruoyao wrote:
> > > On 2017-04-13 09:05 +0200, Richard Biener wrote:
> > > 
> > > > Did you verify LTO bootstrap still works with the patch?
> > > 
> > > I've just done a LTO bootstrapp (boarding a train :) ).
> > > It works with my patch.
> > 
> > I've done dejagnu tests in lto.exp and built a Linux kernel
> > with lto bootstrapped GCC.   They seem good.
> 
> Given Richi's general agreement around the in_lto_p, let's go with the 
> patch on the trunk only.
> 
> If you get positive feedback from Jan, then this can be backported to 
> gcc-7 after it's been on the trunk for at least a week.

We have 15 weeks now :)

Re-tested on gcc-7-branch with lto-bootstrap.  No regressions.  Is it
OK to backport this to gcc-7-branch?
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH, gcc-7-branch] Backport PR80038

2017-08-21 Thread Xi Ruoyao
On 2017-08-21 23:37 +0800, Xi Ruoyao wrote:
> On 2017-04-25 09:30 -0600, Jeff Law wrote:
> > On 04/14/2017 06:44 PM, Xi Ruoyao wrote:
> > > On 2017-04-14 15:00 +0800, Xi Ruoyao wrote:
> > > > On 2017-04-13 09:05 +0200, Richard Biener wrote:
> > > > 
> > > > > Did you verify LTO bootstrap still works with the patch?
> > > > 
> > > > I've just done a LTO bootstrapp (boarding a train :) ).
> > > > It works with my patch.
> > > 
> > > I've done dejagnu tests in lto.exp and built a Linux kernel
> > > with lto bootstrapped GCC.   They seem good.
> > 
> > Given Richi's general agreement around the in_lto_p, let's go with the 
> > patch on the trunk only.
> > 
> > If you get positive feedback from Jan, then this can be backported to 
> > gcc-7 after it's been on the trunk for at least a week.
> 
> We have 15 weeks now :)
> 
> Re-tested on gcc-7-branch with lto-bootstrap.  No regressions.  Is it
> OK to backport this to gcc-7-branch?

I was too stupid so I didn't attach the patch :(

It's attached here.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian UniversityFrom dc5359834ea62ea547cee6608db59b45c25b3263 Mon Sep 17 00:00:00 2001
From: Xi Ruoyao 
Date: Mon, 21 Aug 2017 09:41:59 +0800
Subject: [PATCH] Destroy temps for _Cilk_spawn calling in the child (PR
 c++/80038)

Backport r247446 and r247508 from trunk.

gcc/ChangeLog:

2017-08-21  Xi Ruoyao  

	PR c++/80038
	* cilk_common.c (expand_builtin_cilk_detach): Move pedigree
	operations here.
	* gimplify.c (gimplify_cilk_detach): New function.
	(gimplify_call_expr, gimplify_modify_expr): Call it as needed.
	* tree-core.h: Document EXPR_CILK_SPAWN.
	* tree.h (EXPR_CILK_SPAWN): Define.

gcc/c-family/ChangeLog:

2017-08-21  Xi Ruoyao 

	PR c++/80038
	* c-common.h (cilk_gimplify_call_params_in_spawned_fn): Remove
	prototype.
	(cilk_install_body_pedigree_operations): Likewise.
	* cilk.c (cilk_set_spawn_marker): Mark functions that should be
	detatched.
	(cilk_gimplify_call_params_in_spawned_fn): Remove.
	(cilk_install_body_pedigree_operations): Likewise.
	(gimplify_cilk_spawn): Add EXPR_STMT and CLEANUP_POINT_EXPR
	unwrapping.

gcc/c/ChangeLog:

2017-08-21  Xi Ruoyao 

	PR c++/80038
	* c-gimplify.c (c_gimplify_expr): Remove calls to
	cilk_gimplify_call_params_in_spawned_fn.

gcc/cp/ChangeLog:

2017-08-21  Xi Ruoyao 

	PR c++/80038
	* cp-cilkplus.c (cilk_install_body_with_frame_cleanup): Don't
	add pedigree operation and detach call here.
	* cp-gimplify.c (cp_gimplify_expr): Remove the calls to
	cilk_cp_gimplify_call_params_in_spawned_fn.
	(cilk_cp_gimplify_call_params_in_spawned_fn): Remove function.
	* semantics.c (simplify_aggr_init_expr): Copy EXPR_CILK_SPAWN.

gcc/lto/ChangeLog:

2017-08-21  Xi Ruoyao 

	PR c++/80038
	* lto-lang.c (lto_init): Set in_lto_p earlier.

gcc/testsuite/ChangeLog:

2017-08-21  Xi Ruoyao 

	PR c++/80038
	* g++.dg/cilk-plus/CK/pr80038.cc: New test.
---
 gcc/c-family/c-common.h  |   2 -
 gcc/c-family/c-gimplify.c|  10 +--
 gcc/c-family/cilk.c  | 102 +++
 gcc/c/c-typeck.c |   8 +--
 gcc/cilk-common.c|  49 +
 gcc/cp/cp-cilkplus.c |   6 +-
 gcc/cp/cp-gimplify.c |  40 ++-
 gcc/cp/semantics.c   |   2 +
 gcc/gimplify.c   |  21 ++
 gcc/lto/lto-lang.c   |   6 +-
 gcc/testsuite/g++.dg/cilk-plus/CK/pr80038.cc |  47 
 gcc/tree-core.h  |   4 ++
 gcc/tree.h   |   6 ++
 13 files changed, 150 insertions(+), 153 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cilk-plus/CK/pr80038.cc

diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index b933342..138a0a6 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1463,7 +1463,6 @@ extern bool is_cilkplus_vector_p (tree);
 extern tree insert_cilk_frame (tree);
 extern void cilk_init_builtins (void);
 extern int gimplify_cilk_spawn (tree *);
-extern void cilk_gimplify_call_params_in_spawned_fn (tree *, gimple_seq *);
 extern void cilk_install_body_with_frame_cleanup (tree, tree, void *);
 extern bool cilk_detect_spawn_and_unwrap (tree *);
 extern bool cilk_set_spawn_marker (location_t, tree);
@@ -1471,7 +1470,6 @@ extern tree build_cilk_sync (void);
 extern tree build_cilk_spawn (location_t, tree);
 extern tree make_cilk_frame (tree);
 extern tree create_cilk_function_exit (tree, bool, bool);
-extern tree cilk_install_body_pedigree_operations (tree);
 extern void cilk_outline (tree, tree *, void *);
 extern bool contains_cilk_spawn_stmt (tree);
 extern tree cilk_for_number_of_iterations (tree);
diff --git a/gcc/c-family/c-gimplify.c b/gcc/c-family/c-gimplify.c
index 57edb41..1ae75d2 100644
--- a/gcc/c-family/c-gimplify.c
+++ b/gcc/c-family/c-gimplify.c
@@ -280,10 +280,7 @@ c_gimplify_expr (tree *expr_p, gimpl

Re: [c++/81899] bound tpl-tpl-parm ICE

2017-08-21 Thread Nathan Sidwell

I missed a c++-03 requirement about friends.  Fixed thusly.

Also fixed the recent debug9 failure for stabs debugging.  That test 
appears to be a dwarf test, but as there's no dg-requires-dwarf the 
simplest fix was turning off block partitioning optimization (enabled on 
x86 automatically at O2 and above).


nathan
--
Nathan Sidwell
2017-08-21  Nathan Sidwell  

	* g++.dg/template/pr81899.C: Fix c++03.
	* g++.dg/debug/debug9.C: Add -fno-reorder-blocks-and-partition"

Index: g++.dg/debug/debug9.C
===
--- g++.dg/debug/debug9.C	(revision 251221)
+++ g++.dg/debug/debug9.C	(working copy)
@@ -1,4 +1,9 @@
 /* { dg-do assemble } */
+/* Partitioning causes hot/cold section emission and breaks stabs
+   debugging.  */
+/* { dg-additional-options "-fno-reorder-blocks-and-partition" } */
+
+
 /* This testcase requires entries in the debug_range section in DWARF which
refer to a vague linkage function.  */
 
Index: g++.dg/template/pr81899.C
===
--- g++.dg/template/pr81899.C	(revision 251227)
+++ g++.dg/template/pr81899.C	(working copy)
@@ -2,7 +2,7 @@
 
 template  class FunctorData>
 struct functor {
-  friend FunctorData;
+  friend class FunctorData;
   void foo();
 };
 


[C++ PATCH] class member lookup

2017-08-21 Thread Nathan Sidwell
This small patch adds asserts that we never get to look for a field in a 
non-class.  I'm moving this code around on the name-lookup branch, but 
as Jakub found & fixed a previous suprise of this ilk, I thought I'd put 
this straight on trunk to get wider exposure of the invariant.


I also noticed that fuzzy lookup no longer needed to look at METHOD_VEC, 
as member fns are on TYPE_FIELDS now.  David, FWIW it looks like that 
lookup could be less twisty if fuzzy_lookup_fields was a static member 
fn taking a void *?


nathan

--
Nathan Sidwell
2017-08-21  Nathan Sidwell  

	* search.c (lookup_field_1): Assert TYPE is a class and VFIELD
	isn't special.
	(lookup_field_fuzzy_info::fuzzy_lookup_fnfields): Delete.
	(lookup_field_fuzzy_r): Adjust.

Index: search.c
===
--- search.c	(revision 251221)
+++ search.c	(working copy)
@@ -371,18 +371,7 @@ lookup_field_1 (tree type, tree name, bo
 {
   tree field;
 
-  gcc_assert (identifier_p (name));
-
-  if (TREE_CODE (type) == TEMPLATE_TYPE_PARM
-  || TREE_CODE (type) == BOUND_TEMPLATE_TEMPLATE_PARM
-  || TREE_CODE (type) == TYPENAME_TYPE)
-/* The TYPE_FIELDS of a TEMPLATE_TYPE_PARM and
-   BOUND_TEMPLATE_TEMPLATE_PARM are not fields at all;
-   instead TYPE_FIELDS is the TEMPLATE_PARM_INDEX.  (Miraculously,
-   the code often worked even when we treated the index as a list
-   of fields!)
-   The TYPE_FIELDS of TYPENAME_TYPE is its TYPENAME_TYPE_FULLNAME.  */
-return NULL_TREE;
+  gcc_assert (identifier_p (name) && RECORD_OR_UNION_TYPE_P (type));
 
   if (CLASSTYPE_SORTED_FIELDS (type))
 {
@@ -474,13 +463,11 @@ lookup_field_1 (tree type, tree name, bo
 	  && (!want_type || DECL_DECLARES_TYPE_P (decl)))
 	return decl;
 }
-  /* Not found.  */
-  if (name == vptr_identifier)
-{
-  /* Give the user what s/he thinks s/he wants.  */
-  if (TYPE_POLYMORPHIC_P (type))
-	return TYPE_VFIELD (type);
-}
+
+  /* We used to special-case vptr_identifier.  Make sure it's not
+ special any more.  */
+  gcc_assert (name != vptr_identifier || !TYPE_VFIELD (type));
+
   return NULL_TREE;
 }
 
@@ -1374,7 +1361,6 @@ class lookup_field_fuzzy_info
   lookup_field_fuzzy_info (bool want_type_p) :
 m_want_type_p (want_type_p), m_candidates () {}
 
-  void fuzzy_lookup_fnfields (tree type);
   void fuzzy_lookup_field (tree type);
 
   /* If true, we are looking for types, not data members.  */
@@ -1383,27 +1369,6 @@ class lookup_field_fuzzy_info
   auto_vec m_candidates;
 };
 
-/* Locate all methods within TYPE, append them to m_candidates.  */
-
-void
-lookup_field_fuzzy_info::fuzzy_lookup_fnfields (tree type)
-{
-  vec *method_vec;
-  tree fn;
-  size_t i;
-
-  if (!CLASS_TYPE_P (type))
-return;
-
-  method_vec = CLASSTYPE_METHOD_VEC (type);
-  if (!method_vec)
-return;
-
-  for (i = 0; vec_safe_iterate (method_vec, i, &fn); ++i)
-if (fn)
-  m_candidates.safe_push (OVL_NAME (fn));
-}
-
 /* Locate all fields within TYPE, append them to m_candidates.  */
 
 void
@@ -1432,11 +1397,6 @@ lookup_field_fuzzy_r (tree binfo, void *
   lookup_field_fuzzy_info *lffi = (lookup_field_fuzzy_info *) data;
   tree type = BINFO_TYPE (binfo);
 
-  /* First, look for functions.  */
-  if (!lffi->m_want_type_p)
-lffi->fuzzy_lookup_fnfields (type);
-
-  /* Look for data member and types.  */
   lffi->fuzzy_lookup_field (type);
 
   return NULL_TREE;


Re: Clobbers and Scratch Registers

2017-08-21 Thread Richard Sandiford
Thanks for doing this.

Alan Modra  writes:
> This is a revised version of
> https://gcc.gnu.org/ml/gcc-patches/2017-03/msg01562.html limited to
> showing just the scratch register aspect, as a followup to
> https://gcc.gnu.org/ml/gcc-patches/2017-08/msg01174.html 
>
>   * doc/extend.texi (Extended Asm ): Rename to
>   "Clobbers and Scratch Registers".  Add paragraph on
>   alternative to clobbers for scratch registers and OpenBLAS
>   example.
>
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 940490e..0637672 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -8075,7 +8075,7 @@ A comma-separated list of C expressions read by the 
> instructions in the
>  @item Clobbers
>  A comma-separated list of registers or other values changed by the 
>  @var{AssemblerTemplate}, beyond those listed as outputs.
> -An empty list is permitted.  @xref{Clobbers}.
> +An empty list is permitted.  @xref{Clobbers and Scratch Registers}.
>  
>  @item GotoLabels
>  When you are using the @code{goto} form of @code{asm}, this section contains 
> @@ -8435,7 +8435,7 @@ The enclosing parentheses are a required part of the 
> syntax.
>  
>  When the compiler selects the registers to use to 
>  represent the output operands, it does not use any of the clobbered 
> registers 
> -(@pxref{Clobbers}).
> +(@pxref{Clobbers and Scratch Registers}).
>  
>  Output operand expressions must be lvalues. The compiler cannot check 
> whether 
>  the operands have data types that are reasonable for the instruction being 
> @@ -8671,7 +8671,8 @@ as input.  The enclosing parentheses are a required 
> part of the syntax.
>  @end table
>  
>  When the compiler selects the registers to use to represent the input 
> -operands, it does not use any of the clobbered registers (@pxref{Clobbers}).
> +operands, it does not use any of the clobbered registers
> +(@pxref{Clobbers and Scratch Registers}).
>  
>  If there are no output operands but there are input operands, place two 
>  consecutive colons where the output operands would go:
> @@ -8722,9 +8723,10 @@ asm ("cmoveq %1, %2, %[result]"
> : "r" (test), "r" (new), "[result]" (old));
>  @end example
>  
> -@anchor{Clobbers}
> -@subsubsection Clobbers
> +@anchor{Clobbers and Scratch Registers}
> +@subsubsection Clobbers and Scratch Registers
>  @cindex @code{asm} clobbers
> +@cindex @code{asm} scratch registers
>  
>  While the compiler is aware of changes to entries listed in the output 
>  operands, the inline @code{asm} code may modify more than just the outputs. 
> For 
> @@ -8853,6 +8855,65 @@ dscal (size_t n, double *x, double alpha)
>  @}
>  @end smallexample
>  
> +Rather than allocating fixed registers via clobbers to provide scratch
> +registers for an @code{asm} statement, an alternative is to define a
> +variable and make it an early-clobber output as with @code{a2} and
> +@code{a3} in the example below.  This gives the compiler register
> +allocator more freedom.  You can also define a variable and make it an
> +output tied to an input as with @code{a0} and @code{a1}, tied
> +respectively to @code{ap} and @code{lda}.

I think it's worth emphasising that tying operands doesn't change
whether an output needs an earlyclobber or not.  E.g. for:

  asm ("%0 = f(%1); use %2"
   : "=r" (a) : "0" (b), "r" (c));

the compiler can assign the same register to all three operands if
it can prove that b == c on entry.  Since %0 is being modified before
%2 is used, it needs to be:

  asm ("%0 = f(%1); use %2"
   : "=&r" (a) : "0" (b), "r" (c));

instead.

Thanks,
Richard

> Of course, with tied
> +outputs your @code{asm} can't use the input value after modifying the
> +output register since they are one and the same register.  Note also
> +that tying an input to an output is the way to set up an initialized
> +temporary register modified by an @code{asm} statement.  An input not
> +tied to an output is assumed by GCC to be unchanged, for example
> +@code{"b" (16)} below sets up @code{%11} to 16, and GCC might use that
> +register in following code if the value 16 happened to be needed.  You
> +can even use a normal @code{asm} output for a scratch if all inputs
> +that might share the same register are consumed before the scratch is
> +used.  The VSX registers clobbered by the @code{asm} statement could
> +have used this technique except for GCC's limit on the number of
> +@code{asm} parameters.
> +
> +@smallexample
> +static void
> +dgemv_kernel_4x4 (long n, const double *ap, long lda,
> +  const double *x, double *y, double alpha)
> +@{
> +  double *a0;
> +  double *a1;
> +  double *a2;
> +  double *a3;
> +
> +  __asm__
> +(
> + /* lots of asm here */
> + "#n=%1 ap=%8=%12 lda=%13 x=%7=%10 y=%0=%2 alpha=%9 o16=%11\n"
> + "#a0=%3 a1=%4 a2=%5 a3=%6"
> + :
> +   "+m" (*(double (*)[n]) y),
> +   "+r" (n), // 1
> +   "+b" (y), // 2
> +   "=b" (a0),// 3
> +   "=b" (a1),// 4
> +   "

Re: [PATCH] [Aarch64] Optimize subtract in shift counts

2017-08-21 Thread Richard Sandiford
Richard Biener  writes:
> On Tue, Aug 8, 2017 at 10:20 PM, Richard Kenner
>  wrote:
>>> Correct. It is truncated for integer shift, but not simd shift
>>> instructions. We generate a pattern in the split that only generates
>>> the integer shift instructions.
>>
>> That's unfortunate, because it would be nice to do this in simplify_rtx,
>> since it's machine-independent, but that has to be conditioned on
>> SHIFT_COUNT_TRUNCATED, so you wouldn't get the benefit of it.
>
> SHIFT_COUNT_TRUNCATED should go ... you should express this in
> the patterns, like for example with
>
> (define_insn ashlSI3
>   [(set (match_operand 0 "")
>  (ashl:SI (match_operand ... )
>  (subreg:QI (match_operand:SI ...)))]
>
> or an explicit and:SI and combine / simplify_rtx should apply the magic
> optimization we expect.

The problem with the explicit AND is that you'd end up with either
an AND of two constants for constant shifts, or with two separate patterns,
one for constant shifts and one for variable shifts.  (And the problem in
theory with two patterns is that it reduces the RA's freedom, although in
practice I guess we'd always want a constant shift where possible for
cost reasons, and so the RA would never need to replace pseudos with
constants itself.)

I think all useful instances of this optimisation will be exposed by
the gimple optimisers, so maybe expand could to do it based on
TARGET_SHIFT_TRUNCATION_MASK?  That describes the optab rather than
the rtx code and it does take the mode into account.

Thanks,
Richard


Re: [PATCH] [Aarch64] Optimize subtract in shift counts

2017-08-21 Thread Richard Biener
On August 21, 2017 7:46:09 PM GMT+02:00, Richard Sandiford 
 wrote:
>Richard Biener  writes:
>> On Tue, Aug 8, 2017 at 10:20 PM, Richard Kenner
>>  wrote:
 Correct. It is truncated for integer shift, but not simd shift
 instructions. We generate a pattern in the split that only
>generates
 the integer shift instructions.
>>>
>>> That's unfortunate, because it would be nice to do this in
>simplify_rtx,
>>> since it's machine-independent, but that has to be conditioned on
>>> SHIFT_COUNT_TRUNCATED, so you wouldn't get the benefit of it.
>>
>> SHIFT_COUNT_TRUNCATED should go ... you should express this in
>> the patterns, like for example with
>>
>> (define_insn ashlSI3
>>   [(set (match_operand 0 "")
>>  (ashl:SI (match_operand ... )
>>  (subreg:QI (match_operand:SI ...)))]
>>
>> or an explicit and:SI and combine / simplify_rtx should apply the
>magic
>> optimization we expect.
>
>The problem with the explicit AND is that you'd end up with either
>an AND of two constants for constant shifts, or with two separate
>patterns,
>one for constant shifts and one for variable shifts.  (And the problem
>in
>theory with two patterns is that it reduces the RA's freedom, although
>in
>practice I guess we'd always want a constant shift where possible for
>cost reasons, and so the RA would never need to replace pseudos with
>constants itself.)
>
>I think all useful instances of this optimisation will be exposed by
>the gimple optimisers, so maybe expand could to do it based on
>TARGET_SHIFT_TRUNCATION_MASK?  That describes the optab rather than
>the rtx code and it does take the mode into account.

Sure, that could work as well and also take into account range info. But we'd 
then need named expanders and the result would still have the explicit and or 
need to be an unspec or a different RTL operation. 

Richard. 

>Thanks,
>Richard



[C++ PATCH]: Dead code-ectomy

2017-08-21 Thread Nathan Sidwell
Some more dead code in class lookup.  It's not been well formed to allow 
class fields to be the type-name for a very long time now.


I booted with a gcc-unreachable inside the elided loop condition, before 
ripping the loop out completely.


Again, applied to trunk to get wider exposure.

nathan
--
Nathan Sidwell
2017-08-21  Nathan Sidwell  

	* search.c (lookup_field_r): Remove obsolete code for type-named
	field in PoD.

Index: search.c
===
--- search.c	(revision 251241)
+++ search.c	(working copy)
@@ -,39 +,22 @@ lookup_field_r (tree binfo, void *data)
 	nval = dep_using;
 }
 
-  /* If there is no declaration with the indicated name in this type,
- then there's nothing to do.  */
-  if (!nval)
-goto done;
-
   /* If we're looking up a type (as with an elaborated type specifier)
  we ignore all non-types we find.  */
-  if (lfi->want_type && !DECL_DECLARES_TYPE_P (nval))
+  if (lfi->want_type && nval && !DECL_DECLARES_TYPE_P (nval))
 {
-  if (lfi->name == TYPE_IDENTIFIER (type))
-	{
-	  /* If the aggregate has no user defined constructors, we allow
-	 it to have fields with the same name as the enclosing type.
-	 If we are looking for that name, find the corresponding
-	 TYPE_DECL.  */
-	  for (nval = TREE_CHAIN (nval); nval; nval = TREE_CHAIN (nval))
-	if (DECL_NAME (nval) == lfi->name
-		&& TREE_CODE (nval) == TYPE_DECL)
-	  break;
-	}
-  else
-	nval = NULL_TREE;
-  if (!nval && CLASSTYPE_NESTED_UTDS (type) != NULL)
-	{
-	  binding_entry e = binding_table_find (CLASSTYPE_NESTED_UTDS (type),
-		lfi->name);
-	  if (e != NULL)
-	nval = TYPE_MAIN_DECL (e->type);
-	  else
-	goto done;
-	}
+  nval = NULL_TREE;
+  if (CLASSTYPE_NESTED_UTDS (type))
+	if (binding_entry e = binding_table_find (CLASSTYPE_NESTED_UTDS (type),
+		  lfi->name))
+	  nval = TYPE_MAIN_DECL (e->type);
 }
 
+  /* If there is no declaration with the indicated name in this type,
+ then there's nothing to do.  */
+  if (!nval)
+goto done;
+
   /* If the lookup already found a match, and the new value doesn't
  hide the old one, we might have an ambiguity.  */
   if (lfi->rval_binfo


Re: [PATCH] Don't override user alignment with the same value

2017-08-21 Thread Eric Botcazou
> According to Eric:
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53037#c32
> 
> "if (mode_align >= TYPE_ALIGN (type))" was intentional.  I am
> not familiar with STRICT_ALIGNMENT target and there is no
> testcase to show why it is needed.

Yes, the mode promotion triggers the alignment promotion on strict-alignment 
targets and the specified alignment is thus equal to the default one, so the 
code works as intended and clears DECL_USER_ALIGN.  Not very clear what to do, 
but the most robust approach would be to use lookup_attribute on the type.
 
-- 
Eric Botcazou


Re: [patch,avr] Fir PR91910: ICE for bad attribute "address".

2017-08-21 Thread Denis Chertykov
2017-08-21 16:35 GMT+04:00 Georg-Johann Lay :
> "address" attribute only must be specified with VARs,
> yet the compiler dived into attribute analysis for
> non-VARs, resulting in ICE.
>
> This patch also adds OPT_Wattributes as warning filter.
>
> Ok to apply?

Approved.
Please apply.


>
> Johann
>
> gcc/
> PR target/81910
> * config/avr/avr.c (avr_handle_addr_attribute): Early return if
> not VAR_P. Filter attribute warnings with OPT_Wattributes.
> (avr_attribute_table) : Initialize
> .decl_required with true.


Re: std::list optimizations

2017-08-21 Thread François Dumont

On 18/08/2017 22:04, Jonathan Wakely wrote:

On 28/07/17 18:42 +0200, François Dumont wrote:

Hi

   Completing execution of the testsuite revealed a bug. So here is 
the correct version of this patch.


François

On 21/07/2017 19:14, François Dumont wrote:

Hi

   Here is a proposal for 2 optimizations in the std::list 
implementation.


   Optimization on the move constructor taking an allocator for 
always equal allocators. Compare to the version in my previous 
std::list patch I am now doing it at std::list level rather than at 
_List_base level. This way we won't instantiate the insert call and 
we won't check for empty list when the allocator always compare equal.


   2nd optimization, I replace the _S_distance method by the 
std::distance algo which benefit from the nice [begin(), end()) 
range optimization when cxx11 abi is being used.


   Note that I am proposing the 2 change in 1 patch to save some 
review time but I can commit those separately.


Tested under x86_64 Linux normal mode.


   * include/bits/stl_list.h
   (_List_base<>::_S_distance): Remove.
   (_List_impl(_List_impl&&, _Node_alloc_type&&)): New.
   (_List_base(_List_base&&, _Node_alloc_type&&)): Use latter.
   (_List_base(_Node_alloc_type&&)): New.
   (_List_base<>::_M_distance, _List_base<>::_M_node_count): Move...
   (list<>::_M_distance, list<>::_M_node_count): ...here. Replace 
calls to

   _S_distance with calls to std::distance.
   (list(list&&, const allocator_type&, true_type)): New.
   (list(list&&, const allocator_type&, false_type)): New.
   (list(list&&, const allocator_type&)): Adapt to call latters.

Ok to commit ?

François






  _List_base(_List_base&&) = default;

  _List_base(_List_base&& __x, _Node_alloc_type&& __a)
+  : _M_impl(std::move(__x._M_impl), std::move(__a))
+  { }
+
+  _List_base(_Node_alloc_type&& __a)
  : _M_impl(std::move(__a))
-  {
-if (__x._M_get_Node_allocator() == _M_get_Node_allocator())
-  _M_move_nodes(std::move(__x));
-// else caller must move individual elements.
-  }
+  { }



I like this change in principle, but it alters the behaviour of an
existing constructor. Existing code might use the constructor and get
broken by this.

You can avoid that by leaving the existing constructor alone and
adding two new ones for new code to use. Reordering the parameters
will make the new one distinct from the old one:

 // Used when is_always_equal
 _List_base(_Node_alloc_type&& __a, _List_base&& __x))
 : _M_impl(std::move(__x._M_impl), std::move(__a))
 { }


I have chosen this approach and also adapt the _List_impl class to have 
same signature which moreover correspond to order of members so maybe 
not so bad.




_M_distance could be static though, neither version uses the 'this'
pointer, so it would be called _S_distance.


Applied.



Do those suggestions make sense? The idea is to ensure that a given
function signature continues to have the same effects. To introduce
new effects, use a new signature.


Sure, I didn't consider the explicit instantiation use case, makes sens.

So here is a new version. I propose to "mark" code kept for backward abi 
compatibility with the _GLIBCXX_INLINE_VERSION. Next time we decide to 
break abi we will just need to look for this macro to know what code can 
be removed.


 Ok to commit if tests are successful ?

François


diff --git a/libstdc++-v3/include/bits/stl_list.h b/libstdc++-v3/include/bits/stl_list.h
index cef94f7..e545996 100644
--- a/libstdc++-v3/include/bits/stl_list.h
+++ b/libstdc++-v3/include/bits/stl_list.h
@@ -364,6 +364,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 	rebind<_List_node<_Tp> >::other _Node_alloc_type;
   typedef __gnu_cxx::__alloc_traits<_Node_alloc_type> _Node_alloc_traits;
 
+#if !_GLIBCXX_INLINE_VERSION
   static size_t
   _S_distance(const __detail::_List_node_base* __first,
 		  const __detail::_List_node_base* __last)
@@ -376,6 +377,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 	  }
 	return __n;
   }
+#endif
 
   struct _List_impl
   : public _Node_alloc_type
@@ -393,6 +395,10 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 #if __cplusplus >= 201103L
 	_List_impl(_List_impl&&) = default;
 
+	_List_impl(_Node_alloc_type&& __a, _List_impl&& __x)
+	: _Node_alloc_type(std::move(__a)), _M_node(std::move(__x._M_node))
+	{ }
+
 	_List_impl(_Node_alloc_type&& __a) noexcept
 	: _Node_alloc_type(std::move(__a))
 	{ }
@@ -410,6 +416,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 
   void _M_dec_size(size_t __n) { _M_impl._M_node._M_size -= __n; }
 
+# if !_GLIBCXX_INLINE_VERSION
   size_t
   _M_distance(const __detail::_List_node_base* __first,
 		  const __detail::_List_node_base* __last) const
@@ -417,12 +424,15 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
 
   // return the stored size
   size_t _M_node_count() const { return _M_get_size(); }
+# endif
 #else
   // dummy implementations used when the size is not stored
   size_t _M_get_size() const { return 0; }
   

Re: std::vector default default and move constructors

2017-08-21 Thread François Dumont
Following feedback on std::list patch this one had the same problem of 
unused code being deleted. So here is a new version.


Ok to commit ?

François

On 28/07/2017 18:45, François Dumont wrote:

Hi

There was a little issue in this patch, here is the correct version.

François


On 23/07/2017 19:41, François Dumont wrote:

Hi

Is it time now to consider this patch ?

* include/bits/stl_vector.h
(_Vector_impl_data): New.
(_Vector_impl): Inherit from latter.
(_Vertor_impl(_Vector_impl&&, _Tp_alloc_type&&)): New.
(_Vector_base(_Vector_base&&, const allocator_type&)): Use latter.
(_Vector_base()): Default.
(_Vector_base(size_t)): Delete.
(_Vector_base(_Tp_alloc_type&&)): Delete.
(_Vector_base(_Vector_base&&)): Default.
(vector()): Default.
(vector(vector&&, const allocator_type&, true_type)): New.
(vector(vector&&, const allocator_type&, false_type)): New.
(vector(vector&&, const allocator_type&)): Use latters.

Tested under linux x86_64.

François






diff --git a/libstdc++-v3/include/bits/stl_vector.h b/libstdc++-v3/include/bits/stl_vector.h
index 69cb803..2fd7e2f 100644
--- a/libstdc++-v3/include/bits/stl_vector.h
+++ b/libstdc++-v3/include/bits/stl_vector.h
@@ -85,34 +85,54 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
   typedef typename __gnu_cxx::__alloc_traits<_Tp_alloc_type>::pointer
	pointer;
 
-  struct _Vector_impl
-  : public _Tp_alloc_type
+  struct _Vector_impl_data
   {
 	pointer _M_start;
 	pointer _M_finish;
 	pointer _M_end_of_storage;
 
-	_Vector_impl()
-	: _Tp_alloc_type(), _M_start(), _M_finish(), _M_end_of_storage()
-	{ }
-
-	_Vector_impl(_Tp_alloc_type const& __a) _GLIBCXX_NOEXCEPT
-	: _Tp_alloc_type(__a), _M_start(), _M_finish(), _M_end_of_storage()
+	_Vector_impl_data() _GLIBCXX_NOEXCEPT
+	: _M_start(), _M_finish(), _M_end_of_storage()
 	{ }
 
 #if __cplusplus >= 201103L
-	_Vector_impl(_Tp_alloc_type&& __a) noexcept
-	: _Tp_alloc_type(std::move(__a)),
-	  _M_start(), _M_finish(), _M_end_of_storage()
-	{ }
+	_Vector_impl_data(_Vector_impl_data&& __x) noexcept
+	: _M_start(__x._M_start), _M_finish(__x._M_finish),
+	  _M_end_of_storage(__x._M_end_of_storage)
+	{ __x._M_start = __x._M_finish = __x._M_end_of_storage = pointer(); }
 #endif
 
-	void _M_swap_data(_Vector_impl& __x) _GLIBCXX_NOEXCEPT
+	void
+	_M_swap_data(_Vector_impl_data& __x) _GLIBCXX_NOEXCEPT
 	{
 	  std::swap(_M_start, __x._M_start);
 	  std::swap(_M_finish, __x._M_finish);
 	  std::swap(_M_end_of_storage, __x._M_end_of_storage);
 	}
+  };
+
+  struct _Vector_impl
+	: public _Tp_alloc_type, public _Vector_impl_data
+  {
+	_Vector_impl() _GLIBCXX_NOEXCEPT_IF( noexcept(_Tp_alloc_type()) )
+	: _Tp_alloc_type()
+	{ }
+
+	_Vector_impl(_Tp_alloc_type const& __a) _GLIBCXX_NOEXCEPT
+	: _Tp_alloc_type(__a)
+	{ }
+
+#if __cplusplus >= 201103L
+	_Vector_impl(_Vector_impl&&) = default;
+
+	_Vector_impl(_Tp_alloc_type&& __a) noexcept
+	  : _Tp_alloc_type(std::move(__a))
+	{ }
+
+	_Vector_impl(_Tp_alloc_type&& __a, _Vector_impl&& __rv) noexcept
+	: _Tp_alloc_type(std::move(__a)), _Vector_impl_data(std::move(__rv))
+	{ }
+#endif
 
 #if _GLIBCXX_SANITIZE_STD_ALLOCATOR && _GLIBCXX_SANITIZE_VECTOR
 	template
@@ -235,38 +255,42 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
   _Tp_alloc_type&
   _M_get_Tp_allocator() _GLIBCXX_NOEXCEPT
-  { return *static_cast<_Tp_alloc_type*>(&this->_M_impl); }
+  { return this->_M_impl; }
 
   const _Tp_alloc_type&
   _M_get_Tp_allocator() const _GLIBCXX_NOEXCEPT
-  { return *static_cast(&this->_M_impl); }
+  { return this->_M_impl; }
 
   allocator_type
   get_allocator() const _GLIBCXX_NOEXCEPT
   { return allocator_type(_M_get_Tp_allocator()); }
 
-  _Vector_base()
-  : _M_impl() { }
+#if __cplusplus >= 201103L
+  _Vector_base() = default;
+#else
+  _Vector_base() { }
+#endif
 
   _Vector_base(const allocator_type& __a) _GLIBCXX_NOEXCEPT
   : _M_impl(__a) { }
 
+#if !_GLIBCXX_INLINE_VERSION
   _Vector_base(size_t __n)
   : _M_impl()
   { _M_create_storage(__n); }
+#endif
 
   _Vector_base(size_t __n, const allocator_type& __a)
   : _M_impl(__a)
   { _M_create_storage(__n); }
 
 #if __cplusplus >= 201103L
+  _Vector_base(_Vector_base&&) = default;
+
+# if !_GLIBCXX_INLINE_VERSION
   _Vector_base(_Tp_alloc_type&& __a) noexcept
   : _M_impl(std::move(__a)) { }
 
-  _Vector_base(_Vector_base&& __x) noexcept
-  : _M_impl(std::move(__x._M_get_Tp_allocator()))
-  { this->_M_impl._M_swap_data(__x._M_impl); }
-
   _Vector_base(_Vector_base&& __x, const allocator_type& __a)
   : _M_impl(__a)
   {
@@ -278,6 +302,11 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 	_M_create_storage(__n);
 	  }
   }
+# endif
+
+  _Vector_base(const allocator_type& __a, _Vector_base&& __x)
+  : _M_impl(_Tp_alloc_type(__a), std::move(__x._M_impl))
+  { }
 #endif
 
   ~_Vector_base() _GLIBCXX_NOEXCEPT
@@ -3

Re: [PATCH] Fix fallout from VRP strict-overflow changes

2017-08-21 Thread Martin Sebor

On 08/21/2017 01:51 AM, Richard Biener wrote:

On Sat, 19 Aug 2017, Andreas Schwab wrote:


On Aug 17 2017, Richard Biener  wrote:


I was notifed I broke proper handling of undefined overflow in
multiplicative ops handling.  The following resurrects previous
behavior (and adds a testcase).

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.


This breaks gfortran.dg/alloc_comp_auto_array_2.f90 on aarch64 with
-mabi=ilp32 (only for -O3):

FAIL: gfortran.dg/alloc_comp_auto_array_2.f90   -O3 -g  (test for excess errors)
Excess errors:
/opt/gcc/gcc-20170818/gcc/testsuite/gfortran.dg/alloc_comp_auto_array_2.f90:33:0:
 Warning: '__builtin_memcpy' specified size between 2147483648 and 4294967295 
exceeds maximum object size 2147483647 [-Wstringop-overflow=]


I believe this is an issue that went latent when I broke VRP earlier.

I have opened PR81908, will amend with some initial analysis.


FWIW, the core of the problem is that the warning tends to either
expose limitations in optimizations that were not written to make
use of range information, or indicate additional optimization
opportunities due to range information.  In this case, since
the only valid value in the range the memcpy argument is in (i.e.,
~[0, INT_MAX]) is zero, the call could be eliminated.  But this
isn't noticed by any pass until the expander checks the call for
validity.

It seems to me that this could be handled by enhancing gimple-fold
in two ways: 1) fold arguments whose range contains only one valid
value into constants, and 2) transform calls with one or more
arguments entirely in invalid ranges into __builtin_unreachable.

I have been thinking prototyping this solution for a while but
haven't gotten around to it yet so I can't say what problems it
might run into.

Martin


Re: [PING #2] [PATCH] enhance -Wrestrict to handle string built-ins (PR 78918)

2017-08-21 Thread Martin Sebor

On 08/09/2017 10:14 AM, Jeff Law wrote:

On 08/06/2017 05:08 PM, Martin Sebor wrote:



Well, simply because the way as implemented isn't a must-alias query
but maybe one that's good enough for warnings (reduces false positives
but surely doesn't eliminate them).


I'm very interested in reducing the rate of false positives in
these and all other warnings.  As I mentioned in my comments,
I did my best to weed them out of the implementation by building
GDB, Glibc, Busybox, and the Linux kernel.  That of course isn't
a guarantee that there aren't any.  But the first implementation
of any non-trivial feature is never perfect, and hardly any
warning of sufficient complexity is free of false positives, no
matter here it's implemented (the middle-end, front-end, or
a standalone static analysis tool).  If you spotted some cases
I had missed I'd certainly be grateful for examples.  Otherwise,
they will undoubtedly be reported as more software is exposed
to the warning and, if possible, fixed, as happens with all
other warnings.

I think Richi is saying that the must alias query you've built isn't
proper/correct.  It's less about false positives for Richi and more
about building a proper must alias query if I understand him correctly.

I suspect he's also saying that you can't reasonably build must-alias on
top of a may-alias query framework.  They're pretty different queries.

If you need something that is close to, but not quite a must alias
query, then you're going to have to make a argument for that and you
can't call it a must alias query.


Attached is an updated and simplified patch that avoids making
changes to any of the may-alias functions.  It turns out that
all the information the logic needs to determine the overlap
is already in the ao_ref structures populated by
ao_ref_init_from_ptr_and_size.  The only changes to the pass
are the enhancement to ao_ref_init_from_ptr_and_size to make
use of range info and the addition of the two new functions
used by the -Wrestrict clients outside the pass.

Martin
PR middle-end/78918 - missing -Wrestrict on memcpy copying over self

gcc/ChangeLog:

	PR middle-end/78918
	* builtins.c (warn_for_overlap, maybe_warn_for_overlap): New.
	(check_sizes): Add argument and call maybe_warn_for_overlap.
	Rename function arguments for clarity.
	(check_memop_sizes): Adjust.
	(expand_builtin_memchr): Ditto.
	(expand_builtin_strcat): Ditto.
	(expand_builtin_strcpy): Ditto.
	(expand_builtin_stpcpy): Ditto.
	(expand_builtin_stpncpy): Ditto.
	(expand_builtin_strncpy): Ditto.
	(expand_builtin_memcmp): Ditto.
	(expand_builtin_memory_chk): Ditto.
	(check_strncat_sizes): Ditto.  Rename locals for clarity.
	(expand_builtin_strncat): Ditto.
	(maybe_emit_chk_warning): Ditto.
	(maybe_emit_sprintf_chk_warning): Adjust.
	* cfgexpand.c (expand_call_stmt): Set TREE_NO_WARNING.
	* gimple-fold.c (gimple_fold_builtin_memory_op): Handle -Wrestrict.
	(gimple_fold_builtin_strcpy): Ditto.
	(gimple_fold_builtin_memory_chk): Ditto.
	(gimple_fold_builtin_stxcpy_chk): Ditto.
	* gimple.c (gimple_build_call_from_tree): Set call location.
	* tree-ssa-alias.h (refs_overlap, detect_overlap): New functions.
	* tree-ssa-alias.c (refs_overlap, detect_overlap): Define.
	* tree-ssa-strlen.c (handle_builtin_strcpy): Handle -Wrestrict.
	(handle_builtin_strcat): Ditto.

gcc/c-family/ChangeLog:

	PR middle-end/78918
	* c-common.c (check_function_restrict): Suppress warning for
	built-in functions.
	* c.opt (-Wrestrict): Include in -Wall.

gcc/testsuite/ChangeLog:

	PR middle-end/78918
	* c-c++-common/Wrestrict.c: New test.
	* gcc.dg/Walloca-1.c: Suppress macro expansion tracking.
	* gcc.dg/pr69172.c: Prune -Wrestrict.

diff --git a/gcc/builtins.c b/gcc/builtins.c
index 2deef72..e0fd9a7 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -3036,39 +3036,303 @@ expand_builtin_memcpy_args (tree dest, tree src, tree len, rtx target, tree exp)
   return dest_addr;
 }
 
+/* Issue a warning for a restricted copy call expression EXP to a built-in
+   function FUNC, with a destination of size DSTSIZE, size of copy in RANGE,
+   and with OVERLAP bytes at offset OFFRANGE.  MUST_OVERLAP is true when
+   the overlap is certain, false when it is likely.  */
+
+static void
+warn_for_overlap (tree exp, tree func, bool must_overlap, tree dstsize,
+		  const tree range[2], unsigned HOST_WIDE_INT overlap,
+		  const unsigned HOST_WIDE_INT offrange[2])
+{
+  location_t loc = tree_nonartificial_location (exp);
+  loc = expansion_point_location_if_in_system_header (loc);
+
+  /* To avoid combinatorial explosion of diagnostics format the offset
+ or its range as a string and use it in the warning calls below.  */
+  char offstr[64];
+  if (offrange[0] == offrange[1] || offrange[1] > HOST_WIDE_INT_MAX)
+sprintf (offstr, "%llu", (long long) offrange[0]);
+  else
+sprintf (offstr, "%llu - %llu", (long long) offrange[0],
+	 (long long) offrange[1]);
+
+  /* The text uses the term "writing N bytes" even though most operations
+ in

[PING][PATCH][compare-elim] Merge zero-comparisons with normal ops

2017-08-21 Thread Michael Collison
Ping. Original patch here:

https://gcc.gnu.org/ml/gcc-patches/2017-08/msg00766.html





[PATCH] [docs] Explain how to use multiple file-name patterns in RUNTESTFLAGS

2017-08-21 Thread Daniel Santos
It took me a while to figure out how to do this so I figured that it should be
in the docs.  OK for trunk?

* doc/install.texi: Add more details on selecting multiple tests.

Thanks,
Daniel

Signed-off-by: Daniel Santos 
---
 gcc/doc/install.texi | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 7c9e2f25d44..6aefd213901 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -2737,6 +2737,16 @@ the testsuite with filenames matching @samp{9805*}, you 
would use
 make check-g++ RUNTESTFLAGS="old-deja.exp=9805* @var{other-options}"
 @end smallexample
 
+The file-matching expression following @var{filename}@command{.exp=} is treated
+as a series of whitespace-delimited glob expressions so that multiple patterns
+may be passed, although any whitespace must either be escaped or surrounded by
+tick marks if multiple expressions are desired. For example,
+
+@smallexample
+make check-g++ RUNTESTFLAGS="old-deja.exp=9805*\ virtual2.c 
@var{other-options}"
+make check-g++ RUNTESTFLAGS="'old-deja.exp=9805* virtual2.c' 
@var{other-options}"
+@end smallexample
+
 The @file{*.exp} files are located in the testsuite directories of the GCC
 source, the most important ones being @file{compile.exp},
 @file{execute.exp}, @file{dg.exp} and @file{old-deja.exp}.
-- 
2.13.3



[PATCH] [i386, testsuite] [PR 71958] Error on -mx32 with -mabi=ms

2017-08-21 Thread Daniel Santos
We currently error when -mx32 and -mabi=sysv and we encounter a function
with attribute ms_abi, but we are not erroring on -mx32 and -mabi=ms
(either explicitly or when it is the default on Windows).  In fact, it
generates code that runs, but is of an undfined ABI.

I'm also changing pr64409.c because if you explicitly supply -m64, then
the test became ineffective.  This is because the -mx32 parameter passed
in dg-options is later overridden by the explicit -m64 parameter.

I've bootstrapped and tested on
*  an x86_64-pc-linux-gnux32 system building gcc with --with-abi=mx32,
*  a "normal" x86_64-pc-linux-gnu testing with
   --target_board=unix/\{,-m32\}, and
*  on Windows.

OK for trunk?

gcc/ChangeLog:
2017-08-11  Daniel Santos  

* config/i386/i386.c (ix86_option_override_internal): Error when
-mx32 is combined with -mabi=ms.
(ix86_function_type_abi): Limit errors for mixing -mx32 with
attribute ms_abi.

gcc/testsuite/ChangeLog:
2017-08-11  Daniel Santos  

* gcc.target/i386/pr71958.c: New test to verify error on -mx32
and -mabi=ms
* gcc.target/i386/pr64409.c: Modify to only run on x32.
* gcc.target/i386/pr46470.c: Modify to skip x32 target.
* gcc.target/i386/pr66275.c: Likewise.
* gcc.target/i386/pr68018.c: Likewise.

Thanks,
Daniel

Signed-off-by: Daniel Santos 
---
 gcc/config/i386/i386.c  | 12 ++--
 gcc/testsuite/gcc.target/i386/pr46470.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr64409.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr66275.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr68018.c |  2 +-
 gcc/testsuite/gcc.target/i386/pr71958.c |  7 +++
 6 files changed, 21 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr71958.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1d88e4f247a..3b537f2608f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -5684,6 +5684,10 @@ ix86_option_override_internal (bool main_args_p,
   if (!opts_set->x_ix86_abi)
 opts->x_ix86_abi = DEFAULT_ABI;
 
+  if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
+error ("-mabi=ms not supported with X32 ABI");
+  gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
+
   /* For targets using ms ABI enable ms-extensions, if not
  explicit turned off.  For non-ms ABI we turn off this
  option.  */
@@ -8777,8 +8781,12 @@ ix86_function_type_abi (const_tree fntype)
   if (abi == SYSV_ABI
   && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
 {
-  if (TARGET_X32)
-   error ("X32 does not support ms_abi attribute");
+  static int warned;
+  if (TARGET_X32 && !warned)
+   {
+ error ("X32 does not support ms_abi attribute");
+ warned = 1;
+   }
 
   abi = MS_ABI;
 }
diff --git a/gcc/testsuite/gcc.target/i386/pr46470.c 
b/gcc/testsuite/gcc.target/i386/pr46470.c
index 9e8e731188e..c66a378a1ad 100644
--- a/gcc/testsuite/gcc.target/i386/pr46470.c
+++ b/gcc/testsuite/gcc.target/i386/pr46470.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target { ! x32 } } } */
 /* The pic register save adds unavoidable stack pointer references.  */
 /* { dg-skip-if "" { ia32 && { ! nonpic } } } */
 /* These options are selected to ensure 1 word needs to be allocated
diff --git a/gcc/testsuite/gcc.target/i386/pr64409.c 
b/gcc/testsuite/gcc.target/i386/pr64409.c
index 917472653f4..7bf9d1e398d 100644
--- a/gcc/testsuite/gcc.target/i386/pr64409.c
+++ b/gcc/testsuite/gcc.target/i386/pr64409.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do compile { target x32 } } */
 /* { dg-require-effective-target maybe_x32 } */
 /* { dg-options "-O0 -mx32" } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr66275.c 
b/gcc/testsuite/gcc.target/i386/pr66275.c
index b8759aeb5ec..51ae1f6859c 100644
--- a/gcc/testsuite/gcc.target/i386/pr66275.c
+++ b/gcc/testsuite/gcc.target/i386/pr66275.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-do compile { target { *-*-linux* && lp64 } } } */
 /* { dg-options "-mabi=ms -fdump-rtl-dfinit" } */
 
 void
diff --git a/gcc/testsuite/gcc.target/i386/pr68018.c 
b/gcc/testsuite/gcc.target/i386/pr68018.c
index a0fa21e0b00..04929c6c13c 100644
--- a/gcc/testsuite/gcc.target/i386/pr68018.c
+++ b/gcc/testsuite/gcc.target/i386/pr68018.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
+/* { dg-do compile { target { *-*-linux* && lp64 } } } */
 /* { dg-options "-O -mabi=ms -mstackrealign" } */
 
 typedef float V __attribute__ ((vector_size (16)));
diff --git a/gcc/testsuite/gcc.target/i386/pr71958.c 
b/gcc/testsuite/gcc.target/i386/pr71958.c
new file mode 100644
index 000..c5a109767c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr71958.c
@@ -0,0 +1,7 @@
+/* { dg-do compile { target { x32 } } } */
+/* { dg-options "-mabi=ms" } */
+/* { dg-error 

[PATCH] [i386] PR 81850 Don't ignore -mabi=sysv on Cygwin/MinGW

2017-08-21 Thread Daniel Santos
This is a problem that occured because of this code in
ix86_option_override_internal:

  if (!opts_set->x_ix86_abi)
opts->x_ix86_abi = DEFAULT_ABI;

I tested this along with my other patches.  OK for trunk?

* config/i386/i386-opts.h (enum calling_abi): Modify so that no legal
values are equivalent to zero.

Thanks,
Daniel

Signed-off-by: Daniel Santos 
---
 gcc/config/i386/i386-opts.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index 542cd0f3d67..8c2b5380e49 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -44,8 +44,8 @@ last_alg
 /* Available call abi.  */
 enum calling_abi
 {
-  SYSV_ABI = 0,
-  MS_ABI = 1
+  SYSV_ABI = 1,
+  MS_ABI = 2
 };
 
 enum fpmath_unit
-- 
2.13.3



Re: Clobbers and Scratch Registers

2017-08-21 Thread Alan Modra
On Mon, Aug 21, 2017 at 06:33:09PM +0100, Richard Sandiford wrote:
> I think it's worth emphasising that tying operands doesn't change
> whether an output needs an earlyclobber or not.  E.g. for:

Thanks for noticing this.  It turns out that my OpenBLAS example
actually ought to have an early-clobber on one of the tied outputs, so
you've also alerted me to another bug in the power8 code.  (Well, only
if the dgemv kernel was called directly from user code with a 16*N A
matrix, or I suppose if LTO was used.)  So I now have a real-world
example of the situation where you need an early-clobber on tied
outputs, and also where an early-clobber is undesirable.

Revised and expanded.

* doc/extend.texi (Extended Asm ): Rename to
"Clobbers and Scratch Registers".  Add paragraph on
alternative to clobbers for scratch registers and OpenBLAS
example.

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 940490e..cef6c57 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -8075,7 +8075,7 @@ A comma-separated list of C expressions read by the 
instructions in the
 @item Clobbers
 A comma-separated list of registers or other values changed by the 
 @var{AssemblerTemplate}, beyond those listed as outputs.
-An empty list is permitted.  @xref{Clobbers}.
+An empty list is permitted.  @xref{Clobbers and Scratch Registers}.
 
 @item GotoLabels
 When you are using the @code{goto} form of @code{asm}, this section contains 
@@ -8435,7 +8435,7 @@ The enclosing parentheses are a required part of the 
syntax.
 
 When the compiler selects the registers to use to 
 represent the output operands, it does not use any of the clobbered registers 
-(@pxref{Clobbers}).
+(@pxref{Clobbers and Scratch Registers}).
 
 Output operand expressions must be lvalues. The compiler cannot check whether 
 the operands have data types that are reasonable for the instruction being 
@@ -8671,7 +8671,8 @@ as input.  The enclosing parentheses are a required part 
of the syntax.
 @end table
 
 When the compiler selects the registers to use to represent the input 
-operands, it does not use any of the clobbered registers (@pxref{Clobbers}).
+operands, it does not use any of the clobbered registers
+(@pxref{Clobbers and Scratch Registers}).
 
 If there are no output operands but there are input operands, place two 
 consecutive colons where the output operands would go:
@@ -8722,9 +8723,10 @@ asm ("cmoveq %1, %2, %[result]"
: "r" (test), "r" (new), "[result]" (old));
 @end example
 
-@anchor{Clobbers}
-@subsubsection Clobbers
+@anchor{Clobbers and Scratch Registers}
+@subsubsection Clobbers and Scratch Registers
 @cindex @code{asm} clobbers
+@cindex @code{asm} scratch registers
 
 While the compiler is aware of changes to entries listed in the output 
 operands, the inline @code{asm} code may modify more than just the outputs. 
For 
@@ -8853,6 +8855,75 @@ dscal (size_t n, double *x, double alpha)
 @}
 @end smallexample
 
+Rather than allocating fixed registers via clobbers to provide scratch
+registers for an @code{asm} statement, an alternative is to define a
+variable and make it an early-clobber output as with @code{a2} and
+@code{a3} in the example below.  This gives the compiler register
+allocator more freedom.  You can also define a variable and make it an
+output tied to an input as with @code{a0} and @code{a1}, tied
+respectively to @code{ap} and @code{lda}.  Of course, with tied
+outputs your @code{asm} can't use the input value after modifying the
+output register since they are one and the same register.  What's
+more, if you omit the early-clobber on the output, it is possible that
+GCC might allocate the same register to another of the inputs if GCC
+could prove they had the same value on entry to the @code{asm}.  This
+is why @code{a1} has an early-clobber.  Its tied input, @code{lda}
+might conceivably be known to have the value 16 and without an
+early-clobber share the same register as @code{%11}.  On the other
+hand, @code{ap} can't be the same as any of the other inputs, so an
+early-clobber on @code{a0} is not needed.  It is also not desirable in
+this case.  An early-clobber on @code{a0} would cause GCC to allocate
+a separate register for the @code{"m" (*(const double (*)[]) ap)}
+input.  Note that tying an input to an output is the way to set up an
+initialized temporary register modified by an @code{asm} statement.
+An input not tied to an output is assumed by GCC to be unchanged, for
+example @code{"b" (16)} below sets up @code{%11} to 16, and GCC might
+use that register in following code if the value 16 happened to be
+needed.  You can even use a normal @code{asm} output for a scratch if
+all inputs that might share the same register are consumed before the
+scratch is used.  The VSX registers clobbered by the @code{asm}
+statement could have used this technique except for GCC's limit on the
+number of @code{asm} parameters.
+
+@smallexample
+static void
+dgemv_kernel_4x4 (long n,

Re: Clobbers and Scratch Registers

2017-08-21 Thread Alan Modra
On Tue, Aug 22, 2017 at 01:41:21PM +0930, Alan Modra wrote:
> + "#n=%1 ap=%8=%12 lda=%13 x=%7=%10 y=%0=%2 alpha=%9 o16=%11\n"
> + "#a0=%3 a1=%4 a2=%5 a3=%6"
> + :
> +   "+m" (*(double (*)[n]) y),
> +   "+r" (n), // 1

Another small revision.  That needs to be "+&r" (n), in case n can be
deduced to be 16, matching one of the other inputs.

-- 
Alan Modra
Australia Development Lab, IBM


Re: [PATCH] [i386, testsuite] [PR 71958] Error on -mx32 with -mabi=ms

2017-08-21 Thread Uros Bizjak
On Tue, Aug 22, 2017 at 4:00 AM, Daniel Santos  wrote:
> We currently error when -mx32 and -mabi=sysv and we encounter a function
> with attribute ms_abi, but we are not erroring on -mx32 and -mabi=ms
> (either explicitly or when it is the default on Windows).  In fact, it
> generates code that runs, but is of an undfined ABI.
>
> I'm also changing pr64409.c because if you explicitly supply -m64, then
> the test became ineffective.  This is because the -mx32 parameter passed
> in dg-options is later overridden by the explicit -m64 parameter.
>
> I've bootstrapped and tested on
> *  an x86_64-pc-linux-gnux32 system building gcc with --with-abi=mx32,
> *  a "normal" x86_64-pc-linux-gnu testing with
>--target_board=unix/\{,-m32\}, and
> *  on Windows.
>
> OK for trunk?
>
> gcc/ChangeLog:
> 2017-08-11  Daniel Santos  
>
> * config/i386/i386.c (ix86_option_override_internal): Error when
> -mx32 is combined with -mabi=ms.
> (ix86_function_type_abi): Limit errors for mixing -mx32 with
> attribute ms_abi.
>
> gcc/testsuite/ChangeLog:
> 2017-08-11  Daniel Santos  
>
> * gcc.target/i386/pr71958.c: New test to verify error on -mx32
> and -mabi=ms
> * gcc.target/i386/pr64409.c: Modify to only run on x32.
> * gcc.target/i386/pr46470.c: Modify to skip x32 target.
> * gcc.target/i386/pr66275.c: Likewise.
> * gcc.target/i386/pr68018.c: Likewise.

OK.

Thanks,
Uros.

> Thanks,
> Daniel
>
> Signed-off-by: Daniel Santos 
> ---
>  gcc/config/i386/i386.c  | 12 ++--
>  gcc/testsuite/gcc.target/i386/pr46470.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pr64409.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pr66275.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pr68018.c |  2 +-
>  gcc/testsuite/gcc.target/i386/pr71958.c |  7 +++
>  6 files changed, 21 insertions(+), 6 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr71958.c
>
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 1d88e4f247a..3b537f2608f 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -5684,6 +5684,10 @@ ix86_option_override_internal (bool main_args_p,
>if (!opts_set->x_ix86_abi)
>  opts->x_ix86_abi = DEFAULT_ABI;
>
> +  if (opts->x_ix86_abi == MS_ABI && TARGET_X32_P (opts->x_ix86_isa_flags))
> +error ("-mabi=ms not supported with X32 ABI");
> +  gcc_assert (opts->x_ix86_abi == SYSV_ABI || opts->x_ix86_abi == MS_ABI);
> +
>/* For targets using ms ABI enable ms-extensions, if not
>   explicit turned off.  For non-ms ABI we turn off this
>   option.  */
> @@ -8777,8 +8781,12 @@ ix86_function_type_abi (const_tree fntype)
>if (abi == SYSV_ABI
>&& lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
>  {
> -  if (TARGET_X32)
> -   error ("X32 does not support ms_abi attribute");
> +  static int warned;
> +  if (TARGET_X32 && !warned)
> +   {
> + error ("X32 does not support ms_abi attribute");
> + warned = 1;
> +   }
>
>abi = MS_ABI;
>  }
> diff --git a/gcc/testsuite/gcc.target/i386/pr46470.c 
> b/gcc/testsuite/gcc.target/i386/pr46470.c
> index 9e8e731188e..c66a378a1ad 100644
> --- a/gcc/testsuite/gcc.target/i386/pr46470.c
> +++ b/gcc/testsuite/gcc.target/i386/pr46470.c
> @@ -1,4 +1,4 @@
> -/* { dg-do compile } */
> +/* { dg-do compile { target { ! x32 } } } */
>  /* The pic register save adds unavoidable stack pointer references.  */
>  /* { dg-skip-if "" { ia32 && { ! nonpic } } } */
>  /* These options are selected to ensure 1 word needs to be allocated
> diff --git a/gcc/testsuite/gcc.target/i386/pr64409.c 
> b/gcc/testsuite/gcc.target/i386/pr64409.c
> index 917472653f4..7bf9d1e398d 100644
> --- a/gcc/testsuite/gcc.target/i386/pr64409.c
> +++ b/gcc/testsuite/gcc.target/i386/pr64409.c
> @@ -1,4 +1,4 @@
> -/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-do compile { target x32 } } */
>  /* { dg-require-effective-target maybe_x32 } */
>  /* { dg-options "-O0 -mx32" } */
>
> diff --git a/gcc/testsuite/gcc.target/i386/pr66275.c 
> b/gcc/testsuite/gcc.target/i386/pr66275.c
> index b8759aeb5ec..51ae1f6859c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr66275.c
> +++ b/gcc/testsuite/gcc.target/i386/pr66275.c
> @@ -1,4 +1,4 @@
> -/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
> +/* { dg-do compile { target { *-*-linux* && lp64 } } } */
>  /* { dg-options "-mabi=ms -fdump-rtl-dfinit" } */
>
>  void
> diff --git a/gcc/testsuite/gcc.target/i386/pr68018.c 
> b/gcc/testsuite/gcc.target/i386/pr68018.c
> index a0fa21e0b00..04929c6c13c 100644
> --- a/gcc/testsuite/gcc.target/i386/pr68018.c
> +++ b/gcc/testsuite/gcc.target/i386/pr68018.c
> @@ -1,4 +1,4 @@
> -/* { dg-do compile { target { *-*-linux* && { ! ia32 } } } } */
> +/* { dg-do compile { target { *-*-linux* && lp64 } } } */
>  /* { dg-options "-O -mabi=ms -mstackrealign" } */
>
>  typedef float V __attribute__ ((vector_size (16)));
> diff --git a/g

Re: [PATCH] [i386] PR 81850 Don't ignore -mabi=sysv on Cygwin/MinGW

2017-08-21 Thread Andreas Schwab
On Aug 21 2017, Daniel Santos  wrote:

> This is a problem that occured because of this code in
> ix86_option_override_internal:
>
>   if (!opts_set->x_ix86_abi)
> opts->x_ix86_abi = DEFAULT_ABI;

Why is that a problem?  Note opts_set vs opts.

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [PATCH] [i386] PR 81850 Don't ignore -mabi=sysv on Cygwin/MinGW

2017-08-21 Thread Uros Bizjak
On Tue, Aug 22, 2017 at 4:10 AM, Daniel Santos  wrote:

> This is a problem that occured because of this code in
> ix86_option_override_internal:
>
>   if (!opts_set->x_ix86_abi)
> opts->x_ix86_abi = DEFAULT_ABI;
>
> I tested this along with my other patches.  OK for trunk?
>
> * config/i386/i386-opts.h (enum calling_abi): Modify so that no legal
> values are equivalent to zero.

Please add UNKNOWN_ABI to the enum and initialize -mabi in i386.opt to
UNKNOWN_ABI.

Then change the above condition to

if (opts_set->x_ix86_abi == UNKNOWN_ABI)

We can't just init -mabi to DEFAULT_ABI, sinde this is selected at
runtime. Maybe a comment should be added for UNKNOWN_ABI, that it is
overriden in ix86_option_override_internal.

Uros.

> Thanks,
> Daniel
>
> Signed-off-by: Daniel Santos 
> ---
>  gcc/config/i386/i386-opts.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
> index 542cd0f3d67..8c2b5380e49 100644
> --- a/gcc/config/i386/i386-opts.h
> +++ b/gcc/config/i386/i386-opts.h
> @@ -44,8 +44,8 @@ last_alg
>  /* Available call abi.  */
>  enum calling_abi
>  {
> -  SYSV_ABI = 0,
> -  MS_ABI = 1
> +  SYSV_ABI = 1,
> +  MS_ABI = 2
>  };
>
>  enum fpmath_unit
> --
> 2.13.3
>


Re: [PATCH] [i386] PR 81850 Don't ignore -mabi=sysv on Cygwin/MinGW

2017-08-21 Thread Uros Bizjak
On Tue, Aug 22, 2017 at 8:26 AM, Andreas Schwab  wrote:
> On Aug 21 2017, Daniel Santos  wrote:
>
>> This is a problem that occured because of this code in
>> ix86_option_override_internal:
>>
>>   if (!opts_set->x_ix86_abi)
>> opts->x_ix86_abi = DEFAULT_ABI;
>
> Why is that a problem?  Note opts_set vs opts.

Uh, indeed. Need to clean my glasses. The above is a flag that
something is specified at command line.

Uros.

> Andreas.
>
> --
> Andreas Schwab, SUSE Labs, sch...@suse.de
> GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
> "And now for something completely different."