[committed] Small inclusive scan SSE2 vectorization improvement

2019-06-20 Thread Jakub Jelinek
Hi!

This is a small improvement over the previous patch, the decision to use
whole vector left shift + optional VEC_COND_EXPR doesn't have to be binary
for the whole scan that contains several permutations, e.g. SSE2 can't do
non-whole vector left shift { 0, 4, 5, 6 } permutation, but can do
{ 0, 1, 4, 5 } and especially if the initializer is not 0, that saves some
instructions.

The following patch changes the code, so that it remembers what to do for
each of the permutations.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-06-20  Jakub Jelinek  

* tree-vect-stmts.c (enum scan_store_kind): New type.
(scan_store_can_perm_p): Change last argument from int * to
vec *, record precisely which permutations
need whole vector left shift or that plus VEC_COND_EXPR.
(vectorizable_scan_store): Adjust caller, use whole vector left shift
and additional VEC_COND_EXPR only for those iterations that need it.

--- gcc/tree-vect-stmts.c.jj2019-06-19 11:58:53.161238429 +0200
+++ gcc/tree-vect-stmts.c   2019-06-19 12:40:50.675838267 +0200
@@ -6354,13 +6354,27 @@ scan_operand_equal_p (tree ref1, tree re
 }
 
 
+enum scan_store_kind {
+  /* Normal permutation.  */
+  scan_store_kind_perm,
+
+  /* Whole vector left shift permutation with zero init.  */
+  scan_store_kind_lshift_zero,
+
+  /* Whole vector left shift permutation and VEC_COND_EXPR.  */
+  scan_store_kind_lshift_cond
+};
+
 /* Function check_scan_store.
 
Verify if we can perform the needed permutations or whole vector shifts.
-   Return -1 on failure, otherwise exact log2 of vectype's nunits.  */
+   Return -1 on failure, otherwise exact log2 of vectype's nunits.
+   USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
+   to do at each step.  */
 
 static int
-scan_store_can_perm_p (tree vectype, tree init, int *use_whole_vector_p = NULL)
+scan_store_can_perm_p (tree vectype, tree init,
+  vec *use_whole_vector = NULL)
 {
   enum machine_mode vec_mode = TYPE_MODE (vectype);
   unsigned HOST_WIDE_INT nunits;
@@ -6371,50 +6385,59 @@ scan_store_can_perm_p (tree vectype, tre
 return -1;
 
   int i;
+  enum scan_store_kind whole_vector_shift_kind = scan_store_kind_perm;
   for (i = 0; i <= units_log2; ++i)
 {
   unsigned HOST_WIDE_INT j, k;
+  enum scan_store_kind kind = scan_store_kind_perm;
   vec_perm_builder sel (nunits, nunits, 1);
   sel.quick_grow (nunits);
-  if (i == 0)
+  if (i == units_log2)
{
  for (j = 0; j < nunits; ++j)
sel[j] = nunits - 1;
}
   else
{
- for (j = 0; j < (HOST_WIDE_INT_1U << (i - 1)); ++j)
+ for (j = 0; j < (HOST_WIDE_INT_1U << i); ++j)
sel[j] = j;
  for (k = 0; j < nunits; ++j, ++k)
sel[j] = nunits + k;
}
-  vec_perm_indices indices (sel, i == 0 ? 1 : 2, nunits);
+  vec_perm_indices indices (sel, i == units_log2 ? 1 : 2, nunits);
   if (!can_vec_perm_const_p (vec_mode, indices))
-   break;
-}
-
-  if (i == 0)
-return -1;
-
-  if (i <= units_log2)
-{
-  if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
-   return -1;
-  int kind = 1;
-  /* Whole vector shifts shift in zeros, so if init is all zero constant,
-there is no need to do anything further.  */
-  if ((TREE_CODE (init) != INTEGER_CST
-  && TREE_CODE (init) != REAL_CST)
- || !initializer_zerop (init))
{
- tree masktype = build_same_sized_truth_vector_type (vectype);
- if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
+ if (i == units_log2)
return -1;
- kind = 2;
+
+ if (whole_vector_shift_kind == scan_store_kind_perm)
+   {
+ if (optab_handler (vec_shl_optab, vec_mode) == CODE_FOR_nothing)
+   return -1;
+ whole_vector_shift_kind = scan_store_kind_lshift_zero;
+ /* Whole vector shifts shift in zeros, so if init is all zero
+constant, there is no need to do anything further.  */
+ if ((TREE_CODE (init) != INTEGER_CST
+  && TREE_CODE (init) != REAL_CST)
+ || !initializer_zerop (init))
+   {
+ tree masktype = build_same_sized_truth_vector_type (vectype);
+ if (!expand_vec_cond_expr_p (vectype, masktype, VECTOR_CST))
+   return -1;
+ whole_vector_shift_kind = scan_store_kind_lshift_cond;
+   }
+   }
+ kind = whole_vector_shift_kind;
+   }
+  if (use_whole_vector)
+   {
+ if (kind != scan_store_kind_perm && use_whole_vector->is_empty ())
+   use_whole_vector->safe_grow_cleared (i);
+ if (kind != scan_store_kind_perm || !use_whole_vector->is_empty ())
+   use_whole_vector->safe_push (kind);
}
-  if

Re: [PATCH, v2] PowerPC: Add 'prefix' to the 'isa' attribute

2019-06-20 Thread Segher Boessenkool
Hi Mike,

On Wed, Jun 19, 2019 at 10:43:43AM -0400, Michael Meissner wrote:
> Here is version 2:
> 
> 2019-06-19  Michael Meissner  
> 
>   * config/rs6000/rs6000.md (isa attribute): Add support for
>   for a future processor.
> 
> Index: gcc/config/rs6000/rs6000.md
> ===
> --- gcc/config/rs6000/rs6000.md   (revision 272439)
> +++ gcc/config/rs6000/rs6000.md   (working copy)
> @@ -267,7 +267,8 @@ (define_attr "cpu"
>(const (symbol_ref "(enum attr_cpu) rs6000_tune")))
>  
>  ;; The ISA we implement.
> -(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9v,p9kf,p9tf" (const_string "any"))
> +(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9v,p9kf,p9tf,futp"
> +  (const_string "any"))

Note to self: I should add a comment explaining what the values here mean.

> @@ -306,6 +307,10 @@ (define_attr "enabled" ""
>   (and (eq_attr "isa" "p9tf")
> (match_test "FLOAT128_VECTOR_P (TFmode)"))
>   (const_int 1)
> +
> + (and (eq_attr "isa" "futp")
> +   (match_test "TARGET_FUTURE"))
> + (const_int 1)
>  ] (const_int 0)))

Hrm, so maybe this should just be called "fut"?  (We don't yet have values
for just "p8" and "p9", nothing uses those yet, but of course we should
have those too, eventually.)

Okay for trunk with that maybe fixed.  Thanks!


Segher


Re: [PATCH] i386: Separate costs of RTL expressions from costs of moves

2019-06-20 Thread Uros Bizjak
On Mon, Jun 17, 2019 at 6:27 PM H.J. Lu  wrote:
>
> processor_costs has costs of RTL expressions and costs of moves:
>
> 1. Costs of RTL expressions is computed as COSTS_N_INSNS which are used
> to generate RTL expressions with the lowest costs.  Costs of RTL memory
> operation can be very close to costs of fast instructions to indicate
> fast memory operations.
>
> 2. After RTL expressions have been generated, costs of moves are used by
> TARGET_REGISTER_MOVE_COST and TARGET_MEMORY_MOVE_COST to compute move
> costs for register allocator.  Costs of load and store are higher than
> costs of register moves to reduce stack usages by register allocator.
>
> We should separate costs of RTL expressions from costs of moves so that
> they can be adjusted independently.  This patch moves costs of moves to
> the new used_by_ra field and duplicates costs of moves which are also
> used for costs of RTL expressions.

Actually, I think that the current separation is OK. Before reload, we
actually don't know which register set will perform the move (not even
if float mode will be moved in integer registers), the only thing we
can estimate is the number of move instructions. The real cost of
register moves is later calculated by the register allocator, where
the register class is taken into account when calculating the cost.

Uros.

>
> All cost models have been checked with
>
> static void
> check_one (const struct processor_costs *p)
> {
>   if (p->used_by_ra.int_load[2] != p->int_load)
> abort ();
>   if (p->used_by_ra.int_store[2] != p->int_store)
> abort ();
>   if (p->used_by_ra.xmm_move != p->xmm_move)
> abort ();
>   if (p->used_by_ra.sse_to_integer != p->sse_to_integer)
> abort ();
>   if (p->used_by_ra.integer_to_sse != p->integer_to_sse)
> abort ();
>   if (memcmp (p->used_by_ra.sse_load, p->sse_load, sizeof (p->sse_load)))
> abort ();
>   if (memcmp (p->used_by_ra.sse_store, p->sse_store, sizeof (p->sse_store)))
> abort ();
> }
>
> static void
> check_cost ()
> {
>  check_one (&ix86_size_cost);
>   for (unsigned int i = 0; i < ARRAY_SIZE (processor_cost_table); i++)
> check_one (processor_cost_table[i]);
> }
>
> by calling check_cost from ix86_option_override_internal.
>
> PR target/90878
> * config/i386/i386-features.c
> (dimode_scalar_chain::compute_convert_gain): Replace int_store[2]
> and int_load[2] with int_store and int_load.
> * config/i386/i386.c (inline_memory_move_cost): Use used_by_ra
> for costs of moves.
> (ix86_register_move_cost): Likewise.
> (ix86_builtin_vectorization_cost): Replace int_store[2] and
> int_load[2] with int_store and int_load.
> * config/i386/i386.h (processor_costs): Move costs of moves to
> used_by_ra.  Add int_load, int_store, xmm_move, sse_to_integer,
> integer_to_sse, sse_load, sse_store, sse_unaligned_load and
> sse_unaligned_store for costs of RTL expressions.
> * config/i386/x86-tune-costs.h: Duplicate int_load, int_store,
> xmm_move, sse_to_integer, integer_to_sse, sse_load, sse_store
> for costs of RTL expressions.  Use sse_unaligned_load and
> sse_unaligned_store only for costs of RTL expressions.
>
> --
> H.J.


Re: [PATCH] i386: Separate costs of RTL expressions from costs of moves

2019-06-20 Thread Uros Bizjak
On Thu, Jun 20, 2019 at 9:40 AM Uros Bizjak  wrote:
>
> On Mon, Jun 17, 2019 at 6:27 PM H.J. Lu  wrote:
> >
> > processor_costs has costs of RTL expressions and costs of moves:
> >
> > 1. Costs of RTL expressions is computed as COSTS_N_INSNS which are used
> > to generate RTL expressions with the lowest costs.  Costs of RTL memory
> > operation can be very close to costs of fast instructions to indicate
> > fast memory operations.
> >
> > 2. After RTL expressions have been generated, costs of moves are used by
> > TARGET_REGISTER_MOVE_COST and TARGET_MEMORY_MOVE_COST to compute move
> > costs for register allocator.  Costs of load and store are higher than
> > costs of register moves to reduce stack usages by register allocator.
> >
> > We should separate costs of RTL expressions from costs of moves so that
> > they can be adjusted independently.  This patch moves costs of moves to
> > the new used_by_ra field and duplicates costs of moves which are also
> > used for costs of RTL expressions.
>
> Actually, I think that the current separation is OK. Before reload, we
> actually don't know which register set will perform the move (not even
> if float mode will be moved in integer registers), the only thing we
> can estimate is the number of move instructions. The real cost of
> register moves is later calculated by the register allocator, where
> the register class is taken into account when calculating the cost.

Forgot to say that due to the above reasoning, cost of moves should
not be used in the calculation of costs of RTL expressions, as we are
talking about two different cost functions. RTL expressions should
know nothing about register classes.

Uros.
>
> >
> > All cost models have been checked with
> >
> > static void
> > check_one (const struct processor_costs *p)
> > {
> >   if (p->used_by_ra.int_load[2] != p->int_load)
> > abort ();
> >   if (p->used_by_ra.int_store[2] != p->int_store)
> > abort ();
> >   if (p->used_by_ra.xmm_move != p->xmm_move)
> > abort ();
> >   if (p->used_by_ra.sse_to_integer != p->sse_to_integer)
> > abort ();
> >   if (p->used_by_ra.integer_to_sse != p->integer_to_sse)
> > abort ();
> >   if (memcmp (p->used_by_ra.sse_load, p->sse_load, sizeof (p->sse_load)))
> > abort ();
> >   if (memcmp (p->used_by_ra.sse_store, p->sse_store, sizeof (p->sse_store)))
> > abort ();
> > }
> >
> > static void
> > check_cost ()
> > {
> >  check_one (&ix86_size_cost);
> >   for (unsigned int i = 0; i < ARRAY_SIZE (processor_cost_table); i++)
> > check_one (processor_cost_table[i]);
> > }
> >
> > by calling check_cost from ix86_option_override_internal.
> >
> > PR target/90878
> > * config/i386/i386-features.c
> > (dimode_scalar_chain::compute_convert_gain): Replace int_store[2]
> > and int_load[2] with int_store and int_load.
> > * config/i386/i386.c (inline_memory_move_cost): Use used_by_ra
> > for costs of moves.
> > (ix86_register_move_cost): Likewise.
> > (ix86_builtin_vectorization_cost): Replace int_store[2] and
> > int_load[2] with int_store and int_load.
> > * config/i386/i386.h (processor_costs): Move costs of moves to
> > used_by_ra.  Add int_load, int_store, xmm_move, sse_to_integer,
> > integer_to_sse, sse_load, sse_store, sse_unaligned_load and
> > sse_unaligned_store for costs of RTL expressions.
> > * config/i386/x86-tune-costs.h: Duplicate int_load, int_store,
> > xmm_move, sse_to_integer, integer_to_sse, sse_load, sse_store
> > for costs of RTL expressions.  Use sse_unaligned_load and
> > sse_unaligned_store only for costs of RTL expressions.
> >
> > --
> > H.J.


Re: [PATCH] Adding RBIT gcc builtin for ARM

2019-06-20 Thread Kyrill Tkachov

Hi Ayan,

On 6/20/19 4:40 AM, Ayan Shafqat wrote:

The attached patch contains __builtin_arm_rbit which generates RBIT
instruction for ARM targets.

Please let me know if you any questions or comments, or commit this
patch for me as I do not have write access to SVN.


Thanks for the patch.

Before we can review this, do you have a copyright assignment in place 
as described at https://gcc.gnu.org/contribute.html ?


Thanks,

Kyrill


Thanks
Ayan

commit a692b5b4965840babbdaf5e2b9b1feb1995d351d
Author: Ayan Shafqat 
Date:   Mon Jun 17 21:46:54 2019 -0400

 Implementing RBIT builtin as described in ACLE doc

 ARM's RBIT instruction is used to reverse the bit order
 of a word. This is present in ARMv6 and above in both
 ARM and Thumb modes. This is also specified as an intrinsic
 function in ACLE documentation.

 This commit implements the GCC builtin for ARM target for
 RBIT instruction, __builtin_arm_rbit. Also, this implements
 the intrinsic functions as stated in ARM ACLE documentation,
 which are listed below:

 uint32_t __rbit(uint32_t x);
 unsigned long __rbitl(unsigned long x);
 uint64_t __rbitll(uint64_t x);

 Note: __rbitll is implemented as two calls to __rbit. I know
 this is not how it's done in AArch64, but this is what I can
 do for now.

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index ae582172ab9..83dcb7b411c 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -11568,6 +11568,13 @@
    [(set_attr "predicable" "yes")
 (set_attr "type" "clz")])

+(define_insn "rbit"
+  [(set (match_operand:SI 0 "s_register_operand" "=r")
+   (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] 
UNSPEC_RBIT))]

+  "TARGET_32BIT && arm_arch_thumb2"
+  "rbit%?\\t%0, %1"
+  [(set_attr "predicable" "yes")])
+
  (define_insn "rbitsi2"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
 (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")] 
UNSPEC_RBIT))]

diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h
index 2c7acc698ea..ce1b102444b 100644
--- a/gcc/config/arm/arm_acle.h
+++ b/gcc/config/arm/arm_acle.h
@@ -168,6 +168,29 @@ __arm_mrrc2 (const unsigned int __coproc, const
unsigned int __opc1,
  {
    return __builtin_arm_mrrc2 (__coproc, __opc1,  __CRm);
  }
+
+__extension__ static __inline uint32_t __attribute__ 
((__always_inline__))

+__rbit(uint32_t __op1)
+{
+  return __builtin_arm_rbit(__op1);
+}
+
+__extension__ static __inline uint64_t __attribute__ 
((__always_inline__))

+__rbitll(uint64_t __op1)
+{
+  return (((uint64_t)__rbit(__op1)) << 32U) | __rbit(__op1 >> 32U);
+}
+
+__extension__ static __inline unsigned long __attribute__
((__always_inline__))
+__rbitl(unsigned long __op1)
+{
+#if __SIZEOF_LONG__ == 4
+  return __rbit(__op1);
+#else
+  return __rbitll(__op1);
+#endif
+}
+
  #endif /* __ARM_ARCH >= 6.  */
  #endif /* __ARM_ARCH >= 6 ||  defined (__ARM_ARCH_5TE__).  */
  #endif /*  __ARM_ARCH >= 5.  */
diff --git a/gcc/config/arm/arm_acle_builtins.def
b/gcc/config/arm/arm_acle_builtins.def
index b2438d66da2..ecb3be491fc 100644
--- a/gcc/config/arm/arm_acle_builtins.def
+++ b/gcc/config/arm/arm_acle_builtins.def
@@ -24,6 +24,7 @@ VAR1 (UBINOP, crc32w, si)
  VAR1 (UBINOP, crc32cb, si)
  VAR1 (UBINOP, crc32ch, si)
  VAR1 (UBINOP, crc32cw, si)
+VAR1 (UBINOP, rbit, si)
  VAR1 (CDP, cdp, void)
  VAR1 (CDP, cdp2, void)
  VAR1 (LDC, ldc, void)
diff --git a/gcc/testsuite/gcc.target/arm/acle/rbit.c
b/gcc/testsuite/gcc.target/arm/acle/rbit.c
new file mode 100644
index 000..7803dd33615
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/acle/rbit.c
@@ -0,0 +1,18 @@
+/* Test the crc32d ACLE intrinsic.  */
+
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_crc_ok } */
+/* { dg-options "-save-temps -O0" } */
+/* { dg-add-options arm_crc } */
+
+#include "arm_acle.h"
+
+void test_rbit (void)
+{
+  uint32_t out_uint32_t;
+  uint32_t arg0_uint32_t;
+
+  out_uint32_t = __rbit (arg0_uint32_t);
+}
+
+/* { dg-final { scan-assembler-times "rbit\t...?, ...?\n" 2 } } */


RE: Deque fiil/copy/move/copy_backward/move_backward/equal overloads

2019-06-20 Thread Morwenn Ed
That's actually a solution to bug 90409, thanks for it :)

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90409

Morwenn


De : libstdc++-ow...@gcc.gnu.org  de la part de 
François Dumont 
Envoyé : mercredi 19 juin 2019 19:32
À : libstd...@gcc.gnu.org; gcc-patches
Objet : Deque fiil/copy/move/copy_backward/move_backward/equal overloads

I wanted to implement Debug overloads for those already existing
overloads but then realized that those algos could be generalized. This
way we will benefit from the memmove replacement when operating with C
array or std::array or std::vector iterators.

I might do the same for lexicographical_compare one day.

The ChangeLog below is quite huge so I attached it. I wonder if I could
use deque::iterator and deque::const_iterator in place of the
_Deque_iterator<> to reduce it ?

Tested under Linux x86_64 normal and debug modes, ok to commit ?

François



Re: [RFC] zstd as a compression algorithm for LTO

2019-06-20 Thread Martin Liška
Hello.

As mentioned by Honza, it's using cmake and to be honest I prefer to use a 
shared
library than a statically build library. Moreover, it's an optional requirement 
and
so that we don't have to include that to contrib/download_prerequisites.

I like the idea of marking of compression algorithm in 'LTO_header'. However,
we do compress the header as well. Proper solution would be to make a new
section .gnu.lto_.header where we'll put:
struct lto_header
{
  int16_t major_version;
  int16_t minor_version;
};

I don't see a reason why why should have that information in each LTO ELF 
section?

In the time being, I've written the code so that I fallback in decompression to 
zlib
if ZSTD detects that LTO bytecode was compressed with zlib. On the contrary, 
decompression
of zstd with zlib will end with:
lto1: internal compiler error: compressed stream: data error

I'm sending updated version of the patch that can properly detect zstd.

Martin

>From 869b630139676fb740fb5296d68086a8ef7f03ae Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Wed, 19 Jun 2019 09:40:35 +0200
Subject: [PATCH 2/2] Add optional support for zstd.

---
 gcc/common.opt |   4 +-
 gcc/lto-compress.c | 139 ++---
 gcc/timevar.def|   4 +-
 3 files changed, 122 insertions(+), 25 deletions(-)

diff --git a/gcc/common.opt b/gcc/common.opt
index a1544d06824..3b71a36552b 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1888,8 +1888,8 @@ Specify the algorithm to partition symbols and vars at linktime.
 
 ; The initial value of -1 comes from Z_DEFAULT_COMPRESSION in zlib.h.
 flto-compression-level=
-Common Joined RejectNegative UInteger Var(flag_lto_compression_level) Init(-1) IntegerRange(0, 9)
--flto-compression-level=	Use zlib compression level  for IL.
+Common Joined RejectNegative UInteger Var(flag_lto_compression_level) Init(-1) IntegerRange(0, 19)
+-flto-compression-level=	Use zlib/zstd compression level  for IL.
 
 flto-odr-type-merging
 Common Ignore
diff --git a/gcc/lto-compress.c b/gcc/lto-compress.c
index 3287178f257..327ff9e07b7 100644
--- a/gcc/lto-compress.c
+++ b/gcc/lto-compress.c
@@ -35,6 +35,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "lto-compress.h"
 #include "timevar.h"
 
+#ifdef HAVE_ZSTD_H
+#include 
+#endif
+
 /* Compression stream structure, holds the flush callback and opaque token,
the buffered data, and a note of whether compressing or uncompressing.  */
 
@@ -92,6 +96,95 @@ lto_normalized_zlib_level (void)
   return level;
 }
 
+/* Free the buffer and memory associated with STREAM.  */
+
+static void
+lto_destroy_compression_stream (struct lto_compression_stream *stream)
+{
+  free (stream->buffer);
+  free (stream);
+}
+
+#ifdef HAVE_ZSTD_H
+/* Return a zstd compression level that zstd will not reject.  Normalizes
+   the compression level from the command line flag, clamping non-default
+   values to the appropriate end of their valid range.  */
+
+static int
+lto_normalized_zstd_level (void)
+{
+  int level = flag_lto_compression_level;
+
+  if (level != ZSTD_CLEVEL_DEFAULT)
+{
+  if (level < 1)
+	level = 1;
+  else if (level > ZSTD_maxCLevel ())
+	level = ZSTD_maxCLevel ();
+}
+
+  return level;
+}
+
+/* Compress STREAM using ZSTD algorithm.  */
+
+static void
+lto_compression_zstd (struct lto_compression_stream *stream)
+{
+  unsigned char *cursor = (unsigned char *) stream->buffer;
+  size_t size = stream->bytes;
+
+  timevar_push (TV_IPA_LTO_COMPRESS);
+  size_t const outbuf_length = ZSTD_compressBound (size);
+  char *outbuf = (char *) xmalloc (outbuf_length);
+
+  size_t const csize = ZSTD_compress (outbuf, outbuf_length, cursor, size,
+  lto_normalized_zstd_level ());
+
+  if (ZSTD_isError (csize))
+internal_error ("compressed stream: %s", ZSTD_getErrorName (csize));
+
+  stream->callback (outbuf, csize, NULL);
+
+  lto_destroy_compression_stream (stream);
+  free (outbuf);
+  timevar_pop (TV_IPA_LTO_COMPRESS);
+}
+
+/* Uncompress STREAM using ZSTD algorithm.  */
+
+static bool
+lto_uncompression_zstd (struct lto_compression_stream *stream)
+{
+  unsigned char *cursor = (unsigned char *) stream->buffer;
+  size_t size = stream->bytes;
+
+  timevar_push (TV_IPA_LTO_DECOMPRESS);
+  unsigned long long const rsize = ZSTD_getFrameContentSize (cursor, size);
+  if (rsize == ZSTD_CONTENTSIZE_ERROR)
+{
+  /* The content is probably using zlib.  */
+  return false;
+}
+  else if (rsize == ZSTD_CONTENTSIZE_UNKNOWN)
+internal_error ("original size unknown");
+
+  char *outbuf = (char *) xmalloc (rsize);
+  size_t const dsize = ZSTD_decompress (outbuf, rsize, cursor, size);
+
+  if (ZSTD_isError (dsize))
+internal_error ("decompressed stream: %s", ZSTD_getErrorName (dsize));
+
+  stream->callback (outbuf, dsize, stream->opaque);
+
+  lto_destroy_compression_stream (stream);
+  free (outbuf);
+  timevar_pop (TV_IPA_LTO_DECOMPRESS);
+  return true;
+}
+
+#endif
+
 /* Create a new compression

Re: PR libstdc++/90945 Patch to have pretty printer for std::vector return bool intead of int for elements

2019-06-20 Thread Jonathan Wakely

On 20/06/19 08:12 +0200, Stephan Bergmann wrote:

On 19/06/2019 21:54, Jonathan Wakely wrote:

On 19/06/19 21:49 +0200, Michael Weghorn wrote:

On 19/06/2019 21.37, Jonathan Wakely wrote:

+  std::vector vb;
+  vb.reserve(100);
+  vb.push_back(true);
+  vb.push_back(true);
+  vb.push_back(false);
+  vb.push_back(false);
+  vb.push_back(true);
+  vb.erase(vb.begin());
+// { dg-final { regexp-test vb {std::(__debug::)?vector of 
length 4, capacity 100 = \\{true, false, false, true\\}} } }

+


This inserts 5 elements, so I'd expect that either "vector of length 5"
and an additional "true" element at the beginning need to be added for
the expected result or one of the two first 'vb.push_back(true)' needs
to be removed.


It inserts five then erases one, the test is right.


Just one thought that occurred to me while idly browsing this thread: 
Wouldn't it be better in general to have non-symmetric content to test 
against, to check that the printer doesn't print it in reverse?


It certainly would, good idea! It's not inconceivable that the
bit-shifting code in the printer could be backwards, or affected by
endianness.

Ideally we'd also test a vector with more than 64 elements, but
I don't have the patience to add it to the test ;-)

Tested x86_64-linux, committed to trunk.

commit 4025cc174a76ac9bdc1a77dbae88598f73ae458d
Author: Jonathan Wakely 
Date:   Thu Jun 20 09:59:47 2019 +0100

Improve tests for std::vector printer

The current tests wouldn't notice if the vector contents were
printed in reverse, because it would read the same forwards and
backwards. Change the content so the tests would fail if that happened.

* testsuite/libstdc++-prettyprinters/simple.cc: Use non-palindromic
vector for test.
* testsuite/libstdc++-prettyprinters/simple11.cc: Likewise.

diff --git a/libstdc++-v3/testsuite/libstdc++-prettyprinters/simple.cc b/libstdc++-v3/testsuite/libstdc++-prettyprinters/simple.cc
index 04c1ef683a6..2e7f07b649f 100644
--- a/libstdc++-v3/testsuite/libstdc++-prettyprinters/simple.cc
+++ b/libstdc++-v3/testsuite/libstdc++-prettyprinters/simple.cc
@@ -120,11 +120,12 @@ main()
   vb.reserve(100);
   vb.push_back(true);
   vb.push_back(true);
+  vb.push_back(true);
   vb.push_back(false);
   vb.push_back(false);
   vb.push_back(true);
   vb.erase(vb.begin());
-// { dg-final { regexp-test vb {std::(__debug::)?vector of length 4, capacity 128 = \\{true, false, false, true\\}} } }
+// { dg-final { regexp-test vb {std::(__debug::)?vector of length 5, capacity 128 = \\{true, true, false, false, true\\}} } }
 
   __gnu_cxx::slist sll;
   sll.push_front(23);
diff --git a/libstdc++-v3/testsuite/libstdc++-prettyprinters/simple11.cc b/libstdc++-v3/testsuite/libstdc++-prettyprinters/simple11.cc
index ace217cc9e8..24c871f6fba 100644
--- a/libstdc++-v3/testsuite/libstdc++-prettyprinters/simple11.cc
+++ b/libstdc++-v3/testsuite/libstdc++-prettyprinters/simple11.cc
@@ -113,11 +113,12 @@ main()
   vb.reserve(100);
   vb.push_back(true);
   vb.push_back(true);
+  vb.push_back(true);
   vb.push_back(false);
   vb.push_back(false);
   vb.push_back(true);
   vb.erase(vb.begin());
-// { dg-final { regexp-test vb {std::(__debug::)?vector of length 4, capacity 128 = \\{true, false, false, true\\}} } }
+// { dg-final { regexp-test vb {std::(__debug::)?vector of length 5, capacity 128 = \\{true, true, false, false, true\\}} } }
 
   __gnu_cxx::slist sll;
   sll.push_front(23);


Re: [PATCH v3 3/3] PR80791 Consider doloop cmp use in ivopts

2019-06-20 Thread Segher Boessenkool
Hi Kewen,

On Wed, Jun 19, 2019 at 07:47:34PM +0800, Kewen.Lin wrote:
> +/* Return true if count register for branch is supported.  */
> +
> +static bool
> +rs6000_have_count_reg_decr_p ()
> +{
> +  return flag_branch_on_count_reg;
> +}

rs6000 unconditionally supports these instructions, not just when that
flag is set.  If you need to look at the flag, the *caller* of this new
hook should, not every implementation of the hook.  So just "return true"
here?

>  DEFHOOK
> +(have_count_reg_decr_p,
> + "Return true if the target supports hardware count register for decrement\n\
> +and branch.\n\
> +The default version of this hook returns false.",
> + bool, (void),
> + hook_bool_void_false)

Is it important here that you cannot use that register as a GPR, that any
use of it is expensive because it has to be moved to/from a GPR?  The doc
should say something like that; a little more context, what the hook is
meant to be used for.

> +/* For doloop use, if the algothrim selects some candidate which invalid for
> +   later rewrite, fix it up with bind_cand.  */

"algorithm", "which is invalid".

> +/* Find doloop comparison use and set its related bind_cand.  We adjust the
> +   doloop use group cost against various IV cands, it's possible to assign
> +   some cost like zero rather than original inifite cost.  The point is to

"infinite"

Looks good :-)


Segher


[PATCH] Fix outdated reference to C++17 draft in the docs

2019-06-20 Thread Jonathan Wakely

* doc/xml/manual/status_cxx2017.xml: Fix outdated reference to
C++17 working draft.

Committed to trunk.

commit 31b4df091273169a8ebe042c229526012509809c
Author: redi 
Date:   Thu Jun 20 09:13:03 2019 +

Fix outdated reference to C++17 draft in the docs

* doc/xml/manual/status_cxx2017.xml: Fix outdated reference to
C++17 working draft.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@272500 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/libstdc++-v3/doc/xml/manual/status_cxx2017.xml 
b/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
index 9aba079c251..d154d725391 100644
--- a/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
+++ b/libstdc++-v3/doc/xml/manual/status_cxx2017.xml
@@ -25,8 +25,8 @@ not in any particular release.
 
 
 
-The following table lists new library features that have been accepted into
-the C++17 working draft. The "Proposal" column provides a link to the
+The following table lists new library features that are included in
+the C++17 standard. The "Proposal" column provides a link to the
 ISO C++ committee proposal that describes the feature, while the "Status"
 column indicates the first version of GCC that contains an implementation of
 this feature (if it has been implemented).


RE: [PATCH] improve ifcvt optimization (PR rtl-optimization/89430)

2019-06-20 Thread JiangNing OS
Hi Jeff,

Appreciate your effort to review my patch! I've updated my patch as attached. 
See my answers below.

> in current function, so the store speculation can be avoided.
> So at a high level should we be doing this in gimple rather than RTL?
> We're going to have a lot more information about types, better
> infrastructure for looking at uses/defs, access to the alias oracle, we should
> be able to accurately distinguish between potentially shared objects vs those
> which are local to the thread, etc.  We lose the low level costing information
> though.
> 
> I'm still going to go through the patch and do some level of review, but I do
> think we need to answer the higher level question though.
> 
I have the following reasons,

1) Following the clue Richard B gave me before about parameter --param 
allow-store-data-races,
I did check the middle-end pass tree-if-conv, but I think this pass at the 
moment doesn't work
for the issue I'm trying to solve. Tree-if-conv is to do if conversion for 
loop, and its final goal is to
help loop vectorization, while my case doesn't have a loop at all. 
2) My current solution fits into current back-end if-conversion pass very well. 
I don't want to invent
a new framework to solve this relatively small issue. Besides, this back-end 
patch doesn't only
enhance store speculation detection, but also fix a bug in the original code. 

> Nits: We typically refer to parameters, variables, etc in comments using
> upper case.  You'll need to review the entire patch for these its.
> 
> So perhaps the comment should be something like:
> 
> /* Return true of X, a MEM expression, is on the stack.  A_INSN contains
>X if A_INSN exists.  */
> 
Fixed in attached new patch.

> 
> Just from a design standpoint, what are the consequences if this function
> returns true for something that isn't actually in the stack or false for
> something that is on the stack?
> 
If noce_mem_is_on_stack returns true for something that isn't actually in the 
stack, 
it could potentially introduce store speculation, then the if-conversion 
optimization
will be incorrect. If this function returns false for something that is on 
stack, it doesn't
matter, because the optimization will not be triggered. 

> Nit: Space between the function name and its open paren for arguments.  ie
> 
> if (fixed_base_plus_p (a)
>  ^
> I see other instances of this nit, please review the patch and correct them.
> 
Fixed in attached new patch.

> 
> > +
> > +  if (!a_insn)
> > +return false;
> I'm not sure what the calling profile is for this function, but this is a 
> cheaper
> test, so you might consider moving it before the test of fixed_base_plus_p.
> 
Fixed in attached new patch.

> 
> > +
> > +  if (!reg_mentioned_p (x, a_insn))
> > +return false;
> > +
> > +  /* Check if x is on stack. Assume a mem expression using registers
> > + related to stack register is always on stack. */
> > + FOR_EACH_INSN_USE (use, a_insn)
> > +if (reg_mentioned_p (DF_REF_REG (use), x)
> > +&& bitmap_bit_p (bba_sets_must_be_sfp, DF_REF_REGNO (use)))
> > +  return true;
> > +
> > +  return false;
> > +}
> So is X always a MEM?  Just wanted to make sure I understand.
> reg_mentioned_p will do the right thing (using rtx_equal_p) for the
> comparison.
> 
Yes. X is always a MEM. There is an assertion for this in the code above.

> 
> > +
> > +/* Always return true, if there is a dominating write.
> > +
> > +   When there is a dominating read from memory on stack,
> > +   1) if x = a is a memory read, return true.
> > +   2) if x = a is a memory write, return true if the memory is on stack.
> > +  This is the guarantee the memory is *not* readonly. */
> > +
> > +static bool
> > +noce_valid_for_dominating (basic_block bb, rtx_insn *a_insn,
> > +   const_rtx x, bool is_store) {
> > +  rtx_insn *insn;
> > +  rtx set;
> > +
> > +  gcc_assert (MEM_P (x));
> > +
> > +  FOR_BB_INSNS (bb, insn)
> > +{
> > +  set = single_set (insn);
> > +  if (!set)
> > +continue;
> > +
> > +  /* Dominating store */
> > +  if (rtx_equal_p (x, SET_DEST (set)))
> > +return true;
> > +
> > +  /* Dominating load */
> > +  if (rtx_equal_p (x, SET_SRC (set)))
> > +if (is_store && noce_mem_is_on_stack (a_insn, x))
> > +  return true;
> > +}
> > +
> > +  return false;
> > +}
> So what would be the consequences here of returning false when in fact
> there was a dominating read or write?  That could easily happen if the
> dominating read or write was not a single_set.
If return false when in fact there is a dominating read or write, the 
optimization will not
be triggered, because noce_mem_maybe_invalid_p will be true, which is playing 
the same
role as may_trap_or_fault_p.

> 
> I'm guessing that from a design standpoint you're trying to find cases where
> you know an object was written to within the block and does not escape.  So
> return

Re: AARCH64 configure check for gas -mabi support

2019-06-20 Thread Thomas Schwinge
Hi!

I was just building an aarch64 cross-compiler (indeed compiler only:
'make all-gcc'), and then wanted to check something in gimplification
('-S -fdump-tree-gimple'), with '-mabi=ilp32', which told me: "cc1:
error: assembler does not support '-mabi=ilp32'".  That's unexpected, as
for '-S' GCC isn't even going to invoke the assembler.  It's coming from
this change:

On Wed, 11 Dec 2013 13:57:59 +0100, Christophe Lyon 
 wrote:
> Committed on Kugan's behalf as rev 205891.
> 
> On 11 December 2013 13:27, Marcus Shawcroft  wrote:
> > On 10/12/13 20:23, Kugan wrote:
> >
> >> gcc/
> >>
> >> +2013-12-11  Kugan Vivekanandarajah  
> >> +   * configure.ac: Add check for aarch64 assembler -mabi support.
> >> +   * configure: Regenerate.
> >> +   * config.in: Regenerate.
> >> +   * config/aarch64/aarch64-elf.h (ASM_MABI_SPEC): New define.
> >> +   (ASM_SPEC): Update to substitute -mabi with ASM_MABI_SPEC.
> >> +   * config/aarch64/aarch64.h (aarch64_override_options):  Issue
> >> error if
> >> +   assembler does not support -mabi and option ilp32 is selected.
> >> +   * doc/install.texi: Added note that building gcc 4.9 and after
> >> with pre
> >> +   2.24 binutils will not support -mabi=ilp32.
> >> +
> >>
> >
> > Kugan, Thanks for sorting this out. OK to commit.
> >
> > /Marcus

Specifically:

--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5187,6 +5187,13 @@ aarch64_override_options (void)
   aarch64_parse_tune ();
 }

+#ifndef HAVE_AS_MABI_OPTION
+  /* The compiler may have been configured with 2.23.* binutils, which does
+ not have support for ILP32.  */
+  if (TARGET_ILP32)
+error ("Assembler does not support -mabi=ilp32");
+#endif

Why is that necessary?  Won't the assembler itself tell the user that it
"does not support -mabi=ilp32", thus this check can be removed?  If not,
can a condition simply be added here to only emit this error if we're
indeed going to invoke the assembler?

(For my own testing, I just locally disabled that, of course.)


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [PATCH], PowerPC PR90822 (cleanup lfiwax, lfiwzx generation)

2019-06-20 Thread Segher Boessenkool
Hi!

On Tue, Jun 18, 2019 at 01:53:36PM -0400, Michael Meissner wrote:
> On Tue, Jun 18, 2019 at 06:37:54AM -0500, Segher Boessenkool wrote:
> > On Mon, Jun 17, 2019 at 05:24:37PM -0400, Michael Meissner wrote:
> > > I wrote the code to generate LFIWAX and LFIWZX originally for the power7 
> > > in the
> > > 2010 time frame.  At the time, we did not allow SImode to go into floating
> > > point and vector registers.  As part of the power9 work, we now allow 
> > > SImode to
> > > go into FP/vector registers with for 64-bit code targetting -mcpu=power8 
> > > or
> > > higher.  But we never went back and tweaked the LFIWAX/LFIWZX support.
> > 
> > Why do we allow it only in 64-bit mode?  I mean, it sounds like only
> > handling 64-bit mode causes us to have more code and more complexity
> > instead of less.
> 
> The main reason is extendsidi2 and zero_extendsidi2.  These are not enabled on
> 32-bit (due to the EXTSI mode iterator),

That's no reason.  Just change that iterator; see below.

> so that the common code is done to do
> sign/zero extension.  And I felt that if you allowed it, the compiler would
> move the extensions to the fp/vector unit.  Note that direct moves of 64-bit
> items to/from the GPRs is somewhat messy.

Yes, you need to set up costing properly (but you have to *anyway*); and
you might want to expand things to fit GPRs, so that GPRs are used
preferably, and VSRs are only used if there is a benefit to that.  E.g.
even if you allow DImode in VSRs, don't expand normal DImode ops to one
pseudo.

> > > In general, the 32-bit code seems to generate a lot less instructions,
> > > including fewer lfiwax/lfiwzx instructions.  On power8/power9 32-bit code,
> > > there was more mtvsrwz mtvsrwa instructions.
> > 
> > Interesting.  Is that caused by less register pressure?

?

> > > --- gcc/config/rs6000/rs6000.md   (revision 272166)
> > > +++ gcc/config/rs6000/rs6000.md   (working copy)
> > 
> > This patch is very hard to read.  It mixes insertions and deletions of
> > different definitions, where the only thing they have in common is some
> > braces or parens or whitespace usually.
> 
> I was trying to move things so that related things were together (i.e. the
> basic lfiwax and lfiwzx patterns and the two define_insn_and_splits that
> generate it).  I tend to think that when you look at the code and not the
> patches, that it makes more sense.

There are 14k+ lines of rs6000.md...  I prefer looking at diffs ;-)

> > > +; On 32-bit systems, we need to have special versions of LFIWAX and 
> > > LFIWZX because
> > > +; the sign/zero extend insns are not defined.
> > 
> > I don't understand what this means.
> 
> See above about EXTSI.  For reference here is the code from rs6000.md.
> 
> ; Everything we can extend SImode to.
> (define_mode_iterator EXTSI [(DI "TARGET_POWERPC64")])

So change it?  Condition "TARGET_POWERPC64 || TARGET_VSX", perhaps.

If things have proper costs, and you expand the named patterns to just
the GPR version, all should work fine.

> (define_insn "zero_extendsi2"

So you probably want a separate define_expand for this name, and maybe
only have that for TARGET_POWERPC64 even?

> > > +(define_insn_and_split "lfiwax"
> > 
> > This could use a better name?  Why is it separate from extendsidi2 anyway?
> 
> I was using the name that is currently in the code, i.e. the instruction.

But it is not, it is one of four different insns, _or_ a split even.

> > > +  [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wa,wa,v,v")
> > > + (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r,v,v")]
> > >  UNSPEC_LFIWAX))]
> > >"TARGET_HARD_FLOAT && TARGET_LFIWAX"
> > >"@
> > > lfiwax %0,%y1
> > > lxsiwax %x0,%y1
> > > mtvsrwa %x0,%1
> > > +   vextsw2d %0,%1
> > > +   #"
> > > +  "&& reload_completed && TARGET_P8_VECTOR && !TARGET_P9_VECTOR
> > > +   && altivec_register_operand (operands[1], SImode)"
> > 
> > "&& reload_completed && which_alternative == 3" works fine for that; but
> > just "&& reload_completed" should work as well, this is the only alternative
> > with "#" template.
> 
> No, it has been my experience that if you do not limit the split, that it will
> be done.  It does not look for the '#' code.

Ah yes, that's for the split-during-output only.  So use which_alternative
please.

> > > +;; Keep the SImode -> DImode conversion along with DImode -> SF/DFmode 
> > > through
> > > +;; register allocation so that the register allocator generates a LFIWAX 
> > > or
> > > +;; LXSIWAX instruction instead of a LWA instruction plus a MTVSRD* 
> > > instruction
> > > +;; on power8 and LWA + STD + LFD on power7/power6 systems.
> > > +
> > > +;; LFIWAX LFIWAX LXSIWAX MTVSRWA VEXTSW2D VUPKLSW+SPLAT
> > 
> > Not sure what this line means?
> 
> Those are the instructions generated by the different alternatives.  Similar 
> to
> the lines we have in front of the moves when grouping alternatives and
> attribute setting.

Ah.  Not sure if it helps anything, 

[committed][testsuite] Add missing dg-require-effective-target alloca

2019-06-20 Thread Tom de Vries
Hi,

Add missing dg-require-effective-target alloca.

Tested on nvptx.

Committed to trunk.

Thanks,
- Tom

[testsuite] Add missing dg-require-effective-target alloca

2019-06-20  Tom de Vries  

* gcc.c-torture/compile/pr77754-1.c: Require alloca.
* gcc.c-torture/compile/pr77754-2.c: Same.
* gcc.c-torture/compile/pr77754-3.c: Same.
* gcc.c-torture/compile/pr77754-4.c: Same.
* gcc.c-torture/compile/pr77754-5.c: Same.
* gcc.c-torture/compile/pr77754-6.c: Same.
* gcc.c-torture/compile/pr87110.c: Same.
* gcc.c-torture/execute/pr86528.c: Same.
* gcc.dg/Walloca-larger-than-2.c: Same.
* gcc.dg/Walloca-larger-than.c: Same.
* gcc.dg/Warray-bounds-41.c: Same.
* gcc.dg/Wrestrict-17.c: Same.
* gcc.dg/Wstrict-overflow-27.c: Same.
* gcc.dg/Wstringop-truncation-3.c: Same.
* gcc.dg/pr78902.c: Same.
* gcc.dg/pr87099.c: Same.
* gcc.dg/pr87320.c: Same.
* gcc.dg/pr89045.c: Same.
* gcc.dg/strlenopt-62.c: Same.
* gcc.dg/tree-ssa/alias-37.c: Same.

---
 gcc/testsuite/gcc.c-torture/compile/pr77754-1.c | 1 +
 gcc/testsuite/gcc.c-torture/compile/pr77754-2.c | 1 +
 gcc/testsuite/gcc.c-torture/compile/pr77754-3.c | 1 +
 gcc/testsuite/gcc.c-torture/compile/pr77754-4.c | 1 +
 gcc/testsuite/gcc.c-torture/compile/pr77754-5.c | 1 +
 gcc/testsuite/gcc.c-torture/compile/pr77754-6.c | 1 +
 gcc/testsuite/gcc.c-torture/compile/pr87110.c   | 1 +
 gcc/testsuite/gcc.c-torture/execute/pr86528.c   | 1 +
 gcc/testsuite/gcc.dg/Walloca-larger-than-2.c| 1 +
 gcc/testsuite/gcc.dg/Walloca-larger-than.c  | 1 +
 gcc/testsuite/gcc.dg/Warray-bounds-41.c | 1 +
 gcc/testsuite/gcc.dg/Wrestrict-17.c | 1 +
 gcc/testsuite/gcc.dg/Wstrict-overflow-27.c  | 1 +
 gcc/testsuite/gcc.dg/Wstringop-truncation-3.c   | 1 +
 gcc/testsuite/gcc.dg/pr78902.c  | 1 +
 gcc/testsuite/gcc.dg/pr87099.c  | 1 +
 gcc/testsuite/gcc.dg/pr87320.c  | 1 +
 gcc/testsuite/gcc.dg/pr89045.c  | 1 +
 gcc/testsuite/gcc.dg/strlenopt-62.c | 1 +
 gcc/testsuite/gcc.dg/tree-ssa/alias-37.c| 1 +
 20 files changed, 20 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr77754-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr77754-1.c
index 48587f7ca69..be7ee303dd2 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr77754-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr77754-1.c
@@ -1,3 +1,4 @@
+// { dg-require-effective-target alloca }
 /* PR c/77754 */
 
 int fn3();
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr77754-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr77754-2.c
index 411f270a16a..d088961963d 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr77754-2.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr77754-2.c
@@ -1,3 +1,4 @@
+// { dg-require-effective-target alloca }
 /* PR c/77754 */
 
 int fn3();
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr77754-3.c 
b/gcc/testsuite/gcc.c-torture/compile/pr77754-3.c
index c6ff9606200..fb25e234fe2 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr77754-3.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr77754-3.c
@@ -1,3 +1,4 @@
+// { dg-require-effective-target alloca }
 /* PR c/77754 */
 
 int fn3();
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr77754-4.c 
b/gcc/testsuite/gcc.c-torture/compile/pr77754-4.c
index f0274051ad0..1c5c4619a33 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr77754-4.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr77754-4.c
@@ -1,3 +1,4 @@
+// { dg-require-effective-target alloca }
 /* PR c/77754 */
 
 int fn3();
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr77754-5.c 
b/gcc/testsuite/gcc.c-torture/compile/pr77754-5.c
index a9440ddb1fa..82c93d9debd 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr77754-5.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr77754-5.c
@@ -1,3 +1,4 @@
+// { dg-require-effective-target alloca }
 /* PR c/77754 */
 
 int fn3();
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr77754-6.c 
b/gcc/testsuite/gcc.c-torture/compile/pr77754-6.c
index 2e7df3b6774..1b4304381ce 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr77754-6.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr77754-6.c
@@ -1,3 +1,4 @@
+// { dg-require-effective-target alloca }
 /* PR c/77754 */
 
 int fn3();
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr87110.c 
b/gcc/testsuite/gcc.c-torture/compile/pr87110.c
index 8428d3d120a..c2eba54a5c7 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr87110.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr87110.c
@@ -1,3 +1,4 @@
+// { dg-require-effective-target alloca }
 enum a { b, c };
 struct d {
   _Bool e;
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr86528.c 
b/gcc/testsuite/gcc.c-torture/execute/pr86528.c
index 2a7b0110d64..9992faf249d 100644
--- a/gcc/testsuite/gcc.c-torture/execute/pr86528.c
+++ b/gcc/testsuite/gcc.c-torture/execute/pr86528.c
@@ -1,3 +1,4 @@
+// { dg-require-effective-target alloca }
 /*

[committed][testsuite] Add missing dg-require-effective-target label_values

2019-06-20 Thread Tom de Vries
Hi,

Add missing dg-require-effective-target label_values.

Tested on nvptx.

Committed to trunk.

Thanks,
- Tom

[testsuite] Add missing dg-require-effective-target label_values

2019-06-20  Tom de Vries  

* gcc.c-torture/compile/pr89280.c: Require label_values.
* gcc.dg/pr89737.c: Same.
* gcc.dg/pr90082.c: Same.
* gcc.dg/torture/pr89135.c: Same.
* gcc.dg/torture/pr89247.c: Same.
* gcc.dg/torture/pr90071.c: Same.

---
 gcc/testsuite/gcc.c-torture/compile/pr89280.c | 1 +
 gcc/testsuite/gcc.dg/pr89737.c| 1 +
 gcc/testsuite/gcc.dg/pr90082.c| 1 +
 gcc/testsuite/gcc.dg/torture/pr89135.c| 1 +
 gcc/testsuite/gcc.dg/torture/pr89247.c| 1 +
 gcc/testsuite/gcc.dg/torture/pr90071.c| 1 +
 6 files changed, 6 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr89280.c 
b/gcc/testsuite/gcc.c-torture/compile/pr89280.c
index 9db9965172f..ceb15387522 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr89280.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr89280.c
@@ -1,3 +1,4 @@
+// { dg-require-effective-target label_values }
 /* PR tree-optimization/89280 */
 
 int a;
diff --git a/gcc/testsuite/gcc.dg/pr89737.c b/gcc/testsuite/gcc.dg/pr89737.c
index cd3dc81769e..5f9ecbda08d 100644
--- a/gcc/testsuite/gcc.dg/pr89737.c
+++ b/gcc/testsuite/gcc.dg/pr89737.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target label_values } */
 /* { dg-options "-O2 -fdump-tree-profile_estimate" } */
 
 int a, b;
diff --git a/gcc/testsuite/gcc.dg/pr90082.c b/gcc/testsuite/gcc.dg/pr90082.c
index 663a171821b..af741ff3b4c 100644
--- a/gcc/testsuite/gcc.dg/pr90082.c
+++ b/gcc/testsuite/gcc.dg/pr90082.c
@@ -1,5 +1,6 @@
 /* PR rtl-optimization/90082 */
 /* { dg-do compile } */
+/* { dg-require-effective-target label_values } */
 /* { dg-options "-O1 -fnon-call-exceptions -ftrapv" } */
 
 void *buf[5];
diff --git a/gcc/testsuite/gcc.dg/torture/pr89135.c 
b/gcc/testsuite/gcc.dg/torture/pr89135.c
index 8a93e8983df..f1c6a5d5473 100644
--- a/gcc/testsuite/gcc.dg/torture/pr89135.c
+++ b/gcc/testsuite/gcc.dg/torture/pr89135.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target label_values } */
 
 typedef __INTPTR_TYPE__ intptr_t;
 intptr_t a, b, c, d;
diff --git a/gcc/testsuite/gcc.dg/torture/pr89247.c 
b/gcc/testsuite/gcc.dg/torture/pr89247.c
index 558e89e222a..3b60f91d93b 100644
--- a/gcc/testsuite/gcc.dg/torture/pr89247.c
+++ b/gcc/testsuite/gcc.dg/torture/pr89247.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target label_values } */
 
 int *a;
 void b()
diff --git a/gcc/testsuite/gcc.dg/torture/pr90071.c 
b/gcc/testsuite/gcc.dg/torture/pr90071.c
index bfa7239139a..15d27899319 100644
--- a/gcc/testsuite/gcc.dg/torture/pr90071.c
+++ b/gcc/testsuite/gcc.dg/torture/pr90071.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target label_values } */
 
 int a;
 static int b;


[committed][testsuite] Add missing dg-require-effective-target indirect_jumps

2019-06-20 Thread Tom de Vries
Hi,

Add missing dg-require-effective-target indirect_jumps.

Tested on nvptx.

Committed to trunk.

Thanks,
- Tom

[testsuite] Add missing dg-require-effective-target indirect_jumps

2019-06-20  Tom de Vries  

* gcc.dg/pr89737.c: Require indirect_jumps.
* gcc.dg/torture/pr87693.c: Same.
* gcc.dg/torture/pr89135.c: Same.
* gcc.dg/torture/pr90071.c: Same.

---
 gcc/testsuite/gcc.dg/pr89737.c | 1 +
 gcc/testsuite/gcc.dg/torture/pr87693.c | 1 +
 gcc/testsuite/gcc.dg/torture/pr89135.c | 1 +
 gcc/testsuite/gcc.dg/torture/pr90071.c | 1 +
 4 files changed, 4 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/pr89737.c b/gcc/testsuite/gcc.dg/pr89737.c
index 5f9ecbda08d..7dc48cdce98 100644
--- a/gcc/testsuite/gcc.dg/pr89737.c
+++ b/gcc/testsuite/gcc.dg/pr89737.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target indirect_jumps } */
 /* { dg-require-effective-target label_values } */
 /* { dg-options "-O2 -fdump-tree-profile_estimate" } */
 
diff --git a/gcc/testsuite/gcc.dg/torture/pr87693.c 
b/gcc/testsuite/gcc.dg/torture/pr87693.c
index 802560dd347..b4ff59421c1 100644
--- a/gcc/testsuite/gcc.dg/torture/pr87693.c
+++ b/gcc/testsuite/gcc.dg/torture/pr87693.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target indirect_jumps } */
 
 void f (void);
 void g (void);
diff --git a/gcc/testsuite/gcc.dg/torture/pr89135.c 
b/gcc/testsuite/gcc.dg/torture/pr89135.c
index f1c6a5d5473..278303f98a6 100644
--- a/gcc/testsuite/gcc.dg/torture/pr89135.c
+++ b/gcc/testsuite/gcc.dg/torture/pr89135.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target indirect_jumps } */
 /* { dg-require-effective-target label_values } */
 
 typedef __INTPTR_TYPE__ intptr_t;
diff --git a/gcc/testsuite/gcc.dg/torture/pr90071.c 
b/gcc/testsuite/gcc.dg/torture/pr90071.c
index 15d27899319..702c143170e 100644
--- a/gcc/testsuite/gcc.dg/torture/pr90071.c
+++ b/gcc/testsuite/gcc.dg/torture/pr90071.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
+/* { dg-require-effective-target indirect_jumps } */
 /* { dg-require-effective-target label_values } */
 
 int a;


[committed][testsuite] Add missing dg-require-effective-target global_constructor

2019-06-20 Thread Tom de Vries
Hi,

Add missing dg-require-effective-target global_constructor.

Tested on nvptx.

Committed to trunk.

Thanks,
- Tom

[testsuite] Add missing dg-require-effective-target global_constructor

2019-06-20  Tom de Vries  

* gcc.dg/pr90866-2.c: Require global_constructor.

---
 gcc/testsuite/gcc.dg/pr90866-2.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.dg/pr90866-2.c b/gcc/testsuite/gcc.dg/pr90866-2.c
index 8c110498154..38eb8742859 100644
--- a/gcc/testsuite/gcc.dg/pr90866-2.c
+++ b/gcc/testsuite/gcc.dg/pr90866-2.c
@@ -1,6 +1,8 @@
 /* PR tree-optimization/90866 - ICE in fold_binary_loc, at fold-const.c:9827
{ dg-do compile  }
-   { dg-options "-O2 -fsanitize=thread" } */
+   { dg-require-effective-target global_constructor }
+   { dg-options "-O2 -fsanitize=thread" }
+ */
 
 typedef enum { a } b;
 typedef struct {


[committed][testsuite] Add missing dg-require-effective-target nonlocal_goto

2019-06-20 Thread Tom de Vries
Hi,

Add missing dg-require-effective-target nonlocal_goto.

Tested on nvptx.

Committed to trunk.

Thanks,
- Tom

[testsuite] Add missing dg-require-effective-target nonlocal_goto

2019-06-20  Tom de Vries  

* gcc.c-torture/compile/pr89280.c: Require nonlocal_goto.
* gcc.dg/pr88870.c: Same.
* gcc.dg/pr90082.c: Same.

---
 gcc/testsuite/gcc.c-torture/compile/pr89280.c | 1 +
 gcc/testsuite/gcc.dg/pr88870.c| 1 +
 gcc/testsuite/gcc.dg/pr90082.c| 1 +
 3 files changed, 3 insertions(+)

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr89280.c 
b/gcc/testsuite/gcc.c-torture/compile/pr89280.c
index ceb15387522..15b6e7051f8 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr89280.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr89280.c
@@ -1,3 +1,4 @@
+// { dg-require-effective-target nonlocal_goto }
 // { dg-require-effective-target label_values }
 /* PR tree-optimization/89280 */
 
diff --git a/gcc/testsuite/gcc.dg/pr88870.c b/gcc/testsuite/gcc.dg/pr88870.c
index 3f46f32f3ee..81f686bd972 100644
--- a/gcc/testsuite/gcc.dg/pr88870.c
+++ b/gcc/testsuite/gcc.dg/pr88870.c
@@ -1,5 +1,6 @@
 /* PR rtl-optimization/88870 */
 /* { dg-do compile } */
+/* { dg-require-effective-target nonlocal_goto } */
 /* { dg-options "-O1 -fexceptions -fnon-call-exceptions -ftrapv 
-fno-tree-dominator-opts" } */
 
 int a, b;
diff --git a/gcc/testsuite/gcc.dg/pr90082.c b/gcc/testsuite/gcc.dg/pr90082.c
index af741ff3b4c..a8d76bbc78e 100644
--- a/gcc/testsuite/gcc.dg/pr90082.c
+++ b/gcc/testsuite/gcc.dg/pr90082.c
@@ -1,5 +1,6 @@
 /* PR rtl-optimization/90082 */
 /* { dg-do compile } */
+/* { dg-require-effective-target nonlocal_goto } */
 /* { dg-require-effective-target label_values } */
 /* { dg-options "-O1 -fnon-call-exceptions -ftrapv" } */
 


Re: AARCH64 configure check for gas -mabi support

2019-06-20 Thread Kugan Vivekanandarajah
Hi Thomas,

On Thu, 20 Jun 2019 at 20:04, Thomas Schwinge  wrote:
>
> Hi!
>
> I was just building an aarch64 cross-compiler (indeed compiler only:
> 'make all-gcc'), and then wanted to check something in gimplification
> ('-S -fdump-tree-gimple'), with '-mabi=ilp32', which told me: "cc1:
> error: assembler does not support '-mabi=ilp32'".  That's unexpected, as
> for '-S' GCC isn't even going to invoke the assembler.  It's coming from
> this change:
>
> On Wed, 11 Dec 2013 13:57:59 +0100, Christophe Lyon 
>  wrote:
> > Committed on Kugan's behalf as rev 205891.
> >
> > On 11 December 2013 13:27, Marcus Shawcroft  
> > wrote:
> > > On 10/12/13 20:23, Kugan wrote:
> > >
> > >> gcc/
> > >>
> > >> +2013-12-11  Kugan Vivekanandarajah  
> > >> +   * configure.ac: Add check for aarch64 assembler -mabi support.
> > >> +   * configure: Regenerate.
> > >> +   * config.in: Regenerate.
> > >> +   * config/aarch64/aarch64-elf.h (ASM_MABI_SPEC): New define.
> > >> +   (ASM_SPEC): Update to substitute -mabi with ASM_MABI_SPEC.
> > >> +   * config/aarch64/aarch64.h (aarch64_override_options):  Issue
> > >> error if
> > >> +   assembler does not support -mabi and option ilp32 is selected.
> > >> +   * doc/install.texi: Added note that building gcc 4.9 and after
> > >> with pre
> > >> +   2.24 binutils will not support -mabi=ilp32.
> > >> +
> > >>
> > >
> > > Kugan, Thanks for sorting this out. OK to commit.
> > >
> > > /Marcus
>
> Specifically:
>
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -5187,6 +5187,13 @@ aarch64_override_options (void)
>aarch64_parse_tune ();
>  }
>
> +#ifndef HAVE_AS_MABI_OPTION
> +  /* The compiler may have been configured with 2.23.* binutils, which 
> does
> + not have support for ILP32.  */
> +  if (TARGET_ILP32)
> +error ("Assembler does not support -mabi=ilp32");
> +#endif
>
> Why is that necessary?  Won't the assembler itself tell the user that it
> "does not support -mabi=ilp32", thus this check can be removed?  If not,
> can a condition simply be added here to only emit this error if we're
> indeed going to invoke the assembler?
Current binutils will but  binutils  2.23 and before didnt.
Specifically, with  2.23.2, bootstrap was failing. That is why we
needed this.

Thanks,
Kugan


>
> (For my own testing, I just locally disabled that, of course.)
>
>
> Grüße
>  Thomas


Re: [PATCH] Enable GCC support for AVX512_VP2INTERSECT.

2019-06-20 Thread Hongtao Liu
On Thu, Jun 20, 2019 at 2:13 PM Uros Bizjak  wrote:
>
> On Thu, Jun 20, 2019 at 7:36 AM Hongtao Liu  wrote:
> >
> > On Sat, Jun 8, 2019 at 4:12 AM Uros Bizjak  wrote:
> > >
> > > On 6/7/19, H.J. Lu  wrote:
> > >
> > > >> > > +/* Register pair.  */
> > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 2); /* P2QI */
> > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 4); /* P2HI P4QI */
> > > >> > >
> > > >> > > I think
> > > >> > >
> > > >> > > INT_MODE (P2QI, 16);
> > > >> > > INT_MODE (P2HI, 32);
> > > >> > >
> > > >> > > with the above subreg approach should work.
Yes, it works.

But i didn't figure out how did pass_reload correctly handle such subreg,
do you have suggestions such as "which function i can dig into first" or
"which piece of codes handle subreg"?
> > > >> > >
> > > >> >
> > > >> > I don't think subreg works on pseudo registers with non-zero
> > > >> > offset.  validate_subreg has
> > > >> >
> > > >> >  if (maybe_lt (osize, regsize)
> > > >> >   && ! (lra_in_progress && (FLOAT_MODE_P (imode) || FLOAT_MODE_P
> > > >> > (omode
> > > >> > {
> > > >> >   /* It is invalid for the target to pick a register size for a
> > > >> > mode
> > > >> >  that isn't ordered wrt to the size of that mode.  */
> > > >> >   poly_uint64 block_size = ordered_min (isize, regsize);
> > > >> >   unsigned int start_reg;
> > > >> >   poly_uint64 offset_within_reg;
> > > >> >   if (!can_div_trunc_p (offset, block_size, &start_reg,
> > > >> > &offset_within_reg)
> > > >> >   || (BYTES_BIG_ENDIAN
> > > >> >   ? maybe_ne (offset_within_reg, block_size - osize)
> > > >> >   : maybe_ne (offset_within_reg, 0U)))
> > > >> > return false;
> > > >>
> > > >> It works with SImode subregs of DImode values on 32bit targets. Please
> > > >> look for calls to gen_highpart, one concrete example is in
> > > >> atomic_compare_and_swap.
> > > >>
> > > >
> > > > It works because of
> > > >
> > > > #define REGMODE_NATURAL_SIZE(MODE) UNITS_PER_WORD
> > > >
> > > > and only works for the high part of SImode of DImode.
> > > >
> > > > P2QI and P2HI are 2 special modes of mask register pair for
> > > > 2 instructions.   Do we want to make them more generic?
> > >
> > > If enhancing the referred define means that we don't need two
> > > artificial instructions and leave all heavy lifting to the existing
> > Do you mean that we take P2HI and P2QI as normal vector modes,
> > and reuse ix86_expand_vector_* things?
> > But still two artificial instructions can't be avoided.
> > > generic functionality, then this is the way to go.
>
> No, declare them as integer modes and use subregs to access high and
> low register. This should work in the same way as SImode hard
> registers are accessed in DImode pair for 32bit targets.
>
> Uros.

Update patch.

-- 
BR,
Hongtao
Index: gcc/ChangeLog
===
--- gcc/ChangeLog	(revision 271984)
+++ gcc/ChangeLog	(working copy)
@@ -1,3 +1,53 @@
+2019-06-06  Hongtao Liu  
+	H.J. Lu  
+	Olga Makhotina  
+
+	* common/config/i386/i386-common.c
+	(OPTION_MASK_ISA_AVX512VP2INTERSECT_SET,
+	OPTION_MASK_ISA_AVX512VP2INTERSECT_UNSET): New macros.
+	(OPTION_MASK_ISA2_AVX512F_UNSET): Add
+	OPTION_MASK_ISA_AVX512VP2INTERSECT_UNSET.
+	(ix86_handle_option): Handle -mavx512vp2intersect.
+	* config/i386/avx512vp2intersectintrin.h: New.
+	* config/i386/avx512vp2intersectvlintrin.h: New.
+	* config/i386/cpuid.h (bit_AVX512VP2INTERSECT): New.
+	* config/i386/driver-i386.c (host_detect_local_cpu): Detect
+	AVX512VP2INTERSECT.
+	* config/i386/i386-builtin-types.def: Add new types.
+	* config/i386/i386-builtin.def: Add new builtins.
+	* config/i386/i386-builtins.c: (enum processor_features): Add
+	F_AVX512VP2INTERSECT.
+	(static const _isa_names_table isa_names_table): Ditto.
+	* config/i386/i386-c.c (ix86_target_macros_internal): Define
+	__AVX512VP2INTERSECT__.
+	* config/i386/i386-expand.c (ix86_expand_builtin): Expand
+	IX86_BUILTIN_2INTERSECTD512, IX86_BUILTIN_2INTERSECTQ512,
+	IX86_BUILTIN_2INTERSECTD256, IX86_BUILTIN_2INTERSECTQ256,
+	IX86_BUILTIN_2INTERSECTD128, IX86_BUILTIN_2INTERSECTQ128.
+	* config/i386/i386-modes.def (P2QI, P2HI): New modes.
+	* config/i386/i386-options.c (ix86_target_string): Add
+	-mavx512vp2intersect.
+	(ix86_option_override_internal): Handle AVX512VP2INTERSECT.
+	* config/i386/i386.c (ix86_hard_regno_nregs): Allocate two regs for
+	P2HImode and P2QImode.
+	(ix86_hard_regno_mode_ok): Register pair only starts at even hardreg
+	number for P2QImode and P2HImode.
+	* config/i386/i386.h (TARGET_AVX512VP2INTERSECT,
+	TARGET_AVX512VP2INTERSECT_P): New.
+	(PTA_AVX512VP2INTERSECT): Ditto.
+	* config/i386/i386.opt: Add -mavx512vp2intersect.
+	* config/i386/immintrin.h: Include avx512vp2intersectintrin.h and
+	avx512vp2intersectvlintrin.h.
+	* config/i386/sse.md (define_c_enum "unspec"): Add UNSPEC_VP2INTERSECT.
+	(define_mode_iterator VI48_AVX512VP2VL): New.
+	(avx512vp2intersect

Re: [RFC] zstd as a compression algorithm for LTO

2019-06-20 Thread Thomas Koenig

Am 20.06.19 um 11:07 schrieb Martin Liška:

On the contrary, decompression
of zstd with zlib will end with:
lto1: internal compiler error: compressed stream: data error


Sogenerating object files on one system and trying to read them
on another system which does not happen to have a particular
library installed would lead to failure?  If that's the case,
I am not sure that this is a good way of handling things.


Re: Use ODR for canonical types construction in LTO

2019-06-20 Thread Nathan Sidwell

On 6/20/19 12:34 AM, Jason Merrill wrote:

On Wed, Jun 19, 2019 at 2:47 PM Nathan Sidwell  wrote:



This simple (untested) patch doesn't avoid creating the unnecessary
as-base types, but it should avoid using them in a way that causes
them to be streamed, and should let them be discarded by GC.
Thoughts?


I was thinking something like that might work.

nathan

--
Nathan Sidwell


Re: [PATCH] Enable GCC support for AVX512_VP2INTERSECT.

2019-06-20 Thread Uros Bizjak
On Thu, Jun 20, 2019 at 12:54 PM Hongtao Liu  wrote:
>
> On Thu, Jun 20, 2019 at 2:13 PM Uros Bizjak  wrote:
> >
> > On Thu, Jun 20, 2019 at 7:36 AM Hongtao Liu  wrote:
> > >
> > > On Sat, Jun 8, 2019 at 4:12 AM Uros Bizjak  wrote:
> > > >
> > > > On 6/7/19, H.J. Lu  wrote:
> > > >
> > > > >> > > +/* Register pair.  */
> > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 2); /* P2QI */
> > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 4); /* P2HI P4QI */
> > > > >> > >
> > > > >> > > I think
> > > > >> > >
> > > > >> > > INT_MODE (P2QI, 16);
> > > > >> > > INT_MODE (P2HI, 32);
> > > > >> > >
> > > > >> > > with the above subreg approach should work.
> Yes, it works.
>
> But i didn't figure out how did pass_reload correctly handle such subreg,
> do you have suggestions such as "which function i can dig into first" or
> "which piece of codes handle subreg"?

I'm really not an expert in this part of the compiler, so I'll leave
the answer for someone else.

> > > > >> > >
> > > > >> >
> > > > >> > I don't think subreg works on pseudo registers with non-zero
> > > > >> > offset.  validate_subreg has
> > > > >> >
> > > > >> >  if (maybe_lt (osize, regsize)
> > > > >> >   && ! (lra_in_progress && (FLOAT_MODE_P (imode) || 
> > > > >> > FLOAT_MODE_P
> > > > >> > (omode
> > > > >> > {
> > > > >> >   /* It is invalid for the target to pick a register size for a
> > > > >> > mode
> > > > >> >  that isn't ordered wrt to the size of that mode.  */
> > > > >> >   poly_uint64 block_size = ordered_min (isize, regsize);
> > > > >> >   unsigned int start_reg;
> > > > >> >   poly_uint64 offset_within_reg;
> > > > >> >   if (!can_div_trunc_p (offset, block_size, &start_reg,
> > > > >> > &offset_within_reg)
> > > > >> >   || (BYTES_BIG_ENDIAN
> > > > >> >   ? maybe_ne (offset_within_reg, block_size - osize)
> > > > >> >   : maybe_ne (offset_within_reg, 0U)))
> > > > >> > return false;
> > > > >>
> > > > >> It works with SImode subregs of DImode values on 32bit targets. 
> > > > >> Please
> > > > >> look for calls to gen_highpart, one concrete example is in
> > > > >> atomic_compare_and_swap.
> > > > >>
> > > > >
> > > > > It works because of
> > > > >
> > > > > #define REGMODE_NATURAL_SIZE(MODE) UNITS_PER_WORD
> > > > >
> > > > > and only works for the high part of SImode of DImode.
> > > > >
> > > > > P2QI and P2HI are 2 special modes of mask register pair for
> > > > > 2 instructions.   Do we want to make them more generic?
> > > >
> > > > If enhancing the referred define means that we don't need two
> > > > artificial instructions and leave all heavy lifting to the existing
> > > Do you mean that we take P2HI and P2QI as normal vector modes,
> > > and reuse ix86_expand_vector_* things?
> > > But still two artificial instructions can't be avoided.
> > > > generic functionality, then this is the way to go.
> >
> > No, declare them as integer modes and use subregs to access high and
> > low register. This should work in the same way as SImode hard
> > registers are accessed in DImode pair for 32bit targets.
> >
> > Uros.
>
> Update patch.

Does gen_lowpart/gen_higpart instead of simplify_gen_subreg work?
These two are just a handy wrapper for simplify_gen_subreg. Other than
that, patch LGTM.

Uros.


Re: [RFC] zstd as a compression algorithm for LTO

2019-06-20 Thread Martin Liška
On 6/20/19 12:58 PM, Thomas Koenig wrote:
> Am 20.06.19 um 11:07 schrieb Martin Liška:
>> On the contrary, decompression
>> of zstd with zlib will end with:
>> lto1: internal compiler error: compressed stream: data error
> 
> Sogenerating object files on one system and trying to read them
> on another system which does not happen to have a particular
> library installed would lead to failure?  If that's the case,
> I am not sure that this is a good way of handling things.

Yes, but LTO bytecode is not supposed to be a distributable format.

Martin


Re: [RFC] zstd as a compression algorithm for LTO

2019-06-20 Thread Jan Hubicka
> On 6/20/19 12:58 PM, Thomas Koenig wrote:
> > Am 20.06.19 um 11:07 schrieb Martin Liška:
> >> On the contrary, decompression
> >> of zstd with zlib will end with:
> >> lto1: internal compiler error: compressed stream: data error
> > 
> > Sogenerating object files on one system and trying to read them
> > on another system which does not happen to have a particular
> > library installed would lead to failure?  If that's the case,
> > I am not sure that this is a good way of handling things.
> 
> Yes, but LTO bytecode is not supposed to be a distributable format.

In longer term it should be.  We ought to make it host independent and
stable at least within major releases.
I guess it is still OK to make zstd enabled host build require zstd
enabled gcc elsewhere. Just the error message should be more informative
which I think is not hard to do - both zstd and zlip should have
recognizable header.

Other option is to put this into some common place per file.

Honza
> 
> Martin


[patch, committed] Fix for PR 90937

2019-06-20 Thread Thomas Koenig

Hello world,

I have committed the attached patch to trunk as obvious to fix another
of the regressions stemming from the "make up formal from actual
arglist" patch, as obvious and simple.

I will backport this patch to the other affected branches, probably
over the weekend.

Regards

Thomas

2019-06-20  Thomas Koenig  

PR fortran/90937
* trans-types.c (get_formal_from_actual_arglist): Get symbol from
current namespace so it will be freed later.  If symbol is of type
character, get an empty character length.

2019-06-20  Thomas Koenig  

PR fortran/90937
* gfortran.dg/external_procedure_4.f90: New test.
Index: trans-types.c
===
--- trans-types.c	(Revision 272479)
+++ trans-types.c	(Arbeitskopie)
@@ -2997,7 +2997,7 @@ get_formal_from_actual_arglist (gfc_symbol *sym, g
   if (a->expr)
 	{
 	  snprintf (name, GFC_MAX_SYMBOL_LEN, "_formal_%d", var_num ++);
-	  gfc_get_symbol (name, NULL, &s);
+	  gfc_get_symbol (name, gfc_current_ns, &s);
 	  if (a->expr->ts.type == BT_PROCEDURE)
 	{
 	  s->attr.flavor = FL_PROCEDURE;
@@ -3005,6 +3005,10 @@ get_formal_from_actual_arglist (gfc_symbol *sym, g
 	  else
 	{
 	  s->ts = a->expr->ts;
+
+	  if (s->ts.type == BT_CHARACTER)
+		  s->ts.u.cl = gfc_get_charlen ();
+
 	  s->ts.deferred = 0;
 	  s->ts.is_iso_c = 0;
 	  s->ts.is_c_interop = 0;
! { dg-do compile }
! PR fortran/90937 - this used to cause an ICE.
! Original test case by Toon Moene.
subroutine lfidiff

   implicit none

   contains 

  subroutine grlfi(cdnom)

 character(len=*) cdnom(:)
 character(len=len(cdnom)) clnoma

 call lficas(clnoma)

  end subroutine grlfi

end subroutine lfidiff


Re: [PATCH] Adding RBIT gcc builtin for ARM

2019-06-20 Thread Wilco Dijkstra
Hi Ayan,

Have you seen https://gcc.gnu.org/bugzilla/show_bug.cgi?id=50481?

Adding support for a generic bitreverse builtin would be very useful
since LLVM already supports this.

Wilco


Re: [PATCH v3 3/3] PR80791 Consider doloop cmp use in ivopts

2019-06-20 Thread Kewen.Lin
Hi Segher,

> On Wed, Jun 19, 2019 at 07:47:34PM +0800, Kewen.Lin wrote:
>> +/* Return true if count register for branch is supported.  */
>> +
>> +static bool
>> +rs6000_have_count_reg_decr_p ()
>> +{
>> +  return flag_branch_on_count_reg;
>> +}
> 
> rs6000 unconditionally supports these instructions, not just when that
> flag is set.  If you need to look at the flag, the *caller* of this new
> hook should, not every implementation of the hook.  So just "return true"
> here?

Good point!  Updated it as hookpod.

>> +/* For doloop use, if the algothrim selects some candidate which invalid for
> 
> "algorithm", "which is invalid".

>> +   some cost like zero rather than original inifite cost.  The point is to
> 
> "infinite"
> 

Thanks for catching!  I should run spelling check next time.  :)

New version attached with comments addressed.


Thanks,
Kewen
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 12f1dfd..e98aba9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1913,7 +1913,7 @@ static const struct attribute_spec 
rs6000_attribute_table[] =
 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
 
 #undef TARGET_HAVE_COUNT_REG_DECR_P
-#define TARGET_HAVE_COUNT_REG_DECR_P rs6000_have_count_reg_decr_p
+#define TARGET_HAVE_COUNT_REG_DECR_P true
 
 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
@@ -39440,14 +39440,6 @@ rs6000_predict_doloop_p (struct loop *loop)
   return true;
 }
 
-/* Return true if count register for branch is supported.  */
-
-static bool
-rs6000_have_count_reg_decr_p ()
-{
-  return flag_branch_on_count_reg;
-}
-
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 46e488f..5477294 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11618,11 +11618,13 @@ loops, and will help ivopts to make some decisions.
 The default version of this hook returns false.
 @end deftypefn
 
-@deftypefn {Target Hook} bool TARGET_HAVE_COUNT_REG_DECR_P (void)
+@deftypevr {Target Hook} bool TARGET_HAVE_COUNT_REG_DECR_P
 Return true if the target supports hardware count register for decrement
-and branch.
-The default version of this hook returns false.
-@end deftypefn
+and branch.  This count register can't be used as general register since
+moving to/from a general register from/to it is very expensive.
+For the targets with this support, ivopts can take doloop use as zero cost.
+The default value is false.
+@end deftypevr
 
 @deftypefn {Target Hook} bool TARGET_CAN_USE_DOLOOP_P (const widest_int 
@var{&iterations}, const widest_int @var{&iterations_max}, unsigned int 
@var{loop_depth}, bool @var{entered_at_top})
 Return true if it is possible to use low-overhead loops (@code{doloop_end}
diff --git a/gcc/target.def b/gcc/target.def
index ec15a6d..8a64e5b 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4246,13 +4246,15 @@ The default version of this hook returns false.",
  bool, (struct loop *loop),
  default_predict_doloop_p)
 
-DEFHOOK
+DEFHOOKPOD
 (have_count_reg_decr_p,
  "Return true if the target supports hardware count register for decrement\n\
-and branch.\n\
-The default version of this hook returns false.",
- bool, (void),
- hook_bool_void_false)
+and branch.  This count register can't be used as general register since\n\
+moving to/from a general register from/to it is very expensive.\n\
+For the targets with this support, ivopts can take doloop use as zero cost.\n\
+The default value is false.",
+ bool, false)
+
 
 DEFHOOK
 (can_use_doloop_p,
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
index 7d5859b..71d7f67 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
@@ -17,6 +17,7 @@ f1 (char *p, uintptr_t i, uintptr_t n)
   while (i < n);
 }
 
-/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" } } */
-/* { dg-final { scan-tree-dump-times "PHI current_loop;
@@ -6749,7 +6749,7 @@ find_optimal_iv_set_1 (struct ivopts_data *data, bool 
originalp)
   return set;
 }
 
-/* For doloop use, if the algothrim selects some candidate which invalid for
+/* For doloop use, if the algorithm selects some candidate which is invalid for
later rewrite, fix it up with bind_cand.  */
 
 static void
@@ -7622,7 +7622,7 @@ determine_scaling_factor (struct ivopts_data *data, 
basic_block *body)
 
 /* Find doloop comparison use and set its related bind_cand.  We adjust the
doloop use group cost against various IV cands, it's possible to assign
-   some cost like zero rather than original inifite cost.  The point is to
+   some cost like zero rather than original infinite cost.  The point is to
give more chances to consider other IV cands instead of BIV.  The cost
originally given on doloop use can affect optimal decision because it can
become dead and get eliminated but considered too much here.
@

Re: [RFC] zstd as a compression algorithm for LTO

2019-06-20 Thread Thomas Koenig

Hi Martin,


LTO bytecode is not supposed to be a distributable format.


One of my dreams is to make libgfortran LTO-clean.  There is
a lot of performance to be gained both in I/O (where a huge
number of special cases could be shortcut by LTO, because
hardly any program uses them all) and in array intrinsics,
where seeing through the array descriptors can also lead
to large benefits.  This is PR 77278.

Once this is achieved, it would make sense to distribute
libgfortran.a as a library of fat object files.

Regards

Thomas





Re: [PATCH v3 3/3] PR80791 Consider doloop cmp use in ivopts

2019-06-20 Thread Kewen.Lin
Hi,

Sorry, the previous patch is incomplete.
New one attached.  Sorry for inconvenience.

on 2019/6/20 下午8:08, Kewen.Lin wrote:
> Hi Segher,
> 
>> On Wed, Jun 19, 2019 at 07:47:34PM +0800, Kewen.Lin wrote:
>>> +/* Return true if count register for branch is supported.  */
>>> +
>>> +static bool
>>> +rs6000_have_count_reg_decr_p ()
>>> +{
>>> +  return flag_branch_on_count_reg;
>>> +}
>>
>> rs6000 unconditionally supports these instructions, not just when that
>> flag is set.  If you need to look at the flag, the *caller* of this new
>> hook should, not every implementation of the hook.  So just "return true"
>> here?
> 
> Good point!  Updated it as hookpod.
> 
>>> +/* For doloop use, if the algothrim selects some candidate which invalid 
>>> for
>>
>> "algorithm", "which is invalid".
> 
>>> +   some cost like zero rather than original inifite cost.  The point is to
>>
>> "infinite"
>>
> 
> Thanks for catching!  I should run spelling check next time.  :)
> 
> New version attached with comments addressed.
> 
> 
> Thanks,
> Kewen
> 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6667cd0..e98aba9 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1912,6 +1912,9 @@ static const struct attribute_spec 
rs6000_attribute_table[] =
 #undef TARGET_PREDICT_DOLOOP_P
 #define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
 
+#undef TARGET_HAVE_COUNT_REG_DECR_P
+#define TARGET_HAVE_COUNT_REG_DECR_P true
+
 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
 
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index c2aa4d0..5477294 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11618,6 +11618,14 @@ loops, and will help ivopts to make some decisions.
 The default version of this hook returns false.
 @end deftypefn
 
+@deftypevr {Target Hook} bool TARGET_HAVE_COUNT_REG_DECR_P
+Return true if the target supports hardware count register for decrement
+and branch.  This count register can't be used as general register since
+moving to/from a general register from/to it is very expensive.
+For the targets with this support, ivopts can take doloop use as zero cost.
+The default value is false.
+@end deftypevr
+
 @deftypefn {Target Hook} bool TARGET_CAN_USE_DOLOOP_P (const widest_int 
@var{&iterations}, const widest_int @var{&iterations_max}, unsigned int 
@var{loop_depth}, bool @var{entered_at_top})
 Return true if it is possible to use low-overhead loops (@code{doloop_end}
 and @code{doloop_begin}) for a particular loop.  @var{iterations} gives the
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index b4d57b8..5f43b27 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7946,6 +7946,8 @@ to by @var{ce_info}.
 
 @hook TARGET_PREDICT_DOLOOP_P
 
+@hook TARGET_HAVE_COUNT_REG_DECR_P
+
 @hook TARGET_CAN_USE_DOLOOP_P
 
 @hook TARGET_INVALID_WITHIN_DOLOOP
diff --git a/gcc/target.def b/gcc/target.def
index 71b6972..8a64e5b 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4246,6 +4246,16 @@ The default version of this hook returns false.",
  bool, (struct loop *loop),
  default_predict_doloop_p)
 
+DEFHOOKPOD
+(have_count_reg_decr_p,
+ "Return true if the target supports hardware count register for decrement\n\
+and branch.  This count register can't be used as general register since\n\
+moving to/from a general register from/to it is very expensive.\n\
+For the targets with this support, ivopts can take doloop use as zero cost.\n\
+The default value is false.",
+ bool, false)
+
+
 DEFHOOK
 (can_use_doloop_p,
  "Return true if it is possible to use low-overhead loops (@code{doloop_end}\n\
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
index 7d5859b..71d7f67 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ivopts-lt.c
@@ -17,6 +17,7 @@ f1 (char *p, uintptr_t i, uintptr_t n)
   while (i < n);
 }
 
-/* { dg-final { scan-tree-dump-times "PHI" 1 "ivopts" } } */
-/* { dg-final { scan-tree-dump-times "PHI  vuses;
 };
@@ -612,6 +614,9 @@ struct ivopts_data
 
   /* Whether the loop body can only be exited via single exit.  */
   bool loop_single_exit_p;
+
+  /* Whether the loop has doloop comparison use.  */
+  bool doloop_use_p;
 };
 
 /* An assignment of iv candidates to uses.  */
@@ -1528,6 +1533,7 @@ record_group (struct ivopts_data *data, enum use_type 
type)
   group->type = type;
   group->related_cands = BITMAP_ALLOC (NULL);
   group->vuses.create (1);
+  group->bind_cand = NULL;
 
   data->vgroups.safe_push (group);
   return group;
@@ -3724,7 +3730,7 @@ prepare_decl_rtl (tree *expr_p, int *ws, void *data)
Some RTL specific checks seems unable to be checked in gimple, if any new
checks or easy checks _are_ missing here, please add them.  */
 
-static bool ATTRIBUTE_UNUSED
+static bool
 generic_predict_doloop_p (struct ivopts_data *data)
 {
   struct loop *loop = data->current_loop;
@@ -5291,6 +5297

C++ PATCH to add test for c++/89873

2019-06-20 Thread Marek Polacek
I didn't realize my change r270319 had fixed this PR.  So adding the test and
closing the PR.

Tested on x86_64-linux, applying to trunk.

2019-06-20  Marek Polacek  

PR c++/89873
* g++.dg/cpp1y/noexcept1.C: New test.

diff --git gcc/testsuite/g++.dg/cpp1y/noexcept1.C 
gcc/testsuite/g++.dg/cpp1y/noexcept1.C
new file mode 100644
index 000..86e46c96148
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp1y/noexcept1.C
@@ -0,0 +1,13 @@
+// PR c++/89873
+// { dg-do compile { target c++14 } }
+
+template  bool b;
+
+template  
+struct C {
+  template  friend int foo() noexcept(b<1>); // { dg-error "not 
usable in a constant expression|different exception specifier" }
+};
+
+template  int foo() noexcept(b<1>);
+
+auto a = C();


Re: Use ODR for canonical types construction in LTO

2019-06-20 Thread Jan Hubicka
> On 6/19/19 1:53 PM, Jan Hubicka wrote:
> > > > > -ctype = CLASSTYPE_AS_BASE (ctype);
> > > > > +{
> > > > > +  if (!tree_int_cst_equal (TYPE_SIZE (ctype),
> > > > > +TYPE_SIZE (CLASSTYPE_AS_BASE (ctype
> > > > > +ctype = CLASSTYPE_AS_BASE (ctype);
> > > > > +}
> > > > >  tree clobber = build_clobber (ctype);
> > > 
> > > I have noticed we build a distinct as-base type in rather more cases than
> > > strictly necessary.  For instance when there's a member of reference type 
> > > or
> > > we have a non-trivial dtor. (CLASSTYPE_NON_LAYOUT_POD_P gets set by a 
> > > bunch
> > > of things that don't affect ABI layout)
> > 
> > Avoiding the extra copies at first place would be great. In my
> > understanding the types differ by virtual bases and also by their size
> > since the fake types are not padded to multiply of their alignment.
> > I guess this can be tested ahead of producing the copy and saving some
> > memory...
> > 
> > I am not sure if my C++ FE abilities are on par to implement this tough.
> 
> I don't think it's simple to fix there, just unfortunate.  your
> understanding is correct, and I think your workaround will work. However,
> remember it's possible for T == CLASSTYPE_AS_BASE (T), so might be worth
> checking that before doing the size comparison?
> 
> It'd be great to comment on why you're not just using classtype_as_base
> there.  I suppose I'm serializing this stuff too, with the same
> inefficiencies ...

Hi,
here is updated patch.
Bootstrapped/regtested x86_64-linux, OK?

It would be still nice to avoid copies at least in the commmon cases -
it is easy to generate many types via templates and having basically
every type twice is not very nice (we also copy all the fields, so
overall memory use can be large).

* decl.c (build_clobber_this): Do not use CLASSTYPE_AS_BASE
when possible.

Index: decl.c
===
--- decl.c  (revision 272506)
+++ decl.c  (working copy)
@@ -15229,7 +15229,20 @@ build_clobber_this ()
 
   tree ctype = current_class_type;
   if (!vbases)
-ctype = CLASSTYPE_AS_BASE (ctype);
+{
+  /* When clobbering base type, we need to be careful to not clobber
+ extra padding at the end of structure or virtual bases, which are
+not considered part of the base by the C++ ABI.
+
+However try to avoid using CLASSTYPE_AS_BASE when possible because
+typically this is the only use in the final intermediate language
+where this type is needed.  Doing so avoids need to stream many
+duplciate type copies to LTO.  */
+  if (ctype != CLASSTYPE_AS_BASE (ctype)
+ && !tree_int_cst_equal (TYPE_SIZE (ctype),
+ TYPE_SIZE (CLASSTYPE_AS_BASE (ctype
+ctype = CLASSTYPE_AS_BASE (ctype);
+}
 
   tree clobber = build_clobber (ctype);
 


Re: [PATCH] Handle '\0' in strcmp in RTL expansion (PR tree-optimization/90892).

2019-06-20 Thread Martin Liška
On 6/18/19 11:56 AM, Martin Liška wrote:
> On 6/18/19 10:23 AM, Martin Liška wrote:
>> On 6/18/19 10:11 AM, Jakub Jelinek wrote:
>>> On Tue, Jun 18, 2019 at 10:07:50AM +0200, Martin Liška wrote:
 diff --git a/gcc/builtins.c b/gcc/builtins.c
 index 3463ffb1539..b58e1e58d4d 100644
 --- a/gcc/builtins.c
 +++ b/gcc/builtins.c
 @@ -7142,6 +7142,20 @@ inline_expand_builtin_string_cmp (tree exp, rtx 
 target)
const char *src_str1 = c_getstr (arg1, &len1);
const char *src_str2 = c_getstr (arg2, &len2);
  
 +  if (src_str1 != NULL)
 +{
 +  unsigned HOST_WIDE_INT str_str1_strlen = strnlen (src_str1, len1);
 +  if (str_str1_strlen + 1 < len1)
 +  len1 = str_str1_strlen + 1;
>>>
>>> You really don't need any of this after strnlen.  strnlen is already
>>> guaranteed to return a number from 0 to len1 inclusive, so you can really
>>> just do:
>>>   if (src_str1 != NULL)
>>> len1 = strnlen (src_str1, len1);
>>>
>>> Jakub
>>>
>>
>> Got it, I'm testing that.
>>
>> Martin
>>
> 
> Ok, there's an off-by-one error in the previous patch candidate.
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Ready to be installed?
> Thanks,
> Martin
> 

Patch to this. I hope this version of the patch is correct.
Jakub?

Thanks,
Martin


Re: [PATCH] [RFC, PGO+LTO] Missed function specialization + partial devirtualization

2019-06-20 Thread Martin Liška
On 6/20/19 8:15 AM, luoxhu wrote:
> Hi Martin,
> 
> On 2019/6/20 09:59, luoxhu wrote:
>>
>>
>> On 2019/6/19 20:18, Martin Liška wrote:
>>> On 6/19/19 10:56 AM, Martin Liška wrote:
 Thank you very much for the numbers. Today, I'm going to prepare the 
 generalization of single-value counter to track N values.
>>>
>>> Ok, here's a patch candidate that does tracking of most common N values. 
>>> For your test-case I can see:
>>>
>>> pr69678.gcda:    01a9:  18:COUNTERS indirect_call 9 counts
>>> pr69678.gcda:   0: 35000 1868707024 17500 969338501 
>>> 17500 0 0 0
>>> pr69678.gcda:   8: 0
>>>
>>> So for now, you'll need to generalize get_most_common_single_value to return
>>> N most common values.
>>>
>>> Eventually we'll need to renamed the counter as it won't be tracking just a 
>>> single value
>>> any longer. I can take care of it.
>>>
>>> Can you please verify that the patch candidate works for you?
>> Thanks, the profile data seems good, I will try it.  I need rebase my patch
>> to trunk first, as there are many conflicts with your previous patch.
> 
> The patch works perfect for me, lots of duplicate code can be removed base
> on that.  Hope you can upstream it soon.  :)

Yep, I'll send it in coupe of hours.

> BTW, I don't need call the get_most_common_single_value function to access
> the histogram values & counters, I will loop access it directly one by one.

No, please do not do it. I would like to see get_most_common_single_value being 
used
for your purpose. You'll have to generalize it, but please no direct accessed
to the histogram values.

Thanks,
Martin

> 
> Thanks
> Xionghu
> 
>>
>>
>>> Thanks,
>>> Martin
>>>
> 



Re: [C++ PATCH] PR c++/90875 - added -Wswitch-outside-range option.

2019-06-20 Thread Matthew Beliveau
Sorry for the last update, I guess I didn't notice the other changes, oops!

This should have all the changes. Let me know if anything went wrong!

Thanks,
Matthew Beliveau

On Tue, Jun 18, 2019 at 1:38 PM Marek Polacek  wrote:
>
> On Tue, Jun 18, 2019 at 01:17:10PM -0400, Matthew Beliveau wrote:
> > Hello,
> >
> > This patch should change the formatting, and move the test files into
> > the appropriate directory!
>
> It doesn't address my other comments, though, so please send a new version
> with that fixed.
>
> Marek
Bootstrapped/regtested on x86_64-linux, ok for trunk?

2019-06-20  Matthew Beliveau  

	PR c++/90875 - added -Wswitch-outside-range option
	* doc/invoke.texi (Wswitch-outside-range): Document.

	* c-warn.c (c_do_switch_warnings): Implemented new Wswitch-outside-range
	warning option.

	* c.opt (Wswitch-outside-range): Added new option.
	
	* c-c++-common/Wswitch-outside-range-1.C: New test.
	* c-c++-common/Wswitch-outside-range-2.C: New test.
	* c-c++-common/Wswitch-outside-range-3.C: New test.
	* c-c++-common/Wswitch-outside-range-4.C: New test.

diff --git gcc/c-family/c-warn.c gcc/c-family/c-warn.c
index 5941c10cddb..743099c75ca 100644
--- gcc/c-family/c-warn.c
+++ gcc/c-family/c-warn.c
@@ -1460,8 +1460,9 @@ c_do_switch_warnings (splay_tree cases, location_t switch_location,
    min_value) >= 0)
 	{
 	  location_t loc = EXPR_LOCATION ((tree) node->value);
-	  warning_at (loc, 0, "lower value in case label range"
-  " less than minimum value for type");
+	  warning_at (loc, OPT_Wswitch_outside_range,
+		 "lower value in case label range less than minimum value"
+		 " for type");
 	  CASE_LOW ((tree) node->value) = convert (TREE_TYPE (cond),
 		   min_value);
 	  node->key = (splay_tree_key) CASE_LOW ((tree) node->value);
@@ -1474,8 +1475,8 @@ c_do_switch_warnings (splay_tree cases, location_t switch_location,
 	  if (node == NULL || !node->key)
 		break;
 	  location_t loc = EXPR_LOCATION ((tree) node->value);
-	  warning_at (loc, 0, "case label value is less than minimum "
-  "value for type");
+	  warning_at (loc, OPT_Wswitch_outside_range, "case label value is"
+  " less than minimum value for type");
 	  splay_tree_remove (cases, node->key);
 	}
 	  while (1);
@@ -1491,8 +1492,8 @@ c_do_switch_warnings (splay_tree cases, location_t switch_location,
    max_value) > 0)
 	{
 	  location_t loc = EXPR_LOCATION ((tree) node->value);
-	  warning_at (loc, 0, "upper value in case label range"
-			  " exceeds maximum value for type");
+	  warning_at (loc, OPT_Wswitch_outside_range, "upper value in case"
+			  " label range exceeds maximum value for type");
 	  CASE_HIGH ((tree) node->value)
 	= convert (TREE_TYPE (cond), max_value);
 	  outside_range_p = true;
@@ -1503,7 +1504,7 @@ c_do_switch_warnings (splay_tree cases, location_t switch_location,
 	 != NULL)
 	{
 	  location_t loc = EXPR_LOCATION ((tree) node->value);
-	  warning_at (loc, 0,
+	  warning_at (loc, OPT_Wswitch_outside_range,
 		  "case label value exceeds maximum value for type");
 	  splay_tree_remove (cases, node->key);
 	  outside_range_p = true;
diff --git gcc/c-family/c.opt gcc/c-family/c.opt
index 572cf186262..a4cf3bd623d 100644
--- gcc/c-family/c.opt
+++ gcc/c-family/c.opt
@@ -819,6 +819,10 @@ Wswitch-bool
 C ObjC C++ ObjC++ Var(warn_switch_bool) Warning Init(1)
 Warn about switches with boolean controlling expression.
 
+Wswitch-outside-range
+C ObjC C++ ObjC++ Var(warn_switch_outside_range) Warning Init(1)
+Warn about switch values that are outside of the switch's type range.
+
 Wtemplates
 C++ ObjC++ Var(warn_templates) Warning
 Warn on primary template declaration.
diff --git gcc/doc/invoke.texi gcc/doc/invoke.texi
index eaef4cd63d2..210535cb84a 100644
--- gcc/doc/invoke.texi
+++ gcc/doc/invoke.texi
@@ -5390,6 +5390,13 @@ switch ((int) (a == 4))
 @end smallexample
 This warning is enabled by default for C and C++ programs.
 
+@item -Wswitch-outside-range
+@opindex Wswitch-outside-range
+@opindex Wno-switch-outside-range
+Warn whenever a @code{switch} state has a value that is outside of its
+respective type range.  This warning is enabled by default for
+C and C++ progarams.
+
 @item -Wswitch-unreachable
 @opindex Wswitch-unreachable
 @opindex Wno-switch-unreachable
diff --git gcc/testsuite/c-c++-common/Wswitch-outside-range-1.C gcc/testsuite/c-c++-common/Wswitch-outside-range-1.C
new file mode 100644
index 000..29e56f3ba2d
--- /dev/null
+++ gcc/testsuite/c-c++-common/Wswitch-outside-range-1.C
@@ -0,0 +1,8 @@
+// PR c++/90875
+
+void f(char c)
+{
+  switch (c)
+case 300: // { dg-warning "case label value exceeds maximum value for type" }
+case -300:; // { dg-warning "case label value is less than minimum value for type" }
+}
diff --git gcc/testsuite/c-c++-common/Wswitch-outside-range-2.C gcc/testsuite/c-c++-common/Wswitch-outside-range-2.C
new file mode 100644
index 000..20cc019b209
--- /

Re: Use ODR for canonical types construction in LTO

2019-06-20 Thread Richard Biener
On Thu, 20 Jun 2019, Jason Merrill wrote:

> On Wed, Jun 19, 2019 at 2:47 PM Nathan Sidwell  wrote:
> >
> > On 6/19/19 1:53 PM, Jan Hubicka wrote:
> >  -ctype = CLASSTYPE_AS_BASE (ctype);
> >  +{
> >  +  if (!tree_int_cst_equal (TYPE_SIZE (ctype),
> >  + TYPE_SIZE (CLASSTYPE_AS_BASE (ctype
> >  +ctype = CLASSTYPE_AS_BASE (ctype);
> >  +}
> >   tree clobber = build_clobber (ctype);
> > >>
> > >> I have noticed we build a distinct as-base type in rather more cases than
> > >> strictly necessary.  For instance when there's a member of reference 
> > >> type or
> > >> we have a non-trivial dtor. (CLASSTYPE_NON_LAYOUT_POD_P gets set by a 
> > >> bunch
> > >> of things that don't affect ABI layout)
> > >
> > > Avoiding the extra copies at first place would be great. In my
> > > understanding the types differ by virtual bases and also by their size
> > > since the fake types are not padded to multiply of their alignment.
> > > I guess this can be tested ahead of producing the copy and saving some
> > > memory...
> > >
> > > I am not sure if my C++ FE abilities are on par to implement this tough.
> >
> > I don't think it's simple to fix there, just unfortunate.  your
> > understanding is correct, and I think your workaround will work.
> > However, remember it's possible for T == CLASSTYPE_AS_BASE (T), so might
> > be worth checking that before doing the size comparison?
> >
> > It'd be great to comment on why you're not just using classtype_as_base
> > there.  I suppose I'm serializing this stuff too, with the same
> > inefficiencies ...
> 
> This simple (untested) patch doesn't avoid creating the unnecessary
> as-base types, but it should avoid using them in a way that causes
> them to be streamed, and should let them be discarded by GC.
> Thoughts?

Looks better than Honzas patch fixing a single place.

I've spent some thoughts on this and I wonder whether we can
re-implement classtype-as-base with fake inheritance (which would
also solve the TBAA alias set issue in a natural way).  That is,
we'd lay out structs as-base and make instances of it use a

class as-instance { as-base b; X pad1; Y pad2; };

with either explicit padding fields or with implicit ones
(I didn't check how we trick stor-layout to not pad the as-base
type to its natural alignment...).

I realize that this impacts all code building component-refs ontop
of as-instance typed objects so this might rule out this approach
completely - but maybe that's reasonably well abstracted into common
code so only few places need adjustments.

Regular derived classes would simply derive from the as-base type
(as they do now I guess).

Richard.


Re: [PATCH] [RFC, PGO+LTO] Missed function specialization + partial devirtualization

2019-06-20 Thread Jan Hubicka
Hi,
some comments on the ipa part of the patch
(and thanks for working on it - this was on my TODO list for years)

> diff --git a/gcc/cgraph.c b/gcc/cgraph.c
> index de82316d4b1..0d373a67d1b 100644
> --- a/gcc/cgraph.c
> +++ b/gcc/cgraph.c
> @@ -553,6 +553,7 @@ cgraph_node::get_create (tree decl)
>   fprintf (dump_file, "Introduced new external node "
>"(%s) and turned into root of the clone tree.\n",
>node->dump_name ());
> +  node->profile_id = first_clone->profile_id;
>  }
>else if (dump_file)
>  fprintf (dump_file, "Introduced new external node "

This is independent of the rest of changes.  Do you have example where
this matters? The inline clones are created in ipa-inline while
ipa-profile is run before it, so I can not think of such a scenario.
I see you also copy profile_id from function to clone.  I would like to
know why you needed that.

Also you mention that you hit some ICEs. If fixes are independent of
rest of your changes, send them separately.

> @@ -1110,6 +,7 @@ cgraph_edge::speculative_call_info (cgraph_edge 
> *&direct,
>int i;
>cgraph_edge *e2;
>cgraph_edge *e = this;
> +  cgraph_node *referred_node;
>  
>if (!e->indirect_unknown_callee)
>  for (e2 = e->caller->indirect_calls;
> @@ -1142,8 +1144,20 @@ cgraph_edge::speculative_call_info (cgraph_edge 
> *&direct,
>   && ((ref->stmt && ref->stmt == e->call_stmt)
>   || (!ref->stmt && ref->lto_stmt_uid == e->lto_stmt_uid)))
>{
> - reference = ref;
> - break;
> + if (e2->indirect_info && e2->indirect_info->num_of_ics)
> +   {
> + referred_node = dyn_cast (ref->referred);
> + if (strstr (e->callee->name (), referred_node->name ()))
> +   {
> + reference = ref;
> + break;
> +   }
> +   }
> + else
> +   {
> + reference = ref;
> + break;
> +   }
>}

This function is intended to return everything related to the
speculative call, so if you add multiple direct targets, i would expect
it to tage auto_vec of cgraph_nodes for direct and auto_vec of
references.
>  
>/* Speculative edge always consist of all three components - direct edge,
> @@ -1199,7 +1213,14 @@ cgraph_edge::resolve_speculation (tree callee_decl)
>   in the functions inlined through it.  */
>  }
>edge->count += e2->count;
> -  edge->speculative = false;
> +  if (edge->indirect_info && edge->indirect_info->num_of_ics)
> +{
> +  edge->indirect_info->num_of_ics--;
> +  if (edge->indirect_info->num_of_ics == 0)
> + edge->speculative = false;
> +}
> +  else
> +edge->speculative = false;
>e2->speculative = false;
>ref->remove_reference ();
>if (e2->indirect_unknown_callee || e2->inline_failed)

This function should turn speculative call into direct call to DECL, so
I think it should remove all the other direct calls associated with stmt
and the indirect one.

There are now two cases - in first case you want to turn speculative
call into direct call or give up on especulation completely, while in
other case you want to only remove one of speculations.

I guess we want to have resolve_speculation(decl) for first and 
remove_one_speculation(edge) for the second case?
The second case would be useful for the code below handling type
mismatches and also for inline when one of speculative targets seems not
useful to bother with.
> @@ -1333,7 +1354,14 @@ cgraph_edge::redirect_call_stmt_to_callee (void)
> e->caller->set_call_stmt_including_clones (e->call_stmt, new_stmt,
>false);
> e->count = gimple_bb (e->call_stmt)->count;
> -   e2->speculative = false;
> +   if (e2->indirect_info && e2->indirect_info->num_of_ics)
> + {
> +   e2->indirect_info->num_of_ics--;
> +   if (e2->indirect_info->num_of_ics == 0)
> + e2->speculative = false;
> + }
> +   else
> + e2->speculative = false;
> e2->count = gimple_bb (e2->call_stmt)->count;
> ref->speculative = false;
> ref->stmt = NULL;

>  extern void debuginfo_early_init (void);
>  extern void debuginfo_init (void);
> @@ -1638,11 +1639,17 @@ struct GTY(()) cgraph_indirect_call_info
>int param_index;
>/* ECF flags determined from the caller.  */
>int ecf_flags;
> -  /* Profile_id of common target obtrained from profile.  */
> +  /* Profile_id of common target obtained from profile.  */
>int common_target_id;
>/* Probability that call will land in function with COMMON_TARGET_ID.  */
>int common_target_probability;
>  
> +  /* Profile_id of common target obtained from profile.  */
> +  int common_target_ids[GCOV_ICALL_TOPN_NCOUNTS / 2];
> +  /* Probabilities that call will land in function with COMMON_TARGET_IDS.  
> */
> +  int common_target_probabilities[GCOV_ICALL_TOPN_NCOUNTS / 2];

I would use vec of pairs (profile_id,pro

Re: Use ODR for canonical types construction in LTO

2019-06-20 Thread Jan Hubicka
> > This simple (untested) patch doesn't avoid creating the unnecessary
> > as-base types, but it should avoid using them in a way that causes
> > them to be streamed, and should let them be discarded by GC.
> > Thoughts?
> 
> Looks better than Honzas patch fixing a single place.

Indeed, I think in this case it is also possible to drop
 else if (tree_int_cst_equal (TYPE_SIZE (type), TYPE_SIZE (as_base)))
in call.c since they will now be always different.
> 
> I've spent some thoughts on this and I wonder whether we can
> re-implement classtype-as-base with fake inheritance (which would
> also solve the TBAA alias set issue in a natural way).  That is,
> we'd lay out structs as-base and make instances of it use a
> 
> class as-instance { as-base b; X pad1; Y pad2; };
> 
> with either explicit padding fields or with implicit ones
> (I didn't check how we trick stor-layout to not pad the as-base
> type to its natural alignment...).
> 
> I realize that this impacts all code building component-refs ontop
> of as-instance typed objects so this might rule out this approach
> completely - but maybe that's reasonably well abstracted into common
> code so only few places need adjustments.
> 
> Regular derived classes would simply derive from the as-base type
> (as they do now I guess).

I was trying to construct testcase using this (i.e. have different
access paths one using CLASSTYPE_AS_BASE and other the actual type
and failed to do so.  Having one would be nice.

Honza
> 
> Richard.


Re: Use ODR for canonical types construction in LTO

2019-06-20 Thread Nathan Sidwell

On 6/20/19 9:37 AM, Richard Biener wrote:


I've spent some thoughts on this and I wonder whether we can
re-implement classtype-as-base with fake inheritance (which would
also solve the TBAA alias set issue in a natural way).  That is,
we'd lay out structs as-base and make instances of it use a

class as-instance { as-base b; X pad1; Y pad2; };

with either explicit padding fields or with implicit ones
(I didn't check how we trick stor-layout to not pad the as-base
type to its natural alignment...).


I think you might end up with unordered fields?  virtual empty bases 
don't appear in the as-base variant, and I think they could appear in 
the middle of the as-instance variant.  that might or might not be a 
problem?


nathan

--
Nathan Sidwell


Re: RFA: Synchronize top level files with binutils

2019-06-20 Thread Nick Clifton
Hi Richard,

  Please may I apply this patch to the gcc-9, gcc-8 and gcc-7 branches ?

  I have tested it on all three branches and found no problems.

Cheers
  Nick

2019-06-07  Nick Clifton  

Import these changes from the binutils/gdb repository:

2019-05-28  Nick Alcock  

* Makefile.def (dependencies): configure-libctf depends on all-bfd
and all its deps.
* Makefile.in: Regenerated.

2019-05-28  Nick Alcock  

* Makefile.def (host_modules): Add libctf.
* Makefile.def (dependencies): Likewise.
libctf depends on zlib, libiberty, and bfd.
* Makefile.in: Regenerated.
* configure.ac (host_libs): Add libctf.
* configure: Regenerated.
2019-06-07  Nick Clifton  

	Import these changes from the binutils/gdb repository:

	2019-05-28  Nick Alcock  

	* Makefile.def (dependencies): configure-libctf depends on all-bfd
	and all its deps.
	* Makefile.in: Regenerated.

	2019-05-28  Nick Alcock  

	* Makefile.def (host_modules): Add libctf.
	* Makefile.def (dependencies): Likewise.
	libctf depends on zlib, libiberty, and bfd.
	* Makefile.in: Regenerated.
	* configure.ac (host_libs): Add libctf.
	* configure: Regenerated.

Index: Makefile.def
===
--- Makefile.def	(revision 272111)
+++ Makefile.def	(working copy)
@@ -4,7 +4,7 @@
 // Makefile.in is generated from Makefile.tpl by 'autogen Makefile.def'.
 // This file was originally written by Nathanael Nerode.
 //
-//   Copyright 2002-2013 Free Software Foundation
+//   Copyright 2002-2019 Free Software Foundation
 //
 // This file is free software; you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
@@ -128,6 +128,8 @@
 		extra_make_flags='@extra_linker_plugin_flags@'; };
 host_modules= { module= libcc1; extra_configure_flags=--enable-shared; };
 host_modules= { module= gotools; };
+host_modules= { module= libctf; no_install=true; no_check=true;
+		bootstrap=true; };
 
 target_modules = { module= libstdc++-v3;
 		   bootstrap=true;
@@ -428,6 +430,7 @@
 dependencies = { module=all-binutils; on=all-build-bison; };
 dependencies = { module=all-binutils; on=all-intl; };
 dependencies = { module=all-binutils; on=all-gas; };
+dependencies = { module=all-binutils; on=all-libctf; };
 
 // We put install-opcodes before install-binutils because the installed
 // binutils might be on PATH, and they might need the shared opcodes
@@ -518,6 +521,14 @@
 dependencies = { module=all-fastjar; on=all-zlib; };
 dependencies = { module=all-fastjar; on=all-build-texinfo; };
 dependencies = { module=all-fastjar; on=all-libiberty; };
+dependencies = { module=all-libctf; on=all-libiberty; hard=true; };
+dependencies = { module=all-libctf; on=all-bfd; };
+dependencies = { module=all-libctf; on=all-zlib; };
+// So that checking for ELF support in BFD from libctf configure is possible.
+dependencies = { module=configure-libctf; on=all-bfd; };
+dependencies = { module=configure-libctf; on=all-intl; };
+dependencies = { module=configure-libctf; on=all-zlib; };
+dependencies = { module=configure-libctf; on=all-libiconv; };
 
 // Warning, these are not well tested.
 dependencies = { module=all-bison; on=all-intl; };
Index: configure.ac
===
--- configure.ac	(revision 272111)
+++ configure.ac	(working copy)
@@ -131,7 +131,7 @@
 
 # these libraries are used by various programs built for the host environment
 #f
-host_libs="intl libiberty opcodes bfd readline tcl tk itcl libgui zlib libbacktrace libcpp libdecnumber gmp mpfr mpc isl libelf libiconv"
+host_libs="intl libiberty opcodes bfd readline tcl tk itcl libgui zlib libbacktrace libcpp libdecnumber gmp mpfr mpc isl libelf libiconv libctf"
 
 # these tools are built for the host environment
 # Note, the powerpc-eabi build depends on sim occurring before gdb in order to





Drop ref2_is_decl from aliasing_component_refs_p

2019-06-20 Thread Jan Hubicka
Hi,
as discussed earlier this patch drops ref2_is_decl from
aliasing_component_refs.  This parameter makes it to assume that all
access path may not continue by reference to DECL which is not true
because in gimple memory model we allow to store value of completely
unrealted type to it.  I have also constructed an (invalid C IMO)
testcase (which fails on GCC for ages, but works for ICC)

Bootstrapped/regtested x86_64-linux, will commit it shortly.

* tree-ssa-alias.c (aliasing_component_refs_p): Remove ref2_is_decl
parameter; it has no use in gimple memory model.
(indirect_ref_may_alias_decl_p): Update.
* gcc.c-torture/execute/alias-access-path-1.c: New testcase.
Index: tree-ssa-alias.c
===
--- tree-ssa-alias.c(revision 272507)
+++ tree-ssa-alias.c(working copy)
@@ -850,8 +850,7 @@ type_has_components_p (tree type)
 
 /* Determine if the two component references REF1 and REF2 which are
based on access types TYPE1 and TYPE2 and of which at least one is based
-   on an indirect reference may alias.  REF2 is the only one that can
-   be a decl in which case REF2_IS_DECL is true.
+   on an indirect reference may alias.  
REF1_ALIAS_SET, BASE1_ALIAS_SET, REF2_ALIAS_SET and BASE2_ALIAS_SET
are the respective alias sets.  */
 
@@ -863,8 +862,7 @@ aliasing_component_refs_p (tree ref1,
   tree ref2,
   alias_set_type ref2_alias_set,
   alias_set_type base2_alias_set,
-  poly_int64 offset2, poly_int64 max_size2,
-  bool ref2_is_decl)
+  poly_int64 offset2, poly_int64 max_size2)
 {
   /* If one reference is a component references through pointers try to find a
  common base and apply offset based disambiguation.  This handles
@@ -982,7 +980,7 @@ aliasing_component_refs_p (tree ref1,
  if (TREE_CODE (TREE_TYPE (base1)) == ARRAY_TYPE
  && (!TYPE_SIZE (TREE_TYPE (base1))
  || TREE_CODE (TYPE_SIZE (TREE_TYPE (base1))) != INTEGER_CST
- || (ref == base2 && !ref2_is_decl)))
+ || ref == base2))
{
  ++alias_stats.aliasing_component_refs_p_may_alias;
  return true;
@@ -1041,7 +1039,7 @@ aliasing_component_refs_p (tree ref1,
  if (TREE_CODE (TREE_TYPE (base2)) == ARRAY_TYPE
  && (!TYPE_SIZE (TREE_TYPE (base2))
  || TREE_CODE (TYPE_SIZE (TREE_TYPE (base2))) != INTEGER_CST
- || (ref == base1 && !ref2_is_decl)))
+ || ref == base1))
{
  ++alias_stats.aliasing_component_refs_p_may_alias;
  return true;
@@ -1089,8 +1087,7 @@ aliasing_component_refs_p (tree ref1,
   return true;
 }
   /* If this is ptr vs. decl then we know there is no ptr ... decl path.  */
-  if (!ref2_is_decl
-  && compare_type_sizes (TREE_TYPE (ref1), type2) >= 0
+  if (compare_type_sizes (TREE_TYPE (ref1), type2) >= 0
   && (!end_struct_ref2
  || compare_type_sizes (TREE_TYPE (ref1),
 TREE_TYPE (end_struct_ref2)) >= 0)
@@ -1554,13 +1551,7 @@ indirect_ref_may_alias_decl_p (tree ref1
  offset1, max_size1,
  ref2,
  ref2_alias_set, base2_alias_set,
- offset2, max_size2, 
- /* Only if the other reference is actual
-decl we can safely check only toplevel
-part of access path 1.  */
- same_type_for_tbaa (TREE_TYPE (dbase2),
- TREE_TYPE (base2))
- == 1);
+ offset2, max_size2);
 
   return true;
 }
@@ -1675,7 +1666,7 @@ indirect_refs_may_alias_p (tree ref1 ATT
  offset1, max_size1,
  ref2,
  ref2_alias_set, base2_alias_set,
- offset2, max_size2, false);
+ offset2, max_size2);
 
   return true;
 }
Index: testsuite/gcc.c-torture/execute/alias-access-path-1.c
===
--- testsuite/gcc.c-torture/execute/alias-access-path-1.c   (nonexistent)
+++ testsuite/gcc.c-torture/execute/alias-access-path-1.c   (working copy)
@@ -0,0 +1,19 @@
+/* Test that variable
+ int val;
+   may hold value of tyope "struct c" which has same size.
+   This is valid in GIMPLE memory model.  */
+
+struct a {int val;} a={1},a2;
+struct b {struct a a;};
+int val;
+struct c {struct b b;} *cptr=(void *)&val;
+
+int
+main(voi

[PATCH] Qualify calls to __never_valueless in

2019-06-20 Thread Jonathan Wakely

* include/std/variant (_Variant_storage, _Extra_visit_slot_needed):
Qualify calls to __never_valueless.

Tested x86_64-linux, committed to trunk.


commit f432c00da2ed386e356ac23a41a9cf943a2e2c3a
Author: redi 
Date:   Thu Jun 20 14:17:51 2019 +

Qualify calls to __never_valueless in 

* include/std/variant (_Variant_storage, _Extra_visit_slot_needed):
Qualify calls to __never_valueless.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@272508 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index c86b0c8ccf3..d93ea86ea29 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -422,7 +422,7 @@ namespace __variant
   constexpr bool
   _M_valid() const noexcept
   {
-   if constexpr (__never_valueless<_Types...>())
+   if constexpr (__variant::__never_valueless<_Types...>())
  return true;
return this->_M_index != __index_type(variant_npos);
   }
@@ -456,7 +456,7 @@ namespace __variant
   constexpr bool
   _M_valid() const noexcept
   {
-   if constexpr (__never_valueless<_Types...>())
+   if constexpr (__variant::__never_valueless<_Types...>())
  return true;
return this->_M_index != __index_type(variant_npos);
   }
@@ -799,7 +799,7 @@ namespace __variant
 
   template 
struct _Variant_never_valueless>
-   : bool_constant<__never_valueless<_Types...>()> {};
+   : bool_constant<__variant::__never_valueless<_Types...>()> {};
 
   static constexpr bool value =
(is_same_v<_Maybe_variant_cookie, __variant_cookie>


[PATCH] Skip libstdc++ debug build in early bootstrap stages

2019-06-20 Thread Jonathan Wakely

As mentioned in PR 90770, this is a patch that Debian have been carrying
for some time. The additional unoptimized copies of libstdc++ libs that
get built during each stage are never going to be used, so don't bother
building them.

For a profiled bootstrap this means we won't train the compiler on the
unoptimized library code with assertions enabled, but that doesn't seem
like a big problem, as the same code has already been compiled once for
the main libstdc++ library.

* acinclude.m4 (GLIBCXX_ENABLE_DEBUG): Only do debug build for final
stage of bootstrap.
* configure: Regenerate.

Tested x86_64-linux, committed to trunk.


commit 6473c6e506298bba6111df73fc9810642113a321
Author: redi 
Date:   Thu Jun 20 14:17:57 2019 +

Skip libstdc++ debug build in early bootstrap stages

As mentioned in PR 90770, this is a patch that Debian have been carrying
for some time. The additional unoptimized copies of libstdc++ libs that
get built during each stage are never going to be used, so don't bother
building them.

For a profiled bootstrap this means we won't train the compiler on the
unoptimized library code with assertions enabled, but that doesn't seem
like a big problem, as the same code has already been compiled once for
the main libstdc++ library.

* acinclude.m4 (GLIBCXX_ENABLE_DEBUG): Only do debug build for final
stage of bootstrap.
* configure: Regenerate.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@272509 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index 19e9f14b0f5..fad390ba322 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -2899,8 +2899,20 @@ dnl   Where DEFAULT is either `yes' or `no'.
 dnl
 AC_DEFUN([GLIBCXX_ENABLE_DEBUG], [
   AC_MSG_CHECKING([for additional debug build])
+  skip_debug_build=
   GLIBCXX_ENABLE(libstdcxx-debug,$1,,[build extra debug library])
-  AC_MSG_RESULT($enable_libstdcxx_debug)
+  if test x$enable_libstdcxx_debug = xyes; then
+if test -f $toplevel_builddir/../stage_final \
+  && test -f $toplevel_builddir/../stage_current; then
+  stage_final=`cat $toplevel_builddir/../stage_final`
+  stage_current=`cat $toplevel_builddir/../stage_current`
+  if test x$stage_current != x$stage_final ; then
+   skip_debug_build=" (skipped for bootstrap stage $stage_current)"
+   enable_libstdcxx_debug=no
+  fi
+fi
+  fi
+  AC_MSG_RESULT($enable_libstdcxx_debug$skip_debug_build)
   GLIBCXX_CONDITIONAL(GLIBCXX_BUILD_DEBUG, test $enable_libstdcxx_debug = yes)
 ])
 


Re: [PATCH] Skip libstdc++ debug build in early bootstrap stages

2019-06-20 Thread Jonathan Wakely

On 20/06/19 15:23 +0100, Jonathan Wakely wrote:

As mentioned in PR 90770, this is a patch that Debian have been carrying


Oops, Ubuntu, not Debian, sorry.



Re: [PATCH] [RFC, PGO+LTO] Missed function specialization + partial devirtualization

2019-06-20 Thread Martin Liška
Hi.

So the first part is about support of N tracked values to be supported.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin
>From f3e361fb6d799acf538bc76a91bfcc8e265b7cbe Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Wed, 19 Jun 2019 14:15:14 +0200
Subject: [PATCH 1/2] Support N values in libgcov for single value counter
 type.

gcc/testsuite/ChangeLog:

2019-06-20  Martin Liska  

	* gcc.dg/tree-prof/val-prof-2.c: Update scanned pattern
	as we do now better.

libgcc/ChangeLog:

2019-06-20  Martin Liska  

	* libgcov-merge.c (merge_single_value_set): Support N values.
	* libgcov-profiler.c (__gcov_one_value_profiler_body): Likewise.
---
 gcc/testsuite/gcc.dg/tree-prof/val-prof-2.c |  5 +--
 libgcc/libgcov-merge.c  | 48 +++--
 libgcc/libgcov-profiler.c   | 42 ++
 3 files changed, 60 insertions(+), 35 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-prof/val-prof-2.c b/gcc/testsuite/gcc.dg/tree-prof/val-prof-2.c
index 8cb3c64fd17..b3bbadfeb40 100644
--- a/gcc/testsuite/gcc.dg/tree-prof/val-prof-2.c
+++ b/gcc/testsuite/gcc.dg/tree-prof/val-prof-2.c
@@ -25,8 +25,5 @@ main ()
   return 0;
 }
 /* autofdo does not do value profiling so far */
-/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: mod power of 2" "profile" } } */
-/* This is part of code checking that n is power of 2, so we are sure that the transformation
-   didn't get optimized out.  */
-/* { dg-final-use-not-autofdo { scan-tree-dump "n_\[0-9\]* \\+ (4294967295|0x0*)" "optimized"} } */
+/* { dg-final-use-not-autofdo { scan-ipa-dump "Transformation done: div/mod by constant 256" "profile" } } */
 /* { dg-final-use { scan-tree-dump-not "Invalid sum" "optimized"} } */
diff --git a/libgcc/libgcov-merge.c b/libgcc/libgcov-merge.c
index f778cc4b6b7..84367005663 100644
--- a/libgcc/libgcov-merge.c
+++ b/libgcc/libgcov-merge.c
@@ -89,49 +89,53 @@ __gcov_merge_time_profile (gcov_type *counters, unsigned n_counters)
 static void
 merge_single_value_set (gcov_type *counters)
 {
-  unsigned j;
-  gcov_type value, counter;
-
   /* First value is number of total executions of the profiler.  */
   gcov_type all = gcov_get_counter_ignore_scaling (-1);
   counters[0] += all;
   ++counters;
 
+  /* Read all part values.  */
+  gcov_type read_counters[2 * GCOV_DISK_SINGLE_VALUES];
+
   for (unsigned i = 0; i < GCOV_DISK_SINGLE_VALUES; i++)
 {
-  value = gcov_get_counter_target ();
-  counter = gcov_get_counter_ignore_scaling (-1);
+  read_counters[2 * i] = gcov_get_counter_target ();
+  read_counters[2 * i + 1] = gcov_get_counter_ignore_scaling (-1);
+}
 
-  if (counter == -1)
-	{
-	  counters[1] = -1;
-	  /* We can't return as we need to read all counters.  */
-	  continue;
-	}
-  else if (counter == 0 || counters[1] == -1)
-	{
-	  /* We can't return as we need to read all counters.  */
-	  continue;
-	}
+  if (read_counters[1] == -1)
+{
+  counters[1] = -1;
+  return;
+}
+
+  for (unsigned i = 0; i < GCOV_DISK_SINGLE_VALUES; i++)
+{
+  if (read_counters[2 * i + 1] == 0)
+	return;
 
+  unsigned j;
   for (j = 0; j < GCOV_DISK_SINGLE_VALUES; j++)
 	{
-	  if (counters[2 * j] == value)
+	  if (counters[2 * j] == read_counters[2 * i])
 	{
-	  counters[2 * j + 1] += counter;
+	  counters[2 * j + 1] += read_counters[2 * i + 1];
 	  break;
 	}
 	  else if (counters[2 * j + 1] == 0)
 	{
-	  counters[2 * j] = value;
-	  counters[2 * j + 1] = counter;
+	  counters[2 * j] += read_counters[2 * i];
+	  counters[2 * j + 1] += read_counters[2 * i + 1];
 	  break;
 	}
 	}
 
-  /* We haven't found a free slot for the value, mark overflow.  */
+  /* We haven't found a slot, bail out.  */
   if (j == GCOV_DISK_SINGLE_VALUES)
-	counters[1] = -1;
+	{
+	  counters[1] = -1;
+	  return;
+	}
 }
 }
 
diff --git a/libgcc/libgcov-profiler.c b/libgcc/libgcov-profiler.c
index 9ba65b90df3..04d6f9c0e40 100644
--- a/libgcc/libgcov-profiler.c
+++ b/libgcc/libgcov-profiler.c
@@ -118,20 +118,44 @@ static inline void
 __gcov_one_value_profiler_body (gcov_type *counters, gcov_type value,
 int use_atomic)
 {
-  if (value == counters[1])
-counters[2]++;
-  else if (counters[2] == 0)
+  if (use_atomic)
+__atomic_fetch_add (&counters[0], 1, __ATOMIC_RELAXED);
+  else
+counters[0]++;
+
+  ++counters;
+
+  /* We have GCOV_DISK_SINGLE_VALUES as we can keep multiple values
+ next to each other.  */
+  unsigned sindex = 0;
+
+  for (unsigned i = 0; i < GCOV_DISK_SINGLE_VALUES; i++)
 {
-  counters[2] = 1;
-  counters[1] = value;
+  if (value == counters[2 * i])
+	{
+	  if (use_atomic)
+	__atomic_fetch_add (&counters[2 * i + 1], 1, __ATOMIC_RELAXED);
+	  else
+	counters[2 * i + 1]++;
+	  return;
+	}
+  else if (counters[2 * i + 1] == 0)
+	{
+	  /* We found an empty slot.  */
+	

C++ PATCH for c++/90490 - fix decltype issues in noexcept-specifier

2019-06-20 Thread Marek Polacek
The first test is rejected because build_noexcept_spec first converts
a TARGET_EXPR using a user-defined conversion, creating

  R::operator bool (&TARGET_EXPR )

which then fails when instantiating ("taking address of rvalue").

The second test ICEs in verify_ctor_sanity because there was no ctx.ctor
when evaluating the TARGET_EXPR.  That is expected, because we create
constructors for class and vector types (and PMFs I guess) only, not for
scalars.  We need finish_compound_literal to turn 

  TARGET_EXPR 

into

  TARGET_EXPR 

Fixed both by handling the noexcept expr like we handle the explicit expr in
build_explicit_specifier.

Bootstrapped/regtested on x86_64-linux, ok for trunk and 9?

2019-06-20  Marek Polacek  

PR c++/90490 - fix decltype issues in noexcept-specifier.
* except.c (build_noexcept_spec): Call
instantiate_non_dependent_expr_sfinae before
build_converted_constant_expr instead of calling
instantiate_non_dependent_expr after it.  Add
processing_template_decl_sentinel.

* g++.dg/cpp0x/noexcept43.C: New test.
* g++.dg/cpp0x/noexcept44.C: New test.

diff --git gcc/cp/except.c gcc/cp/except.c
index 892d5201da9..71f5d609f10 100644
--- gcc/cp/except.c
+++ gcc/cp/except.c
@@ -1285,8 +1285,10 @@ build_noexcept_spec (tree expr, tsubst_flags_t complain)
   if (TREE_CODE (expr) != DEFERRED_NOEXCEPT
   && !value_dependent_expression_p (expr))
 {
+  expr = instantiate_non_dependent_expr_sfinae (expr, complain);
+  /* Don't let convert_like_real create more template codes.  */
+  processing_template_decl_sentinel s;
   expr = build_converted_constant_bool_expr (expr, complain);
-  expr = instantiate_non_dependent_expr (expr);
   expr = cxx_constant_value (expr);
 }
   if (TREE_CODE (expr) == INTEGER_CST)
diff --git gcc/testsuite/g++.dg/cpp0x/noexcept43.C 
gcc/testsuite/g++.dg/cpp0x/noexcept43.C
new file mode 100644
index 000..faa7d146029
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp0x/noexcept43.C
@@ -0,0 +1,10 @@
+// PR c++/90490
+// { dg-do compile { target c++11 } }
+
+struct R { constexpr operator bool() { return false;} };
+
+template 
+struct S {
+  void g() noexcept(decltype(R{ }) { }) {
+  }
+};
diff --git gcc/testsuite/g++.dg/cpp0x/noexcept44.C 
gcc/testsuite/g++.dg/cpp0x/noexcept44.C
new file mode 100644
index 000..78c9d12f2b7
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp0x/noexcept44.C
@@ -0,0 +1,8 @@
+// PR c++/90490
+// { dg-do compile { target c++11 } }
+
+template 
+struct S {
+  void g() noexcept(decltype(int{ }) { }) {
+  }
+};


[PATCH 2/2] Rename SINGE_VALUE to TOPN_VALUES counters.

2019-06-20 Thread Martin Liška
And the second part is rename so that it reflect reality
that single value can actually track multiple values.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin
>From cc9e93d43941176e92b5821e5a8134a5319a10b4 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Thu, 20 Jun 2019 14:50:23 +0200
Subject: [PATCH 2/2] Rename SINGE_VALUE to TOPN_VALUES counters.

gcc/ChangeLog:

2019-06-20  Martin Liska  

	* gcov-counter.def (GCOV_COUNTER_V_SINGLE): Remove.
	(GCOV_COUNTER_V_TOPN): New.
	(GCOV_COUNTER_V_INDIR): Use _topn.
	* gcov-io.h (GCOV_DISK_SINGLE_VALUES): Remove.
	(GCOV_TOPN_VALUES): New.
	(GCOV_SINGLE_VALUE_COUNTERS): Remove.
	(GCOV_TOPN_VALUES_COUNTERS): New.
	* profile.c (instrument_values): Use HIST_TYPE_TOPN_VALUES.
	* tree-profile.c:
	(gimple_init_gcov_profiler): Rename variables from one_value
	to topn_values.
	(gimple_gen_one_value_profiler): Remove.
	(gimple_gen_topn_values_profiler): New function.
	* value-prof.c (dump_histogram_value): Use TOPN_VALUES
	names instead of SINGLE_VALUE.
	(stream_out_histogram_value): Likewise.
	(stream_in_histogram_value): Likewise.
	(get_most_common_single_value): Likewise.
	(gimple_divmod_fixed_value_transform): Likewise.
	(gimple_stringops_transform): Likewise.
	(gimple_divmod_values_to_profile): Likewise.
	(gimple_stringops_values_to_profile): Likewise.
	(gimple_find_values_to_profile): Likewise.
	* value-prof.h (enum hist_type): Rename to TOPN.
	(gimple_gen_one_value_profiler): Remove.
	(gimple_gen_topn_values_profiler): New.

libgcc/ChangeLog:

2019-06-20  Martin Liska  

	* Makefile.in: Use topn_values instead of one_value names.
	* libgcov-merge.c (__gcov_merge_single): Move to ...
	(__gcov_merge_topn): ... this.
	(merge_single_value_set): Move to ...
	(merge_topn_values_set): ... this.
	* libgcov-profiler.c (__gcov_one_value_profiler_body): Move to
	...
	(__gcov_topn_values_profiler_body): ... this.
	(__gcov_one_value_profiler_v2): Move to ...
	(__gcov_topn_values_profiler): ... this.
	(__gcov_one_value_profiler_v2_atomic): Move to ...
	(__gcov_topn_values_profiler_atomic): ... this.
	(__gcov_indirect_call_profiler_v4): Remove.
	* libgcov-util.c (__gcov_single_counter_op): Move to ...
	(__gcov_topn_counter_op): ... this.
	* libgcov.h (L_gcov_merge_single): Remove.
	(L_gcov_merge_topn): New.
	(__gcov_merge_single): Remove.
	(__gcov_merge_topn): New.
	(__gcov_one_value_profiler_v2): Move to ..
	(__gcov_topn_values_profiler): ... this.
	(__gcov_one_value_profiler_v2_atomic): Move to ...
	(__gcov_topn_values_profiler_atomic): ... this.
---
 gcc/gcov-counter.def  |  4 ++--
 gcc/gcov-io.h |  7 +++
 gcc/profile.c |  4 ++--
 gcc/tree-profile.c| 31 ---
 gcc/value-prof.c  | 35 ---
 gcc/value-prof.h  |  6 +++---
 libgcc/Makefile.in|  6 +++---
 libgcc/libgcov-merge.c| 30 +++---
 libgcc/libgcov-profiler.c | 30 --
 libgcc/libgcov-util.c |  6 +++---
 libgcc/libgcov.h  | 10 +-
 11 files changed, 80 insertions(+), 89 deletions(-)

diff --git a/gcc/gcov-counter.def b/gcc/gcov-counter.def
index b0596c8dc6b..1a2cbb27b31 100644
--- a/gcc/gcov-counter.def
+++ b/gcc/gcov-counter.def
@@ -36,10 +36,10 @@ DEF_GCOV_COUNTER(GCOV_COUNTER_V_INTERVAL, "interval", _add)
 DEF_GCOV_COUNTER(GCOV_COUNTER_V_POW2, "pow2", _add)
 
 /* The most common value of expression.  */
-DEF_GCOV_COUNTER(GCOV_COUNTER_V_SINGLE, "single", _single)
+DEF_GCOV_COUNTER(GCOV_COUNTER_V_TOPN, "topn", _topn)
 
 /* The most common indirect address.  */
-DEF_GCOV_COUNTER(GCOV_COUNTER_V_INDIR, "indirect_call", _single)
+DEF_GCOV_COUNTER(GCOV_COUNTER_V_INDIR, "indirect_call", _topn)
 
 /* Compute average value passed to the counter.  */
 DEF_GCOV_COUNTER(GCOV_COUNTER_AVERAGE, "average", _add)
diff --git a/gcc/gcov-io.h b/gcc/gcov-io.h
index 0f2905c17ec..7df578f8538 100644
--- a/gcc/gcov-io.h
+++ b/gcc/gcov-io.h
@@ -266,12 +266,11 @@ GCOV_COUNTERS
 #define GCOV_N_VALUE_COUNTERS \
   (GCOV_LAST_VALUE_COUNTER - GCOV_FIRST_VALUE_COUNTER + 1)
 
-/* Number of single value histogram values that live
-   on disk representation.  */
-#define GCOV_DISK_SINGLE_VALUES 4
+/* Number of top N value histogram.  */
+#define GCOV_TOPN_VALUES 4
 
 /* Total number of single value counters.  */
-#define GCOV_SINGLE_VALUE_COUNTERS (2 * GCOV_DISK_SINGLE_VALUES + 1)
+#define GCOV_TOPN_VALUES_COUNTERS (2 * GCOV_TOPN_VALUES + 1)
 
 /* Convert a counter index to a tag.  */
 #define GCOV_TAG_FOR_COUNTER(COUNT)\
diff --git a/gcc/profile.c b/gcc/profile.c
index 9aff9ef2b21..e3f8c5542be 100644
--- a/gcc/profile.c
+++ b/gcc/profile.c
@@ -167,8 +167,8 @@ instrument_values (histogram_values values)
 	  gimple_gen_pow2_profiler (hist, t, 0);
 	  break;
 
-	case HIST_TYPE_SINGLE_VALUE:
-	  gimple_gen_one_value_profiler (hist, t, 0);
+	case HIST_TYPE_TOPN_VALUES:
+	  gimple_gen_topn_values_profiler (

Re: [PING][PATCH 0/3] GNAT test suite fixes for build sysroot

2019-06-20 Thread Maciej Rozycki
On Wed, 19 Jun 2019, Arnaud Charlet wrote:

> >  Ping for:
> > 
> > 
> 
> Have you resolved your copyright assignment issues since then?

 The ball is now in FSF's court I'm told.

> The above patch needs to use "or else" instead of "or". OK with this change
> on the above patch.

 OK, I have updated that in my patch.

 Technically both variants of the expression achieve the same effect here 
as there is no problem with evaluating both sides of the OR operation in 
all cases, but your suggestion might help the readers avoid scratching 
their heads.

 Thank you for your review.  I will apply the change in due course.

  Maciej

Re: [PATCH] Enable GCC support for AVX512_VP2INTERSECT.

2019-06-20 Thread H.J. Lu
On Thu, Jun 20, 2019 at 3:54 AM Hongtao Liu  wrote:
>
> On Thu, Jun 20, 2019 at 2:13 PM Uros Bizjak  wrote:
> >
> > On Thu, Jun 20, 2019 at 7:36 AM Hongtao Liu  wrote:
> > >
> > > On Sat, Jun 8, 2019 at 4:12 AM Uros Bizjak  wrote:
> > > >
> > > > On 6/7/19, H.J. Lu  wrote:
> > > >
> > > > >> > > +/* Register pair.  */
> > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 2); /* P2QI */
> > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 4); /* P2HI P4QI */
> > > > >> > >
> > > > >> > > I think
> > > > >> > >
> > > > >> > > INT_MODE (P2QI, 16);
> > > > >> > > INT_MODE (P2HI, 32);
> > > > >> > >
> > > > >> > > with the above subreg approach should work.
> Yes, it works.
>
> But i didn't figure out how did pass_reload correctly handle such subreg,
> do you have suggestions such as "which function i can dig into first" or
> "which piece of codes handle subreg"?

You need to define REGMODE_NATURAL_SIZE.

-- 
H.J.


Re: [PING][PATCH 0/3] GNAT test suite fixes for build sysroot

2019-06-20 Thread Arnaud Charlet
> > Have you resolved your copyright assignment issues since then?
> 
>  The ball is now in FSF's court I'm told.

OK

> > The above patch needs to use "or else" instead of "or". OK with this change
> > on the above patch.
> 
>  OK, I have updated that in my patch.
> 
>  Technically both variants of the expression achieve the same effect here 
> as there is no problem with evaluating both sides of the OR operation in 
> all cases, but your suggestion might help the readers avoid scratching 
> their heads.

The performance isn't the same, and more importantly, this is the documented
Ada coding style for GNAT: 
https://gcc.gnu.org/onlinedocs/gnat-style/Statements.html#Statements

Arno


Re: [PATCH] i386: Separate costs of RTL expressions from costs of moves

2019-06-20 Thread H.J. Lu
On Thu, Jun 20, 2019 at 12:43 AM Uros Bizjak  wrote:
>
> On Thu, Jun 20, 2019 at 9:40 AM Uros Bizjak  wrote:
> >
> > On Mon, Jun 17, 2019 at 6:27 PM H.J. Lu  wrote:
> > >
> > > processor_costs has costs of RTL expressions and costs of moves:
> > >
> > > 1. Costs of RTL expressions is computed as COSTS_N_INSNS which are used
> > > to generate RTL expressions with the lowest costs.  Costs of RTL memory
> > > operation can be very close to costs of fast instructions to indicate
> > > fast memory operations.
> > >
> > > 2. After RTL expressions have been generated, costs of moves are used by
> > > TARGET_REGISTER_MOVE_COST and TARGET_MEMORY_MOVE_COST to compute move
> > > costs for register allocator.  Costs of load and store are higher than
> > > costs of register moves to reduce stack usages by register allocator.
> > >
> > > We should separate costs of RTL expressions from costs of moves so that
> > > they can be adjusted independently.  This patch moves costs of moves to
> > > the new used_by_ra field and duplicates costs of moves which are also
> > > used for costs of RTL expressions.
> >
> > Actually, I think that the current separation is OK. Before reload, we
> > actually don't know which register set will perform the move (not even
> > if float mode will be moved in integer registers), the only thing we
> > can estimate is the number of move instructions. The real cost of
> > register moves is later calculated by the register allocator, where
> > the register class is taken into account when calculating the cost.
>
> Forgot to say that due to the above reasoning, cost of moves should
> not be used in the calculation of costs of RTL expressions, as we are
> talking about two different cost functions. RTL expressions should
> know nothing about register classes.
>

Currently, costs of moves are also used for costs of RTL expressions.   This
patch:

https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00405.html

includes:

diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index e943d13..8409a5f 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1557,7 +1557,7 @@ struct processor_costs skylake_cost = {
   {4, 4, 4}, /* cost of loading integer registers
 in QImode, HImode and SImode.
 Relative to reg-reg move (2).  */
-  {6, 6, 6}, /* cost of storing integer registers */
+  {6, 6, 3}, /* cost of storing integer registers */
   2, /* cost of reg,reg fld/fst */
   {6, 6, 8}, /* cost of loading fp registers
 in SFmode, DFmode and XFmode */

It lowered the cost for SImode store and made it cheaper than SSE<->integer
register move.  It caused a regression:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90878

Since the cost for SImode store is also used to compute scalar_store
in ix86_builtin_vectorization_cost, it changed loop costs in

void
foo (long p2, long *diag, long d, long i)
{
  long k;
  k = p2 < 3 ? p2 + p2 : p2 + 3;
  while (i < k)
diag[i++] = d;
}

As the result, the loop is unrolled 4 times with -O3 -march=skylake,
instead of 3.

My patch separates costs of moves from costs of RTL expressions.  We have
a follow up patch which restores the cost for SImode store back to 6 and leave
the cost of scalar_store unchanged.  It keeps loop unrolling unchanged and
improves powf performance in glibc by 30%.  We are collecting SPEC CPU 2017
data now.

-- 
H.J.


Re: [PING][PATCH 0/3] GNAT test suite fixes for build sysroot

2019-06-20 Thread Maciej Rozycki
On Thu, 20 Jun 2019, Arnaud Charlet wrote:

> >  Technically both variants of the expression achieve the same effect here 
> > as there is no problem with evaluating both sides of the OR operation in 
> > all cases, but your suggestion might help the readers avoid scratching 
> > their heads.
> 
> The performance isn't the same, and more importantly, this is the documented
> Ada coding style for GNAT: 
> https://gcc.gnu.org/onlinedocs/gnat-style/Statements.html#Statements

 Thanks for the pointer, good to know!

  Maciej

C++ PATCH to add test for c++/87512

2019-06-20 Thread Marek Polacek
This got fixed by r266055 but we didn't have a test like this.

Tested on x86_64-linux, applying to trunk.

2019-06-20  Marek Polacek  

PR c++/87512
* g++.dg/cpp1z/inline-var7.C: New test.

diff --git gcc/testsuite/g++.dg/cpp1z/inline-var7.C 
gcc/testsuite/g++.dg/cpp1z/inline-var7.C
new file mode 100644
index 000..71fa1d3b7da
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp1z/inline-var7.C
@@ -0,0 +1,20 @@
+// PR c++/87512
+// { dg-do compile { target c++17 } }
+
+template  using enable_if_t = int;
+template struct is_pointer { enum { value = 0 }; };
+
+template 
+inline constexpr auto IsPtr = is_pointer::value;
+
+class Foo;
+class Bar;
+
+template 
+void foo(T1, T2);
+
+template 
+enable_if_t> foo(T, Foo);
+
+template <>
+void foo(Bar, Bar);


Re: Use ODR for canonical types construction in LTO

2019-06-20 Thread Richard Biener
On June 20, 2019 4:06:58 PM GMT+02:00, Nathan Sidwell  wrote:
>On 6/20/19 9:37 AM, Richard Biener wrote:
>
>> I've spent some thoughts on this and I wonder whether we can
>> re-implement classtype-as-base with fake inheritance (which would
>> also solve the TBAA alias set issue in a natural way).  That is,
>> we'd lay out structs as-base and make instances of it use a
>> 
>> class as-instance { as-base b; X pad1; Y pad2; };
>> 
>> with either explicit padding fields or with implicit ones
>> (I didn't check how we trick stor-layout to not pad the as-base
>> type to its natural alignment...).
>
>I think you might end up with unordered fields?  virtual empty bases 
>don't appear in the as-base variant, and I think they could appear in 
>the middle of the as-instance variant.  that might or might not be a 
>problem?

Certainly interesting, but if they are empty it might not be an issue if they 
are never referred to (address taken?) 

Richard. 

>nathan



Re: C++ PATCH for c++/60364 - noreturn after first decl not diagnosed (v2)

2019-06-20 Thread Joseph Myers
This (commit r272486) introduces an ICE building libstdc++-v3 for 
sh4-linux-gnu.

libtool: compile:  
/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/./gcc/xgcc 
-shared-libgcc 
-B/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/./gcc 
-nostdinc++ 
-L/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/sh4-glibc-linux-gnu/libstdc++-v3/src
 
-L/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/sh4-glibc-linux-gnu/libstdc++-v3/src/.libs
 
-L/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/sh4-glibc-linux-gnu/libstdc++-v3/libsupc++/.libs
 
-B/scratch/jmyers/glibc-bot/install/compilers/sh4-linux-gnu/sh4-glibc-linux-gnu/bin/
 
-B/scratch/jmyers/glibc-bot/install/compilers/sh4-linux-gnu/sh4-glibc-linux-gnu/lib/
 
-isystem 
/scratch/jmyers/glibc-bot/install/compilers/sh4-linux-gnu/sh4-glibc-linux-gnu/include
 
-isystem 
/scratch/jmyers/glibc-bot/install/compilers/sh4-linux-gnu/sh4-glibc-linux-gnu/sys-include
 
-I/scratch/jmyers/glibc-bot/src/gcc/libstdc++-v3/../libgcc 
-I/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/sh4-glibc-linux-gnu/libstdc++-v3/include/sh4-glibc-linux-gnu
 
-I/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/sh4-glibc-linux-gnu/libstdc++-v3/include
 
-I/scratch/jmyers/glibc-bot/src/gcc/libstdc++-v3/libsupc++ 
-D_GLIBCXX_SHARED -fno-implicit-templates -Wall -Wextra -Wwrite-strings 
-Wcast-qual -Wabi=2 -fdiagnostics-show-location=once -ffunction-sections 
-fdata-sections -frandom-seed=new_opa.lo -g -O2 -D_GNU_SOURCE -std=gnu++1z 
-c /scratch/jmyers/glibc-bot/src/gcc/libstdc++-v3/libsupc++/new_opa.cc  
-fPIC -DPIC -D_GLIBCXX_SHARED -o new_opa.o
during RTL pass: final
/scratch/jmyers/glibc-bot/src/gcc/libstdc++-v3/libsupc++/new_opa.cc: In 
function 'void* operator new(std::size_t, std::align_val_t)':
/scratch/jmyers/glibc-bot/src/gcc/libstdc++-v3/libsupc++/new_opa.cc:132:1: 
internal compiler error: tree check: expected identifier_node, have tree_list 
in is_attribute_p, at attribs.h:155
  132 | }
  | ^
0x5b320d tree_check_failed(tree_node const*, char const*, int, char const*, ...)
/scratch/jmyers/glibc-bot/src/gcc/gcc/tree.c:9899
0x5b3f0d tree_check(tree_node const*, char const*, int, char const*, 
tree_code)
/scratch/jmyers/glibc-bot/src/gcc/gcc/tree.h:3453
0x5b3f0d is_attribute_p
/scratch/jmyers/glibc-bot/src/gcc/gcc/attribs.h:155
0x11884c3 is_attribute_p
/scratch/jmyers/glibc-bot/src/gcc/gcc/tree.h:3197
0x11884c3 sh2a_function_vector_p
/scratch/jmyers/glibc-bot/src/gcc/gcc/config/sh/sh.c:8649
0x1188527 sh_encode_section_info
/scratch/jmyers/glibc-bot/src/gcc/gcc/config/sh/sh.c:1570
0x1153ba8 make_decl_rtl(tree_node*)
/scratch/jmyers/glibc-bot/src/gcc/gcc/varasm.c:1524
0x115460c get_fnname_from_decl(tree_node*)
/scratch/jmyers/glibc-bot/src/gcc/gcc/varasm.c:1720
0xab2aa9 rest_of_handle_final
/scratch/jmyers/glibc-bot/src/gcc/gcc/final.c:4648
0xab2aa9 execute
/scratch/jmyers/glibc-bot/src/gcc/gcc/final.c:4737
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See  for instructions.
Makefile:960: recipe for target 'new_opa.lo' failed
make[5]: *** [new_opa.lo] Error 1

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: C++ PATCH for c++/60364 - noreturn after first decl not diagnosed (v2)

2019-06-20 Thread Marek Polacek
On Thu, Jun 20, 2019 at 04:43:58PM +, Joseph Myers wrote:
> This (commit r272486) introduces an ICE building libstdc++-v3 for 
> sh4-linux-gnu.
> 
> libtool: compile:  
> /scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/./gcc/xgcc 
> -shared-libgcc 
> -B/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/./gcc 
> -nostdinc++ 
> -L/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/sh4-glibc-linux-gnu/libstdc++-v3/src
>  
> -L/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/sh4-glibc-linux-gnu/libstdc++-v3/src/.libs
>  
> -L/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/sh4-glibc-linux-gnu/libstdc++-v3/libsupc++/.libs
>  
> -B/scratch/jmyers/glibc-bot/install/compilers/sh4-linux-gnu/sh4-glibc-linux-gnu/bin/
>  
> -B/scratch/jmyers/glibc-bot/install/compilers/sh4-linux-gnu/sh4-glibc-linux-gnu/lib/
>  
> -isystem 
> /scratch/jmyers/glibc-bot/install/compilers/sh4-linux-gnu/sh4-glibc-linux-gnu/include
>  
> -isystem 
> /scratch/jmyers/glibc-bot/install/compilers/sh4-linux-gnu/sh4-glibc-linux-gnu/sys-include
>  
> -I/scratch/jmyers/glibc-bot/src/gcc/libstdc++-v3/../libgcc 
> -I/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/sh4-glibc-linux-gnu/libstdc++-v3/include/sh4-glibc-linux-gnu
>  
> -I/scratch/jmyers/glibc-bot/build/compilers/sh4-linux-gnu/gcc/sh4-glibc-linux-gnu/libstdc++-v3/include
>  
> -I/scratch/jmyers/glibc-bot/src/gcc/libstdc++-v3/libsupc++ 
> -D_GLIBCXX_SHARED -fno-implicit-templates -Wall -Wextra -Wwrite-strings 
> -Wcast-qual -Wabi=2 -fdiagnostics-show-location=once -ffunction-sections 
> -fdata-sections -frandom-seed=new_opa.lo -g -O2 -D_GNU_SOURCE -std=gnu++1z 
> -c /scratch/jmyers/glibc-bot/src/gcc/libstdc++-v3/libsupc++/new_opa.cc  
> -fPIC -DPIC -D_GLIBCXX_SHARED -o new_opa.o
> during RTL pass: final
> /scratch/jmyers/glibc-bot/src/gcc/libstdc++-v3/libsupc++/new_opa.cc: In 
> function 'void* operator new(std::size_t, std::align_val_t)':
> /scratch/jmyers/glibc-bot/src/gcc/libstdc++-v3/libsupc++/new_opa.cc:132:1: 
> internal compiler error: tree check: expected identifier_node, have tree_list 
> in is_attribute_p, at attribs.h:155
>   132 | }
>   | ^
> 0x5b320d tree_check_failed(tree_node const*, char const*, int, char const*, 
> ...)
> /scratch/jmyers/glibc-bot/src/gcc/gcc/tree.c:9899
> 0x5b3f0d tree_check(tree_node const*, char const*, int, char const*, 
> tree_code)
> /scratch/jmyers/glibc-bot/src/gcc/gcc/tree.h:3453
> 0x5b3f0d is_attribute_p
> /scratch/jmyers/glibc-bot/src/gcc/gcc/attribs.h:155
> 0x11884c3 is_attribute_p
> /scratch/jmyers/glibc-bot/src/gcc/gcc/tree.h:3197
> 0x11884c3 sh2a_function_vector_p
> /scratch/jmyers/glibc-bot/src/gcc/gcc/config/sh/sh.c:8649
> 0x1188527 sh_encode_section_info
> /scratch/jmyers/glibc-bot/src/gcc/gcc/config/sh/sh.c:1570
> 0x1153ba8 make_decl_rtl(tree_node*)
> /scratch/jmyers/glibc-bot/src/gcc/gcc/varasm.c:1524
> 0x115460c get_fnname_from_decl(tree_node*)
> /scratch/jmyers/glibc-bot/src/gcc/gcc/varasm.c:1720
> 0xab2aa9 rest_of_handle_final
> /scratch/jmyers/glibc-bot/src/gcc/gcc/final.c:4648
> 0xab2aa9 execute
> /scratch/jmyers/glibc-bot/src/gcc/gcc/final.c:4737
> Please submit a full bug report,
> with preprocessed source if appropriate.
> Please include the complete backtrace with any bug report.
> See  for instructions.
> Makefile:960: recipe for target 'new_opa.lo' failed
> make[5]: *** [new_opa.lo] Error 1

Sorry about that.  Does this patch work?

2019-06-20  Marek Polacek  

* config/sh/sh.c (sh2a_function_vector_p): Use get_attribute_name.

diff --git gcc/config/sh/sh.c gcc/config/sh/sh.c
index 07d5b3c1df5..dfaeab55142 100644
--- gcc/config/sh/sh.c
+++ gcc/config/sh/sh.c
@@ -8646,7 +8646,7 @@ sh2a_function_vector_p (tree func)
 return false;
 
   for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
-if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+if (is_attribute_p ("function_vector", get_attribute_name (list)))
   return true;
 
   return false;


Re: [RFC] zstd as a compression algorithm for LTO

2019-06-20 Thread Joseph Myers
Any use of a host library should come with associated configure options to 
specify header and library paths for that library (and documentation for 
those options).  (See existing --with-gmp*, --with-isl* etc. options.)

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: C++ PATCH for c++/60364 - noreturn after first decl not diagnosed (v2)

2019-06-20 Thread Joseph Myers
On Thu, 20 Jun 2019, Marek Polacek wrote:

> Sorry about that.  Does this patch work?

Yes, that fixes it, thanks.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: C++ PATCH for c++/60364 - noreturn after first decl not diagnosed (v2)

2019-06-20 Thread Marek Polacek
On Thu, Jun 20, 2019 at 05:08:37PM +, Joseph Myers wrote:
> On Thu, 20 Jun 2019, Marek Polacek wrote:
> 
> > Sorry about that.  Does this patch work?
> 
> Yes, that fixes it, thanks.

Great, I've applied the patch.

Marek


Re: Use ODR for canonical types construction in LTO

2019-06-20 Thread Nathan Sidwell

On 6/20/19 11:49 AM, Richard Biener wrote:

On June 20, 2019 4:06:58 PM GMT+02:00, Nathan Sidwell  wrote:

On 6/20/19 9:37 AM, Richard Biener wrote:


I've spent some thoughts on this and I wonder whether we can
re-implement classtype-as-base with fake inheritance (which would
also solve the TBAA alias set issue in a natural way).  That is,
we'd lay out structs as-base and make instances of it use a

class as-instance { as-base b; X pad1; Y pad2; };

with either explicit padding fields or with implicit ones
(I didn't check how we trick stor-layout to not pad the as-base
type to its natural alignment...).


I think you might end up with unordered fields?  virtual empty bases
don't appear in the as-base variant, and I think they could appear in
the middle of the as-instance variant.  that might or might not be a
problem?


Certainly interesting, but if they are empty it might not be an issue if they 
are never referred to (address taken?)


their address could be taken -- the layout rules are such that no two 
empty objects of the same type can have the same offset.  This is 
important to determine if two references are to the same instance or 
not.  It can't be dereferenced though. and I guess emitting a static 
initializer wouldn't have anything to put there -- though varasm might 
barf on non-monotonic offsets regardless.


I don't know how common virtual empty bases are in practice. 
(non-virtual ones would be in the as-base instance, and not have this 
problem)


nathan

--
Nathan Sidwell


[aarch64] Refactor common errata work-around specs

2019-06-20 Thread Richard Earnshaw (lists)
I noticed while adding the AArch64 NetBSD support code that we now had
four ports all using and defining the same errata work-around headers.
That's silly and long-term becomes a maintenance burden.

So this patch factors all that code into a single header to eliminate
all the duplication.

* config/aarch64/aarch64-errata.h: New file.
* config/aarch64/aarch64-elf-raw.h (CA53_ERR_835769_SPEC): Delete.
(CA53_ERR_843419_SPEC): Delete.
(LINK_SPEC): Use AARCH64_ERRATA_LINK_SPEC instead of above.
* config/aarch64/aarch64-linux.h: Likewise.
* config/aarch64/aarch64-netbsd.h: Likewise.
* config/aarch64/aarch64-freebsd.h: Likewise.

Full bootstrap on aarch64 linux. Cross built the other targets and
manually verified that the specs file has the right contents.

Committed to trunk.

R.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index fda048dc12b..33c5ddebc96 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -980,7 +980,7 @@ esac
 case ${target} in
 aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*)
 	tm_file="${tm_file} dbxelf.h elfos.h newlib-stdint.h"
-	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-elf-raw.h"
+	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-elf-raw.h"
 	tmake_file="${tmake_file} aarch64/t-aarch64"
 	case $target in
 	aarch64-*-elf*)
@@ -1017,19 +1017,19 @@ aarch64*-*-elf | aarch64*-*-fuchsia* | aarch64*-*-rtems*)
 	;;
 aarch64*-*-freebsd*)
 	tm_file="${tm_file} dbxelf.h elfos.h ${fbsd_tm_file}"
-	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-freebsd.h"
+	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-freebsd.h"
 	tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-freebsd"
 	tm_defines="${tm_defines}  TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
 	;;
 aarch64*-*-netbsd*)
 	tm_file="${tm_file} dbxelf.h elfos.h ${nbsd_tm_file}"
-	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-netbsd.h"
+	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-netbsd.h"
 	tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-netbsd"
 	extra_options="${extra_options} netbsd.opt netbsd-elf.opt"
 	;;
 aarch64*-*-linux*)
 	tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h"
-	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-linux.h"
+	tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-errata.h aarch64/aarch64-linux.h"
 	tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-linux"
 	tm_defines="${tm_defines}  TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
 	case $target in
diff --git a/gcc/config/aarch64/aarch64-elf-raw.h b/gcc/config/aarch64/aarch64-elf-raw.h
index bbebd0ef029..8fe7b378360 100644
--- a/gcc/config/aarch64/aarch64-elf-raw.h
+++ b/gcc/config/aarch64/aarch64-elf-raw.h
@@ -27,22 +27,6 @@
   " crtend%O%s crtn%O%s " \
   "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
 
-#if TARGET_FIX_ERR_A53_835769_DEFAULT
-#define CA53_ERR_835769_SPEC \
-  " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
-#else
-#define CA53_ERR_835769_SPEC \
-  " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
-#endif
-
-#if TARGET_FIX_ERR_A53_843419_DEFAULT
-#define CA53_ERR_843419_SPEC \
-  " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}"
-#else
-#define CA53_ERR_843419_SPEC \
-  " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}"
-#endif
-
 #ifndef LINK_SPEC
 #define LINK_SPEC "%{h*}			\
%{static:-Bstatic}\
@@ -51,8 +35,7 @@
%{!static:%{rdynamic:-export-dynamic}}	\
%{mbig-endian:-EB} %{mlittle-endian:-EL} -X	\
   -maarch64elf%{mabi=ilp32*:32}%{mbig-endian:b}" \
-  CA53_ERR_835769_SPEC \
-  CA53_ERR_843419_SPEC
+  AARCH64_ERRATA_LINK_SPEC
 #endif
 
 #endif /* GCC_AARCH64_ELF_RAW_H */
diff --git a/gcc/config/aarch64/aarch64-errata.h b/gcc/config/aarch64/aarch64-errata.h
new file mode 100644
index 000..8f062536e8b
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-errata.h
@@ -0,0 +1,44 @@
+/* Machine description for AArch64 architecture.
+   Copyright (C) 2009-2019 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+#ifndef GCC_AARCH64_ERRATA_H
+#define GCC_AARCH64_ERRATA_H
+
+#if TARGET_FIX_ERR_A53_835769_DEFAULT
+#define CA53_ERR_835769_SPEC \
+  " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#else
+#define CA53

Re: [PATCH] PR libstdc++/71579 assert that type traits are not misused with an incomplete type

2019-06-20 Thread Antony Polukhin
чт, 6 июн. 2019 г. в 15:19, Jonathan Wakely :
> I'm removing some of these assertions again, because they are either
> reundant or wrong.

Thanks for cleaning up!


In attachment there is an additional patch for type traits hardening.

Things that still remain unasserted are type traits  with variadic
template arguments. I have to came up with a proper solution for
providing a useful and lightweight diagnostics.

-- 
Best regards,
Antony Polukhin
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index ac98c0d..9063fe5 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,28 @@
+2019-06-20  Antony Polukhin  
+
+   PR libstdc++/71579
+   * include/std/type_traits (is_assignable, is_nothrow_assignable)
+   (is_trivially_assignable, is_nothrow_invocable_r): Add static_asserts
+   to make sure that the second argument of the type trait is not misused
+   with incomplete types.
+   (is_convertible, is_nothrow_convertible, is_swappable_with)
+   (is_nothrow_swappable_with): Add static_asserts to make sure that the
+   first and second arguments of the type trait are not misused with
+   incomplete types.
+   invoke_result: Add static_asserts to make sure that the first argument
+   of the type trait is not misused with incomplete types.
+   * testsuite/20_util/invoke_result/incomplete_neg.cc: New test.
+   * testsuite/20_util/is_assignable/incomplete_neg.cc: New test.
+   * testsuite/20_util/is_convertible/incomplete_neg.cc: New test.
+   * testsuite/20_util/is_nothrow_assignable/incomplete_neg.cc: New test.
+   * testsuite/20_util/is_nothrow_convertible/incomplete_neg.cc: New test.
+   * testsuite/20_util/is_nothrow_swappable/incomplete_neg.cc: New test.
+   * testsuite/20_util/is_nothrow_swappable_with/incomplete_neg.cc: New
+   test.
+   * testsuite/20_util/is_swappable/incomplete_neg.cc: New test.
+   * testsuite/20_util/is_swappable_with/incomplete_neg.cc: New test.
+   * testsuite/20_util/is_trivially_assignable/incomplete_neg.cc: New test.
+
 2019-06-20  Jonathan Wakely  
 
* acinclude.m4 (GLIBCXX_ENABLE_DEBUG): Only do debug build for final
diff --git a/libstdc++-v3/include/std/type_traits 
b/libstdc++-v3/include/std/type_traits
index 7d4deb1..77fc94e 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -1106,7 +1106,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 : public __bool_constant<__is_assignable(_Tp, _Up)>
 {
   static_assert(std::__is_complete_or_unbounded(__type_identity<_Tp>{}),
-   "template argument must be a complete class or an unbounded array");
+   "first template argument must be a complete class or an unbounded 
array");
+  static_assert(std::__is_complete_or_unbounded(__type_identity<_Up>{}),
+   "second template argument must be a complete class or an unbounded 
array");
 };
 
   template::value>
@@ -1168,7 +1170,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 : public __is_nothrow_assignable_impl<_Tp, _Up>
 {
   static_assert(std::__is_complete_or_unbounded(__type_identity<_Tp>{}),
-   "template argument must be a complete class or an unbounded array");
+   "first template argument must be a complete class or an unbounded 
array");
+  static_assert(std::__is_complete_or_unbounded(__type_identity<_Up>{}),
+   "second template argument must be a complete class or an unbounded 
array");
 };
 
   template::value>
@@ -1313,7 +1317,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 : public __bool_constant<__is_trivially_assignable(_Tp, _Up)>
 {
   static_assert(std::__is_complete_or_unbounded(__type_identity<_Tp>{}),
-   "template argument must be a complete class or an unbounded array");
+   "first template argument must be a complete class or an unbounded 
array");
+  static_assert(std::__is_complete_or_unbounded(__type_identity<_Up>{}),
+   "second template argument must be a complete class or an unbounded 
array");
 };
 
   template::value>
@@ -1474,7 +1480,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_convertible
 : public __is_convertible_helper<_From, _To>::type
-{ };
+{
+  static_assert(std::__is_complete_or_unbounded(__type_identity<_From>{}),
+   "first template argument must be a complete class or an unbounded 
array");
+  static_assert(std::__is_complete_or_unbounded(__type_identity<_To>{}),
+   "second template argument must be a complete class or an unbounded 
array");
+};
 
   template, is_function<_To>,
@@ -1516,7 +1527,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template
 struct is_nothrow_convertible
 : public __is_nt_convertible_helper<_From, _To>::type
-{ };
+{
+  static_assert(std::__is_complete_or_unbounded(__type_identity<_From>{}),
+   "_From must be a complete class or an unbounded array");
+  static_assert(std::__is_complete_or_unbounded(__type_identit

Re: [PATCH] PR libstdc++/71579 assert that type traits are not misused with an incomplete type

2019-06-20 Thread Ville Voutilainen
On Thu, 20 Jun 2019 at 20:49, Antony Polukhin  wrote:
>
> чт, 6 июн. 2019 г. в 15:19, Jonathan Wakely :
> > I'm removing some of these assertions again, because they are either
> > reundant or wrong.
>
> Thanks for cleaning up!
>
>
> In attachment there is an additional patch for type traits hardening.
>
> Things that still remain unasserted are type traits  with variadic
> template arguments. I have to came up with a proper solution for
> providing a useful and lightweight diagnostics.

I see a
public __bool_constant<__is_trivially_assignable(_Tp, _Up)>
in this patch, followed by a trait-body that static_asserts. In such
cases, I think we want
to
a) be really careful about duplicating compiler diagnostics with library ones
b) look at the compiler diagnostics, and if they are lacking, improve them.

...because that's what Jonathan's cleanup was really about.
In the test modifications of __is_trivially_assignable, this looks bloody
suspicious:

+// { dg-prune-output "invalid use of incomplete type" }
+// { dg-prune-output "must be a complete" }

No. Don't merge. We are not replacing diagnostics A with diagnostics
B, we are ignoring existing
diagnostics and adding more. Which is exactly what Jonathan's cleanup avoided.


Re: C++ PATCH for c++/60364 - noreturn after first decl not diagnosed (v2)

2019-06-20 Thread Jakub Jelinek
On Thu, Jun 20, 2019 at 12:49:13PM -0400, Marek Polacek wrote:
> Sorry about that.  Does this patch work?
> 
> 2019-06-20  Marek Polacek  
> 
>   * config/sh/sh.c (sh2a_function_vector_p): Use get_attribute_name.

Just that?
grep is_attribute_p.*TREE_PURPOSE config/*/*
config/m32c/m32c.c:  if (is_attribute_p ("interrupt", TREE_PURPOSE (list)))
config/m32c/m32c.c:  if (is_attribute_p ("bank_switch", TREE_PURPOSE 
(list)))
config/m32c/m32c.c:  if (is_attribute_p ("fast_interrupt", TREE_PURPOSE 
(list)))
config/m32c/m32c.c:  if (is_attribute_p ("function_vector", TREE_PURPOSE 
(list)))
config/m32c/m32c.c:  if (is_attribute_p ("function_vector", 
TREE_PURPOSE (list)))
config/rl78/rl78.c:  if (is_attribute_p ("saddr", TREE_PURPOSE (list)))
config/sh/sh.c:   if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
config/sh/sh.c:   || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
config/sh/sh.c:   || is_attribute_p ("nosave_low_regs", TREE_PURPOSE 
(attrs))
config/sh/sh.c:   || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
config/sh/sh.c: if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
config/sh/sh.c:if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))

> diff --git gcc/config/sh/sh.c gcc/config/sh/sh.c
> index 07d5b3c1df5..dfaeab55142 100644
> --- gcc/config/sh/sh.c
> +++ gcc/config/sh/sh.c
> @@ -8646,7 +8646,7 @@ sh2a_function_vector_p (tree func)
>  return false;
>  
>for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
> -if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
> +if (is_attribute_p ("function_vector", get_attribute_name (list)))
>return true;
>  
>return false;

Jakub


Re: [C++ PATCH] PR c++/90875 - added -Wswitch-outside-range option.

2019-06-20 Thread Marek Polacek
On Thu, Jun 20, 2019 at 09:33:18AM -0400, Matthew Beliveau wrote:
> Sorry for the last update, I guess I didn't notice the other changes, oops!
> 
> This should have all the changes. Let me know if anything went wrong!
> 
> Thanks,
> Matthew Beliveau
> 
> On Tue, Jun 18, 2019 at 1:38 PM Marek Polacek  wrote:
> >
> > On Tue, Jun 18, 2019 at 01:17:10PM -0400, Matthew Beliveau wrote:
> > > Hello,
> > >
> > > This patch should change the formatting, and move the test files into
> > > the appropriate directory!
> >
> > It doesn't address my other comments, though, so please send a new version
> > with that fixed.
> >
> > Marek

> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> 2019-06-20  Matthew Beliveau  
> 
>   PR c++/90875 - added -Wswitch-outside-range option
>   * doc/invoke.texi (Wswitch-outside-range): Document.
> 
>   * c-warn.c (c_do_switch_warnings): Implemented new Wswitch-outside-range
>   warning option.
> 
>   * c.opt (Wswitch-outside-range): Added new option.
>   
>   * c-c++-common/Wswitch-outside-range-1.C: New test.
>   * c-c++-common/Wswitch-outside-range-2.C: New test.
>   * c-c++-common/Wswitch-outside-range-3.C: New test.
>   * c-c++-common/Wswitch-outside-range-4.C: New test.
> 
> diff --git gcc/c-family/c-warn.c gcc/c-family/c-warn.c
> index 5941c10cddb..743099c75ca 100644
> --- gcc/c-family/c-warn.c
> +++ gcc/c-family/c-warn.c
> @@ -1460,8 +1460,9 @@ c_do_switch_warnings (splay_tree cases, location_t 
> switch_location,
>  min_value) >= 0)
>   {
> location_t loc = EXPR_LOCATION ((tree) node->value);
> -   warning_at (loc, 0, "lower value in case label range"
> -   " less than minimum value for type");
> +   warning_at (loc, OPT_Wswitch_outside_range,
> +  "lower value in case label range less than minimum value"
> +  " for type");
> CASE_LOW ((tree) node->value) = convert (TREE_TYPE (cond),
>  min_value);
> node->key = (splay_tree_key) CASE_LOW ((tree) node->value);
> @@ -1474,8 +1475,8 @@ c_do_switch_warnings (splay_tree cases, location_t 
> switch_location,
> if (node == NULL || !node->key)
>   break;
> location_t loc = EXPR_LOCATION ((tree) node->value);
> -   warning_at (loc, 0, "case label value is less than minimum "
> -   "value for type");
> +   warning_at (loc, OPT_Wswitch_outside_range, "case label value is"
> +   " less than minimum value for type");
> splay_tree_remove (cases, node->key);
>   }
> while (1);
> @@ -1491,8 +1492,8 @@ c_do_switch_warnings (splay_tree cases, location_t 
> switch_location,
>  max_value) > 0)
>   {
> location_t loc = EXPR_LOCATION ((tree) node->value);
> -   warning_at (loc, 0, "upper value in case label range"
> -   " exceeds maximum value for type");
> +   warning_at (loc, OPT_Wswitch_outside_range, "upper value in case"
> +   " label range exceeds maximum value for type");
> CASE_HIGH ((tree) node->value)
>   = convert (TREE_TYPE (cond), max_value);
> outside_range_p = true;

The formatting is still wrong here...

> @@ -1503,7 +1504,7 @@ c_do_switch_warnings (splay_tree cases, location_t 
> switch_location,
>!= NULL)
>   {
> location_t loc = EXPR_LOCATION ((tree) node->value);
> -   warning_at (loc, 0,
> +   warning_at (loc, OPT_Wswitch_outside_range,
> "case label value exceeds maximum value for type");
> splay_tree_remove (cases, node->key);
> outside_range_p = true;

...but is correct here.  So make the other cases above like this one.

> diff --git gcc/doc/invoke.texi gcc/doc/invoke.texi
> index eaef4cd63d2..210535cb84a 100644
> --- gcc/doc/invoke.texi
> +++ gcc/doc/invoke.texi
> @@ -5390,6 +5390,13 @@ switch ((int) (a == 4))
>  @end smallexample
>  This warning is enabled by default for C and C++ programs.
>  
> +@item -Wswitch-outside-range
> +@opindex Wswitch-outside-range
> +@opindex Wno-switch-outside-range
> +Warn whenever a @code{switch} state has a value that is outside of its

s/state/case/

> +respective type range.  This warning is enabled by default for
> +C and C++ progarams.

"programs"

> diff --git gcc/testsuite/c-c++-common/Wswitch-outside-range-1.C 
> gcc/testsuite/c-c++-common/Wswitch-outside-range-1.C
> new file mode 100644
> index 000..29e56f3ba2d
> --- /dev/null
> +++ gcc/testsuite/c-c++-common/Wswitch-outside-range-1.C
> @@ -0,0 +1,8 @@
> +// PR c++/90875
> +
> +void f(char c)
> +{
> +  switch (c)
> +case 300: // { dg-warning "case label value exceeds maximum value for 
> type" }
> +case -300:; // { dg-warning "case label valu

Re: [PATCH] PR libstdc++/71579 assert that type traits are not misused with an incomplete type

2019-06-20 Thread Antony Polukhin
чт, 20 июн. 2019 г. в 20:57, Ville Voutilainen :
>
> On Thu, 20 Jun 2019 at 20:49, Antony Polukhin  wrote:
> >
> > чт, 6 июн. 2019 г. в 15:19, Jonathan Wakely :
> > > I'm removing some of these assertions again, because they are either
> > > reundant or wrong.
> >
> > Thanks for cleaning up!
> >
> >
> > In attachment there is an additional patch for type traits hardening.
> >
> > Things that still remain unasserted are type traits  with variadic
> > template arguments. I have to came up with a proper solution for
> > providing a useful and lightweight diagnostics.
>
> I see a
> public __bool_constant<__is_trivially_assignable(_Tp, _Up)>
> in this patch, followed by a trait-body that static_asserts. In such
> cases, I think we want
> to
> a) be really careful about duplicating compiler diagnostics with library ones
> b) look at the compiler diagnostics, and if they are lacking, improve them.
>
> ...because that's what Jonathan's cleanup was really about.
> In the test modifications of __is_trivially_assignable, this looks bloody
> suspicious:
>
> +// { dg-prune-output "invalid use of incomplete type" }
> +// { dg-prune-output "must be a complete" }
>
> No. Don't merge. We are not replacing diagnostics A with diagnostics
> B, we are ignoring existing
> diagnostics and adding more. Which is exactly what Jonathan's cleanup avoided.

Thanks for the review and clarifications. I'll fix the patch.



-- 
Best regards,
Antony Polukhin


Re: C++ PATCH for c++/60364 - noreturn after first decl not diagnosed (v2)

2019-06-20 Thread Marek Polacek
On Thu, Jun 20, 2019 at 07:57:08PM +0200, Jakub Jelinek wrote:
> On Thu, Jun 20, 2019 at 12:49:13PM -0400, Marek Polacek wrote:
> > Sorry about that.  Does this patch work?
> > 
> > 2019-06-20  Marek Polacek  
> > 
> > * config/sh/sh.c (sh2a_function_vector_p): Use get_attribute_name.
> 
> Just that?
> grep is_attribute_p.*TREE_PURPOSE config/*/*
> config/m32c/m32c.c:  if (is_attribute_p ("interrupt", TREE_PURPOSE 
> (list)))
> config/m32c/m32c.c:  if (is_attribute_p ("bank_switch", TREE_PURPOSE 
> (list)))
> config/m32c/m32c.c:  if (is_attribute_p ("fast_interrupt", TREE_PURPOSE 
> (list)))
> config/m32c/m32c.c:  if (is_attribute_p ("function_vector", TREE_PURPOSE 
> (list)))
> config/m32c/m32c.c:  if (is_attribute_p ("function_vector", 
> TREE_PURPOSE (list)))
> config/rl78/rl78.c:  if (is_attribute_p ("saddr", TREE_PURPOSE (list)))
> config/sh/sh.c: if (is_attribute_p ("sp_switch", TREE_PURPOSE 
> (attrs))
> config/sh/sh.c: || is_attribute_p ("trap_exit", TREE_PURPOSE 
> (attrs))
> config/sh/sh.c: || is_attribute_p ("nosave_low_regs", 
> TREE_PURPOSE (attrs))
> config/sh/sh.c: || is_attribute_p ("resbank", TREE_PURPOSE 
> (attrs)))
> config/sh/sh.c:   if (is_attribute_p ("function_vector", TREE_PURPOSE 
> (list)))
> config/sh/sh.c:if (is_attribute_p ("function_vector", TREE_PURPOSE 
> (list)))

Here's a bunch of more; I've audited the uses of TREE_PURPOSE in config/*/*.

I haven't tested it or anything, but... ok for trunk?

2019-06-20  Marek Polacek  

* config/epiphany/epiphany.c (epiphany_compute_function_type): Use
get_attribute_name.
* config/m32c/m32c.c (interrupt_p): Likewise.
(bank_switch_p): Likewise.
(fast_interrupt_p): Likewise.
(m32c_special_page_vector_p): Likewise.
(current_function_special_page_vector): Likewise.
* config/nds32/nds32.c (nds32_asm_function_prologue): Likewise.
* config/rl78/rl78.c (rl78_attrlist_to_encoding): Likewise.
* config/sh/sh.c (sh_insert_attributes): Likewise.
(sh2a_get_function_vector_number): Likewise.

diff --git gcc/config/epiphany/epiphany.c gcc/config/epiphany/epiphany.c
index 657a8886ac7..2cc6c59eae3 100644
--- gcc/config/epiphany/epiphany.c
+++ gcc/config/epiphany/epiphany.c
@@ -1044,7 +1044,7 @@ epiphany_compute_function_type (tree decl)
a;
a = TREE_CHAIN (a))
 {
-  tree name = TREE_PURPOSE (a);
+  tree name = get_attribute_name (a);
 
   if (name == get_identifier ("interrupt"))
fn_type = EPIPHANY_FUNCTION_INTERRUPT;
diff --git gcc/config/m32c/m32c.c gcc/config/m32c/m32c.c
index 1a0d0c681b4..bda56e3beee 100644
--- gcc/config/m32c/m32c.c
+++ gcc/config/m32c/m32c.c
@@ -2858,7 +2858,7 @@ interrupt_p (tree node ATTRIBUTE_UNUSED)
   tree list = M32C_ATTRIBUTES (node);
   while (list)
 {
-  if (is_attribute_p ("interrupt", TREE_PURPOSE (list)))
+  if (is_attribute_p ("interrupt", get_attribute_name (list)))
return 1;
   list = TREE_CHAIN (list);
 }
@@ -2872,7 +2872,7 @@ bank_switch_p (tree node ATTRIBUTE_UNUSED)
   tree list = M32C_ATTRIBUTES (node);
   while (list)
 {
-  if (is_attribute_p ("bank_switch", TREE_PURPOSE (list)))
+  if (is_attribute_p ("bank_switch", get_attribute_name (list)))
return 1;
   list = TREE_CHAIN (list);
 }
@@ -2886,7 +2886,7 @@ fast_interrupt_p (tree node ATTRIBUTE_UNUSED)
   tree list = M32C_ATTRIBUTES (node);
   while (list)
 {
-  if (is_attribute_p ("fast_interrupt", TREE_PURPOSE (list)))
+  if (is_attribute_p ("fast_interrupt", get_attribute_name (list)))
return 1;
   list = TREE_CHAIN (list);
 }
@@ -2915,7 +2915,7 @@ m32c_special_page_vector_p (tree func)
   list = M32C_ATTRIBUTES (func);
   while (list)
 {
-  if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+  if (is_attribute_p ("function_vector", get_attribute_name (list)))
 return 1;
   list = TREE_CHAIN (list);
 }
@@ -2984,7 +2984,7 @@ current_function_special_page_vector (rtx x)
   list = M32C_ATTRIBUTES (t);
   while (list)
 {
-  if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
+  if (is_attribute_p ("function_vector", get_attribute_name (list)))
 {
   num = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
   return num;
diff --git gcc/config/nds32/nds32.c gcc/config/nds32/nds32.c
index eba98126705..ea532ce1eb3 100644
--- gcc/config/nds32/nds32.c
+++ gcc/config/nds32/nds32.c
@@ -2190,7 +2190,7 @@ nds32_asm_function_prologue (FILE *file)
   /* Display all attributes of this function.  */
   while (attrs)
 {
-  name = TREE_PURPOSE (attrs);
+  name = get_attribute_name (attrs);
   fprintf (file, "%s ", IDENTIFIER_POINTER (name));
 
   /* Pick up the next attribute.  */
diff --git gcc/config/rl78/rl78.c gcc/config/rl78/rl78.c
ind

[objective-c/c++, testsuite, committed] Fix stubify tests for -fnext-runtime.

2019-06-20 Thread Iain Sandoe
These tests were broken in the first place, and more broken following
the change to drop pic symbol stubs unless the linker needs them.

Fixed to work for x86 as well as powerpc and amended the options to
force pic symbol stubs on.

tested on i686/powerpc-darwin9, x86-64-darwin16, x86_64-linux-gnu
applied to mainline,
thanks
Iain

gcc/testsuite/

2019-06-20  Iain Sandoe  

* obj-c++.dg/stubify-1.mm: Adjust options and scan-asm checks.
* obj-c++.dg/stubify-2.mm: Likewise.
* objc.dg/stubify-1.m: Likewise.
* objc.dg/stubify-2.m: Likewise.

diff --git a/gcc/testsuite/obj-c++.dg/stubify-1.mm 
b/gcc/testsuite/obj-c++.dg/stubify-1.mm
index c235543..b82167e 100644
--- a/gcc/testsuite/obj-c++.dg/stubify-1.mm
+++ b/gcc/testsuite/obj-c++.dg/stubify-1.mm
@@ -4,7 +4,7 @@
 /* { dg-do compile { target *-*-darwin* } } */
 /* { dg-skip-if "" { *-*-* } { "-fgnu-runtime" } { "" } } */
 /* { dg-require-effective-target ilp32 } */
-/* { dg-options "-Os -mdynamic-no-pic -fno-exceptions 
-mmacosx-version-min=10.4" } */
+/* { dg-options "-Os -mdynamic-no-pic -fno-exceptions 
-mmacosx-version-min=10.4 -mpic-symbol-stubs" } */
 
 typedef struct objc_object { } *id ;
 int x = 41 ;
@@ -32,8 +32,8 @@ extern "C" {
 }
 @end
 
-/* { dg-final { scan-assembler-not "\(bl|call\)\[ \t\]+_objc_msgSend\n" } } */
-/* { dg-final { scan-assembler "\(bl|call\)\[ 
\t\]+L_objc_msgSend\\\$stub\n" } } */
-/* { dg-final { scan-assembler-not "\(bl|call\)\[ \t\]+_bogonic\n" } } */
-/* { dg-final { scan-assembler "\(bl|call\)\[ \t\]+L_bogonic\\\$stub\n" } 
} */
-/* { dg-final { scan-assembler-not "\\\$non_lazy_ptr" } } */
+/* { dg-final { scan-assembler-not {(bl|call)[ \t]+_objc_msgSend\n} } } */
+/* { dg-final { scan-assembler {(bl|call)[ \t]+L_objc_msgSend\$stub\n} } } 
*/
+/* { dg-final { scan-assembler-not {(bl|call)[ \t]+_bogonic\n} } } */
+/* { dg-final { scan-assembler {(bl|call)[ \t]+L_bogonic\$stub\n} } } */
+/* { dg-final { scan-assembler-not {\$non_lazy_ptr} } } */
diff --git a/gcc/testsuite/obj-c++.dg/stubify-2.mm 
b/gcc/testsuite/obj-c++.dg/stubify-2.mm
index a9f66ca..ee8e342 100644
--- a/gcc/testsuite/obj-c++.dg/stubify-2.mm
+++ b/gcc/testsuite/obj-c++.dg/stubify-2.mm
@@ -4,7 +4,7 @@
 /* { dg-do compile { target *-*-darwin* } } */
 /* { dg-skip-if "" { *-*-* } { "-fgnu-runtime" } { "" } } */
 /* { dg-require-effective-target ilp32 } */
-/* { dg-options "-mdynamic-no-pic -fdump-rtl-jump -mmacosx-version-min=10.4" } 
*/
+/* { dg-options "-mdynamic-no-pic -fdump-rtl-jump -mmacosx-version-min=10.4 
-mpic-symbol-stubs" } */
 
 typedef struct objc_object { } *id ;
 int x = 41 ;
@@ -30,4 +30,10 @@ extern int bogonic (int, int, int) ;
 
 /* Any symbol_ref of an un-stubified objc_msgSend is an error; look
for "objc_msgSend" in quotes, without the $stub suffix. */
-/* { dg-final {  scan-rtl-dump-not "symbol_ref.*\"objc_msgSend\"" "jump" } } */
+/* { dg-final {  scan-rtl-dump-not {symbol_ref.*"objc_msgSend"} "jump" { 
target powerpc*-*-darwin* } } } */
+
+/* { dg-final { scan-assembler-not {(bl|call)[ \t]+_objc_msgSend\n} } } */
+/* { dg-final { scan-assembler {(bl|call)[ \t]+L_objc_msgSend\$stub\n} } } 
*/
+/* { dg-final { scan-assembler-not {(bl|call)[ \t]+_Z7bogoniciii\n} } } */
+/* { dg-final { scan-assembler {(bl|call)[ \t]+L__Z7bogoniciii\$stub\n} } 
} */
+/* { dg-final { scan-assembler-not {\$non_lazy_ptr} } } */
diff --git a/gcc/testsuite/objc.dg/stubify-1.m 
b/gcc/testsuite/objc.dg/stubify-1.m
index 91bf73a..1e160a1 100644
--- a/gcc/testsuite/objc.dg/stubify-1.m
+++ b/gcc/testsuite/objc.dg/stubify-1.m
@@ -4,7 +4,7 @@
 /* { dg-do compile { target *-*-darwin* } } */
 /* { dg-skip-if "" { *-*-* } { "-fgnu-runtime" } { "" } } */
 /* { dg-require-effective-target ilp32 } */
-/* { dg-options "-Os -mdynamic-no-pic -mmacosx-version-min=10.4" } */
+/* { dg-options "-Os -mdynamic-no-pic -mmacosx-version-min=10.4 
-mpic-symbol-stubs" } */
 
 typedef struct objc_object { } *id ;
 int x = 41 ;
@@ -28,8 +28,8 @@ extern int bogonic (int, int, int) ;
 }
 @end
 
-/* { dg-final { scan-assembler-not "\(bl|call\)\[ \t\]+_objc_msgSend\n" } } */
-/* { dg-final { scan-assembler "\(bl|call\)\[ 
\t\]+L_objc_msgSend\\\$stub\n" } } */
-/* { dg-final { scan-assembler-not "\(bl|call\)\[ \t\]+_bogonic\n" } } */
-/* { dg-final { scan-assembler "\(bl|call\)\[ \t\]+L_bogonic\\\$stub\n" } 
} */
-/* { dg-final { scan-assembler-not "\\\$non_lazy_ptr" } } */
+/* { dg-final { scan-assembler-not {(bl|call)[ \t]+_objc_msgSend\n} } } */
+/* { dg-final { scan-assembler {(bl|call)[ \t]+L_objc_msgSend\$stub\n} } } 
*/
+/* { dg-final { scan-assembler-not {(bl|call)[ \t]+_bogonic\n} } } */
+/* { dg-final { scan-assembler {(bl|call)[ \t]+L_bogonic\$stub\n} } } */
+/* { dg-final { scan-assembler-not {\$non_lazy_ptr} } } */
diff --git a/gcc/testsuite/objc.dg/stubify-2.m 
b/gcc/testsuite/objc.dg/stubify-2.m
index eaf4b96..1f53b9c 100644
--- a/gcc/testsuite/objc.dg/stubify-2.m
+++ b/gcc/testsuite/objc.dg/stubify-2.m
@@ 

Re: [PATCH] Add --disable-tm-clone-registry libgcc configure option.

2019-06-20 Thread Jim Wilson
On Wed, Jun 12, 2019 at 5:57 PM  wrote:
> This patch adds libgcc configuration option to disable TM clone
> registry. This option helps to reduce code size for embedded targets
> which do not need transactional memory support.

This looks OK to me.  It is worth pointing out that ARM already ships
compilers built this way, but they didn't bother adding a configure
option.  They just override Makefile variables in their build scripts.
I think this is much cleaner as a documented configure option.

One issue here is that I don't know the transactional memory support
well enough to know what harm comes when we drop the tm clone registry
support.  Perhaps a change should be made to the compiler to disable
the transactional memory support, or some subset of it.  Maybe Aldy
can comment on that?  The intent here is to only use this for size
constrained embedded targets (e.g. newlib-nano targets), and such
targets are very unlikely to ever want transactional memory support.
But someone might accidentally use this configure option with an
x86_64-linux toolchain, and then get confusing transactional memory
failures, and we might want to try to prevent that before it happens.

Jim


Re: [PATCH] PR fortran/86587 -- PRIVATE and BIND(C) are allowed for derived type

2019-06-20 Thread Janne Blomqvist
On Thu, Jun 20, 2019 at 12:10 AM Steve Kargl
 wrote:
>
> Revision 126185 introduced ISO C Binding to gfortran.
> In that revision, a check for a conflict between a
> derived type with the PRIVATE attribute and BIND(C) was
> introduced.  After checking the F2003, F2008, and F2018
> standards, I cannot find this restriction.  Thus, the
> check is removed by the attached patch.  Regression
> checked on x86_64-*-freebsd.  OK to commit?
>
> 2019-06-19  Steven G. Kargl  
>
> PR fortran/86587
> * symbol.c (verify_bind_c_derived_type): Remove erroneous error
> checking for BIND(C) and PRIVATE attributes.
>
> 2019-06-19  Steven G. Kargl  
>
> PR fortran/86587
> * gfortran.dg/pr86587.f90: New test.
>
> --
> Steve

Ok.

-- 
Janne Blomqvist


Re: [C++ PATCH] PR c++/90875 - added -Wswitch-outside-range option.

2019-06-20 Thread Matthew Beliveau
Hopefully fixed!

On Thu, Jun 20, 2019 at 2:42 PM Marek Polacek  wrote:
>
> On Thu, Jun 20, 2019 at 09:33:18AM -0400, Matthew Beliveau wrote:
> > Sorry for the last update, I guess I didn't notice the other changes, oops!
> >
> > This should have all the changes. Let me know if anything went wrong!
> >
> > Thanks,
> > Matthew Beliveau
> >
> > On Tue, Jun 18, 2019 at 1:38 PM Marek Polacek  wrote:
> > >
> > > On Tue, Jun 18, 2019 at 01:17:10PM -0400, Matthew Beliveau wrote:
> > > > Hello,
> > > >
> > > > This patch should change the formatting, and move the test files into
> > > > the appropriate directory!
> > >
> > > It doesn't address my other comments, though, so please send a new version
> > > with that fixed.
> > >
> > > Marek
>
> > Bootstrapped/regtested on x86_64-linux, ok for trunk?
> >
> > 2019-06-20  Matthew Beliveau  
> >
> >   PR c++/90875 - added -Wswitch-outside-range option
> >   * doc/invoke.texi (Wswitch-outside-range): Document.
> >
> >   * c-warn.c (c_do_switch_warnings): Implemented new 
> > Wswitch-outside-range
> >   warning option.
> >
> >   * c.opt (Wswitch-outside-range): Added new option.
> >
> >   * c-c++-common/Wswitch-outside-range-1.C: New test.
> >   * c-c++-common/Wswitch-outside-range-2.C: New test.
> >   * c-c++-common/Wswitch-outside-range-3.C: New test.
> >   * c-c++-common/Wswitch-outside-range-4.C: New test.
> >
> > diff --git gcc/c-family/c-warn.c gcc/c-family/c-warn.c
> > index 5941c10cddb..743099c75ca 100644
> > --- gcc/c-family/c-warn.c
> > +++ gcc/c-family/c-warn.c
> > @@ -1460,8 +1460,9 @@ c_do_switch_warnings (splay_tree cases, location_t 
> > switch_location,
> >  min_value) >= 0)
> >   {
> > location_t loc = EXPR_LOCATION ((tree) node->value);
> > -   warning_at (loc, 0, "lower value in case label range"
> > -   " less than minimum value for type");
> > +   warning_at (loc, OPT_Wswitch_outside_range,
> > +  "lower value in case label range less than minimum value"
> > +  " for type");
> > CASE_LOW ((tree) node->value) = convert (TREE_TYPE (cond),
> >  min_value);
> > node->key = (splay_tree_key) CASE_LOW ((tree) node->value);
> > @@ -1474,8 +1475,8 @@ c_do_switch_warnings (splay_tree cases, location_t 
> > switch_location,
> > if (node == NULL || !node->key)
> >   break;
> > location_t loc = EXPR_LOCATION ((tree) node->value);
> > -   warning_at (loc, 0, "case label value is less than minimum "
> > -   "value for type");
> > +   warning_at (loc, OPT_Wswitch_outside_range, "case label value 
> > is"
> > +   " less than minimum value for type");
> > splay_tree_remove (cases, node->key);
> >   }
> > while (1);
> > @@ -1491,8 +1492,8 @@ c_do_switch_warnings (splay_tree cases, location_t 
> > switch_location,
> >  max_value) > 0)
> >   {
> > location_t loc = EXPR_LOCATION ((tree) node->value);
> > -   warning_at (loc, 0, "upper value in case label range"
> > -   " exceeds maximum value for type");
> > +   warning_at (loc, OPT_Wswitch_outside_range, "upper value in case"
> > +   " label range exceeds maximum value for type");
> > CASE_HIGH ((tree) node->value)
> >   = convert (TREE_TYPE (cond), max_value);
> > outside_range_p = true;
>
> The formatting is still wrong here...
>
> > @@ -1503,7 +1504,7 @@ c_do_switch_warnings (splay_tree cases, location_t 
> > switch_location,
> >!= NULL)
> >   {
> > location_t loc = EXPR_LOCATION ((tree) node->value);
> > -   warning_at (loc, 0,
> > +   warning_at (loc, OPT_Wswitch_outside_range,
> > "case label value exceeds maximum value for type");
> > splay_tree_remove (cases, node->key);
> > outside_range_p = true;
>
> ...but is correct here.  So make the other cases above like this one.
>
> > diff --git gcc/doc/invoke.texi gcc/doc/invoke.texi
> > index eaef4cd63d2..210535cb84a 100644
> > --- gcc/doc/invoke.texi
> > +++ gcc/doc/invoke.texi
> > @@ -5390,6 +5390,13 @@ switch ((int) (a == 4))
> >  @end smallexample
> >  This warning is enabled by default for C and C++ programs.
> >
> > +@item -Wswitch-outside-range
> > +@opindex Wswitch-outside-range
> > +@opindex Wno-switch-outside-range
> > +Warn whenever a @code{switch} state has a value that is outside of its
>
> s/state/case/
>
> > +respective type range.  This warning is enabled by default for
> > +C and C++ progarams.
>
> "programs"
>
> > diff --git gcc/testsuite/c-c++-common/Wswitch-outside-range-1.C 
> > gcc/testsuite/c-c++-common/Wswitch-outside-range-1.C
> > new file mode 100644
> > index 000..29e56f3ba2

Re: [PATCH] i386: Separate costs of RTL expressions from costs of moves

2019-06-20 Thread Uros Bizjak
On Thu, Jun 20, 2019 at 5:19 PM H.J. Lu  wrote:
>
> On Thu, Jun 20, 2019 at 12:43 AM Uros Bizjak  wrote:
> >
> > On Thu, Jun 20, 2019 at 9:40 AM Uros Bizjak  wrote:
> > >
> > > On Mon, Jun 17, 2019 at 6:27 PM H.J. Lu  wrote:
> > > >
> > > > processor_costs has costs of RTL expressions and costs of moves:
> > > >
> > > > 1. Costs of RTL expressions is computed as COSTS_N_INSNS which are used
> > > > to generate RTL expressions with the lowest costs.  Costs of RTL memory
> > > > operation can be very close to costs of fast instructions to indicate
> > > > fast memory operations.
> > > >
> > > > 2. After RTL expressions have been generated, costs of moves are used by
> > > > TARGET_REGISTER_MOVE_COST and TARGET_MEMORY_MOVE_COST to compute move
> > > > costs for register allocator.  Costs of load and store are higher than
> > > > costs of register moves to reduce stack usages by register allocator.
> > > >
> > > > We should separate costs of RTL expressions from costs of moves so that
> > > > they can be adjusted independently.  This patch moves costs of moves to
> > > > the new used_by_ra field and duplicates costs of moves which are also
> > > > used for costs of RTL expressions.
> > >
> > > Actually, I think that the current separation is OK. Before reload, we
> > > actually don't know which register set will perform the move (not even
> > > if float mode will be moved in integer registers), the only thing we
> > > can estimate is the number of move instructions. The real cost of
> > > register moves is later calculated by the register allocator, where
> > > the register class is taken into account when calculating the cost.
> >
> > Forgot to say that due to the above reasoning, cost of moves should
> > not be used in the calculation of costs of RTL expressions, as we are
> > talking about two different cost functions. RTL expressions should
> > know nothing about register classes.
> >
>
> Currently, costs of moves are also used for costs of RTL expressions.   This
> patch:
>
> https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00405.html
>
> includes:
>
> diff --git a/gcc/config/i386/x86-tune-costs.h 
> b/gcc/config/i386/x86-tune-costs.h
> index e943d13..8409a5f 100644
> --- a/gcc/config/i386/x86-tune-costs.h
> +++ b/gcc/config/i386/x86-tune-costs.h
> @@ -1557,7 +1557,7 @@ struct processor_costs skylake_cost = {
>{4, 4, 4}, /* cost of loading integer registers
>  in QImode, HImode and SImode.
>  Relative to reg-reg move (2).  */
> -  {6, 6, 6}, /* cost of storing integer registers */
> +  {6, 6, 3}, /* cost of storing integer registers */
>2, /* cost of reg,reg fld/fst */
>{6, 6, 8}, /* cost of loading fp registers
>  in SFmode, DFmode and XFmode */
>
> It lowered the cost for SImode store and made it cheaper than SSE<->integer
> register move.  It caused a regression:
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90878
>
> Since the cost for SImode store is also used to compute scalar_store
> in ix86_builtin_vectorization_cost, it changed loop costs in
>
> void
> foo (long p2, long *diag, long d, long i)
> {
>   long k;
>   k = p2 < 3 ? p2 + p2 : p2 + 3;
>   while (i < k)
> diag[i++] = d;
> }
>
> As the result, the loop is unrolled 4 times with -O3 -march=skylake,
> instead of 3.
>
> My patch separates costs of moves from costs of RTL expressions.  We have
> a follow up patch which restores the cost for SImode store back to 6 and leave
> the cost of scalar_store unchanged.  It keeps loop unrolling unchanged and
> improves powf performance in glibc by 30%.  We are collecting SPEC CPU 2017
> data now.

It looks that x86 costs are one big mess. I suggest you took this
matter to Honza, he knows this part better than I.

Uros.


Re: Review Hashtable extract node API

2019-06-20 Thread François Dumont

On 6/19/19 12:47 AM, Jonathan Wakely wrote:

On 18/06/19 22:42 +0200, François Dumont wrote:

On 6/18/19 12:54 PM, Jonathan Wakely wrote:

On 18/06/19 07:52 +0200, François Dumont wrote:

A small regression noticed while merging.

We shouldn't keep on using a moved-from key_type instance.

Ok to commit ? Feel free to do it if you prefer, I'll do so at end 
of Europe day otherwise.



diff --git a/libstdc++-v3/include/bits/hashtable_policy.h 
b/libstdc++-v3/include/bits/hashtable_policy.h

index f5809c7443a..7e89e1b44c4 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -743,7 +743,8 @@ namespace __detail
std::tuple<>()
  };
  auto __pos
-    = __h->_M_insert_unique_node(__k, __bkt, __code, __node._M_node);
+    = 
__h->_M_insert_unique_node(__h->_M_extract()(__node._M_node->_M_v()),

+ __bkt, __code, __node._M_node);
  __node._M_node = nullptr;
  return __pos->second;
    }


I can't create an example where this causes a problem, because the key
passed to _M_insert_unique_node is never used. So it doesn't matter
that it's been moved from.

So I have to wonder why we just added the key parameter to that
function, if it's never used.


I think you've been influence by my patch. I was using a 
"_NodeAccessor" which wasn't giving access to the node without taking 
owership so I needed to pass the key properly to compute new bucket 
index in case of rehash.


But with your approach this change to the _M_insert_unique_node was 
simply unecessary so here is a patch to cleanup this part.


Ha! I see, thanks. So I should have removed that key_type parameter
again after removing the NodeAccessor stuff.



Ok to commit ?


No, because that would restore the original signature of the
_M_insert_unique_node function, but it has changed contract. Old
callers who expect that function to delete the node would now leak
memory if an exception is thrown.

Oh, yes, abi, I tend to forget even if the recent PR 90920 remind me 
about that, sorry.

If we change the contract of the function we need to change its
mangled name, so that callers expecting the old contract will not use
the new function.

I'll think about the best way to do that ...


Something like what's attached ?

I still use _GLIBCXX_INLINE_VERSION to tag functions that are kept just 
for abi-compatibility.


Ok to commit after having run tests ?

François

diff --git a/libstdc++-v3/include/bits/hashtable.h b/libstdc++-v3/include/bits/hashtable.h
index ab579a7059e..2ea75a24f1c 100644
--- a/libstdc++-v3/include/bits/hashtable.h
+++ b/libstdc++-v3/include/bits/hashtable.h
@@ -693,19 +693,35 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __node_base*
   _M_get_previous_node(size_type __bkt, __node_base* __n);
 
-  // Insert node __n with key __k and hash code __code, in bucket __bkt
-  // if no rehash (assumes no element with same key already present).
+  // Insert node __n with hash code __code, in bucket __bkt if no
+  // rehash (assumes no element with same key already present).
   // Takes ownership of __n if insertion succeeds, throws otherwise.
   iterator
-  _M_insert_unique_node(const key_type& __k, size_type __bkt,
-			__hash_code __code, __node_type* __n,
-			size_type __n_elt = 1);
+  _M_insert_node(true_type, size_type __bkt, __hash_code,
+		 __node_type* __n, size_type __n_elt = 1);
+
+#if !_GLIBCXX_INLINE_VERSION
+  // Insert node with hash code __code, in bucket bkt if no rehash (assumes
+  // no element with its key already present). Take ownership of the node,
+  // deallocate it on exception.
+  iterator
+  _M_insert_unique_node(size_type __bkt, __hash_code __code,
+			__node_type* __n, size_type __n_elt = 1);
+#endif
 
   // Insert node __n with key __k and hash code __code.
   // Takes ownership of __n if insertion succeeds, throws otherwise.
   iterator
-  _M_insert_multi_node(__node_type* __hint, const key_type& __k,
+  _M_insert_node(false_type, __node_type* __hint,
+		 __hash_code __code, __node_type* __n);
+
+#if !_GLIBCXX_INLINE_VERSION
+  // Insert node with hash code __code. Take ownership of the node,
+  // deallocate it on exception.
+  iterator
+  _M_insert_multi_node(__node_type* __hint,
 			   __hash_code __code, __node_type* __n);
+#endif
 
   template
 	std::pair
@@ -831,7 +847,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	else
 	  {
 		__ret.position
-		  = _M_insert_unique_node(__k, __bkt, __code, __nh._M_ptr);
+		  = _M_insert_node(true_type{}, __bkt, __code, __nh._M_ptr);
 		__nh._M_ptr = nullptr;
 		__ret.inserted = true;
 	  }
@@ -851,7 +867,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	const key_type& __k = __nh._M_key();
 	auto __code = this->_M_hash_code(__k);
 	auto __ret
-	  = _M_insert_multi_node(__hint._M_cur, __k, __code, __nh._M_ptr);
+	  = _M_insert_node(false_type{}, __hint._M_cur, __code, __nh._M_ptr);
 	__

[PATCH, alpha]: Introduce parametrized pattern names

2019-06-20 Thread Uros Bizjak
2019-06-20  Uroš Bizjak  

* config/alpha/alpha.md (@unaligned_store):
Rename from unaligned_store.
(@reload_in_aligned): Rename from reload_in_aligned.
* config/alpha/sync.md (@load_locked_): Rename
from load_locked_.
(@store_conditional_): Rename from store_conditional_.
(@atomic_compare_and_swap_1): Rename
from atomic_compare_and_swap_1.
(@atomic_exchange_1): Rename from atomic_exchange_1.
* config/alpha/alpha.c (alpha_expand_mov_nobwx):
Use gen_reload_in_aligned and gen_unaligned_store.
(emit_load_locked): Remove.
(emit_store_conditional): Ditto.
(alpha_split_atomic_op): Use gen_load_locked and gen_store_conditional.
(alpha_split_compare_and_swap): Ditto.
(alpha_expand_compare_and_swap_12): Use gen_atomic_compare_and_swap_1.
(alpha_split_compare_and_swap_12): Use gen_load_locked
and gen_store_conditional.
(alpha_split_atomic_exchange): Ditto.
(alpha_expand_atomic_exchange_12): Use gen_atomic_exchange_1.
(alpha_split_atomic_exchange_12): Use gen_load_locked
and gen_store_conditional.

Bootstrapped and regression tested on alphaev68-linux-gnu.

Committed to mainline SVN.

Uros.
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index db17f7c06e25..10656bea58c0 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -2292,10 +2292,7 @@ alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
{
  if (reload_in_progress)
{
- if (mode == QImode)
-   seq = gen_reload_inqi_aligned (operands[0], operands[1]);
- else
-   seq = gen_reload_inhi_aligned (operands[0], operands[1]);
+ seq = gen_reload_in_aligned (mode, operands[0], operands[1]);
  emit_insn (seq);
}
  else
@@ -2378,10 +2375,8 @@ alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
  rtx temp3 = gen_reg_rtx (DImode);
  rtx ua = get_unaligned_address (operands[0]);
 
- if (mode == QImode)
-   seq = gen_unaligned_storeqi (ua, operands[1], temp1, temp2, temp3);
- else
-   seq = gen_unaligned_storehi (ua, operands[1], temp1, temp2, temp3);
+ seq = gen_unaligned_store
+   (mode, ua, operands[1], temp1, temp2, temp3);
 
  alpha_set_memflags (seq, operands[0]);
  emit_insn (seq);
@@ -4349,34 +4344,6 @@ emit_unlikely_jump (rtx cond, rtx label)
   add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
 }
 
-/* A subroutine of the atomic operation splitters.  Emit a load-locked
-   instruction in MODE.  */
-
-static void
-emit_load_locked (machine_mode mode, rtx reg, rtx mem)
-{
-  rtx (*fn) (rtx, rtx) = NULL;
-  if (mode == SImode)
-fn = gen_load_locked_si;
-  else if (mode == DImode)
-fn = gen_load_locked_di;
-  emit_insn (fn (reg, mem));
-}
-
-/* A subroutine of the atomic operation splitters.  Emit a store-conditional
-   instruction in MODE.  */
-
-static void
-emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
-{
-  rtx (*fn) (rtx, rtx, rtx) = NULL;
-  if (mode == SImode)
-fn = gen_store_conditional_si;
-  else if (mode == DImode)
-fn = gen_store_conditional_di;
-  emit_insn (fn (res, mem, val));
-}
-
 /* Subroutines of the atomic operation splitters.  Emit barriers
as needed for the memory MODEL.  */
 
@@ -4448,7 +4415,7 @@ alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx 
val, rtx before,
 
   if (before == NULL)
 before = scratch;
-  emit_load_locked (mode, before, mem);
+  emit_insn (gen_load_locked (mode, before, mem));
 
   if (code == NOT)
 {
@@ -4463,7 +4430,7 @@ alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx 
val, rtx before,
 emit_insn (gen_rtx_SET (after, copy_rtx (x)));
   emit_insn (gen_rtx_SET (scratch, x));
 
-  emit_store_conditional (mode, cond, mem, scratch);
+  emit_insn (gen_store_conditional (mode, cond, mem, scratch));
 
   x = gen_rtx_EQ (DImode, cond, const0_rtx);
   emit_unlikely_jump (x, label);
@@ -4502,7 +4469,7 @@ alpha_split_compare_and_swap (rtx operands[])
 }
   label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
 
-  emit_load_locked (mode, retval, mem);
+  emit_insn (gen_load_locked (mode, retval, mem));
 
   x = gen_lowpart (DImode, retval);
   if (oldval == const0_rtx)
@@ -4519,7 +4486,8 @@ alpha_split_compare_and_swap (rtx operands[])
   emit_unlikely_jump (x, label2);
 
   emit_move_insn (cond, newval);
-  emit_store_conditional (mode, cond, mem, gen_lowpart (mode, cond));
+  emit_insn (gen_store_conditional
+(mode, cond, mem, gen_lowpart (mode, cond)));
 
   if (!is_weak)
 {
@@ -4542,7 +4510,6 @@ alpha_expand_compare_and_swap_12 (rtx operands[])
   rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
   machine_mode mode;
   rtx addr, align, wdst;
-  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
 
   cond = operands[0];
   dst = operands[1];
@@ -4567,12 +4534,9 @@ alpha_expand_com

Re: [PATCH] i386: Separate costs of RTL expressions from costs of moves

2019-06-20 Thread Jan Hubicka
> > Currently, costs of moves are also used for costs of RTL expressions.   This
> > patch:
> >
> > https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00405.html
> >
> > includes:
> >
> > diff --git a/gcc/config/i386/x86-tune-costs.h 
> > b/gcc/config/i386/x86-tune-costs.h
> > index e943d13..8409a5f 100644
> > --- a/gcc/config/i386/x86-tune-costs.h
> > +++ b/gcc/config/i386/x86-tune-costs.h
> > @@ -1557,7 +1557,7 @@ struct processor_costs skylake_cost = {
> >{4, 4, 4}, /* cost of loading integer registers
> >  in QImode, HImode and SImode.
> >  Relative to reg-reg move (2).  */
> > -  {6, 6, 6}, /* cost of storing integer registers */
> > +  {6, 6, 3}, /* cost of storing integer registers */
> >2, /* cost of reg,reg fld/fst */
> >{6, 6, 8}, /* cost of loading fp registers
> >  in SFmode, DFmode and XFmode */

Well, it seems that the patch was fixing things on wrong spot - the
tables are intended to be mostly latency based. I think we ought to
document divergences from these including benchmarks where the change
helped. Otherwise it is very hard to figure out why the entry does not
match the reality.
> >
> > It lowered the cost for SImode store and made it cheaper than SSE<->integer
> > register move.  It caused a regression:
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90878
> >
> > Since the cost for SImode store is also used to compute scalar_store
> > in ix86_builtin_vectorization_cost, it changed loop costs in
> >
> > void
> > foo (long p2, long *diag, long d, long i)
> > {
> >   long k;
> >   k = p2 < 3 ? p2 + p2 : p2 + 3;
> >   while (i < k)
> > diag[i++] = d;
> > }
> >
> > As the result, the loop is unrolled 4 times with -O3 -march=skylake,
> > instead of 3.
> >
> > My patch separates costs of moves from costs of RTL expressions.  We have
> > a follow up patch which restores the cost for SImode store back to 6 and 
> > leave
> > the cost of scalar_store unchanged.  It keeps loop unrolling unchanged and
> > improves powf performance in glibc by 30%.  We are collecting SPEC CPU 2017
> > data now.

I have seen the problem with scalar_store with AMD tuning as well.
It seems to make SLP vectorizer to be happy about idea of turning
sequence of say integer tores into code which moves all the values into
AVX register and then does one vector store.

The cost basically compare cost of N scalar stores to 1 scalar store +
vector construction. Vector construction then N*sse_op+addss.

With testcase:

short array[8];
test (short a,short b,short c,short d,short e,short f,short g,short h)
{ 
  array[0]=a;
  array[1]=b;
  array[2]=c;
  array[3]=d;
  array[4]=e;
  array[5]=f;
  array[6]=g;
  array[7]=h;
}
int iarray[8];
test2 (int a,int b,int c,int d,int e,int f,int g,int h)
{ 
  iarray[0]=a;
  iarray[1]=b;
  iarray[2]=c;
  iarray[3]=d;
  iarray[4]=e;
  iarray[5]=f;
  iarray[6]=g;
  iarray[7]=h;
}

I get the following codegen:


test:
vmovd   %edi, %xmm0
vmovd   %edx, %xmm2
vmovd   %r8d, %xmm1
vmovd   8(%rsp), %xmm3
vpinsrw $1, 16(%rsp), %xmm3, %xmm3
vpinsrw $1, %esi, %xmm0, %xmm0
vpinsrw $1, %ecx, %xmm2, %xmm2
vpinsrw $1, %r9d, %xmm1, %xmm1
vpunpckldq  %xmm2, %xmm0, %xmm0
vpunpckldq  %xmm3, %xmm1, %xmm1
vpunpcklqdq %xmm1, %xmm0, %xmm0
vmovaps %xmm0, array(%rip)
ret

test2:
vmovd   %r8d, %xmm5
vmovd   %edx, %xmm6
vmovd   %edi, %xmm7
vpinsrd $1, %r9d, %xmm5, %xmm1
vpinsrd $1, %ecx, %xmm6, %xmm3
vpinsrd $1, %esi, %xmm7, %xmm0
vpunpcklqdq %xmm3, %xmm0, %xmm0
vmovd   16(%rbp), %xmm4
vpinsrd $1, 24(%rbp), %xmm4, %xmm2
vpunpcklqdq %xmm2, %xmm1, %xmm1
vinserti128 $0x1, %xmm1, %ymm0, %ymm0
vmovdqu %ymm0, iarray(%rip)
vzeroupper
ret

which is about 20% slower on my skylake notebook than the
non-SLP-vectorized variant.

I wonder if the vec_construct costs should be made more realistic.
It is computed as:

  case vec_construct:
{
  /* N element inserts into SSE vectors.  */
  int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
  /* One vinserti128 for combining two SSE vectors for AVX256.  */
  if (GET_MODE_BITSIZE (mode) == 256)
cost += ix86_vec_cost (mode, ix86_cost->addss);
  /* One vinserti64x4 and two vinserti128 for combining SSE
 and AVX256 vectors to AVX512.  */
  else if (GET_MODE_BITSIZE (mode) == 512)
cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
  return cost;

So it expects 8 simple SSE operations + one SSE FP arithmetical
operations.  While code above has 8 inter-unit moves + 3 SSE integer
operations to shuffle things around. Not mentioning the increased
register pressure.

I would say that for integer constructs it is a common case that things
needs to be moved from integer unit to SSE.

Overall the problem is deeper since vect

Re: [C++ PATCH] PR c++/90875 - added -Wswitch-outside-range option.

2019-06-20 Thread Matthew Beliveau
Sorry, the last version had some problems.

On Thu, Jun 20, 2019 at 4:26 PM Matthew Beliveau  wrote:
>
> Hopefully fixed!
>
> On Thu, Jun 20, 2019 at 2:42 PM Marek Polacek  wrote:
> >
> > On Thu, Jun 20, 2019 at 09:33:18AM -0400, Matthew Beliveau wrote:
> > > Sorry for the last update, I guess I didn't notice the other changes, 
> > > oops!
> > >
> > > This should have all the changes. Let me know if anything went wrong!
> > >
> > > Thanks,
> > > Matthew Beliveau
> > >
> > > On Tue, Jun 18, 2019 at 1:38 PM Marek Polacek  wrote:
> > > >
> > > > On Tue, Jun 18, 2019 at 01:17:10PM -0400, Matthew Beliveau wrote:
> > > > > Hello,
> > > > >
> > > > > This patch should change the formatting, and move the test files into
> > > > > the appropriate directory!
> > > >
> > > > It doesn't address my other comments, though, so please send a new 
> > > > version
> > > > with that fixed.
> > > >
> > > > Marek
> >
> > > Bootstrapped/regtested on x86_64-linux, ok for trunk?
> > >
> > > 2019-06-20  Matthew Beliveau  
> > >
> > >   PR c++/90875 - added -Wswitch-outside-range option
> > >   * doc/invoke.texi (Wswitch-outside-range): Document.
> > >
> > >   * c-warn.c (c_do_switch_warnings): Implemented new 
> > > Wswitch-outside-range
> > >   warning option.
> > >
> > >   * c.opt (Wswitch-outside-range): Added new option.
> > >
> > >   * c-c++-common/Wswitch-outside-range-1.C: New test.
> > >   * c-c++-common/Wswitch-outside-range-2.C: New test.
> > >   * c-c++-common/Wswitch-outside-range-3.C: New test.
> > >   * c-c++-common/Wswitch-outside-range-4.C: New test.
> > >
> > > diff --git gcc/c-family/c-warn.c gcc/c-family/c-warn.c
> > > index 5941c10cddb..743099c75ca 100644
> > > --- gcc/c-family/c-warn.c
> > > +++ gcc/c-family/c-warn.c
> > > @@ -1460,8 +1460,9 @@ c_do_switch_warnings (splay_tree cases, location_t 
> > > switch_location,
> > >  min_value) >= 0)
> > >   {
> > > location_t loc = EXPR_LOCATION ((tree) node->value);
> > > -   warning_at (loc, 0, "lower value in case label range"
> > > -   " less than minimum value for type");
> > > +   warning_at (loc, OPT_Wswitch_outside_range,
> > > +  "lower value in case label range less than minimum 
> > > value"
> > > +  " for type");
> > > CASE_LOW ((tree) node->value) = convert (TREE_TYPE (cond),
> > >  min_value);
> > > node->key = (splay_tree_key) CASE_LOW ((tree) node->value);
> > > @@ -1474,8 +1475,8 @@ c_do_switch_warnings (splay_tree cases, location_t 
> > > switch_location,
> > > if (node == NULL || !node->key)
> > >   break;
> > > location_t loc = EXPR_LOCATION ((tree) node->value);
> > > -   warning_at (loc, 0, "case label value is less than minimum "
> > > -   "value for type");
> > > +   warning_at (loc, OPT_Wswitch_outside_range, "case label value 
> > > is"
> > > +   " less than minimum value for type");
> > > splay_tree_remove (cases, node->key);
> > >   }
> > > while (1);
> > > @@ -1491,8 +1492,8 @@ c_do_switch_warnings (splay_tree cases, location_t 
> > > switch_location,
> > >  max_value) > 0)
> > >   {
> > > location_t loc = EXPR_LOCATION ((tree) node->value);
> > > -   warning_at (loc, 0, "upper value in case label range"
> > > -   " exceeds maximum value for type");
> > > +   warning_at (loc, OPT_Wswitch_outside_range, "upper value in case"
> > > +   " label range exceeds maximum value for 
> > > type");
> > > CASE_HIGH ((tree) node->value)
> > >   = convert (TREE_TYPE (cond), max_value);
> > > outside_range_p = true;
> >
> > The formatting is still wrong here...
> >
> > > @@ -1503,7 +1504,7 @@ c_do_switch_warnings (splay_tree cases, location_t 
> > > switch_location,
> > >!= NULL)
> > >   {
> > > location_t loc = EXPR_LOCATION ((tree) node->value);
> > > -   warning_at (loc, 0,
> > > +   warning_at (loc, OPT_Wswitch_outside_range,
> > > "case label value exceeds maximum value for type");
> > > splay_tree_remove (cases, node->key);
> > > outside_range_p = true;
> >
> > ...but is correct here.  So make the other cases above like this one.
> >
> > > diff --git gcc/doc/invoke.texi gcc/doc/invoke.texi
> > > index eaef4cd63d2..210535cb84a 100644
> > > --- gcc/doc/invoke.texi
> > > +++ gcc/doc/invoke.texi
> > > @@ -5390,6 +5390,13 @@ switch ((int) (a == 4))
> > >  @end smallexample
> > >  This warning is enabled by default for C and C++ programs.
> > >
> > > +@item -Wswitch-outside-range
> > > +@opindex Wswitch-outside-range
> > > +@opindex Wno-switch-outside-range
> > > +Warn whenever a @code{switch

Re: [PATCH] i386: Separate costs of RTL expressions from costs of moves

2019-06-20 Thread H.J. Lu
On Thu, Jun 20, 2019 at 2:10 PM Jan Hubicka  wrote:
>
> > > Currently, costs of moves are also used for costs of RTL expressions.   
> > > This
> > > patch:
> > >
> > > https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00405.html
> > >
> > > includes:
> > >
> > > diff --git a/gcc/config/i386/x86-tune-costs.h 
> > > b/gcc/config/i386/x86-tune-costs.h
> > > index e943d13..8409a5f 100644
> > > --- a/gcc/config/i386/x86-tune-costs.h
> > > +++ b/gcc/config/i386/x86-tune-costs.h
> > > @@ -1557,7 +1557,7 @@ struct processor_costs skylake_cost = {
> > >{4, 4, 4}, /* cost of loading integer registers
> > >  in QImode, HImode and SImode.
> > >  Relative to reg-reg move (2).  */
> > > -  {6, 6, 6}, /* cost of storing integer registers */
> > > +  {6, 6, 3}, /* cost of storing integer registers */
> > >2, /* cost of reg,reg fld/fst */
> > >{6, 6, 8}, /* cost of loading fp registers
> > >  in SFmode, DFmode and XFmode */
>
> Well, it seems that the patch was fixing things on wrong spot - the
> tables are intended to be mostly latency based. I think we ought to
> document divergences from these including benchmarks where the change
> helped. Otherwise it is very hard to figure out why the entry does not
> match the reality.
> > >
> > > It lowered the cost for SImode store and made it cheaper than 
> > > SSE<->integer
> > > register move.  It caused a regression:
> > >
> > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90878
> > >
> > > Since the cost for SImode store is also used to compute scalar_store
> > > in ix86_builtin_vectorization_cost, it changed loop costs in
> > >
> > > void
> > > foo (long p2, long *diag, long d, long i)
> > > {
> > >   long k;
> > >   k = p2 < 3 ? p2 + p2 : p2 + 3;
> > >   while (i < k)
> > > diag[i++] = d;
> > > }
> > >
> > > As the result, the loop is unrolled 4 times with -O3 -march=skylake,
> > > instead of 3.
> > >
> > > My patch separates costs of moves from costs of RTL expressions.  We have
> > > a follow up patch which restores the cost for SImode store back to 6 and 
> > > leave
> > > the cost of scalar_store unchanged.  It keeps loop unrolling unchanged and
> > > improves powf performance in glibc by 30%.  We are collecting SPEC CPU 
> > > 2017
> > > data now.
>
> I have seen the problem with scalar_store with AMD tuning as well.
> It seems to make SLP vectorizer to be happy about idea of turning
> sequence of say integer tores into code which moves all the values into
> AVX register and then does one vector store.
>
> The cost basically compare cost of N scalar stores to 1 scalar store +
> vector construction. Vector construction then N*sse_op+addss.
>
> With testcase:
>
> short array[8];
> test (short a,short b,short c,short d,short e,short f,short g,short h)
> {
>   array[0]=a;
>   array[1]=b;
>   array[2]=c;
>   array[3]=d;
>   array[4]=e;
>   array[5]=f;
>   array[6]=g;
>   array[7]=h;
> }
> int iarray[8];
> test2 (int a,int b,int c,int d,int e,int f,int g,int h)
> {
>   iarray[0]=a;
>   iarray[1]=b;
>   iarray[2]=c;
>   iarray[3]=d;
>   iarray[4]=e;
>   iarray[5]=f;
>   iarray[6]=g;
>   iarray[7]=h;
> }
>
> I get the following codegen:
>
>
> test:
> vmovd   %edi, %xmm0
> vmovd   %edx, %xmm2
> vmovd   %r8d, %xmm1
> vmovd   8(%rsp), %xmm3
> vpinsrw $1, 16(%rsp), %xmm3, %xmm3
> vpinsrw $1, %esi, %xmm0, %xmm0
> vpinsrw $1, %ecx, %xmm2, %xmm2
> vpinsrw $1, %r9d, %xmm1, %xmm1
> vpunpckldq  %xmm2, %xmm0, %xmm0
> vpunpckldq  %xmm3, %xmm1, %xmm1
> vpunpcklqdq %xmm1, %xmm0, %xmm0
> vmovaps %xmm0, array(%rip)
> ret
>
> test2:
> vmovd   %r8d, %xmm5
> vmovd   %edx, %xmm6
> vmovd   %edi, %xmm7
> vpinsrd $1, %r9d, %xmm5, %xmm1
> vpinsrd $1, %ecx, %xmm6, %xmm3
> vpinsrd $1, %esi, %xmm7, %xmm0
> vpunpcklqdq %xmm3, %xmm0, %xmm0
> vmovd   16(%rbp), %xmm4
> vpinsrd $1, 24(%rbp), %xmm4, %xmm2
> vpunpcklqdq %xmm2, %xmm1, %xmm1
> vinserti128 $0x1, %xmm1, %ymm0, %ymm0
> vmovdqu %ymm0, iarray(%rip)
> vzeroupper
> ret
>
> which is about 20% slower on my skylake notebook than the
> non-SLP-vectorized variant.
>
> I wonder if the vec_construct costs should be made more realistic.
> It is computed as:
>
>   case vec_construct:
> {
>   /* N element inserts into SSE vectors.  */
>   int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
>   /* One vinserti128 for combining two SSE vectors for AVX256.  */
>   if (GET_MODE_BITSIZE (mode) == 256)
> cost += ix86_vec_cost (mode, ix86_cost->addss);
>   /* One vinserti64x4 and two vinserti128 for combining SSE
>  and AVX256 vectors to AVX512.  */
>   else if (GET_MODE_BITSIZE (mode) == 512)
> cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
>   return cost;
>
> So it expects 8 simple SSE o

C++ PATCH to add test for c++/68265

2019-06-20 Thread Marek Polacek
This funny one got fixed by r258549.

Tested x86_64-linux, applying to trunk.

2019-06-20  Marek Polacek  

PR c++/68265
* g++.dg/parse/error62.C: New test.

diff --git gcc/testsuite/g++.dg/parse/error62.C 
gcc/testsuite/g++.dg/parse/error62.C
new file mode 100644
index 000..8dac0ffc0d5
--- /dev/null
+++ gcc/testsuite/g++.dg/parse/error62.C
@@ -0,0 +1,10 @@
+// PR c++/68265
+
+int main()
+{
+  int (*) {} // { dg-error "expected primary-expression" }
+ any amount of syntactic nonsense // { dg-error "not declared in this 
scope" }
+ on multiple lines, with *punctuation* and ++operators++ even...
+ will be silently discarded
+ until the next close brace
+}


[Committed] A target in pointer initialization requires SAVE

2019-06-20 Thread Steve Kargl
In taking a look at PR fortran/77632, I stumbled acrossed
the testcase in the attached patch cause an ICE.  I originally
thought that it was realated to the topic of the PR, but is
in fact an unrelated bug.

If a variable is a target in a pointer initialization, then 
it must have the SAVE attribute.  A variable in PROGRAM, 
MODULE, or SUBMODULE scope is implicitly SAVEd.  So, the
patch explicitly sets the save attribute to SAVE_IMPLICIT.

2019-06-20  Steven G. Kargl  

PR fortran/77632
* /decl.c (variable_decl): Mark a variable that is a target in pointer
initialization when in PROGRAM, MODULE, or SUBMODULE scope with an
implicit save.

2019-06-20  Steven G. Kargl  

PR fortran/77632
* gfortran.dg/pr77632_1.f90: New test.

-- 
Steve
Index: gcc/fortran/decl.c
===
--- gcc/fortran/decl.c	(revision 272523)
+++ gcc/fortran/decl.c	(working copy)
@@ -2779,6 +2779,16 @@ variable_decl (int elem)
 	  m = match_pointer_init (&initializer, 0);
 	  if (m != MATCH_YES)
 	goto cleanup;
+
+	  /* The target of a pointer initialization must have the SAVE
+	 attribute.  A variable in PROGRAM, MODULE, or SUBMODULE scope
+	 is implicit SAVEd.  Explicitly, set the SAVE_IMPLICIT value.  */
+	  if (initializer->expr_type == EXPR_VARIABLE
+	  && initializer->symtree->n.sym->attr.save == SAVE_NONE
+	  && (gfc_current_state () == COMP_PROGRAM
+		  || gfc_current_state () == COMP_MODULE
+		  || gfc_current_state () == COMP_SUBMODULE))
+	initializer->symtree->n.sym->attr.save = SAVE_IMPLICIT;
 	}
   else if (gfc_match_char ('=') == MATCH_YES)
 	{
Index: gcc/testsuite/gfortran.dg/pr77632_1.f90
===
--- gcc/testsuite/gfortran.dg/pr77632_1.f90	(nonexistent)
+++ gcc/testsuite/gfortran.dg/pr77632_1.f90	(working copy)
@@ -0,0 +1,7 @@
+! { dg-do run }
+program foo
+   implicit none
+   real, target :: a
+   real, pointer :: b => a
+   if (associated(b, a) .eqv. .false.) stop 1
+end program foo


[RFA] [PR tree-optimization/90949] Don't propagate context sensitive non-nullness when copy-propagating pointers

2019-06-20 Thread Jeff Law
As outlined in the BZ, our alias analysis code is context insensitive.
So when we copy-propagate pointers, we can can and do copy PTA
information from members to the representative pointer in the copy-of
chain (we do this when the representative pointer has no associated PTA
information).

However, [E]VRP can set the non-nullness of a pointer using context
sensitive information.  So we have to be more careful when copying PTA
information.

We already have similar issues with alignment information as well.  This
patch just extends the hack to avoid copying alignment information in
some circumstances to also avoid copying the non-nullness property.

Bootstrapped and regression tested on x86_64-linux-gnu.  OK for the trunk?

Jeff
* tree-ssa-copy.c (fini_copy_prop): Call clear_ptr_nonnull as needed.
* tree-ssanames.c (clear_ptr_nonnull): New function.
* tree-ssanames.h (clear_ptr_nonnull): Declare.

* gcc.c-torture/execute/pr90949.c: New test.

diff --git a/gcc/testsuite/gcc.c-torture/execute/pr90949.c 
b/gcc/testsuite/gcc.c-torture/execute/pr90949.c
new file mode 100644
index 000..12ae31d97a5
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr90949.c
@@ -0,0 +1,33 @@
+void __attribute__((noipa,noinline)) my_puts (const char *str) { }
+void __attribute__((noipa,noinline)) my_free (void *p) { }
+
+
+struct Node
+{
+struct Node* child;
+};
+
+char *space[sizeof (struct Node) * 2] = { };
+
+void * __attribute__((noipa,noinline)) my_malloc (int bytes) { return &space;} 
+
+void walk(struct Node* module, int cleanup)
+{
+if (module == 0) {
+return;
+}
+if (!cleanup) {
+my_puts("No cleanup");
+}
+walk(module->child, cleanup);
+if (cleanup) {
+my_free(module);
+}
+}
+
+int main()
+{
+struct Node* node = my_malloc(sizeof(struct Node));
+node->child = 0;
+walk(node, 1);
+}
diff --git a/gcc/tree-ssa-copy.c b/gcc/tree-ssa-copy.c
index 89532633e42..ccb95bf18b7 100644
--- a/gcc/tree-ssa-copy.c
+++ b/gcc/tree-ssa-copy.c
@@ -548,10 +548,28 @@ fini_copy_prop (void)
 but alignment info might be cfg sensitive, if it
 e.g. is derived from VRP derived non-zero bits.
 So, do not copy alignment info if the two SSA_NAMEs
-aren't defined in the same basic block.  */
+aren't defined in the same basic block.
+
+Similarly, we may have a context sensitive non-NULL
+state for an SSA_NAME (call it A), which in turn is
+used to derive a global non-NULL state for a different
+SSA_NAME (call it B) via a PHI node.
+
+That PHI node also represents a copy which we will try
+to eliminate here.  We will copy the alias info to the
+representative element in the copy-of chains.  If A is
+the representative element, then we just made A globally
+non-NULL which is incorrect.
+
+Arguably one might claim this is too fragile and that we
+should never dupicate the points-to information if the
+objects are in different blocks.  */
  if (var_bb != copy_of_bb)
-   mark_ptr_info_alignment_unknown
-   (SSA_NAME_PTR_INFO (copy_of[i].value));
+   {
+ mark_ptr_info_alignment_unknown
+ (SSA_NAME_PTR_INFO (copy_of[i].value));
+ clear_ptr_nonnull (copy_of[i].value);
+   }
}
  else if (!POINTER_TYPE_P (TREE_TYPE (var))
   && SSA_NAME_RANGE_INFO (var)
diff --git a/gcc/tree-ssanames.c b/gcc/tree-ssanames.c
index 5bac799e9a3..2114a9584ba 100644
--- a/gcc/tree-ssanames.c
+++ b/gcc/tree-ssanames.c
@@ -451,6 +451,16 @@ get_range_info (const_tree name, value_range_base &vr)
   return kind;
 }
 
+/* Clear nonnull attribute to pointer NAME.  */
+
+void
+clear_ptr_nonnull (tree name)
+{
+  gcc_assert (POINTER_TYPE_P (TREE_TYPE (name)));
+  struct ptr_info_def *pi = get_ptr_info (name);
+  pi->pt.null = 1;
+}
+
 /* Set nonnull attribute to pointer NAME.  */
 
 void
diff --git a/gcc/tree-ssanames.h b/gcc/tree-ssanames.h
index 6e6cffbce6a..6470d491deb 100644
--- a/gcc/tree-ssanames.h
+++ b/gcc/tree-ssanames.h
@@ -91,6 +91,7 @@ extern void set_ptr_info_alignment (struct ptr_info_def *, 
unsigned int,
 extern void adjust_ptr_info_misalignment (struct ptr_info_def *, poly_uint64);
 extern struct ptr_info_def *get_ptr_info (tree);
 extern void set_ptr_nonnull (tree);
+extern void clear_ptr_nonnull (tree);
 extern bool get_ptr_nonnull (const_tree);
 
 extern tree copy_ssa_name_fn (struct function *, tree, gimple *);


C++ PATCH to add test for c++/79781

2019-06-20 Thread Marek Polacek
This one was fixed by r246607.  Not adding it to c-c++-common/ because
the C FE generates an error.

Tested x86_64-linux, applying to trunk.

2019-06-20  Marek Polacek  

PR c++/79781
* g++.dg/ext/goto1.C: New test.

diff --git gcc/testsuite/g++.dg/ext/goto1.C gcc/testsuite/g++.dg/ext/goto1.C
new file mode 100644
index 000..84bbfce6376
--- /dev/null
+++ gcc/testsuite/g++.dg/ext/goto1.C
@@ -0,0 +1,9 @@
+// PR c++/79781
+// { dg-do compile { target int128 } }
+// { dg-options "" }
+
+void c() {
+  static __int128_t d = (long)&&a - (long)&&b;
+a:
+b:;
+}


Re: [PATCH] Wrap 'expand_all_functions' and 'ipa_passes' around timevars

2019-06-20 Thread Jeff Law
On 6/19/19 2:26 PM, Giuliano Belinassi wrote:

> On 06/19, Jeff Law wrote:
>> On 1/24/19 12:51 PM, Giuliano Belinassi wrote:
>>> This patch adds two variables named 'TV_CGRAPH_FUNC_EXPANSION' and
>>> 'TV_CGRAPH_IPA_PASSES' that count the elapsed time of the functions
>>> 'expand_all_functions' and 'ipa_passes', respectivelly.
>>>
>>> The main point of this is that these functions takes a very long time
>>> when compiling the 'gimple-match.c' file, and therefore may also take
>>> a long time when compiling other large files.
>>>
>>> I also accept suggestions about how to improve this :-)
>>>
>>> ChangeLog:
>>>
>>> 2019-01-24  Giuliano Belinassi 
>>>
>>> * cgraph_unit.c (compile): TV_CGRAPH_FUNC_EXPANSION and
>>> TV_CGRAPH_IPA_PASSES start, stop.
>>> * timevar.def (TV_CGRAPH_IPA_PASSES, TV_CGRAPH_FUNC_EXPANSION): New.
>> So I'm guessing you want the accumulated time for the ipa_passes and
>> expansion.  So independent counters using timevar_{start,stop} seem right.
> 
> Yes, my point was to accumulate the total time spent in those function,
> including everything these functions calls.
OK.  Then let's go with your patch as-is if you'd still like it included
on the trunk.

> 
> With regard to breaking the timevar with IPA, GIMPLE and RTL expansion
> passes, I can do this but it will require splitting `all_passes` into
> `all_passes` and `all_rtl_passes`, as suggested by richi in the
> parallelization thread. I can do this fairly easily since I have
> already done it done in my branch. Is it OK?
I think that'll be fine when you're ready to merge from your branch to
the trunk.

jeff


Re: [PATCH] improve ifcvt optimization (PR rtl-optimization/89430)

2019-06-20 Thread Jeff Law
On 6/20/19 3:53 AM, JiangNing OS wrote:
> Hi Jeff,
> 
> Appreciate your effort to review my patch! I've updated my patch as attached. 
> See my answers below.
> 
>> in current function, so the store speculation can be avoided.
>> So at a high level should we be doing this in gimple rather than RTL?
>> We're going to have a lot more information about types, better
>> infrastructure for looking at uses/defs, access to the alias oracle, we 
>> should
>> be able to accurately distinguish between potentially shared objects vs those
>> which are local to the thread, etc.  We lose the low level costing 
>> information
>> though.
>>
>> I'm still going to go through the patch and do some level of review, but I do
>> think we need to answer the higher level question though.
>>
> I have the following reasons,
> 
> 1) Following the clue Richard B gave me before about parameter --param 
> allow-store-data-races,
> I did check the middle-end pass tree-if-conv, but I think this pass at the 
> moment doesn't work
> for the issue I'm trying to solve. Tree-if-conv is to do if conversion for 
> loop, and its final goal is to
> help loop vectorization, while my case doesn't have a loop at all. 
I think the fact that it's focused so much on loops is a historical
accident.  We certainly have a variety of places in the gimple
optimizers that do if-conversion, and they're not all in tree-if-conv :(
 For example, some are done in tree-ssa-phiopt.

In the gimple optimizers the testcase from 89430 is going to look
something like this:


> ;   basic block 2, loop depth 0, count 1073741824 (estimated locally), maybe 
> hot
> ;;prev block 0, next block 3, flags: (NEW, REACHABLE, VISITED)
> ;;pred:   ENTRY [always]  count:1073741824 (estimated locally) 
> (FALLTHRU,EXECUTABLE)
>   a.0_1 = a;
>   _2 = (long unsigned int) k_8(D);
>   _3 = _2 * 4;
>   _4 = a.0_1 + _3;
>   _5 = *_4;
>   if (_5 > b_9(D))
> goto ; [50.00%]
>   else
> goto ; [50.00%]
> ;;succ:   3 [50.0% (guessed)]  count:536870912 (estimated locally) 
> (TRUE_VALUE,EXECUTABLE)
> ;;4 [50.0% (guessed)]  count:536870912 (estimated locally) 
> (FALSE_VALUE,EXECUTABLE)
> 
> ;;   basic block 3, loop depth 0, count 536870913 (estimated locally), maybe 
> hot
> ;;prev block 2, next block 4, flags: (NEW, REACHABLE, VISITED)
> ;;pred:   2 [50.0% (guessed)]  count:536870912 (estimated locally) 
> (TRUE_VALUE,EXECUTABLE)
>   *_4 = b_9(D);
> ;;succ:   4 [always]  count:536870913 (estimated locally) 
> (FALLTHRU,EXECUTABLE)
> 
> ;;   basic block 4, loop depth 0, count 1073741824 (estimated locally), maybe 
> hot
> ;;prev block 3, next block 1, flags: (NEW, REACHABLE, VISITED)
> ;;pred:   3 [always]  count:536870913 (estimated locally) 
> (FALLTHRU,EXECUTABLE)
> ;;2 [50.0% (guessed)]  count:536870912 (estimated locally) 
> (FALSE_VALUE,EXECUTABLE)
>   return;

That looks like a pretty easy form to analyze.  I'd suggest looking
through tree-ssa-phiopt.c closely.  There's several transformations in
there that share similarities with yours.










> 2) My current solution fits into current back-end if-conversion pass very 
> well. I don't want to invent
> a new framework to solve this relatively small issue. Besides, this back-end 
> patch doesn't only
> enhance store speculation detection, but also fix a bug in the original code. 
Understood, but I still wonder if we're better off addressing this in
gimple.


>> Just from a design standpoint, what are the consequences if this function
>> returns true for something that isn't actually in the stack or false for
>> something that is on the stack?
>>
> If noce_mem_is_on_stack returns true for something that isn't actually in the 
> stack, 
> it could potentially introduce store speculation, then the if-conversion 
> optimization
> will be incorrect. If this function returns false for something that is on 
> stack, it doesn't
> matter, because the optimization will not be triggered. 
OK.  That's what I expected.
> 
> 
> 
> 
>>
>>> +
>>> +/* Always return true, if there is a dominating write.
>>> +
>>> +   When there is a dominating read from memory on stack,
>>> +   1) if x = a is a memory read, return true.
>>> +   2) if x = a is a memory write, return true if the memory is on stack.
>>> +  This is the guarantee the memory is *not* readonly. */
>>> +
>>> +static bool
>>> +noce_valid_for_dominating (basic_block bb, rtx_insn *a_insn,
>>> +   const_rtx x, bool is_store) {
>>> +  rtx_insn *insn;
>>> +  rtx set;
>>> +
>>> +  gcc_assert (MEM_P (x));
>>> +
>>> +  FOR_BB_INSNS (bb, insn)
>>> +{
>>> +  set = single_set (insn);
>>> +  if (!set)
>>> +continue;
>>> +
>>> +  /* Dominating store */
>>> +  if (rtx_equal_p (x, SET_DEST (set)))
>>> +return true;
>>> +
>>> +  /* Dominating load */
>>> +  if (rtx_equal_p (x, SET_SRC (set)))
>>> +if (is_store && noce_mem_is_on_stack (a_insn, x)

Re: [RFA] [PR tree-optimization/90949] Don't propagate context sensitive non-nullness when copy-propagating pointers

2019-06-20 Thread Martin Sebor

On 6/20/19 4:23 PM, Jeff Law wrote:

As outlined in the BZ, our alias analysis code is context insensitive.
So when we copy-propagate pointers, we can can and do copy PTA
information from members to the representative pointer in the copy-of
chain (we do this when the representative pointer has no associated PTA
information).

However, [E]VRP can set the non-nullness of a pointer using context
sensitive information.  So we have to be more careful when copying PTA
information.

We already have similar issues with alignment information as well.  This
patch just extends the hack to avoid copying alignment information in
some circumstances to also avoid copying the non-nullness property.

Bootstrapped and regression tested on x86_64-linux-gnu.  OK for the trunk?

Jeff



Just a question/comment about the test:

--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr90949.c
@@ -0,0 +1,33 @@
+void __attribute__((noipa,noinline)) my_puts (const char *str) { }
+void __attribute__((noipa,noinline)) my_free (void *p) { }
+
+
+struct Node
+{
+struct Node* child;
+};
+
+char *space[sizeof (struct Node) * 2] = { };
+
+void * __attribute__((noipa,noinline)) my_malloc (int bytes) { return 
&space;}


Shouldn't space be declared as an array of char rather than char*?
(As it is, strictly speaking, accessing it via an lvalue of type
Node is undefined, even if it's carefully hidden from the compiler
by the attributes.)

Martin


Re: [PATCH] Enable GCC support for AVX512_VP2INTERSECT.

2019-06-20 Thread Hongtao Liu
On Thu, Jun 20, 2019 at 10:58 PM H.J. Lu  wrote:
>
> On Thu, Jun 20, 2019 at 3:54 AM Hongtao Liu  wrote:
> >
> > On Thu, Jun 20, 2019 at 2:13 PM Uros Bizjak  wrote:
> > >
> > > On Thu, Jun 20, 2019 at 7:36 AM Hongtao Liu  wrote:
> > > >
> > > > On Sat, Jun 8, 2019 at 4:12 AM Uros Bizjak  wrote:
> > > > >
> > > > > On 6/7/19, H.J. Lu  wrote:
> > > > >
> > > > > >> > > +/* Register pair.  */
> > > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 2); /* P2QI */
> > > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 4); /* P2HI P4QI */
> > > > > >> > >
> > > > > >> > > I think
> > > > > >> > >
> > > > > >> > > INT_MODE (P2QI, 16);
> > > > > >> > > INT_MODE (P2HI, 32);
Why P2QI need 16 bytes but not 2 bytes?
Same question with P2HI.
> > > > > >> > >
> > > > > >> > > with the above subreg approach should work.
> > Yes, it works.
> >
> > But i didn't figure out how did pass_reload correctly handle such subreg,
> > do you have suggestions such as "which function i can dig into first" or
> > "which piece of codes handle subreg"?
>
> You need to define REGMODE_NATURAL_SIZE.
>
> --
> H.J.



-- 
BR,
Hongtao


Re: Deque fiil/copy/move/copy_backward/move_backward/equal overloads

2019-06-20 Thread François Dumont
And thanks for noticing that not only user code will be improved but 
also our own algos !


I'll reference this PR if accepted.


On 6/20/19 10:38 AM, Morwenn Ed wrote:

That's actually a solution to bug 90409, thanks for it :)

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90409

Morwenn


*De :* libstdc++-ow...@gcc.gnu.org  de la 
part de François Dumont 

*Envoyé :* mercredi 19 juin 2019 19:32
*À :* libstd...@gcc.gnu.org; gcc-patches
*Objet :* Deque fiil/copy/move/copy_backward/move_backward/equal 
overloads

I wanted to implement Debug overloads for those already existing
overloads but then realized that those algos could be generalized. This
way we will benefit from the memmove replacement when operating with C
array or std::array or std::vector iterators.

I might do the same for lexicographical_compare one day.

The ChangeLog below is quite huge so I attached it. I wonder if I could
use deque::iterator and deque::const_iterator in place of the
_Deque_iterator<> to reduce it ?

Tested under Linux x86_64 normal and debug modes, ok to commit ?

François





Re: [PATCH] Enable GCC support for AVX512_VP2INTERSECT.

2019-06-20 Thread Hongtao Liu
On Thu, Jun 20, 2019 at 7:37 PM Uros Bizjak  wrote:
>
> On Thu, Jun 20, 2019 at 12:54 PM Hongtao Liu  wrote:
> >
> > On Thu, Jun 20, 2019 at 2:13 PM Uros Bizjak  wrote:
> > >
> > > On Thu, Jun 20, 2019 at 7:36 AM Hongtao Liu  wrote:
> > > >
> > > > On Sat, Jun 8, 2019 at 4:12 AM Uros Bizjak  wrote:
> > > > >
> > > > > On 6/7/19, H.J. Lu  wrote:
> > > > >
> > > > > >> > > +/* Register pair.  */
> > > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 2); /* P2QI */
> > > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 4); /* P2HI P4QI */
> > > > > >> > >
> > > > > >> > > I think
> > > > > >> > >
> > > > > >> > > INT_MODE (P2QI, 16);
> > > > > >> > > INT_MODE (P2HI, 32);
> > > > > >> > >
> > > > > >> > > with the above subreg approach should work.
> > Yes, it works.
> >
> > But i didn't figure out how did pass_reload correctly handle such subreg,
> > do you have suggestions such as "which function i can dig into first" or
> > "which piece of codes handle subreg"?
>
> I'm really not an expert in this part of the compiler, so I'll leave
> the answer for someone else.
>
> > > > > >> > >
> > > > > >> >
> > > > > >> > I don't think subreg works on pseudo registers with non-zero
> > > > > >> > offset.  validate_subreg has
> > > > > >> >
> > > > > >> >  if (maybe_lt (osize, regsize)
> > > > > >> >   && ! (lra_in_progress && (FLOAT_MODE_P (imode) || 
> > > > > >> > FLOAT_MODE_P
> > > > > >> > (omode
> > > > > >> > {
> > > > > >> >   /* It is invalid for the target to pick a register size 
> > > > > >> > for a
> > > > > >> > mode
> > > > > >> >  that isn't ordered wrt to the size of that mode.  */
> > > > > >> >   poly_uint64 block_size = ordered_min (isize, regsize);
> > > > > >> >   unsigned int start_reg;
> > > > > >> >   poly_uint64 offset_within_reg;
> > > > > >> >   if (!can_div_trunc_p (offset, block_size, &start_reg,
> > > > > >> > &offset_within_reg)
> > > > > >> >   || (BYTES_BIG_ENDIAN
> > > > > >> >   ? maybe_ne (offset_within_reg, block_size - osize)
> > > > > >> >   : maybe_ne (offset_within_reg, 0U)))
> > > > > >> > return false;
> > > > > >>
> > > > > >> It works with SImode subregs of DImode values on 32bit targets. 
> > > > > >> Please
> > > > > >> look for calls to gen_highpart, one concrete example is in
> > > > > >> atomic_compare_and_swap.
> > > > > >>
> > > > > >
> > > > > > It works because of
> > > > > >
> > > > > > #define REGMODE_NATURAL_SIZE(MODE) UNITS_PER_WORD
> > > > > >
> > > > > > and only works for the high part of SImode of DImode.
> > > > > >
> > > > > > P2QI and P2HI are 2 special modes of mask register pair for
> > > > > > 2 instructions.   Do we want to make them more generic?
> > > > >
> > > > > If enhancing the referred define means that we don't need two
> > > > > artificial instructions and leave all heavy lifting to the existing
> > > > Do you mean that we take P2HI and P2QI as normal vector modes,
> > > > and reuse ix86_expand_vector_* things?
> > > > But still two artificial instructions can't be avoided.
> > > > > generic functionality, then this is the way to go.
> > >
> > > No, declare them as integer modes and use subregs to access high and
> > > low register. This should work in the same way as SImode hard
> > > registers are accessed in DImode pair for 32bit targets.
> > >
> > > Uros.
> >
> > Update patch.
>
> Does gen_lowpart/gen_higpart instead of simplify_gen_subreg work?
Nope.
gen_highpart(QImode, op) calls simpliy_gen_subreg (QImode, op, P2QImode, 15)
which failed to produce subreg operand.
> These two are just a handy wrapper for simplify_gen_subreg. Other than
> that, patch LGTM.
>
> Uros.



-- 
BR,
Hongtao


Re: [PATCH] Enable GCC support for AVX512_VP2INTERSECT.

2019-06-20 Thread Uros Bizjak
On Fri, Jun 21, 2019 at 4:21 AM Hongtao Liu  wrote:
>
> On Thu, Jun 20, 2019 at 10:58 PM H.J. Lu  wrote:
> >
> > On Thu, Jun 20, 2019 at 3:54 AM Hongtao Liu  wrote:
> > >
> > > On Thu, Jun 20, 2019 at 2:13 PM Uros Bizjak  wrote:
> > > >
> > > > On Thu, Jun 20, 2019 at 7:36 AM Hongtao Liu  wrote:
> > > > >
> > > > > On Sat, Jun 8, 2019 at 4:12 AM Uros Bizjak  wrote:
> > > > > >
> > > > > > On 6/7/19, H.J. Lu  wrote:
> > > > > >
> > > > > > >> > > +/* Register pair.  */
> > > > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 2); /* P2QI */
> > > > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 4); /* P2HI P4QI */
> > > > > > >> > >
> > > > > > >> > > I think
> > > > > > >> > >
> > > > > > >> > > INT_MODE (P2QI, 16);
> > > > > > >> > > INT_MODE (P2HI, 32);
> Why P2QI need 16 bytes but not 2 bytes?
> Same question with P2HI.

Because we made a mistake. It should be 2 and 4, since these arguments
are bytes, not bits.

This will also fix gen_highpart issue.

Uros.

> > > > > > >> > >
> > > > > > >> > > with the above subreg approach should work.
> > > Yes, it works.
> > >
> > > But i didn't figure out how did pass_reload correctly handle such subreg,
> > > do you have suggestions such as "which function i can dig into first" or
> > > "which piece of codes handle subreg"?
> >
> > You need to define REGMODE_NATURAL_SIZE.
> >
> > --
> > H.J.
>
>
>
> --
> BR,
> Hongtao


[committed] Support 1-byte elements of "omp simd array" arrays during vectorization

2019-06-20 Thread Jakub Jelinek
Hi!

The following testcase which I wrote in order to test ncopies > 1
handling of the inclusive scan vectorization reveals we don't vectorize
that, because we required MULT_EXPR on the DR_OFFSET, but obviously for
1 byte elements there is none.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk.

2019-06-21  Jakub Jelinek  

* tree-vect-data-refs.c (vect_find_stmt_data_reference): Handle
"omp simd array" arrays with one byte elements.

* gcc.dg/vect/vect-simd-11.c: New test.
* gcc.target/i386/sse2-vect-simd-11.c: New test.
* gcc.target/i386/avx2-vect-simd-11.c: New test.
* gcc.target/i386/avx512bw-vect-simd-11.c: New test.

--- gcc/tree-vect-data-refs.c.jj2019-06-20 13:26:29.071150988 +0200
+++ gcc/tree-vect-data-refs.c   2019-06-20 13:55:35.421150589 +0200
@@ -4075,14 +4075,17 @@ vect_find_stmt_data_reference (loop_p lo
  && integer_zerop (DR_STEP (newdr)))
{
  tree off = DR_OFFSET (newdr);
+ tree step = ssize_int (1);
  STRIP_NOPS (off);
- if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
- && TREE_CODE (off) == MULT_EXPR
+ if (TREE_CODE (off) == MULT_EXPR
  && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
{
- tree step = TREE_OPERAND (off, 1);
+ step = TREE_OPERAND (off, 1);
  off = TREE_OPERAND (off, 0);
  STRIP_NOPS (off);
+   }
+ if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST)
+   {
  if (CONVERT_EXPR_P (off)
  && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
  < TYPE_PRECISION (TREE_TYPE (off
--- gcc/testsuite/gcc.dg/vect/vect-simd-11.c.jj 2019-06-20 13:49:16.322081280 
+0200
+++ gcc/testsuite/gcc.dg/vect/vect-simd-11.c2019-06-20 12:58:52.516069619 
+0200
@@ -0,0 +1,186 @@
+/* { dg-require-effective-target size32plus } */
+/* { dg-additional-options "-fopenmp-simd" } */
+/* { dg-additional-options "-mavx" { target avx_runtime } } */
+/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { 
target i?86-*-* x86_64-*-* } } } */
+
+#ifndef main
+#include "tree-vect.h"
+#endif
+
+int r, a[1024], b[1024];
+unsigned short r2, b2[1024];
+unsigned char r3, b3[1024];
+
+__attribute__((noipa)) void
+foo (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+  #pragma omp simd reduction (inscan, +:r, r2, r3)
+  for (int i = 0; i < 1024; i++)
+{
+  { r += a[i]; r2 += a[i]; r3 += a[i]; }
+  #pragma omp scan inclusive(r, r2, r3)
+  {
+   b[i] = r;
+   b2[i] = r2;
+   b3[i] = r3;
+  }
+}
+}
+
+__attribute__((noipa)) int
+bar (unsigned short *s2p, unsigned char *s3p)
+{
+  int s = 0;
+  unsigned short s2 = 0;
+  unsigned char s3 = 0;
+  #pragma omp simd reduction (inscan, +:s, s2, s3)
+  for (int i = 0; i < 1024; i++)
+{
+  {
+   s += 2 * a[i];
+   s2 += 2 * a[i];
+   s3 += 2 * a[i];
+  }
+  #pragma omp scan inclusive(s, s2, s3)
+  { b[i] = s; b2[i] = s2; b3[i] = s3; }
+}
+  *s2p = s2;
+  *s3p = s3;
+  return s;
+}
+
+__attribute__((noipa)) void
+baz (int *a, int *b, unsigned short *b2, unsigned char *b3)
+{
+  #pragma omp simd reduction (inscan, +:r, r2, r3) if (simd: 0)
+  for (int i = 0; i < 1024; i++)
+{
+  {
+   r += a[i];
+   r2 += a[i];
+   r3 += a[i];
+  }
+  #pragma omp scan inclusive(r, r2, r3)
+  {
+   b[i] = r;
+   b2[i] = r2;
+   b3[i] = r3;
+  }
+}
+}
+
+__attribute__((noipa)) int
+qux (unsigned short *s2p, unsigned char *s3p)
+{
+  int s = 0;
+  unsigned short s2 = 0;
+  unsigned char s3 = 0;
+  #pragma omp simd reduction (inscan, +:s, s2, s3) simdlen (1)
+  for (int i = 0; i < 1024; i++)
+{
+  { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; }
+  #pragma omp scan inclusive(s, s2, s3)
+  { b[i] = s; b2[i] = s2; b3[i] = s3; }
+}
+  *s2p = s2;
+  *s3p = s3;
+  return s;
+}
+
+int
+main ()
+{
+  int s = 0;
+  unsigned short s2;
+  unsigned char s3;
+#ifndef main
+  check_vect ();
+#endif
+  for (int i = 0; i < 1024; ++i)
+{
+  a[i] = i;
+  b[i] = -1;
+  b2[i] = -1;
+  b3[i] = -1;
+  asm ("" : "+g" (i));
+}
+  foo (a, b, b2, b3);
+  if (r != 1024 * 1023 / 2
+  || r2 != (unsigned short) r
+  || r3 != (unsigned char) r)
+abort ();
+  for (int i = 0; i < 1024; ++i)
+{
+  s += i;
+  if (b[i] != s
+ || b2[i] != (unsigned short) s
+ || b3[i] != (unsigned char) s)
+   abort ();
+  else
+   {
+ b[i] = 25;
+ b2[i] = 24;
+ b3[i] = 26;
+   }
+}
+  if (bar (&s2, &s3) != 1024 * 1023)
+abort ();
+  if (s2 != (unsigned short) (1024 * 1023)
+  || s3 != (unsigned char) (1024 * 1023))
+abort ();
+  s = 0;
+  for (int i = 0; i < 1024; ++i)
+{
+  s += 2 * i;
+  if (b[i] != s
+ || b2[i] != (unsigned short) s
+   

Re: [PATCH] Enable GCC support for AVX512_VP2INTERSECT.

2019-06-20 Thread Hongtao Liu
On Fri, Jun 21, 2019 at 1:56 PM Uros Bizjak  wrote:
>
> On Fri, Jun 21, 2019 at 4:21 AM Hongtao Liu  wrote:
> >
> > On Thu, Jun 20, 2019 at 10:58 PM H.J. Lu  wrote:
> > >
> > > On Thu, Jun 20, 2019 at 3:54 AM Hongtao Liu  wrote:
> > > >
> > > > On Thu, Jun 20, 2019 at 2:13 PM Uros Bizjak  wrote:
> > > > >
> > > > > On Thu, Jun 20, 2019 at 7:36 AM Hongtao Liu  
> > > > > wrote:
> > > > > >
> > > > > > On Sat, Jun 8, 2019 at 4:12 AM Uros Bizjak  
> > > > > > wrote:
> > > > > > >
> > > > > > > On 6/7/19, H.J. Lu  wrote:
> > > > > > >
> > > > > > > >> > > +/* Register pair.  */
> > > > > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 2); /* P2QI */
> > > > > > > >> > > +VECTOR_MODES_WITH_PREFIX (P, INT, 4); /* P2HI P4QI */
> > > > > > > >> > >
> > > > > > > >> > > I think
> > > > > > > >> > >
> > > > > > > >> > > INT_MODE (P2QI, 16);
> > > > > > > >> > > INT_MODE (P2HI, 32);
> > Why P2QI need 16 bytes but not 2 bytes?
> > Same question with P2HI.
>
> Because we made a mistake. It should be 2 and 4, since these arguments
Then it will run into internal comiler error when building libgcc.
I'm still invertigating it.
> are bytes, not bits.
>
> This will also fix gen_highpart issue.
>
> Uros.
>
> > > > > > > >> > >
> > > > > > > >> > > with the above subreg approach should work.
> > > > Yes, it works.
> > > >
> > > > But i didn't figure out how did pass_reload correctly handle such 
> > > > subreg,
> > > > do you have suggestions such as "which function i can dig into first" or
> > > > "which piece of codes handle subreg"?
> > >
> > > You need to define REGMODE_NATURAL_SIZE.
> > >
> > > --
> > > H.J.
> >
> >
> >
> > --
> > BR,
> > Hongtao



-- 
BR,
Hongtao


[committed] Fix OpenMP reference handling to incomplete type during template processing (PR c++/90950)

2019-06-20 Thread Jakub Jelinek
Hi!

As the testcase shows, if we have a clause with reference to dependent type,
during the processing_template_decl finish_omp_clauses we would reject it
rather than deferring checking of that until instantiation.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk, queued for backporting to release branches.

2019-06-21  Jakub Jelinek  

PR c++/90950
* semantics.c (finish_omp_clauses): Don't reject references to
incomplete types if processing_template_decl.

* g++.dg/gomp/lastprivate-1.C: New test.

--- gcc/cp/semantics.c.jj   2019-06-17 23:18:53.621850057 +0200
+++ gcc/cp/semantics.c  2019-06-20 16:34:11.111784663 +0200
@@ -7831,7 +7831,8 @@ finish_omp_clauses (tree clauses, enum c
  t = require_complete_type (t);
  if (t == error_mark_node)
remove = true;
- else if (TYPE_REF_P (TREE_TYPE (t))
+ else if (!processing_template_decl
+  && TYPE_REF_P (TREE_TYPE (t))
   && !complete_type_or_else (TREE_TYPE (TREE_TYPE (t)), t))
remove = true;
}
--- gcc/testsuite/g++.dg/gomp/lastprivate-1.C.jj2019-06-20 
16:33:28.980441681 +0200
+++ gcc/testsuite/g++.dg/gomp/lastprivate-1.C   2019-06-20 16:37:05.420066376 
+0200
@@ -0,0 +1,16 @@
+// PR c++/90950
+// { dg-do compile }
+
+template 
+T
+foo (void)
+{
+  T y = 0;
+  T &x = y;
+  #pragma omp parallel for lastprivate (x)
+  for (int i = 0; i < 8; ++i)
+x = i;
+  return x;
+}
+
+int a = foo ();

Jakub


[committed] Add OpenMP 5 exclusive scan support for simd constructs

2019-06-20 Thread Jakub Jelinek
Hi!

The following patch adds exclusive scan support for simd, it is similar to
the inclusive scan, just we need to swap the input and scan phases and
use slightly different pattern at the start of the scan phase, so that it
computes what we need.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-06-21  Jakub Jelinek  

* omp-low.c (lower_rec_simd_input_clauses): Add rvar2 argument,
create another "omp scan inscan exclusive" array if
!ctx->scan_inclusive.
(lower_rec_input_clauses): Handle exclusive scan inscan reductions.
(lower_omp_scan): Likewise.
* tree-vectorizer.h (struct _stmt_vec_info): Use 3-bit instead of
2-bit bitfield for simd_lane_access_p member.
* tree-vect-data-refs.c (vect_analyze_data_refs): Also handle
aux == (void *)-4 as simd lane access.
* tree-vect-stmts.c (check_scan_store): Handle exclusive scan.  Update
comment with permutations to show the canonical permutation order.
(vectorizable_scan_store): Handle exclusive scan.
(vectorizable_store): Call vectorizable_scan_store even for
STMT_VINFO_SIMD_LANE_ACCESS_P > 3.

* gcc.dg/vect/vect-simd-12.c: New test.
* gcc.dg/vect/vect-simd-13.c: New test.
* gcc.dg/vect/vect-simd-14.c: New test.
* gcc.dg/vect/vect-simd-15.c: New test.
* gcc.target/i386/sse2-vect-simd-12.c: New test.
* gcc.target/i386/sse2-vect-simd-13.c: New test.
* gcc.target/i386/sse2-vect-simd-14.c: New test.
* gcc.target/i386/sse2-vect-simd-15.c: New test.
* gcc.target/i386/avx2-vect-simd-12.c: New test.
* gcc.target/i386/avx2-vect-simd-13.c: New test.
* gcc.target/i386/avx2-vect-simd-14.c: New test.
* gcc.target/i386/avx2-vect-simd-15.c: New test.
* gcc.target/i386/avx512f-vect-simd-12.c: New test.
* gcc.target/i386/avx512f-vect-simd-13.c: New test.
* gcc.target/i386/avx512f-vect-simd-14.c: New test.
* gcc.target/i386/avx512bw-vect-simd-15.c: New test.
* g++.dg/vect/simd-6.cc: New test.
* g++.dg/vect/simd-7.cc: New test.
* g++.dg/vect/simd-8.cc: New test.
* g++.dg/vect/simd-9.cc: New test.
* c-c++-common/gomp/scan-2.c: Don't expect any diagnostics.

--- gcc/omp-low.c.jj2019-06-20 13:26:29.085150770 +0200
+++ gcc/omp-low.c   2019-06-20 15:46:25.964253058 +0200
@@ -3692,7 +3692,8 @@ struct omplow_simd_context {
 static bool
 lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
  omplow_simd_context *sctx, tree &ivar,
- tree &lvar, tree *rvar = NULL)
+ tree &lvar, tree *rvar = NULL,
+ tree *rvar2 = NULL)
 {
   if (known_eq (sctx->max_vf, 0U))
 {
@@ -3767,6 +3768,25 @@ lower_rec_simd_input_clauses (tree new_v
  *rvar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar,
  sctx->lastlane, NULL_TREE, NULL_TREE);
  TREE_THIS_NOTRAP (*rvar) = 1;
+
+ if (!ctx->scan_inclusive)
+   {
+ /* And for exclusive scan yet another one, which will
+hold the value during the scan phase.  */
+ tree savar = create_tmp_var_raw (atype);
+ if (TREE_ADDRESSABLE (new_var))
+   TREE_ADDRESSABLE (savar) = 1;
+ DECL_ATTRIBUTES (savar)
+   = tree_cons (get_identifier ("omp simd array"), NULL,
+tree_cons (get_identifier ("omp simd inscan "
+   "exclusive"), NULL,
+   DECL_ATTRIBUTES (savar)));
+ gimple_add_tmp_var (savar);
+ ctx->cb.decl_map->put (iavar, savar);
+ *rvar2 = build4 (ARRAY_REF, TREE_TYPE (new_var), savar,
+  sctx->idx, NULL_TREE, NULL_TREE);
+ TREE_THIS_NOTRAP (*rvar2) = 1;
+   }
}
   ivar = build4 (ARRAY_REF, TREE_TYPE (new_var), iavar, sctx->idx,
 NULL_TREE, NULL_TREE);
@@ -5185,14 +5205,15 @@ lower_rec_input_clauses (tree clauses, g
  new_vard = TREE_OPERAND (new_var, 0);
  gcc_assert (DECL_P (new_vard));
}
- tree rvar = NULL_TREE, *rvarp = NULL;
+ tree rvar = NULL_TREE, *rvarp = NULL, rvar2 = NULL_TREE;
  if (is_simd
  && OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION
  && OMP_CLAUSE_REDUCTION_INSCAN (c))
rvarp = &rvar;
  if (is_simd
  && lower_rec_simd_input_clauses (new_var, ctx, &sctx,
-  ivar, lvar, rvarp))
+  ivar, lvar, rvarp,
+  &rva