Re: Backports to 8.x branch

2019-02-09 Thread Jakub Jelinek
On Thu, Feb 07, 2019 at 04:02:54PM +0100, Jakub Jelinek wrote:
> Another month have passed since my last 8.x backporting effort,
> thus I've backported following 32 patches from trunk to 8.x,
> bootstrapped/regtested on x86_64-linux and i686-linux and committed.

And two further ones now, bootstrapped/regtested on powerpc64{,le}-linux
and committed.

Jakub
2019-02-09  Jakub Jelinek  

PR middle-end/89243
* g++.dg/opt/pr89188.C: Include ../torture/pr88861.C.

Backported from mainline
2019-01-16  David Malcolm  

PR target/88861
* combine.c (delete_noop_moves): Convert to "bool" return,
returning true if any edges are eliminated.
(combine_instructions): Also return true if delete_noop_moves
returns true.

* g++.dg/torture/pr88861.C: New test.

--- gcc/combine.c   (revision 267983)
+++ gcc/combine.c   (revision 267984)
@@ -983,14 +983,17 @@ combine_validate_cost (rtx_insn *i0, rtx
 }
 
 
-/* Delete any insns that copy a register to itself.  */
+/* Delete any insns that copy a register to itself.
+   Return true if the CFG was changed.  */
 
-static void
+static bool
 delete_noop_moves (void)
 {
   rtx_insn *insn, *next;
   basic_block bb;
 
+  bool edges_deleted = false;
+
   FOR_EACH_BB_FN (bb, cfun)
 {
   for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next)
@@ -1001,10 +1004,12 @@ delete_noop_moves (void)
  if (dump_file)
fprintf (dump_file, "deleting noop move %d\n", INSN_UID (insn));
 
- delete_insn_and_edges (insn);
+ edges_deleted |= delete_insn_and_edges (insn);
}
}
 }
+
+  return edges_deleted;
 }
 
 
@@ -1143,8 +1148,8 @@ insn_a_feeds_b (rtx_insn *a, rtx_insn *b
 /* Main entry point for combiner.  F is the first insn of the function.
NREGS is the first unused pseudo-reg number.
 
-   Return nonzero if the combiner has turned an indirect jump
-   instruction into a direct jump.  */
+   Return nonzero if the CFG was changed (e.g. if the combiner has
+   turned an indirect jump instruction into a direct jump).  */
 static int
 combine_instructions (rtx_insn *f, unsigned int nregs)
 {
@@ -1529,7 +1534,7 @@ retry:
   default_rtl_profile ();
   clear_bb_flags ();
   new_direct_jump_p |= purge_all_dead_edges ();
-  delete_noop_moves ();
+  new_direct_jump_p |= delete_noop_moves ();
 
   /* Clean up.  */
   obstack_free (&insn_link_obstack, NULL);
--- gcc/testsuite/g++.dg/torture/pr88861.C  (nonexistent)
+++ gcc/testsuite/g++.dg/torture/pr88861.C  (revision 267984)
@@ -0,0 +1,11 @@
+// { dg-options "-fnon-call-exceptions" }
+
+struct Ax {
+  int n, a[];
+};
+
+int i = 12345678;
+int main() {
+  static Ax s{456, i};
+  ((s.a[0]) ? (void)0 : (void)0);
+}
--- gcc/testsuite/g++.dg/opt/pr89188.C  2019-02-07 15:55:14.595876158 +0100
+++ gcc/testsuite/g++.dg/opt/pr89188.C  2019-02-08 19:37:02.497288425 +0100
@@ -2,12 +2,4 @@
 // { dg-do compile { target c++11 } }
 // { dg-options "-Og -flive-range-shrinkage -fnon-call-exceptions" }
 
-struct Ax {
-  int n, a[];
-};
-
-int i = 12345678;
-int main() {
-  static Ax s{456, i};
-  ((s.a[0]) ? (void)0 : (void)0);
-}
+#include "../torture/pr88861.C"
2019-02-09  Jakub Jelinek  

Backported from mainline
2019-02-08  Jakub Jelinek  

PR rtl-optimization/89234
* except.c (copy_reg_eh_region_note_forward): Return if note_or_insn
is a NOTE, CODE_LABEL etc. - rtx_insn * other than INSN_P.
(copy_reg_eh_region_note_backward): Likewise.

* g++.dg/ubsan/pr89234.C: New test.

--- gcc/except.c(revision 268668)
+++ gcc/except.c(revision 268669)
@@ -1756,6 +1756,8 @@ copy_reg_eh_region_note_forward (rtx not
   if (note == NULL)
return;
 }
+  else if (is_a  (note_or_insn))
+return;
   note = XEXP (note, 0);
 
   for (insn = first; insn != last ; insn = NEXT_INSN (insn))
@@ -1778,6 +1780,8 @@ copy_reg_eh_region_note_backward (rtx no
   if (note == NULL)
return;
 }
+  else if (is_a  (note_or_insn))
+return;
   note = XEXP (note, 0);
 
   for (insn = last; insn != first; insn = PREV_INSN (insn))
--- gcc/testsuite/g++.dg/ubsan/pr89234.C(nonexistent)
+++ gcc/testsuite/g++.dg/ubsan/pr89234.C(revision 268669)
@@ -0,0 +1,11 @@
+// PR rtl-optimization/89234
+// { dg-do compile { target dfp } }
+// { dg-options "-O2 -fnon-call-exceptions -fsanitize=null" }
+
+typedef float __attribute__((mode (SD))) _Decimal32;
+
+void
+foo (_Decimal32 *b, _Decimal32 c)
+{
+  *b = c + 1.5;
+}


[committed] Fix OpenMP declare simd handling with externals (PR middle-end/89246)

2019-02-09 Thread Jakub Jelinek
Hi!

As the following testcase shows, if we only check DECL_ARGUMENTS
even on external decls, where the !node->definition declarations
usually have NULL DECL_ARGUMENTS, we don't actually check the arguments
at all, so if there is any non-suitable argument type, we actually don't
warn for it, but what's worse, we clone it and allow it to be used in
vectorization code, which in some rare cases as in the following testcase
actually is possible.

The following patch grabs the arguments similarly how
  if (node->definition)
args = ipa_get_vector_of_formal_parms (node->decl);
  else
args = simd_clone_vector_of_formal_parm_types (node->decl);
grabs those.  Bootstrapped/regtested on x86_64-linux and i686-linux,
committed to trunk.

aarch64 will need similar changes.

2019-02-09  Jakub Jelinek  

PR middle-end/89246
* config/i386/i386.c (ix86_simd_clone_compute_vecsize_and_simdlen):
If !node->definition and TYPE_ARG_TYPES is non-NULL, use
TYPE_ARG_TYPES instead of DECL_ARGUMENTS.

* gcc.dg/gomp/pr89246-1.c: New test.
* gcc.dg/gomp/pr89246-2.c: New test.

--- gcc/config/i386/i386.c.jj   2019-02-07 17:33:38.374676821 +0100
+++ gcc/config/i386/i386.c  2019-02-08 16:40:33.645381631 +0100
@@ -50447,28 +50447,34 @@ ix86_simd_clone_compute_vecsize_and_simd
 
   tree t;
   int i;
+  tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
+  bool decl_arg_p = (node->definition || type_arg_types == NULL_TREE);
 
-  for (t = DECL_ARGUMENTS (node->decl), i = 0; t; t = DECL_CHAIN (t), i++)
-switch (TYPE_MODE (TREE_TYPE (t)))
-  {
-  case E_QImode:
-  case E_HImode:
-  case E_SImode:
-  case E_DImode:
-  case E_SFmode:
-  case E_DFmode:
-  /* case E_SCmode: */
-  /* case E_DCmode: */
-   if (!AGGREGATE_TYPE_P (TREE_TYPE (t)))
- break;
-   /* FALLTHRU */
-  default:
-   if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
- break;
-   warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
-   "unsupported argument type %qT for simd", TREE_TYPE (t));
-   return 0;
-  }
+  for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
+   t && t != void_list_node; t = TREE_CHAIN (t), i++)
+{
+  tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
+  switch (TYPE_MODE (arg_type))
+   {
+   case E_QImode:
+   case E_HImode:
+   case E_SImode:
+   case E_DImode:
+   case E_SFmode:
+   case E_DFmode:
+   /* case E_SCmode: */
+   /* case E_DCmode: */
+ if (!AGGREGATE_TYPE_P (arg_type))
+   break;
+ /* FALLTHRU */
+   default:
+ if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_UNIFORM)
+   break;
+ warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
+ "unsupported argument type %qT for simd", arg_type);
+ return 0;
+   }
+}
 
   if (!TREE_PUBLIC (node->decl))
 {
--- gcc/testsuite/gcc.dg/gomp/pr89246-1.c.jj2019-02-08 16:44:33.362428659 
+0100
+++ gcc/testsuite/gcc.dg/gomp/pr89246-1.c   2019-02-08 16:46:01.195979218 
+0100
@@ -0,0 +1,19 @@
+/* PR middle-end/89246 */
+/* { dg-do link { target { int128 && vect_simd_clones } } } */
+/* { dg-options "-O2 -fopenmp-simd -w" } */
+/* { dg-additional-sources "pr89246-2.c" } */
+
+#pragma omp declare simd
+int foo (__int128 x)
+{
+  return x;
+}
+
+#pragma omp declare simd
+extern int bar (int x);
+
+int
+main ()
+{
+  return foo (0) + bar (0);
+}
--- gcc/testsuite/gcc.dg/gomp/pr89246-2.c.jj2019-02-08 16:44:39.287330957 
+0100
+++ gcc/testsuite/gcc.dg/gomp/pr89246-2.c   2019-02-08 16:02:16.425237007 
+0100
@@ -0,0 +1,13 @@
+/* PR middle-end/89246 */
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O0 -fno-openmp -fno-openmp-simd" } */
+
+#pragma omp declare simd
+extern int foo (__int128 x);
+
+#pragma omp declare simd
+int
+bar (int x)
+{
+  return x + foo (0);
+}

Jakub


Re: [PATCH] Add target-zlib to top-level configure, use zlib from libphobos

2019-02-09 Thread Iain Buclaw
On Mon, 28 Jan 2019 at 13:10, Richard Biener  wrote:
>
> On Mon, Jan 21, 2019 at 7:35 PM Iain Buclaw  wrote:
> >
> > Hi,
> >
> > Following on from the last, this adds target-zlib to target_libraries
> > and updates libphobos build scripts to link to libz_convenience.a.
> > The D front-end already has target-zlib in d/config-lang.in.
> >
> > Is the top-level part OK?  I considered disabling target-zlib if
> > libphobos is not being built, but decided against unless it's
> > requested.
>
> Hmm, you overload --with-system-zlib to apply to both host and target
> (I guess it already applied to build), not sure if that's really desired?
> I suppose libphobos is the first target library linking against zlib?
>

Originally, libgcj linked to zlib.

> You are also falling back to in-tree zlib if --with-system-zlib was
> specified but no zlib was found - I guess for cross builds that
> will easily get not noticed...  The toplevel --with-system-zlib makes
> it much harder and simply fails.
>

OK, so keep --with-target-system-zlib to distinguish between the two?

-- 
Iain


Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-09 Thread Uros Bizjak
On 2/9/19, H.J. Lu  wrote:
> On Fri, Feb 8, 2019 at 3:28 AM H.J. Lu  wrote:
>>
>> On Fri, Feb 8, 2019 at 1:51 AM Uros Bizjak  wrote:
>> >
>> > On Thu, Feb 7, 2019 at 10:11 PM H.J. Lu  wrote:
>> > >
>> > > OImode and TImode moves must be done in XImode to access upper 16
>> > > vector registers without AVX512VL.  With AVX512VL, we can access
>> > > upper 16 vector registers in OImode and TImode.
>> > >
>> > > PR target/89229
>> > > * config/i386/i386.md (*movoi_internal_avx): Set mode to XI
>> > > for
>> > > upper 16 vector registers without TARGET_AVX512VL.
>> > > (*movti_internal): Likewise.
>> >
>> > Please use (not (match_test "...")) instead of (match_test "!...") and
>> > put the new test as the first argument of the AND rtx.
>> >
>> > LGTM with the above change.
>>
>> This is the patch I am checking in.
>>
>> Thanks.
>>
>> H.J.
>> ---
>> OImode and TImode moves must be done in XImode to access upper 16
>> vector registers without AVX512VL.  With AVX512VL, we can access
>> upper 16 vector registers in OImode and TImode.
>>
>> PR target/89229
>> * config/i386/i386.md (*movoi_internal_avx): Set mode to XI for
>> upper 16 vector registers without TARGET_AVX512VL.
>> (*movti_internal): Likewise.
>> ---
>>  gcc/config/i386/i386.md | 10 ++
>>  1 file changed, 6 insertions(+), 4 deletions(-)
>>
>> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
>> index c1492363bca..3d9141ae450 100644
>> --- a/gcc/config/i386/i386.md
>> +++ b/gcc/config/i386/i386.md
>> @@ -1933,8 +1933,9 @@
>> (set_attr "type" "sselog1,sselog1,ssemov,ssemov")
>> (set_attr "prefix" "vex")
>> (set (attr "mode")
>> - (cond [(ior (match_operand 0 "ext_sse_reg_operand")
>> - (match_operand 1 "ext_sse_reg_operand"))
>> + (cond [(and (not (match_test "TARGET_AVX512VL"))
>> + (ior (match_operand 0 "ext_sse_reg_operand")
>> + (match_operand 1 "ext_sse_reg_operand")))
>>   (const_string "XI")
>>  (and (eq_attr "alternative" "1")
>>   (match_test "TARGET_AVX512VL"))
>> @@ -2012,8 +2013,9 @@
>> (set (attr "mode")
>>   (cond [(eq_attr "alternative" "0,1")
>>   (const_string "DI")
>> -(ior (match_operand 0 "ext_sse_reg_operand")
>> - (match_operand 1 "ext_sse_reg_operand"))
>> +(and (not (match_test "TARGET_AVX512VL"))
>> + (ior (match_operand 0 "ext_sse_reg_operand")
>> + (match_operand 1 "ext_sse_reg_operand")))
>>   (const_string "XI")
>>  (and (eq_attr "alternative" "3")
>>   (match_test "TARGET_AVX512VL"))
>> --
>
> Also need this patch since we no longer set MODE_XI for
> AVX512VL.

No. Please figure out correct condition to set mode attribute to XImode instead.

Uros.


Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-09 Thread Jakub Jelinek
On Sat, Feb 09, 2019 at 10:50:43AM +0100, Uros Bizjak wrote:
> > Also need this patch since we no longer set MODE_XI for
> > AVX512VL.
> 
> No. Please figure out correct condition to set mode attribute to XImode 
> instead.

If it is AVX512VL, isn't MODE_OI or MODE_TI correct in those cases though?
While the instructions need EVEX encoding if they have [xy]mm{16,...31}
operands, they operate just on 256 or 128 bits.

Jakub


Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-09 Thread Jakub Jelinek
On Sat, Feb 09, 2019 at 10:56:38AM +0100, Jakub Jelinek wrote:
> On Sat, Feb 09, 2019 at 10:50:43AM +0100, Uros Bizjak wrote:
> > > Also need this patch since we no longer set MODE_XI for
> > > AVX512VL.
> > 
> > No. Please figure out correct condition to set mode attribute to XImode 
> > instead.
> 
> If it is AVX512VL, isn't MODE_OI or MODE_TI correct in those cases though?
> While the instructions need EVEX encoding if they have [xy]mm{16,...31}
> operands, they operate just on 256 or 128 bits.

That said, mov{oi,ti}_internal is severely broken for avx512f without
avx512vl even after this patch.

I think the following patch, incremental to H.J.'s patch, should fix that.
It is pretty much a copy of what sse.md (*mov_internal) pattern does,
just specialized to the particular instructions (i.e. that it is integral,
not floating, and always 32-byte or always 16-byte).  sse.md has:
  /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
 in avx512f, so we need to use workarounds, to access sse registers
 16-31, which are evex-only. In avx512vl we don't need workarounds.  */
  if (TARGET_AVX512F &&  < 64 && !TARGET_AVX512VL
  && (EXT_REX_SSE_REG_P (operands[0])
  || EXT_REX_SSE_REG_P (operands[1])))
{
  if (memory_operand (operands[0], mode))
{
  if ( == 32)
return "vextract64x4\t{$0x0, %g1, %0|%0, %g1, 
0x0}";
  else if ( == 16)
return "vextract32x4\t{$0x0, %g1, %0|%0, %g1, 
0x0}";
  else
gcc_unreachable ();
}
  else if (memory_operand (operands[1], mode))
{
  if ( == 32)
return "vbroadcast64x4\t{%1, %g0|%g0, %1}";
  else if ( == 16)
return "vbroadcast32x4\t{%1, %g0|%g0, %1}";
  else
gcc_unreachable ();
}
  else
/* Reg -> reg move is always aligned.  Just use wider move.  */
switch (get_attr_mode (insn))
  {
  case MODE_V8SF:
  case MODE_V4SF:
return "vmovaps\t{%g1, %g0|%g0, %g1}";
  case MODE_V4DF:
  case MODE_V2DF:
return "vmovapd\t{%g1, %g0|%g0, %g1}";
  case MODE_OI:
  case MODE_TI:
return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
  default:
gcc_unreachable ();
  }
}
before it tries to handle the normal cases.  Ok for trunk if it passes
bootstrap/regtest?

2019-02-09  Jakub Jelinek  

PR target/89229
* config/i386/i386.md (*movoi_internal_avx, *movti_internal): Handle
MODE_XI properly.

--- gcc/config/i386/i386.md.jj  2019-02-09 11:18:53.995450055 +0100
+++ gcc/config/i386/i386.md 2019-02-09 11:26:04.364342306 +0100
@@ -1905,6 +1905,18 @@ (define_insn "*movoi_internal_avx"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
+  /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
+in avx512f, so we need to use workarounds to access sse registers
+16-31, which are evex-only. In avx512vl we don't need workarounds.  */
+  if (get_attr_mode (insn) == MODE_XI)
+   {
+ if (memory_operand (operands[0], OImode))
+   return "vextracti64x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+ else if (memory_operand (operands[1], OImode))
+   return "vbroadcasti64x4\t{%1, %g0|%g0, %1}";
+ else
+   return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+   }
   if (misaligned_operand (operands[0], OImode)
  || misaligned_operand (operands[1], OImode))
{
@@ -1968,6 +1980,18 @@ (define_insn "*movti_internal"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
+  /* There is no evex-encoded vmov* for sizes smaller than 64-bytes
+in avx512f, so we need to use workarounds to access sse registers
+16-31, which are evex-only. In avx512vl we don't need workarounds.  */
+  if (get_attr_mode (insn) == MODE_XI)
+   {
+ if (memory_operand (operands[0], TImode))
+   return "vextracti32x4\t{$0x0, %g1, %0|%0, %g1, 0x0}";
+ else if (memory_operand (operands[1], TImode))
+   return "vbroadcasti32x4\t{%1, %g0|%g0, %1}";
+ else
+   return "vmovdqa64\t{%g1, %g0|%g0, %g1}";
+   }
   /* TDmode values are passed as TImode on the stack.  Moving them
 to stack may result in unaligned memory access.  */
   if (misaligned_operand (operands[0], TImode)


Jakub


Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-09 Thread Jakub Jelinek
On Sat, Feb 09, 2019 at 11:40:49AM +0100, Jakub Jelinek wrote:
> 2019-02-09  Jakub Jelinek  
> 
>   PR target/89229
>   * config/i386/i386.md (*movoi_internal_avx, *movti_internal): Handle
>   MODE_XI properly.

Actually, I believe this shouldn't be needed, basically I think MODE_XI
should never be the case for these instructions, because hard_regno_mode_ok
shouldn't allow that:

  /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
  if (TARGET_AVX512VL
  && (mode == OImode
  || mode == TImode
  || VALID_AVX256_REG_MODE (mode)
  || VALID_AVX512VL_128_REG_MODE (mode)))
return true;

  /* xmm16-xmm31 are only available for AVX-512.  */
  if (EXT_REX_SSE_REGNO_P (regno))
return false;

but then the question is if we really need:
(and (not (match_test "TARGET_AVX512VL"))
(ior (match_operand 0 "ext_sse_reg_operand")
 (match_operand 1 "ext_sse_reg_operand")))
 (const_string "XI")
on both of the instructions, not avx512vl, the above shouldn't allow
ext_sse_reg_operand through with OImode or TImode.
We still need the MODE_XI -> EXT_REX_SSE_REGNO_P patch H.J. posted.

Jakub


[Patch, fortran] PR89200 - [9 Regression] Erroneous copying of a derived type with a deferred-length character array component

2019-02-09 Thread Paul Richard Thomas
Committed as 'obvious' in revision 268721 after bootstrapping and regtesting.

Even if not entirely obvious to the world at large, the patch is, in
the words of Hitch Hikers Guide to the Galaxy, "mostly harmless".

To explain the 'obviousness': Array and structure constructors make
use of temporary descriptors, which are sometimes copied into the
destination. Array referencing has been tightened up so that more use
is being made of pointer arithmetic involving the 'span' field to
obtain the stride measure. This is especially important in the case of
references to components of derived type array elements, as in the
testcase.

It should be noted that the testcase leaks memory as in PR38319. Since
it is an especially clear case, I might take a quick peek to see if I
can fix this PR at long last.

Paul

2019-02-09  Paul Thomas  

PR fortran/89200
* trans-array.c (gfc_trans_create_temp_array): Set the 'span'
field for derived types.

2019-02-09  Paul Thomas  

PR fortran/89200
* gfortran.dg/array_reference_2.f90 : New test.


Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-09 Thread H.J. Lu
On Sat, Feb 9, 2019 at 2:50 AM Jakub Jelinek  wrote:
>
> On Sat, Feb 09, 2019 at 11:40:49AM +0100, Jakub Jelinek wrote:
> > 2019-02-09  Jakub Jelinek  
> >
> >   PR target/89229
> >   * config/i386/i386.md (*movoi_internal_avx, *movti_internal): Handle
> >   MODE_XI properly.
>
> Actually, I believe this shouldn't be needed, basically I think MODE_XI
> should never be the case for these instructions, because hard_regno_mode_ok
> shouldn't allow that:
>
>   /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
>   if (TARGET_AVX512VL
>   && (mode == OImode
>   || mode == TImode
>   || VALID_AVX256_REG_MODE (mode)
>   || VALID_AVX512VL_128_REG_MODE (mode)))
> return true;
>
>   /* xmm16-xmm31 are only available for AVX-512.  */
>   if (EXT_REX_SSE_REGNO_P (regno))
> return false;
>
> but then the question is if we really need:
> (and (not (match_test "TARGET_AVX512VL"))
> (ior (match_operand 0 "ext_sse_reg_operand")
>  (match_operand 1 "ext_sse_reg_operand")))
>  (const_string "XI")
> on both of the instructions, not avx512vl, the above shouldn't allow
> ext_sse_reg_operand through with OImode or TImode.
> We still need the MODE_XI -> EXT_REX_SSE_REGNO_P patch H.J. posted.
>
> Jakub

I believe all usages of

(ior (match_operand 0 "ext_sse_reg_operand")
  (match_operand 1 "ext_sse_reg_operand"))

should be checked.  I am not sure if they should be there at all.

-- 
H.J.


Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-09 Thread Jakub Jelinek
On Sat, Feb 09, 2019 at 04:11:43AM -0800, H.J. Lu wrote:
> I believe all usages of
> 
> (ior (match_operand 0 "ext_sse_reg_operand")
>   (match_operand 1 "ext_sse_reg_operand"))
> 
> should be checked.  I am not sure if they should be there at all.

E.g. in i386.md all the other spots look fine, because {DI,SI,DF,SF}mode
is allowed in ext sse regs even with -mavx512f.  And sse.md doesn't use this
at all.  What I'm wondering is if we need the sse.md (*mov_internal)
code I've cited earlier, doing bootstrap/regtest now with gcc_unreachable in
there (and in *mov{o,x}i_internal* for MODE_XI too) too see if it ever
triggers.

Jakub


Re: [PR fortran/89077, patch, part 2] - ICE using * as len specifier for character parameter

2019-02-09 Thread Thomas Koenig

Hi Harald,


OK for trunk?  And for backports to 8/7?


I played around with your patch and found a few problems with
ICEs, but these were all pre-existing as far as I could determine;
I have submitted PR89266 for what I discovered.

I a bit concerned about the case of like a(i:i-2) returning negative
lengths. Could you maybe set the length to zero if it is
calculated to be negative?

I'd say that the patch is OK for trunk with that change, even though
we are technically in regression-only mode. It is fairly localized,
and should not have ill effects.  Regarding backport to the other
open branches - well, it does not fix a regression, so I'd be
inclined not to backport it.

Regards

Thomas


[PATCH 08/43] i386: Emulate MMX ashr3/3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX ashr3/3 with SSE.  Only SSE register
source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_ashr3): Disallow with
TARGET_MMX_WITH_SSE.
(mmx_3): Likewise.
(ashr3): New.
(3): Likewise.
---
 gcc/config/i386/mmx.md | 38 --
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ec1c7402603..69c66e968b5 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -995,7 +995,7 @@
 (ashiftrt:MMXMODE24
  (match_operand:MMXMODE24 1 "register_operand" "0")
  (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
+  "TARGET_MMX && !TARGET_MMX_WITH_SSE"
   "psra\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
(set (attr "length_immediate")
@@ -1009,7 +1009,7 @@
 (any_lshift:MMXMODE248
  (match_operand:MMXMODE248 1 "register_operand" "0")
  (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
+  "TARGET_MMX && !TARGET_MMX_WITH_SSE"
   "p\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
(set (attr "length_immediate")
@@ -1018,6 +1018,40 @@
(const_string "0")))
(set_attr "mode" "DI")])
 
+(define_insn "ashr3"
+  [(set (match_operand:MMXMODE24 0 "register_operand" "=Yx,Yy")
+(ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand" "0,Yy")
+ (match_operand:DI 2 "nonmemory_operand" "YxN,YyN")))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   psra\t{%2, %0|%0, %2}
+   vpsra\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft,sseishft")
+   (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+   (const_string "1")
+   (const_string "0")))
+   (set_attr "mode" "TI")])
+
+(define_insn "3"
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=Yx,Yy")
+(any_lshift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0,Yy")
+ (match_operand:DI 2 "nonmemory_operand" "YxN,YyN")))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft,sseishft")
+   (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+   (const_string "1")
+   (const_string "0")))
+   (set_attr "mode" "TI")])
+
 ;
 ;;
 ;; Parallel integral comparisons
-- 
2.20.1



[PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers

2019-02-09 Thread H.J. Lu
In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support 64-bit vectors.

PR target/89021
* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
* config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
modes for TARGET_MMX_WITH_SSE.
(SSE_REG_MODE_P): Likewise.
---
 gcc/config/i386/i386.h | 20 ++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..c1df3ec3326 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -585,6 +585,11 @@ extern unsigned char ix86_arch_features[X86_ARCH_LAST];
 
 #define TARGET_FISTTP  (TARGET_SSE3 && TARGET_80387)
 
+/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
+   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
+#define TARGET_MMX_WITH_SSE \
+  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
+
 extern unsigned char x86_prefetch_sse;
 #define TARGET_PREFETCH_SSEx86_prefetch_sse
 
@@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc, 
const char **argv);
|| (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode   \
|| (MODE) == TFmode || (MODE) == V1TImode)
 
+/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
+   want to include 8-byte vector modes, like V2SFmode, but not DImode
+   nor SImode.  */
 #define VALID_SSE2_REG_MODE(MODE)  \
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
-   || (MODE) == V2DImode || (MODE) == DFmode)
+   || (MODE) == V2DImode || (MODE) == DFmode   \
+   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode
\
+  || (MODE) == V4HImode\
+  || (MODE) == V2SImode\
+  || (MODE) == V2SFmode)))
 
 #define VALID_SSE_REG_MODE(MODE)   \
   ((MODE) == V1TImode || (MODE) == TImode  \
@@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc, 
const char **argv);
|| (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode   \
|| (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode  \
|| (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode \
-   || (MODE) == V16SFmode)
+   || (MODE) == V16SFmode  \
+   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode
\
+  || (MODE) == V4HImode\
+  || (MODE) == V2SImode\
+  || (MODE) == V2SFmode)))
 
 #define X87_FLOAT_MODE_P(MODE) \
   (TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) == XFmode))
-- 
2.20.1



[PATCH 10/43] i386: Emulate MMX mmx_andnot3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mmx_andnot3 with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (mmx_andnot3): Also allow
TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index fae2e43af24..1e235bfcde4 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1093,14 +1093,18 @@
 ;
 
 (define_insn "mmx_andnot3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
(and:MMXMODEI
- (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yy"))
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pandn\t{%2, %0|%0, %2}
+   pandn\t{%2, %0|%0, %2}
+   vpandn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
-- 
2.20.1



[PATCH 07/43] i386: Emulate MMX mmx_pmaddwd with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX pmaddwd with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.
(*mmx_pmaddwd): Also allow TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 65883a68531..ec1c7402603 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -855,20 +855,20 @@
(sign_extend:V2SI
  (vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_pmaddwd"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
 (plus:V2SI
  (mult:V2SI
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0")
+   (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy")
(parallel [(const_int 0) (const_int 2)])))
(sign_extend:V2SI
  (vec_select:V2HI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+   (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")
(parallel [(const_int 0) (const_int 2)]
  (mult:V2SI
(sign_extend:V2SI
@@ -877,10 +877,15 @@
(sign_extend:V2SI
  (vec_select:V2HI (match_dup 2)
(parallel [(const_int 1) (const_int 3)]))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmaddwd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmaddwd\t{%2, %0|%0, %2}
+   pmaddwd\t{%2, %0|%0, %2}
+   vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmulhrwv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 12/43] i386: Emulate MMX vec_dupv2si with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX vec_dupv2si with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (*vec_dupv2si): Changed to
define_insn_and_split and also allow TARGET_MMX_WITH_SSE to
support SSE emulation.
* config/i386/sse.md (*vec_dupv4si): Renamed to ...
(vec_dupv4si): This.
---
 gcc/config/i386/mmx.md | 27 ---
 gcc/config/i386/sse.md |  2 +-
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 73fdef3ba1e..e31c3f5c366 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1420,14 +1420,27 @@
(set_attr "length_immediate" "1")
(set_attr "mode" "DI")])
 
-(define_insn "*vec_dupv2si"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2si"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
(vec_duplicate:V2SI
- (match_operand:SI 1 "register_operand" "0")))]
-  "TARGET_MMX"
-  "punpckldq\t%0, %0"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+ (match_operand:SI 1 "register_operand" "0,0,Yy")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t%0, %0
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Emulate MMX vec_dupv2si with SSE vec_dupv4si.  */
+  rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+  rtx insn = gen_vec_dupv4si (op0, operands[1]);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,ssemov,ssemov")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "*mmx_concatv2si"
   [(set (match_operand:V2SI 0 "register_operand" "=y,y")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5dc0930ac1f..7d2c0367911 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18976,7 +18976,7 @@
(set_attr "prefix" "maybe_evex,maybe_evex,orig")
(set_attr "mode" "V4SF")])
 
-(define_insn "*vec_dupv4si"
+(define_insn "vec_dupv4si"
   [(set (match_operand:V4SI 0 "register_operand" "=v,v,x")
(vec_duplicate:V4SI
  (match_operand:SI 1 "nonimmediate_operand" "Yv,m,0")))]
-- 
2.20.1



[PATCH 11/43] i386: Emulate MMX mmx_eq/mmx_gt3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mmx_eq/mmx_gt3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_eq3): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_eq3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
support.
(mmx_gt3): Likewise.
---
 gcc/config/i386/mmx.md | 39 ---
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 1e235bfcde4..73fdef3ba1e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1063,28 +1063,37 @@
 (eq:MMXMODEI
  (match_operand:MMXMODEI 1 "nonimmediate_operand")
  (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (EQ, mode, operands);")
 
 (define_insn "*mmx_eq3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
 (eq:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (EQ, mode, operands)"
-  "pcmpeq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0,0,Yy")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (EQ, mode, operands)"
+  "@
+   pcmpeq\t{%2, %0|%0, %2}
+   pcmpeq\t{%2, %0|%0, %2}
+   vpcmpeq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_gt3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
 (gt:MMXMODEI
- (match_operand:MMXMODEI 1 "register_operand" "0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pcmpgt\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxcmp")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "register_operand" "0,0,Yy")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pcmpgt\t{%2, %0|%0, %2}
+   pcmpgt\t{%2, %0|%0, %2}
+   vpcmpgt\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcmp,ssecmp,ssecmp")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 09/43] i386: Emulate MMX 3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX 3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (any_logic:3): New.
(any_logic:*mmx_3): Also allow TARGET_MMX_WITH_SSE.
Add SSE support.
---
 gcc/config/i386/mmx.md | 27 ---
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 69c66e968b5..fae2e43af24 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1110,15 +1110,28 @@
   "TARGET_MMX"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
+(define_expand "3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (any_logic:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
 (any_logic:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0")
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "%0,0,Yy")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 ;
 ;;
-- 
2.20.1



[PATCH 06/43] i386: Emulate MMX smulv4hi3_highpart with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_smulv4hi3_highpart): Also allow
TARGET_MMX_WITH_SSE.
(*mmx_smulv4hi3_highpart): Also allow TARGET_MMX_WITH_SSE. Add
SSE support.
---
 gcc/config/i386/mmx.md | 21 +
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2712a86ea3c..65883a68531 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -784,23 +784,28 @@
  (sign_extend:V4SI
(match_operand:V4HI 2 "nonimmediate_operand")))
(const_int 16]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_smulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
(truncate:V4HI
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))
(const_int 16]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmulhw\t{%2, %0|%0, %2}
+   pmulhw\t{%2, %0|%0, %2}
+   vpmulhw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_umulv4hi3_highpart"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 00/43] V2: Emulate MMX intrinsics with SSE

2019-02-09 Thread H.J. Lu
On x86-64, since __m64 is returned and passed in XMM registers, we can
emulate MMX intrinsics with SSE instructions. To support it, we added

 #define TARGET_MMX_WITH_SSE \
  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)

SSE emulation is disabled for 3DNOW since 3DNOW patterns haven't been
updated with SSE emulation.

;; Define instruction set of MMX instructions
(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string 
"base"))

 (eq_attr "mmx_isa" "native")
   (symbol_ref "!TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64")
   (symbol_ref "TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64_avx")
   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
 (eq_attr "mmx_isa" "x64_noavx")
   (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")

(define_register_constraint "Yx" "TARGET_MMX_WITH_SSE ? SSE_REGS : NO_REGS"
 "@internal Any SSE register if MMX is disabled in 64-bit mode.")

(define_register_constraint "Yy"
 "TARGET_MMX_WITH_SSE ? (TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS 
: NO_REGS) : NO_REGS"
 "@internal Any EVEX encodable SSE register for AVX512VL target, otherwise any 
SSE register if MMX is disabled in 64-bit mode.")

We added SSE emulation to MMX patterns and disabled MMX alternatives with
TARGET_MMX_WITH_SSE.

Most of MMX instructions have equivalent SSE versions and results of some
SSE versions need to be reshuffled to the right order for MMX.  Thee are
couple tricky cases:

1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
mask operand and handle unmapped bits 64:127 at memory address by
adjusting source and mask operands together with memory address.

2. MMX movntq is emulated with SSE2 DImode movnti, which is available
in 64-bit mode.

3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
SSE emulation must clear the bit 4 in the shuffle control mask.

4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
the upper 64 bits of destination XMM register.

Tests are also added to check each SSE emulation of MMX intrinsics.

With SSE emulation in 64-bit mode, 8-byte vectorizer is enabled with SSE2.

There are no regressions on i686 and x86-64.  For x86-64, GCC is also
tested with

--with-arch=native --with-cpu=native

on AVX2 and AVX512F machines.

H.J. Lu (43):
  i386: Allow 64-bit vector modes in SSE registers
  i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
  i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
  i386: Emulate MMX plusminus/sat_plusminus with SSE
  i386: Emulate MMX mulv4hi3 with SSE
  i386: Emulate MMX smulv4hi3_highpart with SSE
  i386: Emulate MMX mmx_pmaddwd with SSE
  i386: Emulate MMX ashr3/3 with SSE
  i386: Emulate MMX 3 with SSE
  i386: Emulate MMX mmx_andnot3 with SSE
  i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
  i386: Emulate MMX vec_dupv2si with SSE
  i386: Emulate MMX pshufw with SSE
  i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
  i386: Emulate MMX sse_cvtpi2ps with SSE
  i386: Emulate MMX mmx_pextrw with SSE
  i386: Emulate MMX mmx_pinsrw with SSE
  i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
  i386: Emulate MMX mmx_pmovmskb with SSE
  i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
  i386: Emulate MMX maskmovq with SSE2 maskmovdqu
  i386: Emulate MMX mmx_uavgv8qi3 with SSE
  i386: Emulate MMX mmx_uavgv4hi3 with SSE
  i386: Emulate MMX mmx_psadbw with SSE
  i386: Emulate MMX movntq with SSE2 movntidi
  i386: Emulate MMX umulv1siv1di3 with SSE2
  i386: Emulate MMX ssse3_phwv4hi3 with SSE
  i386: Emulate MMX ssse3_phdv2si3 with SSE
  i386: Emulate MMX ssse3_pmaddubsw with SSE
  i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
  i386: Emulate MMX pshufb with SSE version
  i386: Emulate MMX ssse3_psign3 with SSE
  i386: Emulate MMX ssse3_palignrdi with SSE
  i386: Emulate MMX abs2 with SSE
  i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
  i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
  i386: Allow MMX intrinsic emulation with SSE
  i386: Add tests for MMX intrinsic emulations with SSE
  i386: Also enable SSSE3 __m64 tests in 64-bit mode
  i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE
  i386: Implement V2SF add/sub/mul with SEE
  i386: Implement V2SF <-> V2SI conversions with SEE
  i386: Implement V2SF comparisons with SSE

 gcc/config/i386/constraints.md|  10 +
 gcc/config/i386/i386-builtin.def  | 126 +--
 gcc/config/i386/i386-protos.h |   4 +
 gcc/config/i386/i386.c| 186 +++-
 gcc/config/i386/i386.h|  20 +-
 gcc/config/i386/i386.md   |  15 +-
 gcc/config/i386/mmintrin.h|  10 +-
 gcc/config/i386/mmx.md| 909 +-
 gcc/config/i386/sse.md| 440 +++--
 gcc/config/i386/xmmintrin.h   |  61 ++
 gcc/te

[PATCH 02/43] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2

2019-02-09 Thread H.J. Lu
Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
plus moving bits 64:95 to bits 32:63 in SSE register.  Only SSE register
source operand is allowed.

2019-02-08  H.J. Lu  
Uros Bizjak  

PR target/89021
* config/i386/constraints.md (Yx): Any SSE register if MMX is
disabled in 64-bit mode.
(Yy): Any EVEX encodable SSE register for AVX512VL target,
otherwise any SSE register if MMX is disabled in 64-bit mode.
* config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
New prototype.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
function.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.md (mmx_isa): New.
(enabled): Also check mmx_isa.
* config/i386/mmx.md (any_s_truncate): New code iterator.
(s_trunsuffix): New code attr.
(mmx_packsswb): Removed.
(mmx_packssdw): Likewise.
(mmx_packuswb): Likewise.
(mmx_packswb): New define_insn_and_split to emulate
MMX packsswb/packuswb with SSE2.
(mmx_packssdw): Likewise.
---
 gcc/config/i386/constraints.md | 10 +
 gcc/config/i386/i386-protos.h  |  3 ++
 gcc/config/i386/i386.c | 54 +++
 gcc/config/i386/i386.md| 12 ++
 gcc/config/i386/mmx.md | 67 +++---
 5 files changed, 116 insertions(+), 30 deletions(-)

diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 33921aea267..6e9244ad77f 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -110,6 +110,9 @@
 ;;  v  any EVEX encodable SSE register for AVX512VL target,
 ;; otherwise any SSE register
 ;;  h  EVEX encodable SSE register with number factor of four
+;;  x  SSE register if MMX is disabled in 64-bit mode
+;;  y  any EVEX encodable SSE register for AVX512VL target, otherwise
+;;  any SSE register if MMX is disabled in 64-bit mode
 
 (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
  "First SSE register (@code{%xmm0}).")
@@ -146,6 +149,13 @@
  "TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
  "@internal For AVX512VL, any EVEX encodable SSE register 
(@code{%xmm0-%xmm31}), otherwise any SSE register.")
 
+(define_register_constraint "Yx" "TARGET_MMX_WITH_SSE ? SSE_REGS : NO_REGS"
+ "@internal Any SSE register if MMX is disabled in 64-bit mode.")
+
+(define_register_constraint "Yy"
+ "TARGET_MMX_WITH_SSE ? (TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? 
SSE_REGS : NO_REGS) : NO_REGS"
+ "@internal Any EVEX encodable SSE register for AVX512VL target, otherwise any 
SSE register if MMX is disabled in 64-bit mode.")
+
 ;; We use the B prefix to denote any number of internal operands:
 ;;  f  FLAGS_REG
 ;;  g  GOT memory operand.
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 2d600173917..bb96a420a85 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -200,6 +200,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, 
rtx, rtx);
 
 extern rtx ix86_split_stack_guard (void);
 
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif /* TREE_CODE  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 12bc7926f86..cab35bb2242 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19955,6 +19955,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, 
rtx operands[])
 gcc_unreachable ();
 }
 
+/* Move bits 64:95 to bits 32:63.  */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+ GEN_INT (0), GEN_INT (0)));
+  rtx dest = gen_rtx_REG (V4SImode, REGNO (op));
+  op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  rtx insn = gen_rtx_SET (dest, op);
+  emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2.  */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
+  machine_mode dmode = GET_MODE (op0);
+  machine_mode smode = GET_MODE (op1);
+  machine_mode inner_dmode = GET_MODE_INNER (dmode);
+  machine_mode inner_smode = GET_MODE_INNER (smode);
+
+  /* Get the corresponding SSE mode for destination.  */
+  int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+  machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+   nunits).require ();
+  machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+nunits / 2).require ();
+
+  /* Get the corres

[PATCH 15/43] i386: Emulate MMX sse_cvtpi2ps with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX sse_cvtpi2ps with SSE2 cvtdq2ps, preserving upper 64 bits of
destination XMM register.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/mmx.md (UNSPEC_CVTPI2PS): New.
(sse_cvtpi2ps): Renamed to ...
(*mmx_cvtpi2ps): This.  Disabled for TARGET_MMX_WITH_SSE.
(sse_cvtpi2ps): New.
(mmx_cvtpi2ps_sse): Likewise.
---
 gcc/config/i386/sse.md | 83 +-
 1 file changed, 81 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4321c5c46db..4503d393dc9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -18,6 +18,9 @@
 ;; .
 
 (define_c_enum "unspec" [
+  ;; MMX with SSE
+  UNSPEC_CVTPI2PS
+
   ;; SSE
   UNSPEC_MOVNT
 
@@ -4655,14 +4658,90 @@
 ;;
 ;
 
-(define_insn "sse_cvtpi2ps"
+(define_expand "sse_cvtpi2ps"
+  [(set (match_operand:V4SF 0 "register_operand")
+   (vec_merge:V4SF
+ (vec_duplicate:V4SF
+   (float:V2SF (match_operand:V2SI 2 "nonimmediate_operand")))
+ (match_operand:V4SF 1 "register_operand")
+ (const_int 3)))]
+  "TARGET_SSE"
+{
+  if (TARGET_MMX_WITH_SSE)
+{
+  rtx op2 = force_reg (V2SImode, operands[2]);
+  rtx op3 = gen_reg_rtx (V4SFmode);
+  rtx op4 = gen_reg_rtx (V4SFmode);
+  rtx insn = gen_mmx_cvtpi2ps_sse (operands[0], operands[1], op2,
+  op3, op4);
+  emit_insn (insn);
+  DONE;
+}
+})
+
+(define_insn_and_split "mmx_cvtpi2ps_sse"
+  [(set (match_operand:V4SF 0 "register_operand" "=Yx,Yy")
+   (unspec:V4SF [(match_operand:V2SI 2 "register_operand" "Yx,Yy")
+ (match_operand:V4SF 1 "register_operand" "0,Yy")]
+UNSPEC_CVTPI2PS))
+   (set (match_operand:V4SF 3 "register_operand" "=Yx,Yy")
+   (unspec:V4SF [(match_operand:V4SF 4 "register_operand" "3,3")]
+UNSPEC_CVTPI2PS))]
+  "TARGET_MMX_WITH_SSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+  /* Generate SSE2 cvtdq2ps.  */
+  rtx insn = gen_floatv4siv4sf2 (operands[3], op2);
+  emit_insn (insn);
+
+  /* Merge operands[3] with operands[0].  */
+  rtx mask, op1;
+  if (TARGET_AVX)
+{
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (0), GEN_INT (1),
+ GEN_INT (6), GEN_INT (7)));
+  op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[3], operands[1]);
+  op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+  insn = gen_rtx_SET (operands[0], op2);
+}
+  else
+{
+  /* NB: SSE can only concatenate OP0 and OP3 to OP0.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (4), GEN_INT (5)));
+  op1 = gen_rtx_VEC_CONCAT (V8SFmode, operands[0], operands[3]);
+  op2 = gen_rtx_VEC_SELECT (V4SFmode, op1, mask);
+  insn = gen_rtx_SET (operands[0], op2);
+  emit_insn (insn);
+
+  /* Swap bits 0:63 with bits 64:127.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (1)));
+  rtx dest = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+  op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  insn = gen_rtx_SET (dest, op1);
+}
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "mode" "V4SF")])
+
+(define_insn "*mmx_cvtpi2ps"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_merge:V4SF
  (vec_duplicate:V4SF
(float:V2SF (match_operand:V2SI 2 "nonimmediate_operand" "ym")))
  (match_operand:V4SF 1 "register_operand" "0")
  (const_int 3)))]
-  "TARGET_SSE"
+  "TARGET_SSE && !TARGET_MMX_WITH_SSE"
   "cvtpi2ps\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecvt")
(set_attr "mode" "V4SF")])
-- 
2.20.1



[PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.

PR target/89021
* config/i386/mmx.md (sse_cvtps2pi): Add SSE emulation.
(sse_cvttps2pi): Likewise.
---
 gcc/config/i386/sse.md | 26 --
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 7d2c0367911..4321c5c46db 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4668,26 +4668,32 @@
(set_attr "mode" "V4SF")])
 
 (define_insn "sse_cvtps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yy")
(vec_select:V2SI
- (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
+ (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YyBm")]
   UNSPEC_FIX_NOTRUNC)
  (parallel [(const_int 0) (const_int 1)])))]
   "TARGET_SSE"
-  "cvtps2pi\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+  "@
+   cvtps2pi\t{%1, %0|%0, %q1}
+   %vcvtps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
(set_attr "mode" "DI")])
 
 (define_insn "sse_cvttps2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yy")
(vec_select:V2SI
- (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
+ (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YyBm"))
  (parallel [(const_int 0) (const_int 1)])))]
   "TARGET_SSE"
-  "cvttps2pi\t{%1, %0|%0, %q1}"
-  [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+  "@
+   cvttps2pi\t{%1, %0|%0, %q1}
+   %vcvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "ssecvt")
+   (set_attr "unit" "mmx,*")
(set_attr "prefix_rep" "0")
(set_attr "mode" "SF")])
 
-- 
2.20.1



[PATCH 13/43] i386: Emulate MMX pshufw with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX pshufw with SSE.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_pshufw_1): Add SSE emulation.
(*vec_dupv4hi): Likewise.
emulation.
---
 gcc/config/i386/mmx.md | 27 +--
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e31c3f5c366..8a5c5fb93b7 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1376,9 +1376,9 @@
 })
 
 (define_insn "mmx_pshufw_1"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yy")
 (vec_select:V4HI
-  (match_operand:V4HI 1 "nonimmediate_operand" "ym")
+  (match_operand:V4HI 1 "nonimmediate_operand" "ym,Yy")
   (parallel [(match_operand 2 "const_0_to_3_operand")
  (match_operand 3 "const_0_to_3_operand")
  (match_operand 4 "const_0_to_3_operand")
@@ -1392,11 +1392,15 @@
   mask |= INTVAL (operands[5]) << 6;
   operands[2] = GEN_INT (mask);
 
-  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
+  if (TARGET_MMX_WITH_SSE)
+return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
+  else
+return "pshufw\t{%2, %1, %0|%0, %1, %2}";
 }
-  [(set_attr "type" "mmxcvt")
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_insn "mmx_pswapdv2si2"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
@@ -1410,15 +1414,18 @@
(set_attr "mode" "DI")])
 
 (define_insn "*vec_dupv4hi"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yy")
(vec_duplicate:V4HI
  (truncate:HI
-   (match_operand:SI 1 "register_operand" "0"]
+   (match_operand:SI 1 "register_operand" "0,Yy"]
   "TARGET_SSE || TARGET_3DNOW_A"
-  "pshufw\t{$0, %0, %0|%0, %0, 0}"
-  [(set_attr "type" "mmxcvt")
+  "@
+   pshufw\t{$0, %0, %0|%0, %0, 0}
+   %vpshuflw\t{$0, %1, %0|%0, %1, 0}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog1")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_insn_and_split "*vec_dupv2si"
   [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
-- 
2.20.1



[PATCH 05/43] i386: Emulate MMX mulv4hi3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mulv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mulv4hi3): New.
(*mmx_mulv4hi3): Also allow TARGET_MMX_WITH_SSE.  Add SSE
support.
---
 gcc/config/i386/mmx.md | 26 +++---
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 01a71aa128b..2712a86ea3c 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -753,14 +753,26 @@
   "TARGET_MMX"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
+(define_expand "mulv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand")
+  (match_operand:V4HI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
+
 (define_insn "*mmx_mulv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0")
-  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmullw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
+(mult:V4HI (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy")
+  (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (MULT, V4HImode, operands)"
+  "@
+   pmullw\t{%2, %0|%0, %2}
+   pmullw\t{%2, %0|%0, %2}
+   vpmullw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_smulv4hi3_highpart"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 25/43] i386: Emulate MMX movntq with SSE2 movntidi

2019-02-09 Thread H.J. Lu
Emulate MMX movntq with SSE2 movntidi.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (sse_movntq): Renamed to ...
(*sse_movntq): This.
(sse_movntq): New.  Emulate MMX movntq with SSE2 movntidi.
---
 gcc/config/i386/mmx.md | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e5e7c6ec4ce..c52e5b2e393 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -243,7 +243,21 @@
   DONE;
 })
 
-(define_insn "sse_movntq"
+(define_expand "sse_movntq"
+  [(set (match_operand:DI 0 "memory_operand")
+   (unspec:DI [(match_operand:DI 1 "register_operand")]
+  UNSPEC_MOVNTQ))]
+  "TARGET_SSE || TARGET_3DNOW_A"
+{
+  if (TARGET_MMX_WITH_SSE)
+{
+  rtx insn = gen_sse2_movntidi (operands[0], operands[1]);
+  emit_insn (insn);
+  DONE;
+}
+})
+
+(define_insn "*sse_movntq"
   [(set (match_operand:DI 0 "memory_operand" "=m")
(unspec:DI [(match_operand:DI 1 "register_operand" "y")]
   UNSPEC_MOVNTQ))]
-- 
2.20.1



[PATCH 16/43] i386: Emulate MMX mmx_pextrw with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mmx_pextrw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pextrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 8a5c5fb93b7..fced8fd4a10 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1349,16 +1349,17 @@
(set_attr "mode" "DI")])
 
 (define_insn "mmx_pextrw"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
 (zero_extend:SI
  (vec_select:HI
-   (match_operand:V4HI 1 "register_operand" "y")
-   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]]
+   (match_operand:V4HI 1 "register_operand" "y,Yy")
+   (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")]]
   "TARGET_SSE || TARGET_3DNOW_A"
-  "pextrw\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "mmxcvt")
+  "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,sselog1")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_pshufw"
   [(match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 18/43] i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (smaxmin:v4hi3): New.
(umaxmin:v8qi3): Likewise.
(smaxmin:*mmx_v4hi3): Add SSE emulation.
(umaxmin:*mmx_v8qi3): Likewise.
---
 gcc/config/i386/mmx.md | 48 +++---
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ad33e587352..ee5acb00a6d 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -960,16 +960,28 @@
   "TARGET_SSE || TARGET_3DNOW_A"
   "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);")
 
+(define_expand "v4hi3"
+  [(set (match_operand:V4HI 0 "register_operand")
+(smaxmin:V4HI
+ (match_operand:V4HI 1 "nonimmediate_operand")
+ (match_operand:V4HI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, V4HImode, operands);")
+
 (define_insn "*mmx_v4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 (smaxmin:V4HI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0")
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy")
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
   "(TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (, V4HImode, operands)"
-  "pw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   pw\t{%2, %0|%0, %2}
+   pw\t{%2, %0|%0, %2}
+   vpw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
@@ -979,16 +991,28 @@
   "TARGET_SSE || TARGET_3DNOW_A"
   "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);")
 
+(define_expand "v8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+(umaxmin:V8QI
+ (match_operand:V8QI 1 "nonimmediate_operand")
+ (match_operand:V8QI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, V8QImode, operands);")
+
 (define_insn "*mmx_v8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
 (umaxmin:V8QI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
+ (match_operand:V8QI 1 "nonimmediate_operand" "%0,0,Yy")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy")))]
   "(TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (, V8QImode, operands)"
-  "pb\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   pb\t{%2, %0|%0, %2}
+   pb\t{%2, %0|%0, %2}
+   vpb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_ashr3"
   [(set (match_operand:MMXMODE24 0 "register_operand" "=y")
-- 
2.20.1



[PATCH 17/43] i386: Emulate MMX mmx_pinsrw with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mmx_pinsrw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_pinsrw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 27 +++
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index fced8fd4a10..ad33e587352 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1328,25 +1328,36 @@
 })
 
 (define_insn "*mmx_pinsrw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
 (vec_merge:V4HI
   (vec_duplicate:V4HI
-(match_operand:HI 2 "nonimmediate_operand" "rm"))
- (match_operand:V4HI 1 "register_operand" "0")
+(match_operand:HI 2 "nonimmediate_operand" "rm,rm,rm"))
+ (match_operand:V4HI 1 "register_operand" "0,0,Yy")
   (match_operand:SI 3 "const_int_operand")))]
   "(TARGET_SSE || TARGET_3DNOW_A)
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
< GET_MODE_NUNITS (V4HImode))"
 {
   operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
-  if (MEM_P (operands[2]))
-return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+{
+  if (MEM_P (operands[2]))
+   return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+  else
+   return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+}
   else
-return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+{
+  if (MEM_P (operands[2]))
+   return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+  else
+   return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+}
 }
-  [(set_attr "type" "mmxcvt")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,sselog,sselog")
(set_attr "length_immediate" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_pextrw"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1



[PATCH 03/43] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX

2019-02-09 Thread H.J. Lu
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX.  For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
prototype.
* config/i386/i386.c (ix86_split_mmx_punpck): New function.
* config/i386/mmx.m (mmx_punpckhbw): Changed to
define_insn_and_split to support SSE emulation.
(mmx_punpcklbw): Likewise.
(mmx_punpckhwd): Likewise.
(mmx_punpcklwd): Likewise.
(mmx_punpckhdq): Likewise.
(mmx_punpckldq): Likewise.
---
 gcc/config/i386/i386-protos.h |   1 +
 gcc/config/i386/i386.c|  77 +++
 gcc/config/i386/mmx.md| 138 ++
 3 files changed, 168 insertions(+), 48 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bb96a420a85..dc7fc38d8e4 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -202,6 +202,7 @@ extern rtx ix86_split_stack_guard (void);
 
 extern void ix86_move_vector_high_sse_to_mmx (rtx);
 extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cab35bb2242..6e67ac346dd 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20009,6 +20009,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code 
code)
   ix86_move_vector_high_sse_to_mmx (op0);
 }
 
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX.  */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  machine_mode mode = GET_MODE (op0);
+  rtx mask;
+  /* The corresponding SSE mode.  */
+  machine_mode sse_mode, double_sse_mode;
+
+  switch (mode)
+{
+case E_V8QImode:
+  sse_mode = V16QImode;
+  double_sse_mode = V32QImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (16,
+ GEN_INT (0), GEN_INT (16),
+ GEN_INT (1), GEN_INT (17),
+ GEN_INT (2), GEN_INT (18),
+ GEN_INT (3), GEN_INT (19),
+ GEN_INT (4), GEN_INT (20),
+ GEN_INT (5), GEN_INT (21),
+ GEN_INT (6), GEN_INT (22),
+ GEN_INT (7), GEN_INT (23)));
+  break;
+
+case E_V4HImode:
+  sse_mode = V8HImode;
+  double_sse_mode = V16HImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (8,
+ GEN_INT (0), GEN_INT (8),
+ GEN_INT (1), GEN_INT (9),
+ GEN_INT (2), GEN_INT (10),
+ GEN_INT (3), GEN_INT (11)));
+  break;
+
+case E_V2SImode:
+  sse_mode = V4SImode;
+  double_sse_mode = V8SImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4,
+ GEN_INT (0), GEN_INT (4),
+ GEN_INT (1), GEN_INT (5)));
+  break;
+
+default:
+  gcc_unreachable ();
+}
+
+  /* Generate SSE punpcklXX.  */
+  rtx dest = gen_rtx_REG (sse_mode, REGNO (op0));
+  op1 = gen_rtx_REG (sse_mode, REGNO (op1));
+  op2 = gen_rtx_REG (sse_mode, REGNO (op2));
+
+  op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+  op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+  rtx insn = gen_rtx_SET (dest, op2);
+  emit_insn (insn);
+
+  if (high_p)
+{
+  /* Move bits 64:127 to bits 0:63.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (0)));
+  dest = gen_rtx_REG (V4SImode, REGNO (dest));
+  op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  insn = gen_rtx_SET (dest, op1);
+  emit_insn (insn);
+}
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 5c28d935e82..1d5ed83e7b2 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1089,87 +1089,129 @@
(set_attr "type" "mmxshft,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
(vec_select:V8QI

[PATCH 04/43] i386: Emulate MMX plusminus/sat_plusminus with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX plusminus/sat_plusminus with SSE.  Only SSE register source
operand is allowed.

2019-02-08  H.J. Lu  
Uros Bizjak  

PR target/89021
* config/i386/mmx.md (MMXMODEI8): Require TARGET_SSE2 for V1DI.
(3): New.
(*mmx_3): Changed to define_insn_and_split
to support SSE emulation.
(*mmx_3): Likewise.
(mmx_3): Also allow TARGET_MMX_WITH_SSE.
---
 gcc/config/i386/mmx.md | 51 +-
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 1d5ed83e7b2..01a71aa128b 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -45,7 +45,7 @@
 
 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
 (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
-(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
+(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
 
 ;; All 8-byte vector modes handled by MMX
 (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
@@ -698,34 +698,53 @@
   "TARGET_MMX || (TARGET_SSE2 && mode == V1DImode)"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
+(define_expand "3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (plusminus:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,Yx,Yy")
 (plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand" "0")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_MMX || (TARGET_SSE2 && mode == V1DImode))
+ (match_operand:MMXMODEI8 1 "nonimmediate_operand" "0,0,Yy")
+ (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "(TARGET_MMX
+|| TARGET_MMX_WITH_SSE
+|| (TARGET_SSE2 && mode == V1DImode))
&& ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODE12 0 "register_operand")
(sat_plusminus:MMXMODE12
  (match_operand:MMXMODE12 1 "nonimmediate_operand")
  (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE12 0 "register_operand" "=y,Yx,Yy")
 (sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand" "0")
- (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODE12 1 "nonimmediate_operand" "0,0,Yy")
+ (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym,Yx,Yy")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 21/43] i386: Emulate MMX maskmovq with SSE2 maskmovdqu

2019-02-09 Thread H.J. Lu
Emulate MMX maskmovq with SSE2 maskmovdqu in 64-bit mode by zero-extending
source and mask operands to 128 bits.  Handle unmapped bits 64:127 at
memory address by adjusting source and mask operands together with memory
address.

PR target/89021
* config/i386/xmmintrin.h: Emulate MMX maskmovq with SSE2
maskmovdqu in 64-bit mode.
---
 gcc/config/i386/xmmintrin.h | 61 +
 1 file changed, 61 insertions(+)

diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 58284378514..e797795f127 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1165,7 +1165,68 @@ _m_pshufw (__m64 __A, int const __N)
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P)
 {
+#ifdef __x86_64__
+  /* Emulate MMX maskmovq with SSE2 maskmovdqu and handle unmapped bits
+ 64:127 at address __P.  */
+  typedef long long __v2di __attribute__ ((__vector_size__ (16)));
+  typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+  /* Zero-extend __A and __N to 128 bits.  */
+  __v2di __A128 = __extension__ (__v2di) { ((__v1di) __A)[0], 0 };
+  __v2di __N128 = __extension__ (__v2di) { ((__v1di) __N)[0], 0 };
+
+  /* Check the alignment of __P.  */
+  __SIZE_TYPE__ offset = ((__SIZE_TYPE__) __P) & 0xf;
+  if (offset)
+{
+  /* If the misalignment of __P > 8, subtract __P by 8 bytes.
+Otherwise, subtract __P by the misalignment.  */
+  if (offset > 8)
+   offset = 8;
+  __P = (char *) (((__SIZE_TYPE__) __P) - offset);
+
+  /* Shift __A128 and __N128 to the left by the adjustment.  */
+  switch (offset)
+   {
+   case 1:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8);
+ break;
+   case 2:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 2 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 2 * 8);
+ break;
+   case 3:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 3 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 3 * 8);
+ break;
+   case 4:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 4 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 4 * 8);
+ break;
+   case 5:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 5 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 5 * 8);
+ break;
+   case 6:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 6 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 6 * 8);
+ break;
+   case 7:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 7 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 7 * 8);
+ break;
+   case 8:
+ __A128 = __builtin_ia32_pslldqi128 (__A128, 8 * 8);
+ __N128 = __builtin_ia32_pslldqi128 (__N128, 8 * 8);
+ break;
+   default:
+ break;
+   }
+}
+  __builtin_ia32_maskmovdqu ((__v16qi)__A128, (__v16qi)__N128, __P);
+#else
   __builtin_ia32_maskmovq ((__v8qi)__A, (__v8qi)__N, __P);
+#endif
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
-- 
2.20.1



[PATCH 19/43] i386: Emulate MMX mmx_pmovmskb with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mmx_pmovmskb with SSE by zero-extending result of SSE pmovmskb
from QImode to SImode.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_pmovmskb): Changed to
define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/mmx.md | 29 +++--
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ee5acb00a6d..7759e3e1082 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1753,14 +1753,31 @@
   [(set_attr "type" "mmxshft")
(set_attr "mode" "DI")])
 
-(define_insn "mmx_pmovmskb"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-   (unspec:SI [(match_operand:V8QI 1 "register_operand" "y")]
+(define_insn_and_split "mmx_pmovmskb"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+   (unspec:SI [(match_operand:V8QI 1 "register_operand" "y,Yx")]
   UNSPEC_MOVMSK))]
   "TARGET_SSE || TARGET_3DNOW_A"
-  "pmovmskb\t{%1, %0|%0, %1}"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+  "@
+   pmovmskb\t{%1, %0|%0, %1}
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Generate SSE pmovmskb.  */
+  rtx op0 = operands[0];
+  rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]));
+  rtx insn = gen_sse2_pmovmskb (op0, op1);
+  emit_insn (insn);
+  /* Zero-extend from QImode to SImode.  */
+  op1 = gen_rtx_REG (QImode, REGNO (operands[0]));
+  insn = gen_zero_extendqisi2 (op0, op1);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "mmxcvt,ssemov")
+   (set_attr "mode" "DI,TI")])
 
 (define_expand "mmx_maskmovq"
   [(set (match_operand:V8QI 0 "memory_operand")
-- 
2.20.1



[PATCH 26/43] i386: Emulate MMX umulv1siv1di3 with SSE2

2019-02-09 Thread H.J. Lu
Emulate MMX umulv1siv1di3 with SSE2.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (*sse2_umulv1siv1di3): Add SSE2 emulation.
---
 gcc/config/i386/mmx.md | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c52e5b2e393..f5e96ebe3f3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -955,20 +955,24 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V2SImode, operands);")
 
 (define_insn "*sse2_umulv1siv1di3"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
+  [(set (match_operand:V1DI 0 "register_operand" "=y,Yx,Yy")
 (mult:V1DI
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 1 "nonimmediate_operand" "%0")
+ (match_operand:V2SI 1 "nonimmediate_operand" "%0,0,Yy")
  (parallel [(const_int 0)])))
  (zero_extend:V1DI
(vec_select:V1SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,Yx,Yy")
  (parallel [(const_int 0)])]
   "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V2SImode, operands)"
-  "pmuludq\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "@
+   pmuludq\t{%2, %0|%0, %2}
+   pmuludq\t{%2, %0|%0, %2}
+   vpmuludq\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 22/43] i386: Emulate MMX mmx_uavgv8qi3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mmx_uavgv8qi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (*mmx_uavgv8qi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 15 +--
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index c758aac72e5..17776c66d90 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1679,15 +1679,15 @@
   "ix86_fixup_binary_operands_no_copy (PLUS, V8QImode, operands);")
 
 (define_insn "*mmx_uavgv8qi3"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+  [(set (match_operand:V8QI 0 "register_operand" "=y,Yx,Yy")
(truncate:V8QI
  (lshiftrt:V8HI
(plus:V8HI
  (plus:V8HI
(zero_extend:V8HI
- (match_operand:V8QI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V8QI 1 "nonimmediate_operand" "%0,0,Yy"))
(zero_extend:V8HI
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy")))
  (const_vector:V8HI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
  (const_int 1) (const_int 1)
@@ -1698,19 +1698,22 @@
 {
   /* These two instructions have the same operation, but their encoding
  is different.  Prefer the one that is de facto standard.  */
-  if (TARGET_SSE || TARGET_3DNOW_A)
+  if (TARGET_MMX_WITH_SSE && TARGET_AVX)
+return "vpavgb\t{%2, %1, %0|%0, %1, %2}";
+  else if (TARGET_SSE || TARGET_3DNOW_A)
 return "pavgb\t{%2, %0|%0, %2}";
   else
 return "pavgusb\t{%2, %0|%0, %2}";
 }
-  [(set_attr "type" "mmxshft")
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
(set (attr "prefix_extra")
  (if_then_else
(not (ior (match_test "TARGET_SSE")
 (match_test "TARGET_3DNOW_A")))
(const_string "1")
(const_string "*")))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_uavgv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 24/43] i386: Emulate MMX mmx_psadbw with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mmx_psadbw with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (mmx_psadbw): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e345b2b8875..e5e7c6ec4ce 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1755,14 +1755,18 @@
(set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
-  [(set (match_operand:V1DI 0 "register_operand" "=y")
-(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0")
- (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
+  [(set (match_operand:V1DI 0 "register_operand" "=y,Yx,Yy")
+(unspec:V1DI [(match_operand:V8QI 1 "register_operand" "0,0,Yy")
+ (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy")]
 UNSPEC_PSADBW))]
   "TARGET_SSE || TARGET_3DNOW_A"
-  "psadbw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "@
+   psadbw\t{%2, %0|%0, %2}
+   psadbw\t{%2, %0|%0, %2}
+   vpsadbw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_pmovmskb"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
-- 
2.20.1



[PATCH 20/43] i386: Emulate MMX mmx_umulv4hi3_highpart with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mmx_umulv4hi3_highpart with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (*mmx_umulv4hi3_highpart): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 7759e3e1082..c758aac72e5 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -821,20 +821,24 @@
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_insn "*mmx_umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
(truncate:V4HI
  (lshiftrt:V4SI
(mult:V4SI
  (zero_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy"))
  (zero_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))
  (const_int 16]
   "(TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (MULT, V4HImode, operands)"
-  "pmulhuw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxmul")
-   (set_attr "mode" "DI")])
+  "@
+   pmulhuw\t{%2, %0|%0, %2}
+   pmulhuw\t{%2, %0|%0, %2}
+   vpmulhuw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxmul,ssemul,ssemul")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_pmaddwd"
   [(set (match_operand:V2SI 0 "register_operand")
-- 
2.20.1



[PATCH 27/43] i386: Emulate MMX ssse3_phwv4hi3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX ssse3_phwv4hi3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_phwv4hi3):
Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 4503d393dc9..625e1c4cfd9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15358,13 +15358,13 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
 
-(define_insn "ssse3_phwv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_phwv4hi3"
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
(vec_concat:V4HI
  (vec_concat:V2HI
(ssse3_plusminus:HI
  (vec_select:HI
-   (match_operand:V4HI 1 "register_operand" "0")
+   (match_operand:V4HI 1 "register_operand" "0,0,Yy")
(parallel [(const_int 0)]))
  (vec_select:HI (match_dup 1) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
@@ -15373,19 +15373,35 @@
  (vec_concat:V2HI
(ssse3_plusminus:HI
  (vec_select:HI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")
+   (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")
(parallel [(const_int 0)]))
  (vec_select:HI (match_dup 2) (parallel [(const_int 1)])))
(ssse3_plusminus:HI
  (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
  (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))]
   "TARGET_SSSE3"
-  "phw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "@
+   phw\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = gen_rtx_REG (V8HImode, REGNO (operands[0]));
+  rtx op1 = gen_rtx_REG (V8HImode, REGNO (operands[1]));
+  rtx op2 = gen_rtx_REG (V8HImode, REGNO (operands[2]));
+  rtx insn = gen_ssse3_phwv8hi3 (op0, op1, op2);
+  emit_insn (insn);
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_phdv8si3"
   [(set (match_operand:V8SI 0 "register_operand" "=x")
-- 
2.20.1



[PATCH 37/43] i386: Allow MMX intrinsic emulation with SSE

2019-02-09 Thread H.J. Lu
Allow MMX intrinsic emulation with SSE/SSE2/SSSE3.  For pr82483-1.c and
pr82483-2.c, "-mssse3 -mno-mmx" no longer ICEs in 64-bit mode since MMX
intrinsics can be emulated wit SSE.

gcc/

PR target/89021
* config/i386/i386-builtin.def: Enable MMX intrinsics with
SSE/SSE2/SSSE3.
* config/i386/i386.c (bdesc_tm): Likewise.
(ix86_init_mmx_sse_builtins): Likewise.
(ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
intrinsics in 64-bit mode without MMX.
* config/i386/mmintrin.h: Don't require MMX in 64-bit mode.

gcc/testsuite/

PR target/89021
* gcc.target/i386/pr82483-1.c: Error only on ia32.
* gcc.target/i386/pr82483-2.c: Likewise.
---
 gcc/config/i386/i386-builtin.def  | 126 +++---
 gcc/config/i386/i386.c|  45 +---
 gcc/config/i386/mmintrin.h|  10 +-
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 5 files changed, 107 insertions(+), 78 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 88005f4687f..10a9d631f29 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", 
IX86_BUILTIN_FNSTSW, UNKN
 BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, 
UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", 
IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms, 
"__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* 3DNow! */
 BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms, "__builtin_ia32_femms", 
IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
@@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi", 
IX86_BUILTIN_RORQI, UNKNO
 BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI, 
UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
 
 /* MMX */
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", 
IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", 
IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", 
IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", 
IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", 
IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", 
IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3, 
"__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3, 
"__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3, 
"__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3, 
"__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3, 
"__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3, 
"__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3, 
"__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) 
V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv4hi3, 
"__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", 
IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_smulv4hi3_highpart, 
"__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) 
V4HI_FTYPE_V4HI_V4HI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", 
IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_andnotv2si3, 
"__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", 
IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", 
IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
-
-BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FO

[PATCH 23/43] i386: Emulate MMX mmx_uavgv4hi3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mmx_uavgv4hi3 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/mmx.md (*mmx_uavgv4hi3): Add SSE emulation.
---
 gcc/config/i386/mmx.md | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 17776c66d90..e345b2b8875 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1732,23 +1732,27 @@
   "ix86_fixup_binary_operands_no_copy (PLUS, V4HImode, operands);")
 
 (define_insn "*mmx_uavgv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
(truncate:V4HI
  (lshiftrt:V4SI
(plus:V4SI
  (plus:V4SI
(zero_extend:V4SI
- (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+ (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy"))
(zero_extend:V4SI
- (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+ (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))
  (const_vector:V4SI [(const_int 1) (const_int 1)
  (const_int 1) (const_int 1)]))
(const_int 1]
   "(TARGET_SSE || TARGET_3DNOW_A)
&& ix86_binary_operator_ok (PLUS, V4HImode, operands)"
-  "pavgw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxshft")
-   (set_attr "mode" "DI")])
+  "@
+   pavgw\t{%2, %0|%0, %2}
+   pavgw\t{%2, %0|%0, %2}
+   vpavgw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxshft,sseiadd,sseiadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "mmx_psadbw"
   [(set (match_operand:V1DI 0 "register_operand" "=y")
-- 
2.20.1



[PATCH 35/43] i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE

2019-02-09 Thread H.J. Lu
PR target/89021
* config/i386/mmx.md (MMXMODE:mov): Also allow
TARGET_MMX_WITH_SSE.
(MMXMODE:*mov_internal): Likewise.
(MMXMODE:movmisalign): Likewise.
---
 gcc/config/i386/mmx.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f5e96ebe3f3..283661f7887 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -75,7 +75,7 @@
 (define_expand "mov"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (mode, operands);
   DONE;
@@ -86,7 +86,7 @@
 "=r ,o ,r,r ,m ,?!y,!y,?!y,m  ,r  ,?!y,v,v,v,m,r,v,!y,*x")
(match_operand:MMXMODE 1 "nonimm_or_0_operand"
 "rCo,rC,C,rm,rC,C  ,!y,m  ,?!y,?!y,r  ,C,v,m,v,v,r,*x,!y"))]
-  "TARGET_MMX
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -237,7 +237,7 @@
 (define_expand "movmisalign"
   [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
(match_operand:MMXMODE 1 "nonimmediate_operand"))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_move (mode, operands);
   DONE;
-- 
2.20.1



[PATCH 42/43] i386: Implement V2SF <-> V2SI conversions with SEE

2019-02-09 Thread H.J. Lu
In 64-bit mode, implement V2SF <-> V2SI conversions with SEE.  Only SSE
register source operand is allowed.

gcc/

PR target/89028
* config/i386/sse.md (floatv2siv2sf2): New.
(fix_truncv2sfv2si2): Likewise.

gcc/testsuite/

PR target/89028
* gcc.target/i386/pr89028-8.c: New test.
* gcc.target/i386/pr89028-9.c: Likewise.
---
 gcc/config/i386/sse.md| 31 +++
 gcc/testsuite/gcc.target/i386/pr89028-8.c | 12 +
 gcc/testsuite/gcc.target/i386/pr89028-9.c | 12 +
 3 files changed, 55 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-9.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index e8e25759c57..fe63239f53f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4897,6 +4897,17 @@
(set_attr "prefix" "maybe_vex")
(set_attr "mode" "")])
 
+(define_insn "floatv2siv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand" "=Yx,Yy")
+   (float:V2SF
+ (match_operand:V2SI 1 "register_operand" "Yx,Yy")))]
+  "TARGET_MMX_WITH_SSE"
+  "%vcvtdq2ps\t{%1, %0|%0, %1}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "V4SF")])
+
 (define_insn "ufloat2"
   [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
(unsigned_float:VF1_AVX512VL
@@ -5056,6 +5067,26 @@
(set_attr "prefix" "")
(set_attr "mode" "TI")])
 
+(define_insn "fix_truncv2sfv2si2"
+  [(set (match_operand:V2SI 0 "register_operand" "=Yy")
+   (fix:V2SI (match_operand:V2SF 1 "register_operand" "Yy")))]
+  "TARGET_MMX_WITH_SSE"
+  "%vcvttps2dq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set (attr "prefix_rep")
+ (if_then_else
+   (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "1")))
+   (set (attr "prefix_data16")
+ (if_then_else
+   (match_test "TARGET_AVX")
+ (const_string "*")
+ (const_string "0")))
+   (set_attr "prefix_data16" "0")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "mode" "TI")])
+
 (define_expand "fixuns_trunc2"
   [(match_operand: 0 "register_operand")
(match_operand:VF1 1 "register_operand")]
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-8.c 
b/gcc/testsuite/gcc.target/i386/pr89028-8.c
new file mode 100644
index 000..35cdf1ed332
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-8.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "cvttps2dq" 1 } } */
+
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+__v2si
+foo1 ( __v2sf x)
+{
+  return __builtin_convertvector (x, __v2si);
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-9.c 
b/gcc/testsuite/gcc.target/i386/pr89028-9.c
new file mode 100644
index 000..17242c0402d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-9.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -msse2 -mno-mmx" } */
+/* { dg-final { scan-assembler-times "cvtdq2ps" 1 } } */
+
+typedef int __v2si __attribute__ ((__vector_size__ (8)));
+typedef float __v2sf __attribute__ ((__vector_size__ (8)));
+
+__v2sf
+foo1 ( __v2si x)
+{
+  return __builtin_convertvector (x, __v2sf);
+}
-- 
2.20.1



[PATCH 41/43] i386: Implement V2SF add/sub/mul with SEE

2019-02-09 Thread H.J. Lu
In 64-bit mode, implement V2SF add/sub/mul with SEE.  Only SSE register
source operand is allowed.

gcc/

PR target/89028
* config/i386/i386.md (comm): Handle mult.
* config/i386/mmx.md (plusminusmult): New.
(plusminusmult_insn): Likewse.
(plusminusmult_mnemonic): Likewse.
(plusminusmult_type): Likewse.
(mmx_addv2sf3): Add "&& !TARGET_MMX_WITH_SSE".
(*mmx_addv2sf3): Likewise.
(mmx_subv2sf3): Likewise.
(mmx_subrv2sf3): Likewise.
(*mmx_subv2sf3): Likewise.
(mmx_mulv2sf3): Likewise.
(*mmx_mulv2sf3): Likewise.
(v2sf3): New.
(*sse_v2sf3): Likewise.

gcc/testsuite/

PR target/89028
* gcc.target/i386/pr89028-2.c: New test.
* gcc.target/i386/pr89028-3.c: Likewise.
* gcc.target/i386/pr89028-4.c: Likewise.
* gcc.target/i386/pr89028-5.c: Likewise.
* gcc.target/i386/pr89028-6.c: Likewise.
* gcc.target/i386/pr89028-7.c: Likewise.
---
 gcc/config/i386/i386.md   |  3 +-
 gcc/config/i386/mmx.md| 56 ---
 gcc/testsuite/gcc.target/i386/pr89028-2.c | 11 +
 gcc/testsuite/gcc.target/i386/pr89028-3.c | 14 ++
 gcc/testsuite/gcc.target/i386/pr89028-4.c | 14 ++
 gcc/testsuite/gcc.target/i386/pr89028-5.c | 11 +
 gcc/testsuite/gcc.target/i386/pr89028-6.c | 14 ++
 gcc/testsuite/gcc.target/i386/pr89028-7.c | 14 ++
 8 files changed, 129 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-7.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 72685107fc0..cda973c0fbf 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -873,7 +873,8 @@
 
 ;; Mark commutative operators as such in constraints.
 (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
-   (minus "") (ss_minus "") (us_minus "")])
+   (minus "") (ss_minus "") (us_minus "")
+   (mult "%")])
 
 ;; Mapping of max and min
 (define_code_iterator maxmin [smax smin umax umin])
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 31db0e8b0c7..b2af9ace2c3 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -63,6 +63,20 @@
 ;; Instruction suffix for truncations with saturation.
 (define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
 
+(define_code_iterator plusminusmult [plus minus mult])
+
+;; Base name for define_insn
+(define_code_attr plusminusmult_insn
+  [(plus "add") (minus "sub") (mult "mul")])
+
+;; Base name for insn mnemonic.
+(define_code_attr plusminusmult_mnemonic
+  [(plus "add") (minus "sub") (mult "mul")])
+
+;; Insn type name for insn mnemonic.
+(define_code_attr plusminusmult_type
+  [(plus "add") (minus "add") (mult "mul")])
+
 ;
 ;;
 ;; Move patterns
@@ -277,14 +291,16 @@
(plus:V2SF
  (match_operand:V2SF 1 "nonimmediate_operand")
  (match_operand:V2SF 2 "nonimmediate_operand")))]
-  "TARGET_3DNOW"
+  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
 
 (define_insn "*mmx_addv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
(plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
-  "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
+  "TARGET_3DNOW
+   && !TARGET_MMX_WITH_SSE
+   && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
   "pfadd\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
(set_attr "prefix_extra" "1")
@@ -294,19 +310,21 @@
   [(set (match_operand:V2SF 0 "register_operand")
 (minus:V2SF (match_operand:V2SF 1 "register_operand")
(match_operand:V2SF 2 "nonimmediate_operand")))]
-  "TARGET_3DNOW")
+  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE")
 
 (define_expand "mmx_subrv2sf3"
   [(set (match_operand:V2SF 0 "register_operand")
 (minus:V2SF (match_operand:V2SF 2 "register_operand")
(match_operand:V2SF 1 "nonimmediate_operand")))]
-  "TARGET_3DNOW")
+  "TARGET_3DNOW && !TARGET_MMX_WITH_SSE")
 
 (define_insn "*mmx_subv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y,y")
 (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
(match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
-  "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "TARGET_3DNOW
+   && !TARGET_MMX_WITH_SSE
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
   

[PATCH 30/43] i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX ssse3_pmulhrswv4hi3 with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/sse.md (*ssse3_pmulhrswv4hi3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b0ded2008f1..5f00179aa95 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15774,25 +15774,29 @@
(set_attr "mode" "")])
 
 (define_insn "*ssse3_pmulhrswv4hi3"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
(truncate:V4HI
  (lshiftrt:V4SI
(plus:V4SI
  (lshiftrt:V4SI
(mult:V4SI
  (sign_extend:V4SI
-   (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
+   (match_operand:V4HI 1 "nonimmediate_operand" "%0,0,Yy"))
  (sign_extend:V4SI
-   (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
+   (match_operand:V4HI 2 "nonimmediate_operand" "ym,Yx,Yy")))
(const_int 14))
  (match_operand:V4HI 3 "const1_operand"))
(const_int 1]
   "TARGET_SSSE3 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "pmulhrsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseimul")
+  "@
+   pmulhrsw\t{%2, %0|%0, %2}
+   pmulhrsw\t{%2, %0|%0, %2}
+   vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseimul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_pshufb3"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,x,v")
-- 
2.20.1



[PATCH 33/43] i386: Emulate MMX ssse3_palignrdi with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX version of palignrq with SSE version by concatenating 2
64-bit MMX operands into a single 128-bit SSE operand, followed by
SSE psrldq.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (ssse3_palignrdi): Changed to
define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 53 +++---
 1 file changed, 44 insertions(+), 9 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 47a97540d82..92c12319d16 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15975,23 +15975,58 @@
(set_attr "prefix" "orig,vex,evex")
(set_attr "mode" "")])
 
-(define_insn "ssse3_palignrdi"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-   (unspec:DI [(match_operand:DI 1 "register_operand" "0")
-   (match_operand:DI 2 "nonimmediate_operand" "ym")
-   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n")]
+(define_insn_and_split "ssse3_palignrdi"
+  [(set (match_operand:DI 0 "register_operand" "=y,Yx,Yy")
+   (unspec:DI [(match_operand:DI 1 "register_operand" "0,0,Yy")
+   (match_operand:DI 2 "nonimmediate_operand" "ym,Yx,Yy")
+   (match_operand:SI 3 "const_0_to_255_mul_8_operand" "n,n,n")]
   UNSPEC_PALIGNR))]
   "TARGET_SSSE3"
 {
-  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
-  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+  if (TARGET_MMX_WITH_SSE)
+return "#";
+  else
+{
+  operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
+  return "palignr\t{%3, %2, %0|%0, %2, %3}";
+}
 }
-  [(set_attr "type" "sseishft")
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Emulate MMX palignrdi with SSE psrldq.  */
+  rtx op0 = gen_rtx_REG (V2DImode, REGNO (operands[0]));
+  rtx insn;
+  if (TARGET_AVX)
+insn = gen_vec_concatv2di (op0, operands[2], operands[1]);
+  else
+{
+  /* NB: SSE can only concatenate OP0 and OP1 to OP0.  */
+  insn = gen_vec_concatv2di (op0, operands[1], operands[2]);
+  emit_insn (insn);
+  /* Swap bits 0:63 with bits 64:127.  */
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2),
+ GEN_INT (3),
+ GEN_INT (0),
+ GEN_INT (1)));
+  rtx op1 = gen_rtx_REG (V4SImode, REGNO (op0));
+  rtx op2 = gen_rtx_VEC_SELECT (V4SImode, op1, mask);
+  insn = gen_rtx_SET (op1, op2);
+}
+  emit_insn (insn);
+  op0 = gen_rtx_REG (V1TImode, REGNO (op0));
+  insn = gen_sse2_lshrv1ti3 (op0, op0, operands[3]);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseishft")
(set_attr "atom_unit" "sishuf")
(set_attr "prefix_extra" "1")
(set_attr "length_immediate" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
 ;; modes for abs instruction on pre AVX-512 targets.
-- 
2.20.1



[PATCH 29/43] i386: Emulate MMX ssse3_pmaddubsw with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX ssse3_pmaddubsw with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_pmaddubsw): Add SSE emulation.
---
 gcc/config/i386/sse.md | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 029f33a7000..b0ded2008f1 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15666,17 +15666,17 @@
(set_attr "mode" "TI")])
 
 (define_insn "ssse3_pmaddubsw"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yx,Yy")
(ss_plus:V4HI
  (mult:V4HI
(zero_extend:V4HI
  (vec_select:V4QI
-   (match_operand:V8QI 1 "register_operand" "0")
+   (match_operand:V8QI 1 "register_operand" "0,0,Yy")
(parallel [(const_int 0) (const_int 2)
   (const_int 4) (const_int 6)])))
(sign_extend:V4HI
  (vec_select:V4QI
-   (match_operand:V8QI 2 "nonimmediate_operand" "ym")
+   (match_operand:V8QI 2 "nonimmediate_operand" "ym,Yx,Yy")
(parallel [(const_int 0) (const_int 2)
   (const_int 4) (const_int 6)]
  (mult:V4HI
@@ -15689,12 +15689,16 @@
(parallel [(const_int 1) (const_int 3)
   (const_int 5) (const_int 7)]))]
   "TARGET_SSSE3"
-  "pmaddubsw\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "@
+   pmaddubsw\t{%2, %0|%0, %2}
+   pmaddubsw\t{%2, %0|%0, %2}
+   vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "simul")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_mode_iterator PMULHRSW
   [V4HI V8HI (V16HI "TARGET_AVX2")])
-- 
2.20.1



[PATCH 36/43] i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE

2019-02-09 Thread H.J. Lu
PR target/89021
* config/i386/i386.c (ix86_expand_vector_init_duplicate): Set
mmx_ok to true if TARGET_MMX_WITH_SSE is true.
(ix86_expand_vector_init_one_nonzero): Likewise.
(ix86_expand_vector_init_one_var): Likewise.
(ix86_expand_vector_init_general): Likewise.
(ix86_expand_vector_init): Likewise.
(ix86_expand_vector_set): Likewise.
(ix86_expand_vector_extract): Likewise.
* config/i386/mmx.md (*vec_dupv2sf): Changed to
define_insn_and_split to support SSE emulation.
(vec_setv2sf): Also allow TARGET_MMX_WITH_SSE.
(vec_extractv2sf_1 splitter): Likewise.
(vec_extractv2sfsf): Likewise.
(vec_setv2si): Likewise.
(vec_extractv2si_1 splitter): Likewise.
(vec_extractv2sisi): Likewise.
(vec_setv4hi): Likewise.
(vec_extractv4hihi): Likewise.
(vec_setv8qi): Likewise.
(vec_extractv8qiqi): Likewise.
(*vec_extractv2sf_0): Don't allow TARGET_MMX_WITH_SSE.
(*vec_extractv2sf_1): Likewise.
(*vec_extractv2si_0): Likewise.
(*vec_extractv2si_1): Likewise.
(*vec_extractv2sf_0_sse): New.
(*vec_extractv2sf_1_sse): Likewise.
(*vec_extractv2si_0_sse): Likewise.
(*vec_extractv2si_1_sse): Likewise.
---
 gcc/config/i386/i386.c |   8 +++
 gcc/config/i386/mmx.md | 133 +
 2 files changed, 117 insertions(+), 24 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 6e67ac346dd..3770bb882d4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42364,6 +42364,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, 
machine_mode mode,
 {
   bool ok;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SImode:
@@ -42523,6 +42524,7 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, 
machine_mode mode,
   bool use_vector_set = false;
   rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2DImode:
@@ -42716,6 +42718,7 @@ ix86_expand_vector_init_one_var (bool mmx_ok, 
machine_mode mode,
   XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
   const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2DFmode:
@@ -43101,6 +43104,7 @@ ix86_expand_vector_init_general (bool mmx_ok, 
machine_mode mode,
   machine_mode quarter_mode = VOIDmode;
   int n, i;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SFmode:
@@ -43300,6 +43304,8 @@ ix86_expand_vector_init (bool mmx_ok, rtx target, rtx 
vals)
   int i;
   rtx x;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
+
   /* Handle first initialization from vector elts.  */
   if (n_elts != XVECLEN (vals, 0))
 {
@@ -43399,6 +43405,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx 
val, int elt)
   machine_mode mmode = VOIDmode;
   rtx (*gen_blendm) (rtx, rtx, rtx, rtx);
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SFmode:
@@ -43754,6 +43761,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, 
rtx vec, int elt)
   bool use_vec_extr = false;
   rtx tmp;
 
+  mmx_ok |= TARGET_MMX_WITH_SSE;
   switch (mode)
 {
 case E_V2SImode:
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 283661f7887..31db0e8b0c7 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -595,14 +595,27 @@
(set_attr "prefix_extra" "1")
(set_attr "mode" "V2SF")])
 
-(define_insn "*vec_dupv2sf"
-  [(set (match_operand:V2SF 0 "register_operand" "=y")
+(define_insn_and_split "*vec_dupv2sf"
+  [(set (match_operand:V2SF 0 "register_operand" "=y,Yx,Yy")
(vec_duplicate:V2SF
- (match_operand:SF 1 "register_operand" "0")))]
-  "TARGET_MMX"
-  "punpckldq\t%0, %0"
-  [(set_attr "type" "mmxcvt")
-   (set_attr "mode" "DI")])
+ (match_operand:SF 1 "register_operand" "0,0,Yy")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   punpckldq\t%0, %0
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Emulate MMX vec_dupv2sf with SSE vec_dupv4sf.  */
+  rtx op0 = gen_rtx_REG (V4SFmode, REGNO (operands[0]));
+  rtx insn = gen_vec_dupv4sf (op0, operands[1]);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxcvt,ssemov,ssemov")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "*mmx_concatv2sf"
   [(set (match_operand:V2SF 0 "register_operand" "=y,y")
@@ -620,7 +633,7 @@
   [(match_operand:V2SF 0 "register_operand")
(match_operand:SF 1 "register_operand")
(match_operand 2 "const_int_operand")]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_set (false, operands[0], operands[1],
  INTVAL (operands[2]));
@@ -634,7 +647,20 @@
(vec_select:SF
  (match_operand:V2SF 1 "nonimmediate_

[PATCH 32/43] i386: Emulate MMX ssse3_psign3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX ssse3_psign3 with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_psign3): Add SSE emulation.
---
 gcc/config/i386/sse.md | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a32d67f811a..47a97540d82 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15906,17 +15906,21 @@
(set_attr "mode" "")])
 
 (define_insn "ssse3_psign3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yx,Yy")
(unspec:MMXMODEI
- [(match_operand:MMXMODEI 1 "register_operand" "0")
-  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")]
+ [(match_operand:MMXMODEI 1 "register_operand" "0,0,Yy")
+  (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,Yx,Yy")]
  UNSPEC_PSIGN))]
   "TARGET_SSSE3"
-  "psign\t{%2, %0|%0, %2}";
-  [(set_attr "type" "sselog1")
+  "@
+   psign\t{%2, %0|%0, %2}
+   psign\t{%2, %0|%0, %2}
+   vpsign\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "_palignr_mask"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=v")
-- 
2.20.1



[PATCH 31/43] i386: Emulate MMX pshufb with SSE version

2019-02-09 Thread H.J. Lu
Emulate MMX version of pshufb with SSE version by masking out the bit 3
of the shuffle control byte.  Only SSE register source operand is allowed.

PR target/89021
* config/i386/sse.md (ssse3_pshufbv8qi3): Renamed to ...
(ssse3_pshufbv8qi3_mmx): This.
(ssse3_pshufbv8qi3): New.
(ssse3_pshufbv8qi3_sse): Likewise.
---
 gcc/config/i386/sse.md | 63 --
 1 file changed, 61 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5f00179aa95..a32d67f811a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15817,18 +15817,77 @@
(set_attr "btver2_decode" "vector")
(set_attr "mode" "")])
 
-(define_insn "ssse3_pshufbv8qi3"
+(define_expand "ssse3_pshufbv8qi3"
+  [(set (match_operand:V8QI 0 "register_operand")
+   (unspec:V8QI [(match_operand:V8QI 1 "register_operand")
+ (match_operand:V8QI 2 "nonimmediate_operand")]
+UNSPEC_PSHUFB))]
+  "TARGET_SSSE3"
+{
+  if (TARGET_MMX_WITH_SSE)
+{
+  /* Emulate MMX version of pshufb with SSE version by masking
+out the bit 3 of the shuffle control byte.  */
+  rtvec par = gen_rtvec (4, GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7),
+GEN_INT (0xf7f7f7f7));
+  rtx vec_const = gen_rtx_CONST_VECTOR (V4SImode, par);
+  vec_const = force_const_mem (V4SImode, vec_const);
+  rtx op3 = gen_reg_rtx (V4SImode);
+  rtx op4 = gen_reg_rtx (V4SImode);
+  rtx insn = gen_rtx_SET (op4, vec_const);
+  emit_insn (insn);
+  rtx op2 = force_reg (V8QImode, operands[2]);
+  insn = gen_ssse3_pshufbv8qi3_sse (operands[0], operands[1],
+   op2, op3, op4);
+  emit_insn (insn);
+  DONE;
+}
+})
+
+(define_insn "ssse3_pshufbv8qi3_mmx"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
(unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0")
  (match_operand:V8QI 2 "nonimmediate_operand" "ym")]
 UNSPEC_PSHUFB))]
-  "TARGET_SSSE3"
+  "TARGET_SSSE3 && !TARGET_MMX_WITH_SSE"
   "pshufb\t{%2, %0|%0, %2}";
   [(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
(set_attr "mode" "DI")])
 
+(define_insn_and_split "ssse3_pshufbv8qi3_sse"
+  [(set (match_operand:V8QI 0 "register_operand" "=Yx,Yy")
+   (unspec:V8QI [(match_operand:V8QI 1 "register_operand" "0,Yy")
+ (match_operand:V8QI 2 "register_operand" "Yx,Yy")]
+UNSPEC_PSHUFB))
+   (set (match_operand:V4SI 3 "register_operand" "=Yx,Yy")
+   (unspec:V4SI [(match_operand:V4SI 4 "register_operand" "3,3")]
+UNSPEC_PSHUFB))]
+  "TARGET_SSSE3 && TARGET_MMX_WITH_SSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+{
+  /* Mask out the bit 3 of the shuffle control byte.  */
+  rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+  rtx op3 = operands[3];
+  rtx insn = gen_andv4si3 (op3, op3, op2);
+  emit_insn (insn);
+  /* Generate SSE version of pshufb.  */
+  rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]));
+  rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]));
+  op3 = gen_rtx_REG (V16QImode, REGNO (op3));
+  insn = gen_ssse3_pshufbv16qi3 (op0, op1, op3);
+  emit_insn (insn);
+  DONE;
+}
+  [(set_attr "mmx_isa" "x64_noavx,x64_avx")
+   (set_attr "type" "sselog1")
+   (set_attr "mode" "TI,TI")])
+
 (define_insn "_psign3"
   [(set (match_operand:VI124_AVX2 0 "register_operand" "=x,x")
(unspec:VI124_AVX2
-- 
2.20.1



[PATCH 40/43] i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE

2019-02-09 Thread H.J. Lu
In 64-bit, we support 8-byte vectorizer with SSE.  Also xfail x86-64
targets for gcc.dg/tree-ssa/pr84512.c.

gcc/

PR target/89028
* config/i386/i386.c (ix86_autovectorize_vector_sizes): Enable
8-byte vectorizer for TARGET_MMX_WITH_SSE.

gcc/testsuite/

PR target/89028
* gcc.dg/tree-ssa/pr84512.c: Also xfail x86-64 targets.
* gcc.target/i386/pr89028-1.c: New test.
---
 gcc/config/i386/i386.c|  2 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr84512.c   |  2 +-
 gcc/testsuite/gcc.target/i386/pr89028-1.c | 10 ++
 3 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-1.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e45284ce1a2..9cf96471e8f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -50213,6 +50213,8 @@ ix86_autovectorize_vector_sizes (vector_sizes *sizes)
   sizes->safe_push (32);
   sizes->safe_push (16);
 }
+  if (TARGET_MMX_WITH_SSE)
+sizes->safe_push (8);
 }
 
 /* Implemenation of targetm.vectorize.get_mask_mode.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c
index 3975757d844..8f8529ba8cf 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr84512.c
@@ -13,4 +13,4 @@ int foo()
 }
 
 /* Listed targets xfailed due to PR84958.  */
-/* { dg-final { scan-tree-dump "return 285;" "optimized" { xfail { { 
alpha*-*-* amdgcn*-*-* nvptx*-*-* } || { sparc*-*-* && lp64 } } } } } */
+/* { dg-final { scan-tree-dump "return 285;" "optimized" { xfail { { { 
alpha*-*-* amdgcn*-*-* nvptx*-*-* } || { sparc*-*-* && lp64 } } || { { i?86-*-* 
x86_64-*-* } && { ! ia32 } } } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr89028-1.c 
b/gcc/testsuite/gcc.target/i386/pr89028-1.c
new file mode 100644
index 000..d2ebb7f844d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr89028-1.c
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mavx2 -O3" } */
+/* { dg-final { scan-assembler "vpaddb\[ \\t\]+\[^\n\]*%xmm\[0-9\]" } } */
+
+void
+foo (char* restrict r, char* restrict a)
+{
+  for (int i = 0; i < 8; i++)
+r[i] += a[i];
+}
-- 
2.20.1



[PATCH 39/43] i386: Also enable SSSE3 __m64 tests in 64-bit mode

2019-02-09 Thread H.J. Lu
Since we now emulate MMX intrinsics with SSE in 64-bit mode without
3DNOW, we can enable SSSE3 __m64 tests even when AVX is enabled.

PR target/89021
* gcc.target/i386/ssse3-pabsb.c: Also enable __m64 check in
64-bit mode without 3DNOW,
* gcc.target/i386/ssse3-pabsd.c: Likewise.
* gcc.target/i386/ssse3-pabsw.c: Likewise.
* gcc.target/i386/ssse3-palignr.c: Likewise.
* gcc.target/i386/ssse3-phaddd.c: Likewise.
* gcc.target/i386/ssse3-phaddsw.c: Likewise.
* gcc.target/i386/ssse3-phaddw.c: Likewise.
* gcc.target/i386/ssse3-phsubd.c: Likewise.
* gcc.target/i386/ssse3-phsubsw.c: Likewise.
* gcc.target/i386/ssse3-phsubw.c: Likewise.
* gcc.target/i386/ssse3-pmaddubsw.c: Likewise.
* gcc.target/i386/ssse3-pmulhrsw.c: Likewise.
* gcc.target/i386/ssse3-pshufb.c: Likewise.
* gcc.target/i386/ssse3-psignb.c: Likewise.
* gcc.target/i386/ssse3-psignd.c: Likewise.
* gcc.target/i386/ssse3-psignw.c: Likewise.
---
 gcc/testsuite/gcc.target/i386/ssse3-pabsb.c | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-pabsd.c | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-pabsw.c | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-palignr.c   | 6 +++---
 gcc/testsuite/gcc.target/i386/ssse3-phaddd.c| 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-phaddsw.c   | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-phaddw.c| 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-phsubd.c| 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-phsubsw.c   | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-phsubw.c| 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-pmaddubsw.c | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-pmulhrsw.c  | 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-pshufb.c| 6 +++---
 gcc/testsuite/gcc.target/i386/ssse3-psignb.c| 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-psignd.c| 4 ++--
 gcc/testsuite/gcc.target/i386/ssse3-psignw.c| 4 ++--
 16 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c 
b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
index 7caa1b6c3a6..68d81b4a068 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsb.c
@@ -15,7 +15,7 @@
 #include "ssse3-vals.h"
 #include 
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_pabsb (int *i1, int *r)
@@ -63,7 +63,7 @@ TEST (void)
   /* Manually compute the result */
   compute_correct_result(&vals[i + 0], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
   /* Run the 64-bit tests */
   ssse3_test_pabsb (&vals[i + 0], &r[0]);
   ssse3_test_pabsb (&vals[i + 2], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c 
b/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
index 3a73cf01170..9eb1aedc838 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsd.c
@@ -16,7 +16,7 @@
 
 #include 
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_pabsd (int *i1, int *r)
@@ -62,7 +62,7 @@ TEST (void)
   /* Manually compute the result */
   compute_correct_result(&vals[i + 0], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
   /* Run the 64-bit tests */
   ssse3_test_pabsd (&vals[i + 0], &r[0]);
   ssse3_test_pabsd (&vals[i + 2], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c 
b/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
index 67e4721b8e6..36e99a46a1a 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-pabsw.c
@@ -16,7 +16,7 @@
 
 #include 
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_pabsw (int *i1, int *r)
@@ -64,7 +64,7 @@ TEST (void)
   /* Manually compute the result */
   compute_correct_result (&vals[i + 0], ck);
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
   /* Run the 64-bit tests */
   ssse3_test_pabsw (&vals[i + 0], &r[0]);
   ssse3_test_pabsw (&vals[i + 2], &r[2]);
diff --git a/gcc/testsuite/gcc.target/i386/ssse3-palignr.c 
b/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
index dbee9bee4aa..c46e5d40f9a 100644
--- a/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
+++ b/gcc/testsuite/gcc.target/i386/ssse3-palignr.c
@@ -17,7 +17,7 @@
 #include 
 #include 
 
-#ifndef __AVX__
+#if !defined __AVX__ || (defined __x86_64__ && !defined __3dNOW__)
 /* Test the 64-bit form */
 static void
 ssse3_test_palignr (int *i1, int *i2, unsigned int imm, int *r)
@@ -214,7 +214,7 @@ compute_correct_result_128 (int *i1, int *i2, unsigned int 
imm, int *r)
   bout[i] = buf[imm + i];
 }
 
-#ifndef _

[PATCH 43/43] i386: Implement V2SF comparisons with SSE

2019-02-09 Thread H.J. Lu
In 64-bit mode, implement V2SF comparisons with SEE.  Only SSE register
source operand is allowed.

gcc/

PR target/89028
* config/i386/sse.md (V_128_64): New mode iterator.
(VF_128_64): Likewise.
(sseintvecmode): Add V2SF.
(sseintvecmodelower): Likewise.
(*sse_maskcmpv2sf3_comm): New.
(*sse_maskcmpv2sf3): Likewise.
(vcond): Renamed to ...
(vcond): This.

gcc/testsuite/

PR target/89028
* gcc.target/i386/pr89028-10.c: New test.
* gcc.target/i386/pr89028-11.c: Likewise.
* gcc.target/i386/pr89028-12.c: Likewise.
* gcc.target/i386/pr89028-13.c: Likewise.
---
 gcc/config/i386/sse.md | 61 ++
 gcc/testsuite/gcc.target/i386/pr89028-10.c | 39 ++
 gcc/testsuite/gcc.target/i386/pr89028-11.c | 39 ++
 gcc/testsuite/gcc.target/i386/pr89028-12.c | 39 ++
 gcc/testsuite/gcc.target/i386/pr89028-13.c | 39 ++
 5 files changed, 208 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89028-13.c

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index fe63239f53f..90097b5aa83 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -246,6 +246,12 @@
 (define_mode_iterator V_128
   [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")])
 
+;; All 128bit and 64bit vector modes
+(define_mode_iterator V_128_64
+  [V16QI V8HI V4SI V2DI V4SF (V2DF "TARGET_SSE2")
+   (V8QI "TARGET_MMX_WITH_SSE") (V4HI "TARGET_MMX_WITH_SSE")
+   (V2SI "TARGET_MMX_WITH_SSE") (V2SF "TARGET_MMX_WITH_SSE")])
+
 ;; All 256bit vector modes
 (define_mode_iterator V_256
   [V32QI V16HI V8SI V4DI V8SF V4DF])
@@ -302,6 +308,10 @@
 (define_mode_iterator VF_128
   [V4SF (V2DF "TARGET_SSE2")])
 
+;; All 128bit and 64bit vector float modes
+(define_mode_iterator VF_128_64
+  [V4SF (V2DF "TARGET_SSE2") (V2SF "TARGET_MMX_WITH_SSE")])
+
 ;; All 256bit vector float modes
 (define_mode_iterator VF_256
   [V8SF V4DF])
@@ -734,6 +744,7 @@
   [(V16SF "V16SI") (V8DF  "V8DI")
(V8SF  "V8SI")  (V4DF  "V4DI")
(V4SF  "V4SI")  (V2DF  "V2DI")
+   (V2SF  "V2SI")
(V16SI "V16SI") (V8DI  "V8DI")
(V8SI  "V8SI")  (V4DI  "V4DI")
(V4SI  "V4SI")  (V2DI  "V2DI")
@@ -749,6 +760,7 @@
   [(V16SF "v16si") (V8DF "v8di")
(V8SF "v8si") (V4DF "v4di")
(V4SF "v4si") (V2DF "v2di")
+   (V2SF "v2si")
(V8SI "v8si") (V4DI "v4di")
(V4SI "v4si") (V2DI "v2di")
(V16HI "v16hi") (V8HI "v8hi")
@@ -2766,6 +2778,37 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "")])
 
+(define_insn "*sse_maskcmpv2sf3_comm"
+  [(set (match_operand:V2SF 0 "register_operand" "=Yx,Yx")
+   (match_operator:V2SF 3 "sse_comparison_operator"
+ [(match_operand:V2SF 1 "register_operand" "%0,Yx")
+  (match_operand:V2SF 2 "register_operand" "Yx,Yx")]))]
+  "TARGET_MMX_WITH_SSE
+   && GET_RTX_CLASS (GET_CODE (operands[3])) == RTX_COMM_COMPARE"
+  "@
+   cmp%D3ps\t{%2, %0|%0, %2}
+   vcmp%D3ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "SF")])
+
+(define_insn "*sse_maskcmpv2sf3"
+  [(set (match_operand:V2SF 0 "register_operand" "=Yx,Yx")
+   (match_operator:V2SF 3 "sse_comparison_operator"
+ [(match_operand:V2SF 1 "register_operand" "0,Yx")
+  (match_operand:V2SF 2 "register_operand" "Yx,Yx")]))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   cmp%D3ps\t{%2, %0|%0, %2}
+   vcmp%D3ps\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "SF")])
+
 (define_mode_attr cmp_imm_predicate
   [(V16SF "const_0_to_31_operand")  (V8DF "const_0_to_31_operand")
(V16SI "const_0_to_7_operand")   (V8DI "const_0_to_7_operand")
@@ -3089,17 +3132,17 @@
   DONE;
 })
 
-(define_expand "vcond"
-  [(set (match_operand:V_128 0 "register_operand")
-   (if_then_else:V_128
+(define_expand "vcond"
+  [(set (match_operand:V_128_64 0 "register_operand")
+   (if_then_else:V_128_64
  (match_operator 3 ""
-   [(match_operand:VF_128 4 "vector_operand")
-(match_operand:VF_128 5 "vector_operand")])
- (match_operand:V_128 1 "general_operand")
- (match_operand:V_128 2 "general_operand")))]
+   [(match_operand:VF_128_64 4 "vector_operand")
+(match_operand:VF_128_64 5 "vector_operand")])
+ (match_operand:V_128_64 1 "general_operand")
+ (match_operand:V_128_64 2 "general_operand")))]
   "TARGET_SSE
-   && (GET_MODE_NUNITS (mode)
-   == GET_MODE_NUNITS (mode))"
+   && (GET_MODE_NUNITS (mode)
+   == GET_MODE_NUNITS (mo

[PATCH 28/43] i386: Emulate MMX ssse3_phdv2si3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX ssse3_phdv2si3 with SSE by moving bits
64:95 to bits 32:63 in SSE register.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/sse.md (ssse3_phdv2si3):
Changed to define_insn_and_split to support SSE emulation.
---
 gcc/config/i386/sse.md | 30 +++---
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 625e1c4cfd9..029f33a7000 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15480,26 +15480,42 @@
(set_attr "prefix" "orig,vex")
(set_attr "mode" "TI")])
 
-(define_insn "ssse3_phdv2si3"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
+(define_insn_and_split "ssse3_phdv2si3"
+  [(set (match_operand:V2SI 0 "register_operand" "=y,Yx,Yy")
(vec_concat:V2SI
  (plusminus:SI
(vec_select:SI
- (match_operand:V2SI 1 "register_operand" "0")
+ (match_operand:V2SI 1 "register_operand" "0,0,Yy")
  (parallel [(const_int 0)]))
(vec_select:SI (match_dup 1) (parallel [(const_int 1)])))
  (plusminus:SI
(vec_select:SI
- (match_operand:V2SI 2 "nonimmediate_operand" "ym")
+ (match_operand:V2SI 2 "nonimmediate_operand" "ym,Yx,Yy")
  (parallel [(const_int 0)]))
(vec_select:SI (match_dup 2) (parallel [(const_int 1)])]
   "TARGET_SSSE3"
-  "phd\t{%2, %0|%0, %2}"
-  [(set_attr "type" "sseiadd")
+  "@
+   phd\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed && TARGET_MMX_WITH_SSE"
+  [(const_int 0)]
+{
+  /* Generate SSE version of the operation.  */
+  rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
+  rtx op1 = gen_rtx_REG (V4SImode, REGNO (operands[1]));
+  rtx op2 = gen_rtx_REG (V4SImode, REGNO (operands[2]));
+  rtx insn = gen_ssse3_phdv4si3 (op0, op1, op2);
+  emit_insn (insn);
+  ix86_move_vector_high_sse_to_mmx (op0);
+  DONE;
+}
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "sseiadd")
(set_attr "atom_unit" "complex")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_insn "avx2_pmaddubsw256"
   [(set (match_operand:V16HI 0 "register_operand" "=x,v")
-- 
2.20.1



[PATCH 38/43] i386: Add tests for MMX intrinsic emulations with SSE

2019-02-09 Thread H.J. Lu
Test MMX intrinsics with -msse2 -mno-mmx in 64-bit mode.

PR target/89021
* gcc.target/i386/mmx-vals.h: New file.
* gcc.target/i386/sse2-mmx-2.c: Likewise.
* gcc.target/i386/sse2-mmx-3.c: Likewise.
* gcc.target/i386/sse2-mmx-4.c: Likewise.
* gcc.target/i386/sse2-mmx-5.c: Likewise.
* gcc.target/i386/sse2-mmx-6.c: Likewise.
* gcc.target/i386/sse2-mmx-7.c: Likewise.
* gcc.target/i386/sse2-mmx-8.c: Likewise.
* gcc.target/i386/sse2-mmx-9.c: Likewise.
* gcc.target/i386/sse2-mmx-10.c: Likewise.
* gcc.target/i386/sse2-mmx-11.c: Likewise.
* gcc.target/i386/sse2-mmx-12.c: Likewise.
* gcc.target/i386/sse2-mmx-13.c: Likewise.
* gcc.target/i386/sse2-mmx-14.c: Likewise.
* gcc.target/i386/sse2-mmx-15.c: Likewise.
* gcc.target/i386/sse2-mmx-16.c: Likewise.
* gcc.target/i386/sse2-mmx-17.c: Likewise.
* gcc.target/i386/sse2-mmx-18.c: Likewise.
* gcc.target/i386/sse2-mmx-19.c: Likewise.
* gcc.target/i386/sse2-mmx-20.c: Likewise.
* gcc.target/i386/sse2-mmx-21.c: Likewise.
* gcc.target/i386/sse2-mmx-cvtpi2ps.c: Likewise.
* gcc.target/i386/sse2-mmx-cvtps2pi.c: Likewise.
* gcc.target/i386/sse2-mmx-cvttps2pi.c: Likewise.
* gcc.target/i386/sse2-mmx-maskmovq.c: Likewise.
* gcc.target/i386/sse2-mmx-packssdw.c: Likewise.
* gcc.target/i386/sse2-mmx-packsswb.c: Likewise.
* gcc.target/i386/sse2-mmx-packuswb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddd.c: Likewise.
* gcc.target/i386/sse2-mmx-paddq.c: Likewise.
* gcc.target/i386/sse2-mmx-paddsb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddsw.c: Likewise.
* gcc.target/i386/sse2-mmx-paddusb.c: Likewise.
* gcc.target/i386/sse2-mmx-paddusw.c: Likewise.
* gcc.target/i386/sse2-mmx-paddw.c: Likewise.
* gcc.target/i386/sse2-mmx-pand.c: Likewise.
* gcc.target/i386/sse2-mmx-pandn.c: Likewise.
* gcc.target/i386/sse2-mmx-pavgb.c: Likewise.
* gcc.target/i386/sse2-mmx-pavgw.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpeqb.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpeqd.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpeqw.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpgtb.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpgtd.c: Likewise.
* gcc.target/i386/sse2-mmx-pcmpgtw.c: Likewise.
* gcc.target/i386/sse2-mmx-pextrw.c: Likewise.
* gcc.target/i386/sse2-mmx-pinsrw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmaddwd.c: Likewise.
* gcc.target/i386/sse2-mmx-pmaxsw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmaxub.c: Likewise.
* gcc.target/i386/sse2-mmx-pminsw.c: Likewise.
* gcc.target/i386/sse2-mmx-pminub.c: Likewise.
* gcc.target/i386/sse2-mmx-pmovmskb.c: Likewise.
* gcc.target/i386/sse2-mmx-pmulhuw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmulhw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmullw.c: Likewise.
* gcc.target/i386/sse2-mmx-pmuludq.c: Likewise.
* gcc.target/i386/sse2-mmx-por.c: Likewise.
* gcc.target/i386/sse2-mmx-psadbw.c: Likewise.
* gcc.target/i386/sse2-mmx-pshufw.c: Likewise.
* gcc.target/i386/sse2-mmx-pslld.c: Likewise.
* gcc.target/i386/sse2-mmx-pslldi.c: Likewise.
* gcc.target/i386/sse2-mmx-psllq.c: Likewise.
* gcc.target/i386/sse2-mmx-psllqi.c: Likewise.
* gcc.target/i386/sse2-mmx-psllw.c: Likewise.
* gcc.target/i386/sse2-mmx-psllwi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrad.c: Likewise.
* gcc.target/i386/sse2-mmx-psradi.c: Likewise.
* gcc.target/i386/sse2-mmx-psraw.c: Likewise.
* gcc.target/i386/sse2-mmx-psrawi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrld.c: Likewise.
* gcc.target/i386/sse2-mmx-psrldi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlq.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlqi.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlw.c: Likewise.
* gcc.target/i386/sse2-mmx-psrlwi.c: Likewise.
* gcc.target/i386/sse2-mmx-psubb.c: Likewise.
* gcc.target/i386/sse2-mmx-psubd.c: Likewise.
* gcc.target/i386/sse2-mmx-psubq.c: Likewise.
* gcc.target/i386/sse2-mmx-psubusb.c: Likewise.
* gcc.target/i386/sse2-mmx-psubusw.c: Likewise.
* gcc.target/i386/sse2-mmx-psubw.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckhbw.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckhdq.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckhwd.c: Likewise.
* gcc.target/i386/sse2-mmx-punpcklbw.c: Likewise.
* gcc.target/i386/sse2-mmx-punpckldq.c: Likewise.
* gcc.target/i386/sse2-mmx-punpcklwd.c: Likewise.
* gcc.target/i386/sse2-mmx-pxor.c: Likewise.
---
 gcc/testsuite/gcc.target/i386/mmx-v

Re: [poweprc] RFA: patch changing expected code generation for test vsx-simode2.c

2019-02-09 Thread Segher Boessenkool
Hi Vlad,

On Fri, Feb 08, 2019 at 02:18:40PM -0500, Vladimir Makarov wrote:
> Recently I committed a patch solving
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88560
> 
> The patch resulted in test vsx-simode2.c failure.  Here is the 
> difference in generated code:
> 
> @@ -13,9 +13,8 @@ foo:
>  .LFB0:
>     .cfi_startproc
>     std 3,-16(1)
> -   ori 2,2,0
> -   lwz 9,-12(1)
> -   mtvsrwz 32,9
> +   addi 9,1,-12
> +   lxsiwzx 32,0,9
> 
> The new version is one insn less.  So I propose the following patch 
> changing the expected code generation.
> 
> Is it ok to commit it?

This is not okay.  The test is supposed to test that we get a direct
move instruction instead of going via memory.  But, trunk does the
std+lwz as you see; this is because IRA decides this pseudo needs to
go to memory:

r125: preferred NO_REGS, alternative NO_REGS, allocno NO_REGS

  a1(r125,l0) costs: BASE_REGS:14004,14004 GENERAL_REGS:14004,14004 
LINK_REGS:24010,24010 CTR_REGS:24010,24010 LINK_OR_CTR_REGS:24010,24010 
SPEC_OR_GEN_REGS:24010,24010 MEM:12000,12000

Is there something wrong in our tuning?


For reference, 7 and 8 do just

mtvsrwz 32,3
#APP
 # 10 "vsx-simode2.c" 1
xxlor 32,32,32  # v, v constraints
 # 0 "" 2
#NO_APP
mfvsrwz 3,32
blr

which is the expected code.  The test really should check there is no
memory used, or that there are no extra insns other than the 4 expected.

Your patch seems to be fine btw, this breakage was really there already,
just not detected by the testcase.


Segher


[PATCH 34/43] i386: Emulate MMX abs2 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX abs2 with SSE.  Only SSE register source operand is
allowed.

PR target/89021
* config/i386/sse.md (abs2): Add SSE emulation.
---
 gcc/config/i386/sse.md | 13 -
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 92c12319d16..e8e25759c57 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -16090,16 +16090,19 @@
 })
 
 (define_insn "abs2"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,Yy")
(abs:MMXMODEI
- (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
+ (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym,Yy")))]
   "TARGET_SSSE3"
-  "pabs\t{%1, %0|%0, %1}";
-  [(set_attr "type" "sselog1")
+  "@
+   pabs\t{%1, %0|%0, %1}
+   %vpabs\t{%1, %0|%0, %1}"
+  [(set_attr "mmx_isa" "native,x64")
+   (set_attr "type" "sselog1")
(set_attr "prefix_rep" "0")
(set_attr "prefix_extra" "1")
(set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
-   (set_attr "mode" "DI")])
+   (set_attr "mode" "DI,TI")])
 
 ;
 ;;
-- 
2.20.1



Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-09 Thread Jakub Jelinek
On Sat, Feb 09, 2019 at 01:22:30PM +0100, Jakub Jelinek wrote:
> On Sat, Feb 09, 2019 at 04:11:43AM -0800, H.J. Lu wrote:
> > I believe all usages of
> > 
> > (ior (match_operand 0 "ext_sse_reg_operand")
> >   (match_operand 1 "ext_sse_reg_operand"))
> > 
> > should be checked.  I am not sure if they should be there at all.
> 
> E.g. in i386.md all the other spots look fine, because {DI,SI,DF,SF}mode
> is allowed in ext sse regs even with -mavx512f.  And sse.md doesn't use this
> at all.  What I'm wondering is if we need the sse.md (*mov_internal)
> code I've cited earlier, doing bootstrap/regtest now with gcc_unreachable in
> there (and in *mov{o,x}i_internal* for MODE_XI too) too see if it ever
> triggers.

The following didn't ICE on anything, which is not a proof, but given that
hard_regno_mode_ok should return false for ext_sse_reg_operand regs for
avx512f && !avx512vl, it matches my expectations, on the other hand, it was
a normal defaults bootstrap, don't have a knl which might be best for this
to test -mavx512f -mno-avx512vl on everything.
So perhaps we can also nuke the large if from mov_internal.

--- gcc/config/i386/i386.md.jj  2019-02-09 12:35:57.971475641 +0100
+++ gcc/config/i386/i386.md 2019-02-09 12:37:40.776802962 +0100
@@ -1905,6 +1905,7 @@ (define_insn "*movoi_internal_avx"
   return standard_sse_constant_opcode (insn, operands);
 
 case TYPE_SSEMOV:
+  gcc_assert (get_attr_mode (insn) != MODE_XI);
   if (misaligned_operand (operands[0], OImode)
  || misaligned_operand (operands[1], OImode))
{
@@ -1970,6 +1971,7 @@ (define_insn "*movti_internal"
 case TYPE_SSEMOV:
   /* TDmode values are passed as TImode on the stack.  Moving them
 to stack may result in unaligned memory access.  */
+  gcc_assert (get_attr_mode (insn) != MODE_XI);
   if (misaligned_operand (operands[0], TImode)
  || misaligned_operand (operands[1], TImode))
{
--- gcc/config/i386/sse.md.jj   2019-01-28 21:57:39.301110220 +0100
+++ gcc/config/i386/sse.md  2019-02-09 12:36:45.863696416 +0100
@@ -989,6 +989,7 @@ (define_insn "mov_internal"
  && (EXT_REX_SSE_REG_P (operands[0])
  || EXT_REX_SSE_REG_P (operands[1])))
{
+ gcc_unreachable ();
  if (memory_operand (operands[0], mode))
{
  if ( == 32)

Jakub


Re: [PATCH 00/43] V2: Emulate MMX intrinsics with SSE

2019-02-09 Thread Uros Bizjak
On 2/9/19, H.J. Lu  wrote:
> On x86-64, since __m64 is returned and passed in XMM registers, we can
> emulate MMX intrinsics with SSE instructions. To support it, we added
>
>  #define TARGET_MMX_WITH_SSE \
>   (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
>
> SSE emulation is disabled for 3DNOW since 3DNOW patterns haven't been
> updated with SSE emulation.
>
> ;; Define instruction set of MMX instructions
> (define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string
> "base"))
>
>  (eq_attr "mmx_isa" "native")
>(symbol_ref "!TARGET_MMX_WITH_SSE")
>  (eq_attr "mmx_isa" "x64")
>(symbol_ref "TARGET_MMX_WITH_SSE")
>  (eq_attr "mmx_isa" "x64_avx")
>(symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
>  (eq_attr "mmx_isa" "x64_noavx")
>(symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")
>
> (define_register_constraint "Yx" "TARGET_MMX_WITH_SSE ? SSE_REGS : NO_REGS"
>  "@internal Any SSE register if MMX is disabled in 64-bit mode.")
>
> (define_register_constraint "Yy"
>  "TARGET_MMX_WITH_SSE ? (TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ?
> SSE_REGS : NO_REGS) : NO_REGS"
>  "@internal Any EVEX encodable SSE register for AVX512VL target, otherwise
> any SSE register if MMX is disabled in 64-bit mode.")

I don't think we need the above two new constraints; with mmx_isa
attribute, we can simply use x and Yv constraints. We need to use
mmx_isa to correctly switch between register sets, so we are sure that
TARGET_MMX_WITH_SSE is true when mmx_isa is !native.

Uros.

> We added SSE emulation to MMX patterns and disabled MMX alternatives with
> TARGET_MMX_WITH_SSE.
>
> Most of MMX instructions have equivalent SSE versions and results of some
> SSE versions need to be reshuffled to the right order for MMX.  Thee are
> couple tricky cases:
>
> 1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
> maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
> mask operand and handle unmapped bits 64:127 at memory address by
> adjusting source and mask operands together with memory address.
>
> 2. MMX movntq is emulated with SSE2 DImode movnti, which is available
> in 64-bit mode.
>
> 3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
> SSE emulation must clear the bit 4 in the shuffle control mask.
>
> 4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
> the upper 64 bits of destination XMM register.
>
> Tests are also added to check each SSE emulation of MMX intrinsics.
>
> With SSE emulation in 64-bit mode, 8-byte vectorizer is enabled with SSE2.
>
> There are no regressions on i686 and x86-64.  For x86-64, GCC is also
> tested with
>
> --with-arch=native --with-cpu=native
>
> on AVX2 and AVX512F machines.
>
> H.J. Lu (43):
>   i386: Allow 64-bit vector modes in SSE registers
>   i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
>   i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
>   i386: Emulate MMX plusminus/sat_plusminus with SSE
>   i386: Emulate MMX mulv4hi3 with SSE
>   i386: Emulate MMX smulv4hi3_highpart with SSE
>   i386: Emulate MMX mmx_pmaddwd with SSE
>   i386: Emulate MMX ashr3/3 with SSE
>   i386: Emulate MMX 3 with SSE
>   i386: Emulate MMX mmx_andnot3 with SSE
>   i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
>   i386: Emulate MMX vec_dupv2si with SSE
>   i386: Emulate MMX pshufw with SSE
>   i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
>   i386: Emulate MMX sse_cvtpi2ps with SSE
>   i386: Emulate MMX mmx_pextrw with SSE
>   i386: Emulate MMX mmx_pinsrw with SSE
>   i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
>   i386: Emulate MMX mmx_pmovmskb with SSE
>   i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
>   i386: Emulate MMX maskmovq with SSE2 maskmovdqu
>   i386: Emulate MMX mmx_uavgv8qi3 with SSE
>   i386: Emulate MMX mmx_uavgv4hi3 with SSE
>   i386: Emulate MMX mmx_psadbw with SSE
>   i386: Emulate MMX movntq with SSE2 movntidi
>   i386: Emulate MMX umulv1siv1di3 with SSE2
>   i386: Emulate MMX ssse3_phwv4hi3 with SSE
>   i386: Emulate MMX ssse3_phdv2si3 with SSE
>   i386: Emulate MMX ssse3_pmaddubsw with SSE
>   i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
>   i386: Emulate MMX pshufb with SSE version
>   i386: Emulate MMX ssse3_psign3 with SSE
>   i386: Emulate MMX ssse3_palignrdi with SSE
>   i386: Emulate MMX abs2 with SSE
>   i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
>   i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
>   i386: Allow MMX intrinsic emulation with SSE
>   i386: Add tests for MMX intrinsic emulations with SSE
>   i386: Also enable SSSE3 __m64 tests in 64-bit mode
>   i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE
>   i386: Implement V2SF add/sub/mul with SEE
>   i386: Implement V2SF <-> V2SI conversions with SEE
>   i386: Implement V2SF comparisons with SSE
>
>  gcc/config/i386/constraints.md|  10 +
>  gcc/config/i386/i386-builtin.def  |

Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers

2019-02-09 Thread Uros Bizjak
On 2/9/19, H.J. Lu  wrote:
> In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> 3DNOW.  We can use SSE2 to support 64-bit vectors.
>
>   PR target/89021
>   * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
>   * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
>   modes for TARGET_MMX_WITH_SSE.
>   (SSE_REG_MODE_P): Likewise.
> ---
>  gcc/config/i386/i386.h | 20 ++--
>  1 file changed, 18 insertions(+), 2 deletions(-)
>
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 83b025e0cf5..c1df3ec3326 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -585,6 +585,11 @@ extern unsigned char
> ix86_arch_features[X86_ARCH_LAST];
>
>  #define TARGET_FISTTP(TARGET_SSE3 && TARGET_80387)
>
> +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
> +#define TARGET_MMX_WITH_SSE \
> +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> +
>  extern unsigned char x86_prefetch_sse;
>  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
>
> @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc,
> const char **argv);
> || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
> || (MODE) == TFmode || (MODE) == V1TImode)
>
> +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
> +   want to include 8-byte vector modes, like V2SFmode, but not DImode
> +   nor SImode.  */

This is strange, since we already allow all MMX modes in SSE
registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
return:

return ((TARGET_AVX
 && VALID_AVX256_REG_OR_OI_MODE (mode))
|| VALID_SSE_REG_MODE (mode)
|| VALID_SSE2_REG_MODE (mode)
|| VALID_MMX_REG_MODE (mode)
|| VALID_MMX_REG_MODE_3DNOW (mode));

I'd expect that changed VALID_SSE2_REG_MODE affects only
ix86_vector_mode_supported_p when MMX is disabled and perhaps
ix86_set_reg_reg_cost cost function.

Are there any concrete issues when allowing all MMX (including 3DNOW?)
modes in VALID_SSE2_REG_MODE?

Uros.

>  #define VALID_SSE2_REG_MODE(MODE)\
>((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode   \
> -   || (MODE) == V2DImode || (MODE) == DFmode)
> +   || (MODE) == V2DImode || (MODE) == DFmode \
> +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode  
> \
> +|| (MODE) == V4HImode\
> +|| (MODE) == V2SImode\
> +|| (MODE) == V2SFmode)))
>
>  #define VALID_SSE_REG_MODE(MODE) \
>((MODE) == V1TImode || (MODE) == TImode\
> @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc,
> const char **argv);
> || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \
> || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode
> \
> || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode   
> \
> -   || (MODE) == V16SFmode)
> +   || (MODE) == V16SFmode\
> +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode  
> \
> +|| (MODE) == V4HImode\
> +|| (MODE) == V2SImode\
> +|| (MODE) == V2SFmode)))
>
>  #define X87_FLOAT_MODE_P(MODE)   \
>(TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) ==
> XFmode))
> --
> 2.20.1
>
>


Re: [PATCH 04/43] i386: Emulate MMX plusminus/sat_plusminus with SSE

2019-02-09 Thread Uros Bizjak
On 2/9/19, H.J. Lu  wrote:
> Emulate MMX plusminus/sat_plusminus with SSE.  Only SSE register source
> operand is allowed.
>
> 2019-02-08  H.J. Lu  
>   Uros Bizjak  
>
>   PR target/89021
>   * config/i386/mmx.md (MMXMODEI8): Require TARGET_SSE2 for V1DI.
>   (3): New.
>   (*mmx_3): Changed to define_insn_and_split
>   to support SSE emulation.
>   (*mmx_3): Likewise.
>   (mmx_3): Also allow TARGET_MMX_WITH_SSE.
> ---
>  gcc/config/i386/mmx.md | 51 +-
>  1 file changed, 35 insertions(+), 16 deletions(-)
>
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index 1d5ed83e7b2..01a71aa128b 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -45,7 +45,7 @@
>
>  ;; 8 byte integral modes handled by MMX (and by extension, SSE)
>  (define_mode_iterator MMXMODEI [V8QI V4HI V2SI])
> -(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI V1DI])
> +(define_mode_iterator MMXMODEI8 [V8QI V4HI V2SI (V1DI "TARGET_SSE2")])
>
>  ;; All 8-byte vector modes handled by MMX
>  (define_mode_iterator MMXMODE [V8QI V4HI V2SI V1DI V2SF])
> @@ -698,34 +698,53 @@
>"TARGET_MMX || (TARGET_SSE2 && mode == V1DImode)"
>"ix86_fixup_binary_operands_no_copy (, mode, operands);")
>
> +(define_expand "3"
> +  [(set (match_operand:MMXMODEI 0 "register_operand")
> + (plusminus:MMXMODEI
> +   (match_operand:MMXMODEI 1 "nonimmediate_operand")
> +   (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
> +  "TARGET_MMX_WITH_SSE"
> +  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
> +
>  (define_insn "*mmx_3"
> -  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
> +  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,Yx,Yy")
>  (plusminus:MMXMODEI8
> -   (match_operand:MMXMODEI8 1 "nonimmediate_operand" "0")
> -   (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
> -  "(TARGET_MMX || (TARGET_SSE2 && mode == V1DImode))
> +   (match_operand:MMXMODEI8 1 "nonimmediate_operand" "0,0,Yy")
> +   (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,Yx,Yy")))]
> +  "(TARGET_MMX
> +|| TARGET_MMX_WITH_SSE
> +|| (TARGET_SSE2 && mode == V1DImode))

You don't need V1DImode bypass. This was wrong before the patch and
would break for -msse2 -mno-mmx, since the pattern uses MMX registers.

On a related note, all SSE2 mmx patterns (also in sse.md) should
depend on TARGET_MMX, since they currently use MMX registers. Before
your patch serie, this didn't trigger problems since 8-byte vector
modes were rarely used, but with a new autovectorizer opportunities,
some of these problems can and will trigger. Also note that we
currently enable MMX for SSE2 builtins to mitigate this problem.

Uros.

> && ix86_binary_operator_ok (, mode, operands)"
> -  "p\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "mmxadd")
> -   (set_attr "mode" "DI")])
> +  "@
> +   p\t{%2, %0|%0, %2}
> +   p\t{%2, %0|%0, %2}
> +   vp\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxadd,sseadd,sseadd")
> +   (set_attr "mode" "DI,TI,TI")])
>
>  (define_expand "mmx_3"
>[(set (match_operand:MMXMODE12 0 "register_operand")
>   (sat_plusminus:MMXMODE12
> (match_operand:MMXMODE12 1 "nonimmediate_operand")
> (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
> -  "TARGET_MMX"
> +  "TARGET_MMX || TARGET_MMX_WITH_SSE"
>"ix86_fixup_binary_operands_no_copy (, mode, operands);")
>
>  (define_insn "*mmx_3"
> -  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
> +  [(set (match_operand:MMXMODE12 0 "register_operand" "=y,Yx,Yy")
>  (sat_plusminus:MMXMODE12
> -   (match_operand:MMXMODE12 1 "nonimmediate_operand" "0")
> -   (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
> -  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
> -  "p\t{%2, %0|%0, %2}"
> -  [(set_attr "type" "mmxadd")
> -   (set_attr "mode" "DI")])
> +   (match_operand:MMXMODE12 1 "nonimmediate_operand" "0,0,Yy")
> +   (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym,Yx,Yy")))]
> +  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
> +   && ix86_binary_operator_ok (, mode, operands)"
> +  "@
> +   p\t{%2, %0|%0, %2}
> +   p\t{%2, %0|%0, %2}
> +   vp\t{%2, %1, %0|%0, %1, %2}"
> +  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> +   (set_attr "type" "mmxadd,sseadd,sseadd")
> +   (set_attr "mode" "DI,TI,TI")])
>
>  (define_expand "mmx_mulv4hi3"
>[(set (match_operand:V4HI 0 "register_operand")
> --
> 2.20.1
>
>


Re: [PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE

2019-02-09 Thread Uros Bizjak
On 2/9/19, H.J. Lu  wrote:
> Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.
>
>   PR target/89021
>   * config/i386/mmx.md (sse_cvtps2pi): Add SSE emulation.
>   (sse_cvttps2pi): Likewise.
> ---
>  gcc/config/i386/sse.md | 26 --
>  1 file changed, 16 insertions(+), 10 deletions(-)
>
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index 7d2c0367911..4321c5c46db 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -4668,26 +4668,32 @@
> (set_attr "mode" "V4SF")])
>
>  (define_insn "sse_cvtps2pi"
> -  [(set (match_operand:V2SI 0 "register_operand" "=y")
> +  [(set (match_operand:V2SI 0 "register_operand" "=y,Yy")
>   (vec_select:V2SI
> -   (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm")]
> +   (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YyBm")]
>  UNSPEC_FIX_NOTRUNC)
> (parallel [(const_int 0) (const_int 1)])))]
>"TARGET_SSE"

Patterns that use MMX registers should depend on
(TARGET_MMX || TARGET_MMX_WITH_SSE).

Since the above pattern depends on MMX registers, the condition should read:

(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE.

to disable the pattern for -msse -mno-mmx on 32bit target.

All patterns that use MMX registers (especially those in sse.md)
should be reviewed for the above change.

Uros.

> -  "cvtps2pi\t{%1, %0|%0, %q1}"
> -  [(set_attr "type" "ssecvt")
> -   (set_attr "unit" "mmx")
> +  "@
> +   cvtps2pi\t{%1, %0|%0, %q1}
> +   %vcvtps2dq\t{%1, %0|%0, %1}"
> +  [(set_attr "mmx_isa" "native,x64")
> +   (set_attr "type" "ssecvt")
> +   (set_attr "unit" "mmx,*")
> (set_attr "mode" "DI")])
>
>  (define_insn "sse_cvttps2pi"
> -  [(set (match_operand:V2SI 0 "register_operand" "=y")
> +  [(set (match_operand:V2SI 0 "register_operand" "=y,Yy")
>   (vec_select:V2SI
> -   (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm"))
> +   (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YyBm"))
> (parallel [(const_int 0) (const_int 1)])))]
>"TARGET_SSE"
> -  "cvttps2pi\t{%1, %0|%0, %q1}"
> -  [(set_attr "type" "ssecvt")
> -   (set_attr "unit" "mmx")
> +  "@
> +   cvttps2pi\t{%1, %0|%0, %q1}
> +   %vcvttps2dq\t{%1, %0|%0, %1}"
> +  [(set_attr "mmx_isa" "native,x64")
> +   (set_attr "type" "ssecvt")
> +   (set_attr "unit" "mmx,*")
> (set_attr "prefix_rep" "0")
> (set_attr "mode" "SF")])
>
> --
> 2.20.1
>
>


Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers

2019-02-09 Thread H.J. Lu
On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak  wrote:
>
> On 2/9/19, H.J. Lu  wrote:
> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
> >
> >   PR target/89021
> >   * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> >   * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
> >   modes for TARGET_MMX_WITH_SSE.
> >   (SSE_REG_MODE_P): Likewise.
> > ---
> >  gcc/config/i386/i386.h | 20 ++--
> >  1 file changed, 18 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> > index 83b025e0cf5..c1df3ec3326 100644
> > --- a/gcc/config/i386/i386.h
> > +++ b/gcc/config/i386/i386.h
> > @@ -585,6 +585,11 @@ extern unsigned char
> > ix86_arch_features[X86_ARCH_LAST];
> >
> >  #define TARGET_FISTTP(TARGET_SSE3 && TARGET_80387)
> >
> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
> > +#define TARGET_MMX_WITH_SSE \
> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> > +
> >  extern unsigned char x86_prefetch_sse;
> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
> >
> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int argc,
> > const char **argv);
> > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
> > || (MODE) == TFmode || (MODE) == V1TImode)
> >
> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode
> > +   nor SImode.  */
>
> This is strange, since we already allow all MMX modes in SSE
> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
> return:
>
> return ((TARGET_AVX
>  && VALID_AVX256_REG_OR_OI_MODE (mode))
> || VALID_SSE_REG_MODE (mode)
> || VALID_SSE2_REG_MODE (mode)
> || VALID_MMX_REG_MODE (mode)
> || VALID_MMX_REG_MODE_3DNOW (mode));
>
> I'd expect that changed VALID_SSE2_REG_MODE affects only
> ix86_vector_mode_supported_p when MMX is disabled and perhaps
> ix86_set_reg_reg_cost cost function.
>
> Are there any concrete issues when allowing all MMX (including 3DNOW?)
> modes in VALID_SSE2_REG_MODE?

The problem is with DImode and SImode.  All other vector modes,  including
V2SF is OK.  With DImode and SImode, I got following regressions:

FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
cannot be created; target is discardable"
FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n
FAIL: go test misc/cgo/testcarchive

 gcc.dg/pr39323-3.c  is due to

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261

and

/* Decide whether a variable of mode MODE should be 128 bit aligned.  */
#define ALIGN_MODE_128(MODE) \
 ((MODE) == XFmode || SSE_REG_MODE_P (MODE))

SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P
nor VALID_SSE2_REG_MODE.

> Uros.
>
> >  #define VALID_SSE2_REG_MODE(MODE)\
> >((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode   \
> > -   || (MODE) == V2DImode || (MODE) == DFmode)
> > +   || (MODE) == V2DImode || (MODE) == DFmode \
> > +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode
> >   \
> > +|| (MODE) == V4HImode\
> > +|| (MODE) == V2SImode\
> > +|| (MODE) == V2SFmode)))
> >
> >  #define VALID_SSE_REG_MODE(MODE) \
> >((MODE) == V1TImode || (MODE) == TImode\
> > @@ -1188,7 +1200,11 @@ extern const char *host_detect_local_cpu (int argc,
> > const char **argv);
> > || (MODE) == V4DImode || (MODE) == V8SFmode || (MODE) == V4DFmode \
> > || (MODE) == V2TImode || (MODE) == V8DImode || (MODE) == V64QImode  
> >   \
> > || (MODE) == V16SImode || (MODE) == V32HImode || (MODE) == V8DFmode 
> >   \
> > -   || (MODE) == V16SFmode)
> > +   || (MODE) == V16SFmode\
> > +   || (TARGET_MMX_WITH_SSE && ((MODE) == V1DImode || (MODE) == V8QImode
> >   \
> > +|| (MODE) == V4HImode\
> > +|| (MODE) == V2SImode\
> > +|| (MODE) == V2SFmode)))
> >
> >  #define X87_FLOAT_MODE_P(MODE)   \
> >(TARGET_80387 && ((MODE) == SFmode || (MODE) == DFmode || (MODE) ==
> > XFmode))
> > --
> > 2.20.1
> >
> >



-- 
H.J.


Re: [PATCH 37/43] i386: Allow MMX intrinsic emulation with SSE

2019-02-09 Thread Uros Bizjak
On 2/9/19, H.J. Lu  wrote:
> Allow MMX intrinsic emulation with SSE/SSE2/SSSE3.  For pr82483-1.c and
> pr82483-2.c, "-mssse3 -mno-mmx" no longer ICEs in 64-bit mode since MMX
> intrinsics can be emulated wit SSE.
>
> gcc/
>
>   PR target/89021
>   * config/i386/i386-builtin.def: Enable MMX intrinsics with
>   SSE/SSE2/SSSE3.
>   * config/i386/i386.c (bdesc_tm): Likewise.
>   (ix86_init_mmx_sse_builtins): Likewise.
>   (ix86_expand_builtin): Allow SSE/SSE2/SSSE3 to emulate MMX
>   intrinsics in 64-bit mode without MMX.
>   * config/i386/mmintrin.h: Don't require MMX in 64-bit mode.
>
> gcc/testsuite/
>
>   PR target/89021
>   * gcc.target/i386/pr82483-1.c: Error only on ia32.
>   * gcc.target/i386/pr82483-2.c: Likewise.
> ---
>  gcc/config/i386/i386-builtin.def  | 126 +++---
>  gcc/config/i386/i386.c|  45 +---
>  gcc/config/i386/mmintrin.h|  10 +-
>  gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
>  gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
>  5 files changed, 107 insertions(+), 78 deletions(-)

Please note we have following gems in i386.c, ix86_option_override_internal:

4168   /* Enable by default the SSE and MMX builtins.  Do allow the user to
4169  explicitly disable any of these.  In particular, disabling SSE and
4170  MMX for kernel code is extremely useful.  */
4171   if (!ix86_arch_specified)
4172   opts->x_ix86_isa_flags
4173 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE |
OPTION_MASK_ISA_MMX
4174  | TARGET_SUBTARGET64_ISA_DEFAULT)
4175 & ~opts->x_ix86_isa_flags_explicit);


4219   /* Turn on MMX builtins for -msse.  */
4220   if (TARGET_SSE_P (opts->x_ix86_isa_flags))
4221 opts->x_ix86_isa_flags
4222   |= OPTION_MASK_ISA_MMX & ~opts->x_ix86_isa_flags_explicit;

These should probably involve TARGET_MMX_WITH_SSE now. At least we
don't need to silently enable MMX anymore.

Uros.

> diff --git a/gcc/config/i386/i386-builtin.def
> b/gcc/config/i386/i386-builtin.def
> index 88005f4687f..10a9d631f29 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -100,7 +100,7 @@ BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw",
> IX86_BUILTIN_FNSTSW, UNKN
>  BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX,
> UNKNOWN, (int) VOID_FTYPE_VOID)
>
>  /* MMX */
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms",
> IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
> +BDESC (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_mmx_emms,
> "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
>
>  /* 3DNow! */
>  BDESC (OPTION_MASK_ISA_3DNOW, 0, CODE_FOR_mmx_femms,
> "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
> @@ -442,68 +442,68 @@ BDESC (0, 0, CODE_FOR_rotrqi3, "__builtin_ia32_rorqi",
> IX86_BUILTIN_RORQI, UNKNO
>  BDESC (0, 0, CODE_FOR_rotrhi3, "__builtin_ia32_rorhi", IX86_BUILTIN_RORHI,
> UNKNOWN, (int) UINT16_FTYPE_UINT16_INT)
>
>  /* MMX */
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv8qi3,
> "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv4hi3,
> "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_addv2si3,
> "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv8qi3,
> "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv4hi3,
> "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_subv2si3,
> "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int)
> V2SI_FTYPE_V2SI_V2SI)
> -
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv8qi3,
> "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ssaddv4hi3,
> "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv8qi3,
> "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_sssubv4hi3,
> "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv8qi3,
> "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int)
> V8QI_FTYPE_V8QI_V8QI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_usaddv4hi3,
> "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int)
> V4HI_FTYPE_V4HI_V4HI)
> -BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_ussubv8qi3,
> "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int)
> V8QI_FTYPE_V8Q

Re: [PATCH] Fix PR89247

2019-02-09 Thread H.J. Lu
On Fri, Feb 8, 2019 at 4:49 AM Richard Biener  wrote:
>
>
> The following fixes LOOP_VECTORIZED IFNs made useless by CFG cleanup
> after if-conversion by re-verifying the mentioned loops still exist.
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Richard.
>
> 2019-02-08  Richard Biener  
>
> PR tree-optimization/89247
> * tree-if-conv.c: Include tree-cfgcleanup.h.
> (version_loop_for_if_conversion): Record LOOP_VECTORIZED call.
> (tree_if_conversion): Pass through predicate vector.
> (pass_if_conversion::execute): Do CFG cleanup and SSA update
> inline, see if any if-converted loops we refrece in
> LOOP_VECTORIZED calls vanished and fixup.
> * tree-if-conv.h (tree_if_conversion): Adjust prototype.
>

This caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89268

-- 
H.J.


New Finnish PO file for 'gcc' (version 9.1-b20190203)

2019-02-09 Thread Translation Project Robot
Hello, gentle maintainer.

This is a message from the Translation Project robot.

A revised PO file for textual domain 'gcc' has been submitted
by the Finnish team of translators.  The file is available at:

https://translationproject.org/latest/gcc/fi.po

(This file, 'gcc-9.1-b20190203.fi.po', has just now been sent to you in
a separate email.)

All other PO files for your package are available in:

https://translationproject.org/latest/gcc/

Please consider including all of these in your next release, whether
official or a pretest.

Whenever you have a new distribution with a new version number ready,
containing a newer POT file, please send the URL of that distribution
tarball to the address below.  The tarball may be just a pretest or a
snapshot, it does not even have to compile.  It is just used by the
translators when they need some extra translation context.

The following HTML page has been updated:

https://translationproject.org/domain/gcc.html

If any question arises, please contact the translation coordinator.

Thank you for all your work,

The Translation Project robot, in the
name of your translation coordinator.




Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers

2019-02-09 Thread Uros Bizjak
On 2/9/19, H.J. Lu  wrote:
> On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak  wrote:
>>
>> On 2/9/19, H.J. Lu  wrote:
>> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
>> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
>> >
>> >   PR target/89021
>> >   * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
>> >   * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
>> >   modes for TARGET_MMX_WITH_SSE.
>> >   (SSE_REG_MODE_P): Likewise.
>> > ---
>> >  gcc/config/i386/i386.h | 20 ++--
>> >  1 file changed, 18 insertions(+), 2 deletions(-)
>> >
>> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
>> > index 83b025e0cf5..c1df3ec3326 100644
>> > --- a/gcc/config/i386/i386.h
>> > +++ b/gcc/config/i386/i386.h
>> > @@ -585,6 +585,11 @@ extern unsigned char
>> > ix86_arch_features[X86_ARCH_LAST];
>> >
>> >  #define TARGET_FISTTP(TARGET_SSE3 && TARGET_80387)
>> >
>> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
>> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.
>> > */
>> > +#define TARGET_MMX_WITH_SSE \
>> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
>> > +
>> >  extern unsigned char x86_prefetch_sse;
>> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
>> >
>> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
>> > argc,
>> > const char **argv);
>> > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
>> > || (MODE) == TFmode || (MODE) == V1TImode)
>> >
>> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
>> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode
>> > +   nor SImode.  */
>>
>> This is strange, since we already allow all MMX modes in SSE
>> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
>> return:
>>
>> return ((TARGET_AVX
>>  && VALID_AVX256_REG_OR_OI_MODE (mode))
>> || VALID_SSE_REG_MODE (mode)
>> || VALID_SSE2_REG_MODE (mode)
>> || VALID_MMX_REG_MODE (mode)
>> || VALID_MMX_REG_MODE_3DNOW (mode));
>>
>> I'd expect that changed VALID_SSE2_REG_MODE affects only
>> ix86_vector_mode_supported_p when MMX is disabled and perhaps
>> ix86_set_reg_reg_cost cost function.
>>
>> Are there any concrete issues when allowing all MMX (including 3DNOW?)
>> modes in VALID_SSE2_REG_MODE?
>
> The problem is with DImode and SImode.  All other vector modes,  including
> V2SF is OK.  With DImode and SImode, I got following regressions:
>
> FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
> cannot be created; target is discardable"
> FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n
> FAIL: go test misc/cgo/testcarchive
>
>  gcc.dg/pr39323-3.c  is due to
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
>
> and
>
> /* Decide whether a variable of mode MODE should be 128 bit aligned.  */
> #define ALIGN_MODE_128(MODE) \
>  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))

Hm, this is a bit worrying, we don't want to introduce ABI
incompatibilites w.r.t. alignment. We still need to be ABI compatible
for MMX values and emit unaligned loads/stores when necessary.

> SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
> places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P
> nor VALID_SSE2_REG_MODE.

I think we have to review the usage of these two changed defines to
prevent any ABI issues or other hidden issues.

Uros.


Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers

2019-02-09 Thread H.J. Lu
On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak  wrote:
>
> On 2/9/19, H.J. Lu  wrote:
> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak  wrote:
> >>
> >> On 2/9/19, H.J. Lu  wrote:
> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
> >> >
> >> >   PR target/89021
> >> >   * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> >> >   * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit vector
> >> >   modes for TARGET_MMX_WITH_SSE.
> >> >   (SSE_REG_MODE_P): Likewise.
> >> > ---
> >> >  gcc/config/i386/i386.h | 20 ++--
> >> >  1 file changed, 18 insertions(+), 2 deletions(-)
> >> >
> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> >> > index 83b025e0cf5..c1df3ec3326 100644
> >> > --- a/gcc/config/i386/i386.h
> >> > +++ b/gcc/config/i386/i386.h
> >> > @@ -585,6 +585,11 @@ extern unsigned char
> >> > ix86_arch_features[X86_ARCH_LAST];
> >> >
> >> >  #define TARGET_FISTTP(TARGET_SSE3 && TARGET_80387)
> >> >
> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.
> >> > */
> >> > +#define TARGET_MMX_WITH_SSE \
> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> >> > +
> >> >  extern unsigned char x86_prefetch_sse;
> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
> >> >
> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
> >> > argc,
> >> > const char **argv);
> >> > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode \
> >> > || (MODE) == TFmode || (MODE) == V1TImode)
> >> >
> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
> >> > +   want to include 8-byte vector modes, like V2SFmode, but not DImode
> >> > +   nor SImode.  */
> >>
> >> This is strange, since we already allow all MMX modes in SSE
> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
> >> return:
> >>
> >> return ((TARGET_AVX
> >>  && VALID_AVX256_REG_OR_OI_MODE (mode))
> >> || VALID_SSE_REG_MODE (mode)
> >> || VALID_SSE2_REG_MODE (mode)
> >> || VALID_MMX_REG_MODE (mode)
> >> || VALID_MMX_REG_MODE_3DNOW (mode));
> >>
> >> I'd expect that changed VALID_SSE2_REG_MODE affects only
> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps
> >> ix86_set_reg_reg_cost cost function.
> >>
> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)
> >> modes in VALID_SSE2_REG_MODE?
> >
> > The problem is with DImode and SImode.  All other vector modes,  including
> > V2SF is OK.  With DImode and SImode, I got following regressions:
> >
> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
> > cannot be created; target is discardable"
> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[ \t]*\n
> > FAIL: go test misc/cgo/testcarchive
> >
> >  gcc.dg/pr39323-3.c  is due to
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
> >
> > and
> >
> > /* Decide whether a variable of mode MODE should be 128 bit aligned.  */
> > #define ALIGN_MODE_128(MODE) \
> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
>
> Hm, this is a bit worrying, we don't want to introduce ABI
> incompatibilites w.r.t. alignment. We still need to be ABI compatible
> for MMX values and emit unaligned loads/stores when necessary.

We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
And I don't think we should put DI and SI in them.

> > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
> > places.   i386 backend may not be prepared to deal them in SSE_REG_MODE_P
> > nor VALID_SSE2_REG_MODE.
>
> I think we have to review the usage of these two changed defines to
> prevent any ABI issues or other hidden issues.
>

Absolutely.

-- 
H.J.


Re: [PATCH][libbacktrace] Declare external backtrace fns noinline

2019-02-09 Thread Tom de Vries
On 08-02-19 18:25, Thomas Schwinge wrote:
> Hi Tom!
> 
> On Fri, 8 Feb 2019 10:41:47 +0100, Tom de Vries  wrote:
>> The backtrace functions backtrace_full, backtrace_print and backtrace_simple
>> walk the call stack, but make sure to skip the first entry, in order to skip
>> over the functions themselves, and start the backtrace at the caller of the
>> functions.
>>
>> When compiling with -flto, the functions may be inlined, causing them to skip
>> over the caller instead.
> 
> So, when recently working on the OpenACC Profiling Interface
> implementation in libgomp, where I'm using libbacktrace to figure out the
> caller of certain libgomp functions, I recently wondered about the very
> same issue, that we reliably have to skip a few initial frames.
> 
> So, "noinline" is how to do that reliably...  ;-/ That might be
> non-obvious for the casual reader, so they might not understand...
> 
>> Fix this by declaring the functions with __attribute__((noinline)).
> 
> ... this alone.
> 
> I'd suggest to have a common "#define LIBBACKTRACE_NOINLINE [...]" (or
> similar), together with the explanatory comment given above, and use that
> at the respective definition (or declaration?) sites.  Can that go into
> the public libbacktrace "*.h" file, so that it can also be used
> elsewhere, as described above?
> 
> If you agree, want me to prepare a patch?

Hi Thomas,

I suppose adding an explanatory comment at the places where I added
"__attribute__((noinline))" could be an improvement.

But to me, this is just an implementation detail of the library, and I
would avoid changing the public header file.

Thanks,
- Tom


Re: [PATCH] ARM: fix -masm-syntax-unified (PR88648)

2019-02-09 Thread Stefan Agner
Hi Kyrill,

On 10.01.2019 12:38, Kyrill  Tkachov wrote:
> Hi Stefan,
> 
> On 08/01/19 09:33, Kyrill Tkachov wrote:
>> Hi Stefan,
>>
>> On 01/01/19 23:34, Stefan Agner wrote:
>> > This allows to use unified asm syntax when compiling for the
>> > ARM instruction. This matches documentation and seems what the
>> > initial patch was intended doing when the flag got added.
>> > ---
>> >  gcc/config/arm/arm.c | 3 ++-
>> >  1 file changed, 2 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
>> > index 3419b6bd0f8..67b2b199f3f 100644
>> > --- a/gcc/config/arm/arm.c
>> > +++ b/gcc/config/arm/arm.c
>> > @@ -3095,7 +3095,8 @@ arm_option_override_internal (struct gcc_options 
>> > *opts,
>> >
>> >/* Thumb2 inline assembly code should always use unified syntax.
>> >   This will apply to ARM and Thumb1 eventually.  */
>> > -  opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
>> > +  if (TARGET_THUMB2_P (opts->x_target_flags))
>> > +opts->x_inline_asm_unified = true;
>>
>> This looks right to me and is the logic we had in GCC 5.
>> How has this patch been tested?
>>
>> Can you please provide a ChangeLog entry for this patch[1].
>>
> 
> I've bootstrapped and tested this, together with your testsuite patch
> on arm-none-linux-gnueabihf
> and committed both with r267804 with the following ChangeLog entries:
> 
> 2019-01-10  Stefan Agner  
> 
> PR target/88648
> * config/arm/arm.c (arm_option_override_internal): Force
> opts->x_inline_asm_unified to true only if TARGET_THUMB2_P.
> 
> 2019-01-10  Stefan Agner  
> 
> PR target/88648
> * gcc.target/arm/pr88648-asm-syntax-unified.c: Add test to
> check if -masm-syntax-unified gets applied properly.
> 
> Thank you for the patch. If you plan to contribute more patches in the
> future I suggest you
> sort out the copyright assignment paperwork.
> 
> I believe this fix needs to be backported to the branches.
> I'll do so after a few days of testing on trunk.

Thanks for applying the patch! As far as I can see it did not made it
into the branch yet, do you think it can get backported there too?

--
Stefan

> 
> Thanks again,
> Kyrill
> 
>> Thanks,
>> Kyrill
>>
>> [1] https://gcc.gnu.org/contribute.html
>>
>> >
>> >  #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
>> >SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
>> > --
>> > 2.20.1
>> >
>>


Re: [PR fortran/89077, patch, part 2] - ICE using * as len specifier for character parameter

2019-02-09 Thread Harald Anlauf
Committed to trunk as rev. 268726. after adding a comment that a check
for negative substring length is already present.  The updated version
is attached.

Thanks for the review.  Will not backport unless requested.

Harald

On 02/08/19 21:36, Harald Anlauf wrote:
> The attached patch attempts a substring length simplification
> so that more complex expressions are handled in initialization
> expressions.  Thanks to Thomas König for the suggestion.
> 
> Regtested on x86_64-pc-linux-gnu.
> 
> (The PR still has other wrong-code issue to be addressed separately.)
> 
> OK for trunk?  And for backports to 8/7?
> 
> Thanks,
> Harald
> 
> 
> 2019-02-08  Harald Anlauf  
> 
>   PR fortran/89077
>   * resolve.c (gfc_resolve_substring_charlen): Check substring
>   length for constantness prior to general calculation of length.
> 
> 2019-02-08  Harald Anlauf  
> 
>   PR fortran/89077
>   * gfortran.dg/substr_simplify.f90: New test.
> 


-- 
Harald Anlauf
Dieburger Str. 17
60386 Frankfurt
Tel.: (069) 4014 8318
Index: gcc/fortran/resolve.c
===
--- gcc/fortran/resolve.c   (revision 268725)
+++ gcc/fortran/resolve.c   (working copy)
@@ -4965,6 +4965,7 @@
   gfc_ref *char_ref;
   gfc_expr *start, *end;
   gfc_typespec *ts = NULL;
+  mpz_t diff;
 
   for (char_ref = e->ref; char_ref; char_ref = char_ref->next)
 {
@@ -5016,12 +5017,26 @@
   return;
 }
 
-  /* Length = (end - start + 1).  */
-  e->ts.u.cl->length = gfc_subtract (end, start);
-  e->ts.u.cl->length = gfc_add (e->ts.u.cl->length,
-   gfc_get_int_expr (gfc_charlen_int_kind,
- NULL, 1));
+  /* Length = (end - start + 1).
+ Check first whether it has a constant length.  */
+  if (gfc_dep_difference (end, start, &diff))
+{
+  gfc_expr *len = gfc_get_constant_expr (BT_INTEGER, gfc_charlen_int_kind,
+&e->where);
 
+  mpz_add_ui (len->value.integer, diff, 1);
+  mpz_clear (diff);
+  e->ts.u.cl->length = len;
+  /* The check for length < 0 is handled below */
+}
+  else
+{
+  e->ts.u.cl->length = gfc_subtract (end, start);
+  e->ts.u.cl->length = gfc_add (e->ts.u.cl->length,
+   gfc_get_int_expr (gfc_charlen_int_kind,
+ NULL, 1));
+}
+
   /* F2008, 6.4.1:  Both the starting point and the ending point shall
  be within the range 1, 2, ..., n unless the starting point exceeds
  the ending point, in which case the substring has length zero.  */


Fix overflow issues with new --params

2019-02-09 Thread Jan Hubicka
Hi,
as demonstrated in the PR, adding using verly large values for
uninlined-function-insns and friends leads to overflow.  It is easiest
to handle this by artificial upper bound on the parameter.

I can imagine that for some architectures, like GCN, function calls can
be considered expensvie but not more than 1M of instructions :)

Bootstrapped/regtested x86_64-linux and comitted.
PR ipa/88755
* params.def (uninlined-function-insns, uninlined-function-time,
uninlined-thunk-insns, uninlined-thunk-time): Add artificial upper
bound so we don't get overflows.
Index: params.def
===
--- params.def  (revision 268722)
+++ params.def  (working copy)
@@ -96,23 +96,23 @@ DEFPARAM (PARAM_UNINLINED_FUNCTION_INSNS
  "uninlined-function-insns",
  "Instruction accounted for function prologue, epilogue and other"
  " overhead.",
- 2, 0, 0)
+ 2, 0, 100)
 
 DEFPARAM (PARAM_UNINLINED_FUNCTION_TIME,
  "uninlined-function-time",
  "Time accounted for function prologue, epilogue and other"
  " overhead.",
- 0, 0, 0)
+ 0, 0, 100)
 
 DEFPARAM (PARAM_UNINLINED_FUNCTION_THUNK_INSNS,
  "uninlined-thunk-insns",
  "Instruction accounted for function thunk overhead.",
- 2, 0, 0)
+ 2, 0, 100)
 
 DEFPARAM (PARAM_UNINLINED_FUNCTION_THUNK_TIME,
  "uninlined-thunk-time",
  "Time accounted for function thunk overhead.",
- 2, 0, 0)
+ 2, 0, 100)
 
 DEFPARAM (PARAM_MAX_INLINE_INSNS_RECURSIVE,
  "max-inline-insns-recursive",


Fix odr ICE on Ada LTO

2019-02-09 Thread Jan Hubicka
Hi,
this patch fixes ICE in free_lang_data compiling lto8.adb.
The fix is bit symptomatic becuase type_with_linkage_p should return
false for Ada types. Perhaps adding explicit flag to DECL_NAME would
make sense but it can wait for next stage1.

The fix works because at this stage of free_lang_data all mangled names
must be computed and thus it is cheper to test presence of
DECL_ASSEMBLER_NAME anyway.

Bootstrapped/regtested x86_64-linux, comitted.
PR lto/87957
* tree.c (fld_simplified_type_name): Use DECL_ASSEMBLER_NAME_SET_P
instead of type_with_linkage.
Index: tree.c
===
--- tree.c  (revision 268722)
+++ tree.c  (working copy)
@@ -5152,7 +5152,8 @@ fld_simplified_type_name (tree type)
   /* Drop TYPE_DECLs in TYPE_NAME in favor of the identifier in the
  TYPE_DECL if the type doesn't have linkage.
  this must match fld_  */
-  if (type != TYPE_MAIN_VARIANT (type) || ! type_with_linkage_p (type))
+  if (type != TYPE_MAIN_VARIANT (type)
+  || !DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (type)))
 return DECL_NAME (TYPE_NAME (type));
   return TYPE_NAME (type);
 }


Re: [PATCH, RFC] Avoid the -D option which is not available install-sh

2019-02-09 Thread Bernd Edlinger
On 1/31/19 12:38 PM, Bernd Edlinger wrote:
> Hi,
> 
> I have an issue with the installation of gcc when configured with 
> --enable-languages=all
> on an arm-target where install-sh is used, and make install fails at 
> libphobos as follows:
> 
>   if test -f $file; then \
> /home/ed/gnu/gcc-9-20190127-0/install-sh -c -m 644 -D $file 
> /home/ed/gnu/arm-linux-gnueabihf/lib/gcc/armv7l-unknown-linux-gnueabihf/9.0.1/include/d/$file
>  ; \
>   else \
> /home/ed/gnu/gcc-9-20190127-0/install-sh -c -m 644 -D 
> ../../../../gcc-9-20190127-0/libphobos/libdruntime/$file \
>   
> /home/ed/gnu/arm-linux-gnueabihf/lib/gcc/armv7l-unknown-linux-gnueabihf/9.0.1/include/d/$file
>  ; \
>   fi ; \
> done
> /home/ed/gnu/gcc-9-20190127-0/install-sh: invalid option: -D
> /home/ed/gnu/gcc-9-20190127-0/install-sh: invalid option: -D
> /home/ed/gnu/gcc-9-20190127-0/install-sh: invalid option: -D
> ...
> 
> I have fixed the installation with the attached patch, but when I regenerate 
> the automake
> files using automake-1.15.1 and autoconf-2.69, I have an issue that apparently
> the configure.ac must be out of sync, and the the generated files are missing
> the option --runstatedir no matter what I do.  At least on the source 
> 
> RFC, because I am not sure what the --runstatedir option is, and if it is 
> intentional to remove,
> and forgotten to re-generate, or if was intended to add, and forgotten to 
> check in the
> configure.ac.
> 

Aehm, sorry, I realized that the patch did not work when coreutil's install is 
used
instead of install-sh *and* the subdirectories below include/d do not yet exist.

So have to extract and create the directory part before installing $file.

Attached is the new version of the libphobos install patch.


Bootstrapped and reg-tested / installed on x86_64-pc-linux-gnu and 
arm-linux-gnueabihf
with all languages.

Is it OK for trunk?


Thanks
Bernd.
2019-01-31  Bernd Edlinger  

	* src/Makefile.am: Avoid the -D option which is not available
	with the install-sh fallback.  Use $(MKDIR_P) instead.
	* libdruntime/Makefile.am: Likewise.
	* configure: Regenerated.
	* Makefile.in: Regenerated.
	* src/Makefile.in: Regenerated.
	* libdruntime/Makefile.in: Regenerated.
	* testsuite/Makefile.in: Regenerated.

Index: libphobos/Makefile.in
===
--- libphobos/Makefile.in	(revision 268614)
+++ libphobos/Makefile.in	(working copy)
@@ -15,7 +15,7 @@
 @SET_MAKE@
 
 # Makefile for the toplevel directory of the D Standard library.
-# Copyright (C) 2006-2018 Free Software Foundation, Inc.
+# Copyright (C) 2006-2019 Free Software Foundation, Inc.
 #
 # GCC is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
@@ -319,7 +319,6 @@ phobos_compiler_shared_flag = @phobos_compiler_sha
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
-runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
Index: libphobos/configure
===
--- libphobos/configure	(revision 268614)
+++ libphobos/configure	(working copy)
@@ -782,7 +782,6 @@ infodir
 docdir
 oldincludedir
 includedir
-runstatedir
 localstatedir
 sharedstatedir
 sysconfdir
@@ -868,7 +867,6 @@ datadir='${datarootdir}'
 sysconfdir='${prefix}/etc'
 sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
-runstatedir='${localstatedir}/run'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
 docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
@@ -1121,15 +1119,6 @@ do
   | -silent | --silent | --silen | --sile | --sil)
 silent=yes ;;
 
-  -runstatedir | --runstatedir | --runstatedi | --runstated \
-  | --runstate | --runstat | --runsta | --runst | --runs \
-  | --run | --ru | --r)
-ac_prev=runstatedir ;;
-  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
-  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
-  | --run=* | --ru=* | --r=*)
-runstatedir=$ac_optarg ;;
-
   -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
 ac_prev=sbindir ;;
   -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1267,7 +1256,7 @@ fi
 for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
 		datadir sysconfdir sharedstatedir localstatedir includedir \
 		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
-		libdir localedir mandir runstatedir
+		libdir localedir mandir
 do
   eval ac_val=\$$ac_var
   # Remove trailing slashes.
@@ -1420,7 +1409,6 @@ Fine tuning of the installation directories:
   --sysconfdir=DIRread-only single-machine data [PREFIX/etc]
   --sharedstatedir=DIRmodifiable architecture-independent data [PREFIX/com]
   --localstatedir=DIR modifiable single-machine data [PREFIX/var]
-  --runstatedir=DIR   modifiable per-process data [LOCALSTATEDIR/run]
   --libdir=DIR

Re: [testsuite, ada] Don't XPASS gnat.dg/lto19.adb

2019-02-09 Thread Eric Botcazou
> Between 20181106 (r265849) and 20181107 (r265879), gnat.dg/lto19.adb
> started to XPASS everywhere:
> 
> XPASS: gnat.dg/lto19.adb (test for excess errors)
> 
> Fixed as follows, tested on i386-pc-solaris2.11 and sparc-sun-solaris2.11.

Jan just fixed the lto8 failure (thanks!) so you can go ahead with the patch.

-- 
Eric Botcazou


Re: [PATCH, RFC] Avoid the -D option which is not available install-sh

2019-02-09 Thread Jakub Jelinek
On Sat, Feb 09, 2019 at 06:11:00PM +, Bernd Edlinger wrote:
> --- libphobos/libdruntime/Makefile.am (revision 268614)
> +++ libphobos/libdruntime/Makefile.am (working copy)
> @@ -140,10 +140,12 @@ clean-local:
>  # Handles generated files as well
>  install-data-local:
>   for file in $(ALL_DRUNTIME_INSTALL_DSOURCES); do \
> +   $(MKDIR_P) `echo $(DESTDIR)$(gdc_include_dir)/$$file \
> +   | sed -e 's:/[^/]*$$::'` ; \

Perhaps better `dirname $(DESTDIR)$(gdc_include_dir)/$$file` ?

Jakub


Re: [PATCH, RFC] Avoid the -D option which is not available install-sh

2019-02-09 Thread Bernd Edlinger
On 2/9/19 7:18 PM, Jakub Jelinek wrote:
> On Sat, Feb 09, 2019 at 06:11:00PM +, Bernd Edlinger wrote:
>> --- libphobos/libdruntime/Makefile.am(revision 268614)
>> +++ libphobos/libdruntime/Makefile.am(working copy)
>> @@ -140,10 +140,12 @@ clean-local:
>>  # Handles generated files as well
>>  install-data-local:
>>  for file in $(ALL_DRUNTIME_INSTALL_DSOURCES); do \
>> +  $(MKDIR_P) `echo $(DESTDIR)$(gdc_include_dir)/$$file \
>> +  | sed -e 's:/[^/]*$$::'` ; \
> 
> Perhaps better `dirname $(DESTDIR)$(gdc_include_dir)/$$file` ?
> 

Ah, yes, good point.

Consider it changed.


Thanks
Bernd.


Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers

2019-02-09 Thread Uros Bizjak
On 2/9/19, H.J. Lu  wrote:
> On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak  wrote:
>>
>> On 2/9/19, H.J. Lu  wrote:
>> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak  wrote:
>> >>
>> >> On 2/9/19, H.J. Lu  wrote:
>> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
>> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
>> >> >
>> >> >   PR target/89021
>> >> >   * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
>> >> >   * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit
>> >> > vector
>> >> >   modes for TARGET_MMX_WITH_SSE.
>> >> >   (SSE_REG_MODE_P): Likewise.
>> >> > ---
>> >> >  gcc/config/i386/i386.h | 20 ++--
>> >> >  1 file changed, 18 insertions(+), 2 deletions(-)
>> >> >
>> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
>> >> > index 83b025e0cf5..c1df3ec3326 100644
>> >> > --- a/gcc/config/i386/i386.h
>> >> > +++ b/gcc/config/i386/i386.h
>> >> > @@ -585,6 +585,11 @@ extern unsigned char
>> >> > ix86_arch_features[X86_ARCH_LAST];
>> >> >
>> >> >  #define TARGET_FISTTP(TARGET_SSE3 && TARGET_80387)
>> >> >
>> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
>> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE
>> >> > emulation.
>> >> > */
>> >> > +#define TARGET_MMX_WITH_SSE \
>> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
>> >> > +
>> >> >  extern unsigned char x86_prefetch_sse;
>> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
>> >> >
>> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
>> >> > argc,
>> >> > const char **argv);
>> >> > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode
>> >> > \
>> >> > || (MODE) == TFmode || (MODE) == V1TImode)
>> >> >
>> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since
>> >> > we
>> >> > +   want to include 8-byte vector modes, like V2SFmode, but not
>> >> > DImode
>> >> > +   nor SImode.  */
>> >>
>> >> This is strange, since we already allow all MMX modes in SSE
>> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
>> >> return:
>> >>
>> >> return ((TARGET_AVX
>> >>  && VALID_AVX256_REG_OR_OI_MODE (mode))
>> >> || VALID_SSE_REG_MODE (mode)
>> >> || VALID_SSE2_REG_MODE (mode)
>> >> || VALID_MMX_REG_MODE (mode)
>> >> || VALID_MMX_REG_MODE_3DNOW (mode));
>> >>
>> >> I'd expect that changed VALID_SSE2_REG_MODE affects only
>> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps
>> >> ix86_set_reg_reg_cost cost function.
>> >>
>> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)
>> >> modes in VALID_SSE2_REG_MODE?
>> >
>> > The problem is with DImode and SImode.  All other vector modes,
>> > including
>> > V2SF is OK.  With DImode and SImode, I got following regressions:
>> >
>> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
>> > cannot be created; target is discardable"
>> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[
>> > \t]*\n
>> > FAIL: go test misc/cgo/testcarchive
>> >
>> >  gcc.dg/pr39323-3.c  is due to
>> >
>> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
>> >
>> > and
>> >
>> > /* Decide whether a variable of mode MODE should be 128 bit aligned.
>> > */
>> > #define ALIGN_MODE_128(MODE) \
>> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
>>
>> Hm, this is a bit worrying, we don't want to introduce ABI
>> incompatibilites w.r.t. alignment. We still need to be ABI compatible
>> for MMX values and emit unaligned loads/stores when necessary.
>
> We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
> And I don't think we should put DI and SI in them.

Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to
a big-hammer approach of changing wide-used defines like
SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,
ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,
while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is
changed.

Uros.


>
>> > SSE_REG_MODE_P and VALID_SSE2_REG_MODE are used in many different
>> > places.   i386 backend may not be prepared to deal them in
>> > SSE_REG_MODE_P
>> > nor VALID_SSE2_REG_MODE.
>>
>> I think we have to review the usage of these two changed defines to
>> prevent any ABI issues or other hidden issues.
>>
>
> Absolutely.
>
> --
> H.J.
>


Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers

2019-02-09 Thread H.J. Lu
On Sat, Feb 9, 2019 at 10:27 AM Uros Bizjak  wrote:
>
> On 2/9/19, H.J. Lu  wrote:
> > On Sat, Feb 9, 2019 at 7:03 AM Uros Bizjak  wrote:
> >>
> >> On 2/9/19, H.J. Lu  wrote:
> >> > On Sat, Feb 9, 2019 at 6:09 AM Uros Bizjak  wrote:
> >> >>
> >> >> On 2/9/19, H.J. Lu  wrote:
> >> >> > In 64-bit mode, SSE2 can be used to emulate MMX instructions without
> >> >> > 3DNOW.  We can use SSE2 to support 64-bit vectors.
> >> >> >
> >> >> >   PR target/89021
> >> >> >   * config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
> >> >> >   * config/i386/i386.h (VALID_SSE2_REG_MODE): Allow 64-bit
> >> >> > vector
> >> >> >   modes for TARGET_MMX_WITH_SSE.
> >> >> >   (SSE_REG_MODE_P): Likewise.
> >> >> > ---
> >> >> >  gcc/config/i386/i386.h | 20 ++--
> >> >> >  1 file changed, 18 insertions(+), 2 deletions(-)
> >> >> >
> >> >> > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> >> >> > index 83b025e0cf5..c1df3ec3326 100644
> >> >> > --- a/gcc/config/i386/i386.h
> >> >> > +++ b/gcc/config/i386/i386.h
> >> >> > @@ -585,6 +585,11 @@ extern unsigned char
> >> >> > ix86_arch_features[X86_ARCH_LAST];
> >> >> >
> >> >> >  #define TARGET_FISTTP(TARGET_SSE3 && TARGET_80387)
> >> >> >
> >> >> > +/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
> >> >> > +   FIXME: All 3DNOW patterns needs to be updated with SSE
> >> >> > emulation.
> >> >> > */
> >> >> > +#define TARGET_MMX_WITH_SSE \
> >> >> > +  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
> >> >> > +
> >> >> >  extern unsigned char x86_prefetch_sse;
> >> >> >  #define TARGET_PREFETCH_SSE  x86_prefetch_sse
> >> >> >
> >> >> > @@ -1143,9 +1148,16 @@ extern const char *host_detect_local_cpu (int
> >> >> > argc,
> >> >> > const char **argv);
> >> >> > || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode
> >> >> > \
> >> >> > || (MODE) == TFmode || (MODE) == V1TImode)
> >> >> >
> >> >> > +/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since
> >> >> > we
> >> >> > +   want to include 8-byte vector modes, like V2SFmode, but not
> >> >> > DImode
> >> >> > +   nor SImode.  */
> >> >>
> >> >> This is strange, since we already allow all MMX modes in SSE
> >> >> registers. Please see ix86_hard_regno_mode_ok, where for SSE_REG_P, we
> >> >> return:
> >> >>
> >> >> return ((TARGET_AVX
> >> >>  && VALID_AVX256_REG_OR_OI_MODE (mode))
> >> >> || VALID_SSE_REG_MODE (mode)
> >> >> || VALID_SSE2_REG_MODE (mode)
> >> >> || VALID_MMX_REG_MODE (mode)
> >> >> || VALID_MMX_REG_MODE_3DNOW (mode));
> >> >>
> >> >> I'd expect that changed VALID_SSE2_REG_MODE affects only
> >> >> ix86_vector_mode_supported_p when MMX is disabled and perhaps
> >> >> ix86_set_reg_reg_cost cost function.
> >> >>
> >> >> Are there any concrete issues when allowing all MMX (including 3DNOW?)
> >> >> modes in VALID_SSE2_REG_MODE?
> >> >
> >> > The problem is with DImode and SImode.  All other vector modes,
> >> > including
> >> > V2SF is OK.  With DImode and SImode, I got following regressions:
> >> >
> >> > FAIL: gcc.dg/ipa/pr77653.c scan-ipa-dump icf "Not unifying; alias
> >> > cannot be created; target is discardable"
> >> > FAIL: gcc.dg/pr39323-3.c scan-assembler .align[ \t]+(268435456|28)[
> >> > \t]*\n
> >> > FAIL: go test misc/cgo/testcarchive
> >> >
> >> >  gcc.dg/pr39323-3.c  is due to
> >> >
> >> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89261
> >> >
> >> > and
> >> >
> >> > /* Decide whether a variable of mode MODE should be 128 bit aligned.
> >> > */
> >> > #define ALIGN_MODE_128(MODE) \
> >> >  ((MODE) == XFmode || SSE_REG_MODE_P (MODE))
> >>
> >> Hm, this is a bit worrying, we don't want to introduce ABI
> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible
> >> for MMX values and emit unaligned loads/stores when necessary.
> >
> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
> > And I don't think we should put DI and SI in them.
>
> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to

Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.
We only want 8-byte vector modes here.

> a big-hammer approach of changing wide-used defines like
> SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,
> ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,
> while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is
> changed.

I will give it a try.

-- 
H.J.


Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers

2019-02-09 Thread Uros Bizjak
On 2/9/19, H.J. Lu  wrote:
>> >> Hm, this is a bit worrying, we don't want to introduce ABI
>> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible
>> >> for MMX values and emit unaligned loads/stores when necessary.
>> >
>> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
>> > And I don't think we should put DI and SI in them.
>>
>> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
>> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
>> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to
>
> Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.
> We only want 8-byte vector modes here.

Well, I'm not forcing VALID_MMX_REG_MODE here, it is just an example;
the important part is in the addition of (TARGET_MMX_WITH_SSE &&
some_modes). Surely, we don't want to align SImode to 128 bits in
ALIGN_MODE_128.

Uros.

>> a big-hammer approach of changing wide-used defines like
>> SSE_REG_MODE_P and VALID_SSE2_REG_MODE. As an example,
>> ix86_hard_regno_mode_ok already includes all MMX modes for SSE_REG_P,
>> while mentioned ALIGN_MODE_128 would be wrong when SSE_REG_MODE_P is
>> changed.
>
> I will give it a try.
>
> --
> H.J.
>


Re: [PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers

2019-02-09 Thread H.J. Lu
On Sat, Feb 9, 2019 at 10:41 AM Uros Bizjak  wrote:
>
> On 2/9/19, H.J. Lu  wrote:
> >> >> Hm, this is a bit worrying, we don't want to introduce ABI
> >> >> incompatibilites w.r.t. alignment. We still need to be ABI compatible
> >> >> for MMX values and emit unaligned loads/stores when necessary.
> >> >
> >> > We need to audit all usages of SSE_REG_MODE_P and VALID_SSE2_REG_MODE.
> >> > And I don't think we should put DI and SI in them.
> >>
> >> Perhaps we should leave SSE_REG_MODE_P and VALID_SSE2_REG_MODE as they
> >> are and ammend usage sites with e.g. (TARGET_MMX_WITH_SSE &&
> >> VALID_MMX_REG_MODE (...))? This is much more fine-grained comparing to
> >
> > Not VALID_MMX_REG_MODE since it includes SI/DI, but not V2SF.
> > We only want 8-byte vector modes here.
>
> Well, I'm not forcing VALID_MMX_REG_MODE here, it is just an example;
> the important part is in the addition of (TARGET_MMX_WITH_SSE &&
> some_modes). Surely, we don't want to align SImode to 128 bits in
> ALIGN_MODE_128.
>

I am testing this.

-- 
H.J.
From 1a3a4c4d2e133d99c6671788a8475efe39804dbb Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Thu, 24 Jan 2019 08:27:41 -0800
Subject: [PATCH] i386: Allow 64-bit vector modes in SSE registers

In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support 64-bit vectors.

	PR target/89021
	* config/i386/i386.c (ix86_set_reg_reg_cost): Also support
	VALID_MMX_WITH_SSE_REG_MODE.
	(ix86_vector_mode_supported_p): Likewise.
	* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
	(TARGET_MMX_WITH_SSE_P): Likewise.
	(VALID_MMX_WITH_SSE_REG_MODE): Likewise.
---
 gcc/config/i386/i386.c |  3 +++
 gcc/config/i386/i386.h | 14 ++
 2 files changed, 17 insertions(+)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 12bc7926f86..ba02c26c8b2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40235,6 +40235,7 @@ ix86_set_reg_reg_cost (machine_mode mode)
 	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+	  || (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))
 	  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
 	units = GET_MODE_SIZE (mode);
 }
@@ -44057,6 +44058,8 @@ ix86_vector_mode_supported_p (machine_mode mode)
 return true;
   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
 return true;
+  if (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))
+return true;
   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 return true;
   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..f75fd426293 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_16BIT	TARGET_CODE16
 #define TARGET_16BIT_P(x)	TARGET_CODE16_P(x)
 
+/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
+   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
+#define TARGET_MMX_WITH_SSE \
+  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
+#define TARGET_MMX_WITH_SSE_P(x) \
+  (TARGET_64BIT_P (x) && TARGET_SSE2_P (x) && !TARGET_3DNOW_P (x))
+
 #include "config/vxworks-dummy.h"
 
 #include "config/i386/i386-opts.h"
@@ -1143,6 +1150,13 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|| (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode	\
|| (MODE) == TFmode || (MODE) == V1TImode)
 
+/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
+   want to include 8-byte vector modes, like V2SFmode, but not DImode
+   nor SImode.  */
+#define VALID_MMX_WITH_SSE_REG_MODE(MODE)\
+  ((MODE) == V1DImode || (MODE) == V8QImode || (MODE) == V4HImode	\
+   || (MODE) == V2SImode || (MODE) == V2SFmode)
+
 #define VALID_SSE2_REG_MODE(MODE)	\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
|| (MODE) == V2DImode || (MODE) == DFmode)
-- 
2.20.1



Re: [PATCH][libbacktrace] Declare external backtrace fns noinline

2019-02-09 Thread Ian Lance Taylor
On Fri, Feb 8, 2019 at 9:26 AM Thomas Schwinge  wrote:
>
> On Fri, 8 Feb 2019 10:41:47 +0100, Tom de Vries  wrote:
> > The backtrace functions backtrace_full, backtrace_print and backtrace_simple
> > walk the call stack, but make sure to skip the first entry, in order to skip
> > over the functions themselves, and start the backtrace at the caller of the
> > functions.
> >
> > When compiling with -flto, the functions may be inlined, causing them to 
> > skip
> > over the caller instead.
>
> So, when recently working on the OpenACC Profiling Interface
> implementation in libgomp, where I'm using libbacktrace to figure out the
> caller of certain libgomp functions, I recently wondered about the very
> same issue, that we reliably have to skip a few initial frames.
>
> So, "noinline" is how to do that reliably...  ;-/ That might be
> non-obvious for the casual reader, so they might not understand...
>
> > Fix this by declaring the functions with __attribute__((noinline)).
>
> ... this alone.
>
> I'd suggest to have a common "#define LIBBACKTRACE_NOINLINE [...]" (or
> similar), together with the explanatory comment given above, and use that
> at the respective definition (or declaration?) sites.  Can that go into
> the public libbacktrace "*.h" file, so that it can also be used
> elsewhere, as described above?
>
> If you agree, want me to prepare a patch?

I think that at least for backtrace_full and backtrace_print we are
arguably looking at the SKIP parameter in the wrong place.  We
shouldn't look at it in unwind before calling backtrace_pcinfo.  We
should count the inlined functions found by backtrace_pcinfo against
the SKIP parameter.

Ian


Re: [poweprc] RFA: patch changing expected code generation for test vsx-simode2.c

2019-02-09 Thread Vladimir Makarov



On 2019-02-09 8:28 a.m., Segher Boessenkool wrote:

Hi Vlad,

On Fri, Feb 08, 2019 at 02:18:40PM -0500, Vladimir Makarov wrote:

Recently I committed a patch solving

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88560

The patch resulted in test vsx-simode2.c failure.  Here is the
difference in generated code:

@@ -13,9 +13,8 @@ foo:
  .LFB0:
     .cfi_startproc
     std 3,-16(1)
-   ori 2,2,0
-   lwz 9,-12(1)
-   mtvsrwz 32,9
+   addi 9,1,-12
+   lxsiwzx 32,0,9

The new version is one insn less.  So I propose the following patch
changing the expected code generation.

Is it ok to commit it?

This is not okay.  The test is supposed to test that we get a direct
move instruction instead of going via memory.  But, trunk does the
std+lwz as you see; this is because IRA decides this pseudo needs to
go to memory:

 r125: preferred NO_REGS, alternative NO_REGS, allocno NO_REGS

   a1(r125,l0) costs: BASE_REGS:14004,14004 GENERAL_REGS:14004,14004 
LINK_REGS:24010,24010 CTR_REGS:24010,24010 LINK_OR_CTR_REGS:24010,24010 
SPEC_OR_GEN_REGS:24010,24010 MEM:12000,12000


Thank you for informing me what we expect from the test.

Apparently, the test did not catch what was supposed to be catched.

Although the new generated code is better than the old one (2 insns vs 3 
insns, one insn is a load in the both cases), I see there is no sense 
for this patch.  Simply, the test did not fail before even if the code 
was bad.  Now the test fails as it should be.



Is there something wrong in our tuning?


I have no idea.  It needs more investigation.

For reference, 7 and 8 do just

 mtvsrwz 32,3
#APP
  # 10 "vsx-simode2.c" 1
 xxlor 32,32,32  # v, v constraints
  # 0 "" 2
#NO_APP
 mfvsrwz 3,32
 blr

which is the expected code.  The test really should check there is no
memory used, or that there are no extra insns other than the 4 expected.

Your patch seems to be fine btw, this breakage was really there already,
just not detected by the testcase.

Yes, the patch is fine in a sense that the code is a bit better. But 
still the generated code is bad and the test started to fail. I don't 
think we need to change the test.  The original test now reminds us to 
fix the bad code generation.




Re: [PATCH][libbacktrace] Add btest_lto

2019-02-09 Thread Ian Lance Taylor via gcc-patches
On Fri, Feb 8, 2019 at 1:42 AM Tom de Vries  wrote:
>
> Add libbacktrace test-case using -flto.
>
> OK for trunk?
>
> Thanks,
> - Tom
>
> [libbacktrace] Add btest_lto
>
> 2019-02-08  Tom de Vries  
>
> * Makefile.am (BUILDTESTS): Add btest_lto.
> * Makefile.in: Regenerate.
> * btest.c (test1, f2, f3, test3, f22, f23): Declare with
> __attribute__((noclone)).

This seems more or less fine, but are there any platforms where -flto
doesn't work?  The docs say "Link-time optimization does not work well
with generation of debugging information on systems other than those
using a combination of ELF and DWARF."  For example, does this test
pass on Windows?

Ian


Fix localization of comdats

2019-02-09 Thread Jan Hubicka
Hi,
this patch fixes undefined symbols when COMDAT group contains some
symbols with LDPR_PREVAILING_DEF_IRONLY_EXP and others with
LDPR_PREVIALING_DEF.  In this case we can not optimized out the
symbol defined LDPR_PREVAILING_DEF because it will be used by linker
for second stage linking.

We can still break-up the comdat and privatize
LDPR_PREVAILING_DEF_IRONLY_EXP symbols knowing that address is not taken
and duplication of these symbols within shared libraries is safe.

Bootstrapped/regtested x86_64-linux. Comitted.

Honza

* ipa-visibility.c (localize_node): Also do not localize
LDPR_PREVAILING_DEF_IRONLY_EXP.
Index: ipa-visibility.c
===
--- ipa-visibility.c(revision 268722)
+++ ipa-visibility.c(working copy)
@@ -539,7 +539,8 @@ localize_node (bool whole_program, symta
  symbols.  In this case we can privatize all hidden symbol but we need
  to keep non-hidden exported.  */
   if (node->same_comdat_group
-  && node->resolution == LDPR_PREVAILING_DEF_IRONLY)
+  && (node->resolution == LDPR_PREVAILING_DEF_IRONLY
+ || node->resolution == LDPR_PREVAILING_DEF_IRONLY_EXP))
 {
   symtab_node *next;
   for (next = node->same_comdat_group;


Re: [poweprc] RFA: patch changing expected code generation for test vsx-simode2.c

2019-02-09 Thread Segher Boessenkool
On Sat, Feb 09, 2019 at 04:13:57PM -0500, Vladimir Makarov wrote:
> 
> On 2019-02-09 8:28 a.m., Segher Boessenkool wrote:
> >Hi Vlad,
> >
> >On Fri, Feb 08, 2019 at 02:18:40PM -0500, Vladimir Makarov wrote:
> >>Recently I committed a patch solving
> >>
> >>https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88560
> >>
> >>The patch resulted in test vsx-simode2.c failure.  Here is the
> >>difference in generated code:
> >>
> >>@@ -13,9 +13,8 @@ foo:
> >>  .LFB0:
> >>     .cfi_startproc
> >>     std 3,-16(1)
> >>-   ori 2,2,0
> >>-   lwz 9,-12(1)
> >>-   mtvsrwz 32,9
> >>+   addi 9,1,-12
> >>+   lxsiwzx 32,0,9
> >>
> >>The new version is one insn less.  So I propose the following patch
> >>changing the expected code generation.
> >>
> >>Is it ok to commit it?
> >This is not okay.  The test is supposed to test that we get a direct
> >move instruction instead of going via memory.  But, trunk does the
> >std+lwz as you see; this is because IRA decides this pseudo needs to
> >go to memory:
> >
> > r125: preferred NO_REGS, alternative NO_REGS, allocno NO_REGS
> >
> >   a1(r125,l0) costs: BASE_REGS:14004,14004 GENERAL_REGS:14004,14004 
> >   LINK_REGS:24010,24010 CTR_REGS:24010,24010 LINK_OR_CTR_REGS:24010,24010 
> >   SPEC_OR_GEN_REGS:24010,24010 MEM:12000,12000
> 
> Thank you for informing me what we expect from the test.
> 
> Apparently, the test did not catch what was supposed to be catched.

Yes, exactly.

> Although the new generated code is better than the old one (2 insns vs 3 
> insns, one insn is a load in the both cases), I see there is no sense 
> for this patch.  Simply, the test did not fail before even if the code 
> was bad.  Now the test fails as it should be.
> 
> >Is there something wrong in our tuning?
> >
> I have no idea.  It needs more investigation.

Where do the above costs come from?  Regs 14k, mem 12k.

> >For reference, 7 and 8 do just
> >
> > mtvsrwz 32,3
> >#APP
> >  # 10 "vsx-simode2.c" 1
> > xxlor 32,32,32  # v, v constraints
> >  # 0 "" 2
> >#NO_APP
> > mfvsrwz 3,32
> > blr
> >
> >which is the expected code.  The test really should check there is no
> >memory used, or that there are no extra insns other than the 4 expected.
> >
> >Your patch seems to be fine btw, this breakage was really there already,
> >just not detected by the testcase.
> >
> Yes, the patch is fine in a sense that the code is a bit better.

If we decided to assign memory to this pseudo, it now uses better code
for that.  (Not really fewer insns though, the ori 2,2,0 went missing,
and that is still required for good performance on Power8 at least).

> But 
> still the generated code is bad and the test started to fail. I don't 
> think we need to change the test.  The original test now reminds us to 
> fix the bad code generation.

Yeah.  And I'll improve the test a bit so it would have failed earlier.

It didn't fail before because combine used to usurp the RA job, doing some
kind of greedy register allocation, increasing the lifetime of argument
registers.

I opened PR89271 (and put you on cc:).


Segher


Do not use TYPE_NEED_CONSTRUCTING in may_be_aliased

2019-02-09 Thread Jan Hubicka
Hi,
this patch drops test for TYPE_NEEDS_CONSTRUCTING in tree.h and instead
sets TREE_READONLY to 0 for external vars of this type. For vars
declared locally we drop TREE_READONLY while expanding constructor.
Note that I have tried to drop TREE_READONLY always (not only for
DECL_EXTERNAL) and it breaks a testcase where constructor is constexpr.
So perhaps this is unnecesarily conservative for external vars having
constexpr cotr and perhaps it is better done by frontend.

Curiously enough, this does not fix the actual testcase in PR88677.

Bootstrapped/regtested x86_64-linux, makes sense?

PR lto/88777
* ipa-visibility.c (function_and_variable_visibility): Drop
TREE_READONLY flag for variables where type needs constructing.
* tree.h (may_be_aliased): Do not test TYPE_NEEDS_CONSTRUCTING.
Index: ipa-visibility.c
===
--- ipa-visibility.c(revision 268722)
+++ ipa-visibility.c(working copy)
@@ -810,6 +811,13 @@ function_and_variable_visibility (bool w
  || ! (ADDR_SPACE_GENERIC_P
(TYPE_ADDR_SPACE (TREE_TYPE (vnode->decl))
DECL_COMMON (vnode->decl) = 0;
+  /* For symbols declared locally we clear TREE_READONLY when emitting
+the construtor (if one is needed).  For external declarations we can
+not safely assume that the type is readonly because we may be called
+during its construction.  */
+  if (TYPE_NEEDS_CONSTRUCTING (TREE_TYPE (vnode->decl))
+ && DECL_EXTERNAL (vnode->decl))
+   TREE_READONLY (vnode->decl) = 0;
   if (vnode->weakref)
optimize_weakref (vnode);
 }
Index: tree.h
===
--- tree.h  (revision 268722)
+++ tree.h  (working copy)
@@ -5371,8 +5371,7 @@ may_be_aliased (const_tree var)
  || DECL_EXTERNAL (var)
  || TREE_ADDRESSABLE (var))
  && !((TREE_STATIC (var) || TREE_PUBLIC (var) || DECL_EXTERNAL (var))
-  && ((TREE_READONLY (var)
-   && !TYPE_NEEDS_CONSTRUCTING (TREE_TYPE (var)))
+  && (TREE_READONLY (var)
   || (TREE_CODE (var) == VAR_DECL
   && DECL_NONALIASED (var);
 }


Re: [PATCH] Updated patches for the port of gccgo to GNU/Hurd

2019-02-09 Thread Ian Lance Taylor
On Fri, Feb 8, 2019 at 3:07 PM Matthias Klose  wrote:
>
> On 07.02.19 06:04, Ian Lance Taylor wrote:
> > On Thu, Jan 31, 2019 at 7:40 AM Svante Signell  
> > wrote:
> >>
> >> As advised by the Debian gcc maintainer Matthias Klose and golang
> >> developer Ian Lance Taylor I'm re-submitting the patches for
> >> the port of gccgo to GNU/Hurd again. Now GOOS value is changed from gnu
> >> to hurd as requested.
> >>
> >> The 12 patches are:
> >> src_libgo_build.diff
> >> src_libgo_runtime.diff
> >> src_libgo_go_crypto.diff
> >> src_libgo_go_internal.diff
> >> src_libgo_go_net.diff
> >> src_libgo_go_os.diff
> >> src_libgo_go_runtime.diff
> >> src_libgo_go_syscall.diff
> >> src_libgo_go_test.diff
> >>
> >> src_libgo_testsuite_gotest.diff
> >> add-hurd-to-libgo-headers.diff
> >> add-hurd-to-libgo-test-headers.diff
> >
> > Thanks.  I've committed versions of all of these patches other than
> > src_libgo_testsuite_gotest.diff.  I omitted that one because as far as
> > I can tell it won't work.  While the original code may not run on the
> > Hurd, the modified version won't work.
> >
> > I made various changes, and I'm sure I broke some things.  Take a look
> > at GCC trunk and see how it seems.
>
> libtool: compile:  /<>/build/./gcc/gccgo
> -B/<>/build/./gcc/ -B/usr/i686-gnu/bin/ -B/usr/i6
> 86-gnu/lib/ -isystem /usr/i686-gnu/include -isystem /usr/i686-gnu/sys-include
> -isystem /<>/build/sys-in
> clude -fchecking=1 -minline-all-stringops -O2 -g -I . -c -fgo-pkgpath=syscall
> -fPIC -o .libs/syscall.o
> gccgo: fatal error: no input files
> compilation terminated.
> Makefile:2844: recipe for target 'syscall.lo' failed
> make[6]: *** [syscall.lo] Error 1
> make[6]: Leaving directory '/<>/build/i686-gnu/libgo'
> Makefile:2242: recipe for target 'all-recursive' failed
> make[5]: *** [all-recursive] Error 1
> make[5]: Leaving directory '/<>/build/i686-gnu/libgo'
> Makefile:1167: recipe for target 'all' failed
> make[4]: *** [all] Error 2
> make[4]: Leaving directory '/<>/build/i686-gnu/libgo'
> Makefile:20078: recipe for target 'all-target-libgo' failed
> make[3]: *** [all-target-libgo] Error 2
> make[3]: Leaving directory '/<>/build'
> Makefile:24129: recipe for target 'bootstrap' failed
> make[2]: *** [bootstrap] Error 2

What are the lines before that in the log?  For some reason libtool is
being invoke with no source files.  The lines before the failing line
should show an invocation of match.sh that determines the source
files.

Ian


Re: [PATCH][libbacktrace] Declare external backtrace fns noinline

2019-02-09 Thread Tom de Vries
On 09-02-19 22:07, Ian Lance Taylor wrote:
> On Fri, Feb 8, 2019 at 9:26 AM Thomas Schwinge  
> wrote:
>>
>> On Fri, 8 Feb 2019 10:41:47 +0100, Tom de Vries  wrote:
>>> The backtrace functions backtrace_full, backtrace_print and backtrace_simple
>>> walk the call stack, but make sure to skip the first entry, in order to skip
>>> over the functions themselves, and start the backtrace at the caller of the
>>> functions.
>>>
>>> When compiling with -flto, the functions may be inlined, causing them to 
>>> skip
>>> over the caller instead.
>>
>> So, when recently working on the OpenACC Profiling Interface
>> implementation in libgomp, where I'm using libbacktrace to figure out the
>> caller of certain libgomp functions, I recently wondered about the very
>> same issue, that we reliably have to skip a few initial frames.
>>
>> So, "noinline" is how to do that reliably...  ;-/ That might be
>> non-obvious for the casual reader, so they might not understand...
>>
>>> Fix this by declaring the functions with __attribute__((noinline)).
>>
>> ... this alone.
>>
>> I'd suggest to have a common "#define LIBBACKTRACE_NOINLINE [...]" (or
>> similar), together with the explanatory comment given above, and use that
>> at the respective definition (or declaration?) sites.  Can that go into
>> the public libbacktrace "*.h" file, so that it can also be used
>> elsewhere, as described above?
>>
>> If you agree, want me to prepare a patch?
> 
> I think that at least for backtrace_full and backtrace_print we are
> arguably looking at the SKIP parameter in the wrong place.  We
> shouldn't look at it in unwind before calling backtrace_pcinfo.  We
> should count the inlined functions found by backtrace_pcinfo against
> the SKIP parameter.

That change makes sense to me. It would stabilise the cut-off point
independent of whether inlining happens or not.

Though the documentation of both functions list SKIP as "SKIP is the
number of frames to skip", and inlined function do not have their own
frame, so AFAIU changing things in the way described above would require
changing this documentation as well.

Btw, I think we'd still need the inline attributes, in order to make
libbacktrace behave the same with and without debug info.

Thanks,
- Tom


Re: [PATCH] Updated patches for the port of gccgo to GNU/Hurd

2019-02-09 Thread Svante Signell
On Sat, 2019-02-09 at 14:40 -0800, Ian Lance Taylor wrote:
> On Fri, Feb 8, 2019 at 3:07 PM Matthias Klose  wrote:
> > On 07.02.19 06:04, Ian Lance Taylor wrote:

> What are the lines before that in the log?  For some reason libtool is
> being invoke with no source files.  The lines before the failing line
> should show an invocation of match.sh that determines the source
> files.

Thanks for your job upstreaming the patches!

I've found some problems. Current problem is with the mksysinfo.sh patch. But
there are some other things missing. New patches will be submitted tomorrow. 

Thanks!



Re: [PATCH][libbacktrace] Add btest_lto

2019-02-09 Thread Tom de Vries
On 09-02-19 22:49, Ian Lance Taylor wrote:
> On Fri, Feb 8, 2019 at 1:42 AM Tom de Vries  wrote:
>>
>> Add libbacktrace test-case using -flto.
>>
>> OK for trunk?
>>
>> Thanks,
>> - Tom
>>
>> [libbacktrace] Add btest_lto
>>
>> 2019-02-08  Tom de Vries  
>>
>> * Makefile.am (BUILDTESTS): Add btest_lto.
>> * Makefile.in: Regenerate.
>> * btest.c (test1, f2, f3, test3, f22, f23): Declare with
>> __attribute__((noclone)).
> 
> This seems more or less fine, but are there any platforms where -flto
> doesn't work?  The docs say "Link-time optimization does not work well
> with generation of debugging information on systems other than those
> using a combination of ELF and DWARF." 

So, we could require HAVE_ELF for this testcase.

> For example, does this test
> pass on Windows?

I don't know, I don't have a windows setup.

Thanks,
- Tom


Re: [PATCH][libbacktrace] Add btest_lto

2019-02-09 Thread Ian Lance Taylor
On Sat, Feb 9, 2019 at 2:59 PM Tom de Vries  wrote:
>
> On 09-02-19 22:49, Ian Lance Taylor wrote:
> > On Fri, Feb 8, 2019 at 1:42 AM Tom de Vries  wrote:
> >>
> >> Add libbacktrace test-case using -flto.
> >>
> >> OK for trunk?
> >>
> >> Thanks,
> >> - Tom
> >>
> >> [libbacktrace] Add btest_lto
> >>
> >> 2019-02-08  Tom de Vries  
> >>
> >> * Makefile.am (BUILDTESTS): Add btest_lto.
> >> * Makefile.in: Regenerate.
> >> * btest.c (test1, f2, f3, test3, f22, f23): Declare with
> >> __attribute__((noclone)).
> >
> > This seems more or less fine, but are there any platforms where -flto
> > doesn't work?  The docs say "Link-time optimization does not work well
> > with generation of debugging information on systems other than those
> > using a combination of ELF and DWARF."
>
> So, we could require HAVE_ELF for this testcase.

Works for me.  OK with that change.  Thanks.

Ian


[PATCH] Fix recent tree-if-conv ICE (PR tree-optimization/89268)

2019-02-09 Thread Jakub Jelinek
Hi!

WHen tree_if_conversion is called from within the vectorizer (for masked
epilogues), preds is NULL, so we shouldn't be pushing anything there,
the caller doesn't care anyway.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-02-09  Jakub Jelinek  

PR tree-optimization/89268
* tree-if-conv.c (version_loop_for_if_conversion): Push to preds only
if preds is non-NULL.

* gcc.dg/vect/pr89268.c: New test.

--- gcc/tree-if-conv.c.jj   2019-02-08 20:00:40.774827920 +0100
+++ gcc/tree-if-conv.c  2019-02-09 17:35:36.995782510 +0100
@@ -2760,7 +2760,8 @@ version_loop_for_if_conversion (struct l
   new_loop->force_vectorize = false;
   gsi = gsi_last_bb (cond_bb);
   gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
-  preds->safe_push (g);
+  if (preds)
+preds->safe_push (g);
   gsi_insert_before (&gsi, g, GSI_SAME_STMT);
   update_ssa (TODO_update_ssa);
   return new_loop;
--- gcc/testsuite/gcc.dg/vect/pr89268.c.jj  2019-02-09 17:53:15.099481437 
+0100
+++ gcc/testsuite/gcc.dg/vect/pr89268.c 2019-02-09 17:53:50.900896069 +0100
@@ -0,0 +1,7 @@
+/* PR tree-optimization/89268 */
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_condition } */
+/* { dg-additional-options "-fno-trapping-math --param 
vect-epilogues-nomask=1" } */
+/* { dg-additional-options "-mavx512ifma -mtune=intel" { target x86_64-*-* 
i?86-*-* } } */
+
+#include "pr79887.c"

Jakub


Re: Fix odr ICE on Ada LTO

2019-02-09 Thread H.J. Lu
On Sat, Feb 9, 2019 at 10:10 AM Jan Hubicka  wrote:
>
> Hi,
> this patch fixes ICE in free_lang_data compiling lto8.adb.
> The fix is bit symptomatic becuase type_with_linkage_p should return
> false for Ada types. Perhaps adding explicit flag to DECL_NAME would
> make sense but it can wait for next stage1.
>
> The fix works because at this stage of free_lang_data all mangled names
> must be computed and thus it is cheper to test presence of
> DECL_ASSEMBLER_NAME anyway.
>
> Bootstrapped/regtested x86_64-linux, comitted.
> PR lto/87957
> * tree.c (fld_simplified_type_name): Use DECL_ASSEMBLER_NAME_SET_P
> instead of type_with_linkage.
> Index: tree.c
> ===
> --- tree.c  (revision 268722)
> +++ tree.c  (working copy)
> @@ -5152,7 +5152,8 @@ fld_simplified_type_name (tree type)
>/* Drop TYPE_DECLs in TYPE_NAME in favor of the identifier in the
>   TYPE_DECL if the type doesn't have linkage.
>   this must match fld_  */
> -  if (type != TYPE_MAIN_VARIANT (type) || ! type_with_linkage_p (type))
> +  if (type != TYPE_MAIN_VARIANT (type)
> +  || !DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (type)))
>  return DECL_NAME (TYPE_NAME (type));
>return TYPE_NAME (type);
>  }

This caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89272

-- 
H.J.


[PATCH 02/43] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2

2019-02-09 Thread H.J. Lu
Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
plus moving bits 64:95 to bits 32:63 in SSE register.  Only SSE register
source operand is allowed.

2019-02-08  H.J. Lu  
Uros Bizjak  

PR target/89021
* config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
New prototype.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
function.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.md (mmx_isa): New.
(enabled): Also check mmx_isa.
* config/i386/mmx.md (any_s_truncate): New code iterator.
(s_trunsuffix): New code attr.
(mmx_packsswb): Removed.
(mmx_packssdw): Likewise.
(mmx_packuswb): Likewise.
(mmx_packswb): New define_insn_and_split to emulate
MMX packsswb/packuswb with SSE2.
(mmx_packssdw): Likewise.
---
 gcc/config/i386/i386-protos.h |  3 ++
 gcc/config/i386/i386.c| 54 
 gcc/config/i386/i386.md   | 12 +++
 gcc/config/i386/mmx.md| 67 +++
 4 files changed, 106 insertions(+), 30 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 2d600173917..bb96a420a85 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -200,6 +200,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, 
rtx, rtx);
 
 extern rtx ix86_split_stack_guard (void);
 
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif /* TREE_CODE  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ba02c26c8b2..2af7f891350 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19955,6 +19955,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, 
rtx operands[])
 gcc_unreachable ();
 }
 
+/* Move bits 64:95 to bits 32:63.  */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+ GEN_INT (0), GEN_INT (0)));
+  rtx dest = gen_rtx_REG (V4SImode, REGNO (op));
+  op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  rtx insn = gen_rtx_SET (dest, op);
+  emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2.  */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
+  machine_mode dmode = GET_MODE (op0);
+  machine_mode smode = GET_MODE (op1);
+  machine_mode inner_dmode = GET_MODE_INNER (dmode);
+  machine_mode inner_smode = GET_MODE_INNER (smode);
+
+  /* Get the corresponding SSE mode for destination.  */
+  int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+  machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+   nunits).require ();
+  machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+nunits / 2).require ();
+
+  /* Get the corresponding SSE mode for source.  */
+  nunits = 16 / GET_MODE_SIZE (inner_smode);
+  machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
+   nunits).require ();
+
+  /* Generate SSE pack with signed/unsigned saturation.  */
+  rtx dest = gen_rtx_REG (sse_dmode, REGNO (op0));
+  op1 = gen_rtx_REG (sse_smode, REGNO (op1));
+  op2 = gen_rtx_REG (sse_smode, REGNO (op2));
+
+  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
+   op1, op2));
+  emit_insn (insn);
+
+  ix86_move_vector_high_sse_to_mmx (op0);
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4a32144a71a..72685107fc0 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -792,6 +792,9 @@
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
 
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string 
"base"))
+
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
 (eq_attr "isa" "x64_sse2")
@@ -830,6 +833,15 @@
 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
+
+(eq_attr "mmx_isa" "native")
+  (symbol_ref

[PATCH 01/43] i386: Allow 64-bit vector modes in SSE registers

2019-02-09 Thread H.J. Lu
In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support 64-bit vectors.

PR target/89021
* config/i386/i386.c (ix86_set_reg_reg_cost): Also support
VALID_MMX_WITH_SSE_REG_MODE.
(ix86_vector_mode_supported_p): Likewise.
* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
(TARGET_MMX_WITH_SSE_P): Likewise.
(VALID_MMX_WITH_SSE_REG_MODE): Likewise.
---
 gcc/config/i386/i386.c |  3 +++
 gcc/config/i386/i386.h | 14 ++
 2 files changed, 17 insertions(+)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 12bc7926f86..ba02c26c8b2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40235,6 +40235,7 @@ ix86_set_reg_reg_cost (machine_mode mode)
  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
+ || (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))
  || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
units = GET_MODE_SIZE (mode);
 }
@@ -44057,6 +44058,8 @@ ix86_vector_mode_supported_p (machine_mode mode)
 return true;
   if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
 return true;
+  if (TARGET_MMX_WITH_SSE && VALID_MMX_WITH_SSE_REG_MODE (mode))
+return true;
   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 return true;
   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..3ae0900caa0 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 #define TARGET_16BIT   TARGET_CODE16
 #define TARGET_16BIT_P(x)  TARGET_CODE16_P(x)
 
+/* In 64-bit mode, SSE2 can be used to emulate MMX instructions.
+   FIXME: All 3DNOW patterns needs to be updated with SSE emulation.  */
+#define TARGET_MMX_WITH_SSE \
+  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)
+#define TARGET_MMX_WITH_SSE_P(x) \
+  (TARGET_64BIT_P (x) && TARGET_SSE2_P (x) && !TARGET_3DNOW_P (x))
+
 #include "config/vxworks-dummy.h"
 
 #include "config/i386/i386-opts.h"
@@ -1143,6 +1150,13 @@ extern const char *host_detect_local_cpu (int argc, 
const char **argv);
|| (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode   \
|| (MODE) == TFmode || (MODE) == V1TImode)
 
+/* NB: Don't use VALID_MMX_REG_MODE with TARGET_MMX_WITH_SSE since we
+   want to include only 8-byte vector modes, like V2SFmode, but not
+   DImode nor SImode.  */
+#define VALID_MMX_WITH_SSE_REG_MODE(MODE)  \
+  ((MODE) == V1DImode || (MODE) == V8QImode || (MODE) == V4HImode  \
+   || (MODE) == V2SImode || (MODE) == V2SFmode)
+
 #define VALID_SSE2_REG_MODE(MODE)  \
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
|| (MODE) == V2DImode || (MODE) == DFmode)
-- 
2.20.1



[PATCH 03/43] i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX

2019-02-09 Thread H.J. Lu
Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX.  For MMX punpckhXX,
move bits 64:127 to bits 0:63 in SSE register.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/i386-protos.h (ix86_split_mmx_punpck): New
prototype.
* config/i386/i386.c (ix86_split_mmx_punpck): New function.
* config/i386/mmx.m (mmx_punpckhbw): Changed to
define_insn_and_split to support SSE emulation.
(mmx_punpcklbw): Likewise.
(mmx_punpckhwd): Likewise.
(mmx_punpcklwd): Likewise.
(mmx_punpckhdq): Likewise.
(mmx_punpckldq): Likewise.
---
 gcc/config/i386/i386-protos.h |   1 +
 gcc/config/i386/i386.c|  77 +++
 gcc/config/i386/mmx.md| 138 ++
 3 files changed, 168 insertions(+), 48 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index bb96a420a85..dc7fc38d8e4 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -202,6 +202,7 @@ extern rtx ix86_split_stack_guard (void);
 
 extern void ix86_move_vector_high_sse_to_mmx (rtx);
 extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+extern void ix86_split_mmx_punpck (rtx[], bool);
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2af7f891350..cf7a71bcc02 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -20009,6 +20009,83 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code 
code)
   ix86_move_vector_high_sse_to_mmx (op0);
 }
 
+/* Split MMX punpcklXX/punpckhXX with SSE punpcklXX.  */
+
+void
+ix86_split_mmx_punpck (rtx operands[], bool high_p)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+  machine_mode mode = GET_MODE (op0);
+  rtx mask;
+  /* The corresponding SSE mode.  */
+  machine_mode sse_mode, double_sse_mode;
+
+  switch (mode)
+{
+case E_V8QImode:
+  sse_mode = V16QImode;
+  double_sse_mode = V32QImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (16,
+ GEN_INT (0), GEN_INT (16),
+ GEN_INT (1), GEN_INT (17),
+ GEN_INT (2), GEN_INT (18),
+ GEN_INT (3), GEN_INT (19),
+ GEN_INT (4), GEN_INT (20),
+ GEN_INT (5), GEN_INT (21),
+ GEN_INT (6), GEN_INT (22),
+ GEN_INT (7), GEN_INT (23)));
+  break;
+
+case E_V4HImode:
+  sse_mode = V8HImode;
+  double_sse_mode = V16HImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (8,
+ GEN_INT (0), GEN_INT (8),
+ GEN_INT (1), GEN_INT (9),
+ GEN_INT (2), GEN_INT (10),
+ GEN_INT (3), GEN_INT (11)));
+  break;
+
+case E_V2SImode:
+  sse_mode = V4SImode;
+  double_sse_mode = V8SImode;
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4,
+ GEN_INT (0), GEN_INT (4),
+ GEN_INT (1), GEN_INT (5)));
+  break;
+
+default:
+  gcc_unreachable ();
+}
+
+  /* Generate SSE punpcklXX.  */
+  rtx dest = gen_rtx_REG (sse_mode, REGNO (op0));
+  op1 = gen_rtx_REG (sse_mode, REGNO (op1));
+  op2 = gen_rtx_REG (sse_mode, REGNO (op2));
+
+  op1 = gen_rtx_VEC_CONCAT (double_sse_mode, op1, op2);
+  op2 = gen_rtx_VEC_SELECT (sse_mode, op1, mask);
+  rtx insn = gen_rtx_SET (dest, op2);
+  emit_insn (insn);
+
+  if (high_p)
+{
+  /* Move bits 64:127 to bits 0:63.  */
+  mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (2), GEN_INT (3),
+ GEN_INT (0), GEN_INT (0)));
+  dest = gen_rtx_REG (V4SImode, REGNO (dest));
+  op1 = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  insn = gen_rtx_SET (dest, op1);
+  emit_insn (insn);
+}
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 10096f7cab7..ff9c5dc8507 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1089,87 +1089,129 @@
(set_attr "type" "mmxshft,sselog,sselog")
(set_attr "mode" "DI,TI,TI")])
 
-(define_insn "mmx_punpckhbw"
-  [(set (match_operand:V8QI 0 "register_operand" "=y")
+(define_insn_and_split "mmx_punpckhbw"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yv")
(vec_select:V8QI
 

[PATCH 00/43] V3: Emulate MMX intrinsics with SSE

2019-02-09 Thread H.J. Lu
On x86-64, since __m64 is returned and passed in XMM registers, we can
emulate MMX intrinsics with SSE instructions. To support it, we added

 #define TARGET_MMX_WITH_SSE \
  (TARGET_64BIT && TARGET_SSE2 && !TARGET_3DNOW)

SSE emulation is disabled for 3DNOW since 3DNOW patterns haven't been
updated with SSE emulation.

;; Define instruction set of MMX instructions
(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string 
"base"))

 (eq_attr "mmx_isa" "native")
   (symbol_ref "!TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64")
   (symbol_ref "TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64_avx")
   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
 (eq_attr "mmx_isa" "x64_noavx")
   (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")

We added SSE emulation to MMX patterns and disabled MMX alternatives with
TARGET_MMX_WITH_SSE.

Most of MMX instructions have equivalent SSE versions and results of some
SSE versions need to be reshuffled to the right order for MMX.  Thee are
couple tricky cases:

1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
mask operand and handle unmapped bits 64:127 at memory address by
adjusting source and mask operands together with memory address.

2. MMX movntq is emulated with SSE2 DImode movnti, which is available
in 64-bit mode.

3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
SSE emulation must clear the bit 4 in the shuffle control mask.

4. To emulate MMX cvtpi2p with SSE2 cvtdq2ps, we must properly preserve
the upper 64 bits of destination XMM register.

Tests are also added to check each SSE emulation of MMX intrinsics.

With SSE emulation in 64-bit mode, 8-byte vectorizer is enabled with SSE2.

There are no regressions on i686 and x86-64.  For x86-64, GCC is also
tested with

--with-arch=native --with-cpu=native

on AVX2 and AVX512F machines.

H.J. Lu (43):
  i386: Allow 64-bit vector modes in SSE registers
  i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
  i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
  i386: Emulate MMX plusminus/sat_plusminus with SSE
  i386: Emulate MMX mulv4hi3 with SSE
  i386: Emulate MMX smulv4hi3_highpart with SSE
  i386: Emulate MMX mmx_pmaddwd with SSE
  i386: Emulate MMX ashr3/3 with SSE
  i386: Emulate MMX 3 with SSE
  i386: Emulate MMX mmx_andnot3 with SSE
  i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
  i386: Emulate MMX vec_dupv2si with SSE
  i386: Emulate MMX pshufw with SSE
  i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
  i386: Emulate MMX sse_cvtpi2ps with SSE
  i386: Emulate MMX mmx_pextrw with SSE
  i386: Emulate MMX mmx_pinsrw with SSE
  i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
  i386: Emulate MMX mmx_pmovmskb with SSE
  i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
  i386: Emulate MMX maskmovq with SSE2 maskmovdqu
  i386: Emulate MMX mmx_uavgv8qi3 with SSE
  i386: Emulate MMX mmx_uavgv4hi3 with SSE
  i386: Emulate MMX mmx_psadbw with SSE
  i386: Emulate MMX movntq with SSE2 movntidi
  i386: Emulate MMX umulv1siv1di3 with SSE2
  i386: Emulate MMX ssse3_phwv4hi3 with SSE
  i386: Emulate MMX ssse3_phdv2si3 with SSE
  i386: Emulate MMX ssse3_pmaddubsw with SSE
  i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
  i386: Emulate MMX pshufb with SSE version
  i386: Emulate MMX ssse3_psign3 with SSE
  i386: Emulate MMX ssse3_palignrdi with SSE
  i386: Emulate MMX abs2 with SSE
  i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
  i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
  i386: Allow MMX intrinsic emulation with SSE
  i386: Add tests for MMX intrinsic emulations with SSE
  i386: Also enable SSSE3 __m64 tests in 64-bit mode
  i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE
  i386: Implement V2SF add/sub/mul with SSE
  i386: Implement V2SF <-> V2SI conversions with SSE
  i386: Implement V2SF comparisons with SSE

 gcc/config/i386/i386-builtin.def  | 126 +--
 gcc/config/i386/i386-protos.h |   4 +
 gcc/config/i386/i386.c| 206 +++-
 gcc/config/i386/i386.h|  14 +
 gcc/config/i386/i386.md   |  15 +-
 gcc/config/i386/mmintrin.h|  10 +-
 gcc/config/i386/mmx.md| 962 +-
 gcc/config/i386/sse.md| 460 +++--
 gcc/config/i386/xmmintrin.h   |  61 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr84512.c   |   2 +-
 gcc/testsuite/gcc.target/i386/mmx-vals.h  |  77 ++
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr89028-1.c |  10 +
 gcc/testsuite/gcc.target/i386/pr89028-10.c|  39 +
 gcc/testsuite/gcc.target/i386/pr89028-11.c|  39 +
 gcc/testsuite/gcc.target/i386/pr89028-12.c|  39 +
 gcc/testsuite/gcc.target/i38

[PATCH 04/43] i386: Emulate MMX plusminus/sat_plusminus with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX plusminus/sat_plusminus with SSE.  Only SSE register source
operand is allowed.

PR target/89021
* config/i386/mmx.md (3): New.
(*mmx_3): Changed to define_insn_and_split
to support SSE emulation.
(*mmx_3): Likewise.
(mmx_3): Also allow TARGET_MMX_WITH_SSE.
---
 gcc/config/i386/mmx.md | 49 +-
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ff9c5dc8507..32920343fcf 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -698,34 +698,53 @@
   "TARGET_MMX || (TARGET_SSE2 && mode == V1DImode)"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
+(define_expand "3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+   (plusminus:MMXMODEI
+ (match_operand:MMXMODEI 1 "nonimmediate_operand")
+ (match_operand:MMXMODEI 2 "nonimmediate_operand")))]
+  "TARGET_MMX_WITH_SSE"
+  "ix86_fixup_binary_operands_no_copy (, mode, operands);")
+
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,Yv")
 (plusminus:MMXMODEI8
- (match_operand:MMXMODEI8 1 "nonimmediate_operand" "0")
- (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym")))]
-  "(TARGET_MMX || (TARGET_SSE2 && mode == V1DImode))
+ (match_operand:MMXMODEI8 1 "nonimmediate_operand" "0,0,Yv")
+ (match_operand:MMXMODEI8 2 "nonimmediate_operand" "ym,x,Yv")))]
+  "(TARGET_MMX
+|| TARGET_MMX_WITH_SSE
+|| (TARGET_SSE2 && mode == V1DImode))
&& ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODE12 0 "register_operand")
(sat_plusminus:MMXMODE12
  (match_operand:MMXMODE12 1 "nonimmediate_operand")
  (match_operand:MMXMODE12 2 "nonimmediate_operand")))]
-  "TARGET_MMX"
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (, mode, operands);")
 
 (define_insn "*mmx_3"
-  [(set (match_operand:MMXMODE12 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yv")
 (sat_plusminus:MMXMODE12
- (match_operand:MMXMODE12 1 "nonimmediate_operand" "0")
- (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX && ix86_binary_operator_ok (, mode, operands)"
-  "p\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (match_operand:MMXMODE12 1 "nonimmediate_operand" "0,0,Yv")
+ (match_operand:MMXMODE12 2 "nonimmediate_operand" "ym,x,Yv")))]
+  "(TARGET_MMX || TARGET_MMX_WITH_SSE)
+   && ix86_binary_operator_ok (, mode, operands)"
+  "@
+   p\t{%2, %0|%0, %2}
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sseadd,sseadd")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-- 
2.20.1



[PATCH 10/43] i386: Emulate MMX mmx_andnot3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX mmx_andnot3 with SSE.  Only SSE register source operand
is allowed.

PR target/89021
* config/i386/mmx.md (mmx_andnot3): Also allow
TARGET_MMX_WITH_SSE.  Add SSE support.
---
 gcc/config/i386/mmx.md | 18 +++---
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index b7cbe2155b6..8945ece2a03 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1093,14 +1093,18 @@
 ;
 
 (define_insn "mmx_andnot3"
-  [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
+  [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,Yv")
(and:MMXMODEI
- (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0"))
- (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym")))]
-  "TARGET_MMX"
-  "pandn\t{%2, %0|%0, %2}"
-  [(set_attr "type" "mmxadd")
-   (set_attr "mode" "DI")])
+ (not:MMXMODEI (match_operand:MMXMODEI 1 "register_operand" "0,0,Yv"))
+ (match_operand:MMXMODEI 2 "nonimmediate_operand" "ym,x,Yv")))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   pandn\t{%2, %0|%0, %2}
+   pandn\t{%2, %0|%0, %2}
+   vpandn\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
+   (set_attr "type" "mmxadd,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
 
 (define_expand "mmx_3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
-- 
2.20.1



[PATCH 08/43] i386: Emulate MMX ashr3/3 with SSE

2019-02-09 Thread H.J. Lu
Emulate MMX ashr3/3 with SSE.  Only SSE register
source operand is allowed.

PR target/89021
* config/i386/mmx.md (mmx_ashr3): Disallow with
TARGET_MMX_WITH_SSE.
(mmx_3): Likewise.
(ashr3): New.
(3): Likewise.
---
 gcc/config/i386/mmx.md | 38 --
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 2024c75fa78..9e07bf31f81 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -995,7 +995,7 @@
 (ashiftrt:MMXMODE24
  (match_operand:MMXMODE24 1 "register_operand" "0")
  (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
+  "TARGET_MMX && !TARGET_MMX_WITH_SSE"
   "psra\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
(set (attr "length_immediate")
@@ -1009,7 +1009,7 @@
 (any_lshift:MMXMODE248
  (match_operand:MMXMODE248 1 "register_operand" "0")
  (match_operand:DI 2 "nonmemory_operand" "yN")))]
-  "TARGET_MMX"
+  "TARGET_MMX && !TARGET_MMX_WITH_SSE"
   "p\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxshft")
(set (attr "length_immediate")
@@ -1018,6 +1018,40 @@
(const_string "0")))
(set_attr "mode" "DI")])
 
+(define_insn "ashr3"
+  [(set (match_operand:MMXMODE24 0 "register_operand" "=x,Yv")
+(ashiftrt:MMXMODE24
+ (match_operand:MMXMODE24 1 "register_operand" "0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "xN,YvN")))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   psra\t{%2, %0|%0, %2}
+   vpsra\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft,sseishft")
+   (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+   (const_string "1")
+   (const_string "0")))
+   (set_attr "mode" "TI")])
+
+(define_insn "3"
+  [(set (match_operand:MMXMODE248 0 "register_operand" "=x,Yv")
+(any_lshift:MMXMODE248
+ (match_operand:MMXMODE248 1 "register_operand" "0,Yv")
+ (match_operand:DI 2 "nonmemory_operand" "xN,YvN")))]
+  "TARGET_MMX_WITH_SSE"
+  "@
+   p\t{%2, %0|%0, %2}
+   vp\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sseishft,sseishft")
+   (set (attr "length_immediate")
+ (if_then_else (match_operand 2 "const_int_operand")
+   (const_string "1")
+   (const_string "0")))
+   (set_attr "mode" "TI")])
+
 ;
 ;;
 ;; Parallel integral comparisons
-- 
2.20.1



  1   2   >