Attached patch adds SSE alternatives to sse2_cvtpi2pd, sse2_cvtpd2pi
and sse2_cvttpd2pi to avoid MMX registers when e.g. _mm_cvtepi32_pd
intrinsics is used. Without the patch, the testcase compiles to (-O2
-mavx):

_Z7prepareii:
        vmovd   %edi, %xmm1
        vpinsrd $1, %esi, %xmm1, %xmm0
        movdq2q %xmm0, %mm0
        cvtpi2pd        %mm0, %xmm0
        vhaddpd %xmm0, %xmm0, %xmm0
        ret

while patched gcc generates:

        vmovd   %edi, %xmm1
        vpinsrd $1, %esi, %xmm1, %xmm0
        vcvtdq2pd       %xmm0, %xmm0
        vhaddpd %xmm0, %xmm0, %xmm0
        ret

The later avoids transition of FPU to MMX mode.

2019-01-23  Uroš Bizjak  <ubiz...@gmail.com>

    PR target/88998
    * config/i386/sse.md (sse2_cvtpi2pd): Add SSE alternatives.
    Disparage MMX alternative.
    (sse2_cvtpd2pi): Ditto.
    (sse2_cvttpd2pi): Ditto.

testsuite/ChangeLog:

2019-01-23  Uroš Bizjak  <ubiz...@gmail.com>

    PR target/88998
    * g++.target/i386/pr88998.c: New test.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline, will be backported to release branches.

Uros.
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md  (revision 268188)
+++ config/i386/sse.md  (working copy)
@@ -4997,37 +4997,49 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_insn "sse2_cvtpi2pd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
-       (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
+  [(set (match_operand:V2DF 0 "register_operand" "=v,x")
+       (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "vBm,?!y")))]
   "TARGET_SSE2"
-  "cvtpi2pd\t{%1, %0|%0, %1}"
+  "@
+   %vcvtdq2pd\t{%1, %0|%0, %1}
+   cvtpi2pd\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx,*")
-   (set_attr "prefix_data16" "1,*")
+   (set_attr "unit" "*,mmx")
+   (set_attr "prefix_data16" "*,1")
+   (set_attr "prefix" "maybe_vex,*")
    (set_attr "mode" "V2DF")])
 
 (define_insn "sse2_cvtpd2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
+  [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
+       (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "vBm,xm")]
                     UNSPEC_FIX_NOTRUNC))]
   "TARGET_SSE2"
-  "cvtpd2pi\t{%1, %0|%0, %1}"
+  "@
+   * return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" : \"cvtpd2dq\t{%1, 
%0|%0, %1}\";
+   cvtpd2pi\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+   (set_attr "unit" "*,mmx")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "athlon_decode" "vector")
    (set_attr "bdver1_decode" "double")
-   (set_attr "btver2_decode" "direct")
-   (set_attr "prefix_data16" "1")
-   (set_attr "mode" "DI")])
+   (set_attr "prefix_data16" "*,1")
+   (set_attr "prefix" "maybe_vex,*")
+   (set_attr "mode" "TI")])
 
 (define_insn "sse2_cvttpd2pi"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-       (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "xm")))]
+  [(set (match_operand:V2SI 0 "register_operand" "=v,?!y")
+       (fix:V2SI (match_operand:V2DF 1 "nonimmediate_operand" "vBm,xm")))]
   "TARGET_SSE2"
-  "cvttpd2pi\t{%1, %0|%0, %1}"
+  "@
+   * return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" : 
\"cvttpd2dq\t{%1, %0|%0, %1}\";
+   cvttpd2pi\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
-   (set_attr "unit" "mmx")
+   (set_attr "unit" "*,mmx")
+   (set_attr "amdfam10_decode" "double")
+   (set_attr "athlon_decode" "vector")
    (set_attr "bdver1_decode" "double")
-   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_data16" "*,1")
+   (set_attr "prefix" "maybe_vex,*")
    (set_attr "mode" "TI")])
 
 (define_insn "sse2_cvtsi2sd"
Index: testsuite/g++.target/i386/pr88998.C
===================================================================
--- testsuite/g++.target/i386/pr88998.C (nonexistent)
+++ testsuite/g++.target/i386/pr88998.C (working copy)
@@ -0,0 +1,31 @@
+// PR target/88998
+// { dg-do run { target sse2_runtime } }
+// { dg-options "-O2 -msse2 -mfpmath=387" }
+// { dg-require-effective-target c++11 }
+
+#include <cassert>
+#include <unordered_map>
+#include <x86intrin.h>
+
+double
+__attribute__((noinline))
+prepare (int a, int b)
+{
+  __m128i is = _mm_setr_epi32 (a, b, 0, 0);
+  __m128d ds = _mm_cvtepi32_pd (is);
+  return ds[0] + ds[1];
+}
+
+int
+main (int, char **)
+{
+  double d = prepare (1, 2);
+
+  std::unordered_map < int, int >m;
+  m.insert ({0, 0});
+  m.insert ({1, 1});
+  assert (m.load_factor () <= m.max_load_factor ());
+
+  assert (d == 3);
+  return 0;
+}

Reply via email to