> -----Original Message----- > From: cfe-commits [mailto:cfe-commits-boun...@lists.llvm.org] On Behalf Of > Simon Pilgrim via cfe-commits > Sent: Wednesday, July 20, 2016 3:18 AM > To: cfe-commits@lists.llvm.org > Subject: r276102 - [X86][SSE] Reimplement SSE fp2si conversion intrinsics > instead of using generic IR > > Author: rksimon > Date: Wed Jul 20 05:18:01 2016 > New Revision: 276102 > > URL: http://llvm.org/viewvc/llvm-project?rev=276102&view=rev > Log: > [X86][SSE] Reimplement SSE fp2si conversion intrinsics instead of using > generic IR > > D20859 and D20860 attempted to replace the SSE (V)CVTTPS2DQ and VCVTTPD2DQ > truncating conversions with generic IR instead. > > It turns out that the behaviour of these intrinsics is different enough > from generic IR that this will cause problems, INF/NAN/out of range values > are guaranteed to result in a 0x80000000 value - which plays havoc with > constant folding which converts them to either zero or UNDEF. This is also > an issue with the scalar implementations (which were already generic IR > and what I was trying to match).
Are the problems enough that this should be merged to the 3.9 release branch? --paulr > > This patch changes both scalar and packed versions back to using x86- > specific builtins. > > It also deals with the other scalar conversion cases that are runtime > rounding mode dependent and can have similar issues with constant folding. > > Differential Revision: https://reviews.llvm.org/D22105 > > Modified: > cfe/trunk/include/clang/Basic/BuiltinsX86.def > cfe/trunk/lib/Headers/avxintrin.h > cfe/trunk/lib/Headers/emmintrin.h > cfe/trunk/lib/Headers/xmmintrin.h > cfe/trunk/test/CodeGen/avx-builtins.c > cfe/trunk/test/CodeGen/builtins-x86.c > cfe/trunk/test/CodeGen/sse-builtins.c > cfe/trunk/test/CodeGen/sse2-builtins.c > > Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def > URL: http://llvm.org/viewvc/llvm- > project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=276102&r1=276101 > &r2=276102&view=diff > ========================================================================== > ==== > --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original) > +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Wed Jul 20 05:18:01 2016 > @@ -303,7 +303,9 @@ TARGET_BUILTIN(__builtin_ia32_pabsd128, > TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "", "sse") > TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "", "sse") > TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "", "sse") > +TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "", "sse") > TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "", "sse") > +TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "", "sse") > TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "", "sse") > TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "", "sse") > TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "", "sse") > @@ -328,8 +330,12 @@ TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, > TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "", "sse2") > TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "", "sse2") > TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "", "sse2") > +TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "", "sse2") > TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "", "sse2") > +TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "", "sse2") > +TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "", "sse2") > TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "", "sse2") > +TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "", "sse2") > TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "", "sse2") > TARGET_BUILTIN(__builtin_ia32_lfence, "v", "", "sse2") > TARGET_BUILTIN(__builtin_ia32_mfence, "v", "", "sse2") > @@ -455,7 +461,9 @@ TARGET_BUILTIN(__builtin_ia32_cmpss, "V4 > TARGET_BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "", "avx") > TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "", "avx") > TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "", "avx") > +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "", "avx") > TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "", "avx") > +TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "", "avx") > TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "", "avx") > TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "", "avx") > TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "", "avx") > > Modified: cfe/trunk/lib/Headers/avxintrin.h > URL: http://llvm.org/viewvc/llvm- > project/cfe/trunk/lib/Headers/avxintrin.h?rev=276102&r1=276101&r2=276102&v > iew=diff > ========================================================================== > ==== > --- cfe/trunk/lib/Headers/avxintrin.h (original) > +++ cfe/trunk/lib/Headers/avxintrin.h Wed Jul 20 05:18:01 2016 > @@ -2117,7 +2117,7 @@ _mm256_cvtps_pd(__m128 __a) > static __inline __m128i __DEFAULT_FN_ATTRS > _mm256_cvttpd_epi32(__m256d __a) > { > - return (__m128i)__builtin_convertvector((__v4df) __a, __v4si); > + return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a); > } > > static __inline __m128i __DEFAULT_FN_ATTRS > @@ -2129,7 +2129,7 @@ _mm256_cvtpd_epi32(__m256d __a) > static __inline __m256i __DEFAULT_FN_ATTRS > _mm256_cvttps_epi32(__m256 __a) > { > - return (__m256i)__builtin_convertvector((__v8sf) __a, __v8si); > + return (__m256i)__builtin_ia32_cvttps2dq256((__v8sf) __a); > } > > static __inline double __DEFAULT_FN_ATTRS > > Modified: cfe/trunk/lib/Headers/emmintrin.h > URL: http://llvm.org/viewvc/llvm- > project/cfe/trunk/lib/Headers/emmintrin.h?rev=276102&r1=276101&r2=276102&v > iew=diff > ========================================================================== > ==== > --- cfe/trunk/lib/Headers/emmintrin.h (original) > +++ cfe/trunk/lib/Headers/emmintrin.h Wed Jul 20 05:18:01 2016 > @@ -417,8 +417,7 @@ _mm_cvtsd_si32(__m128d __a) > static __inline__ __m128 __DEFAULT_FN_ATTRS > _mm_cvtsd_ss(__m128 __a, __m128d __b) > { > - __a[0] = __b[0]; > - return __a; > + return (__m128)__builtin_ia32_cvtsd2ss((__v4sf)__a, (__v2df)__b); > } > > static __inline__ __m128d __DEFAULT_FN_ATTRS > @@ -444,7 +443,7 @@ _mm_cvttpd_epi32(__m128d __a) > static __inline__ int __DEFAULT_FN_ATTRS > _mm_cvttsd_si32(__m128d __a) > { > - return __a[0]; > + return __builtin_ia32_cvttsd2si((__v2df)__a); > } > > static __inline__ __m64 __DEFAULT_FN_ATTRS > @@ -1707,7 +1706,7 @@ _mm_cvtsd_si64(__m128d __a) > static __inline__ long long __DEFAULT_FN_ATTRS > _mm_cvttsd_si64(__m128d __a) > { > - return __a[0]; > + return __builtin_ia32_cvttsd2si64((__v2df)__a); > } > #endif > > @@ -1755,7 +1754,7 @@ _mm_cvtps_epi32(__m128 __a) > static __inline__ __m128i __DEFAULT_FN_ATTRS > _mm_cvttps_epi32(__m128 __a) > { > - return (__m128i)__builtin_convertvector((__v4sf)__a, __v4si); > + return (__m128i)__builtin_ia32_cvttps2dq((__v4sf)__a); > } > > /// \brief Returns a vector of [4 x i32] where the lowest element is the > input > > Modified: cfe/trunk/lib/Headers/xmmintrin.h > URL: http://llvm.org/viewvc/llvm- > project/cfe/trunk/lib/Headers/xmmintrin.h?rev=276102&r1=276101&r2=276102&v > iew=diff > ========================================================================== > ==== > --- cfe/trunk/lib/Headers/xmmintrin.h (original) > +++ cfe/trunk/lib/Headers/xmmintrin.h Wed Jul 20 05:18:01 2016 > @@ -1350,7 +1350,7 @@ _mm_cvt_ps2pi(__m128 __a) > static __inline__ int __DEFAULT_FN_ATTRS > _mm_cvttss_si32(__m128 __a) > { > - return __a[0]; > + return __builtin_ia32_cvttss2si((__v4sf)__a); > } > > /// \brief Converts a float value contained in the lower 32 bits of a > vector of > @@ -1386,7 +1386,7 @@ _mm_cvtt_ss2si(__m128 __a) > static __inline__ long long __DEFAULT_FN_ATTRS > _mm_cvttss_si64(__m128 __a) > { > - return __a[0]; > + return __builtin_ia32_cvttss2si64((__v4sf)__a); > } > > /// \brief Converts two low-order float values in a 128-bit vector of > > Modified: cfe/trunk/test/CodeGen/avx-builtins.c > URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx- > builtins.c?rev=276102&r1=276101&r2=276102&view=diff > ========================================================================== > ==== > --- cfe/trunk/test/CodeGen/avx-builtins.c (original) > +++ cfe/trunk/test/CodeGen/avx-builtins.c Wed Jul 20 05:18:01 2016 > @@ -286,13 +286,13 @@ __m256d test_mm256_cvtps_pd(__m128 A) { > > __m128i test_mm256_cvttpd_epi32(__m256d A) { > // CHECK-LABEL: test_mm256_cvttpd_epi32 > - // CHECK: fptosi <4 x double> %{{.*}} to <4 x i32> > + // CHECK: call <4 x i32> @llvm.x86.avx.cvtt.pd2dq.256(<4 x double> > %{{.*}}) > return _mm256_cvttpd_epi32(A); > } > > __m256i test_mm256_cvttps_epi32(__m256 A) { > // CHECK-LABEL: test_mm256_cvttps_epi32 > - // CHECK: fptosi <8 x float> %{{.*}} to <8 x i32> > + // CHECK: call <8 x i32> @llvm.x86.avx.cvtt.ps2dq.256(<8 x float> > %{{.*}}) > return _mm256_cvttps_epi32(A); > } > > > Modified: cfe/trunk/test/CodeGen/builtins-x86.c > URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins- > x86.c?rev=276102&r1=276101&r2=276102&view=diff > ========================================================================== > ==== > --- cfe/trunk/test/CodeGen/builtins-x86.c (original) > +++ cfe/trunk/test/CodeGen/builtins-x86.c Wed Jul 20 05:18:01 2016 > @@ -287,12 +287,14 @@ void f0() { > tmp_V4f = __builtin_ia32_cvtpi2ps(tmp_V4f, tmp_V2i); > tmp_V2i = __builtin_ia32_cvtps2pi(tmp_V4f); > tmp_i = __builtin_ia32_cvtss2si(tmp_V4f); > + tmp_i = __builtin_ia32_cvttss2si(tmp_V4f); > > tmp_i = __builtin_ia32_rdtsc(); > tmp_i = __builtin_ia32_rdtscp(&tmp_Ui); > tmp_LLi = __builtin_ia32_rdpmc(tmp_i); > #ifdef USE_64 > tmp_LLi = __builtin_ia32_cvtss2si64(tmp_V4f); > + tmp_LLi = __builtin_ia32_cvttss2si64(tmp_V4f); > #endif > tmp_V2i = __builtin_ia32_cvttps2pi(tmp_V4f); > (void) __builtin_ia32_maskmovq(tmp_V8c, tmp_V8c, tmp_cp); > @@ -328,10 +330,14 @@ void f0() { > tmp_V2i = __builtin_ia32_cvttpd2pi(tmp_V2d); > tmp_V2d = __builtin_ia32_cvtpi2pd(tmp_V2i); > tmp_i = __builtin_ia32_cvtsd2si(tmp_V2d); > + tmp_i = __builtin_ia32_cvttsd2si(tmp_V2d); > + tmp_V4f = __builtin_ia32_cvtsd2ss(tmp_V4f, tmp_V2d); > #ifdef USE_64 > tmp_LLi = __builtin_ia32_cvtsd2si64(tmp_V2d); > + tmp_LLi = __builtin_ia32_cvttsd2si64(tmp_V2d); > #endif > tmp_V4i = __builtin_ia32_cvtps2dq(tmp_V4f); > + tmp_V4i = __builtin_ia32_cvttps2dq(tmp_V4f); > (void) __builtin_ia32_clflush(tmp_vCp); > (void) __builtin_ia32_lfence(); > (void) __builtin_ia32_mfence(); > @@ -410,7 +416,9 @@ void f0() { > tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i); > tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d); > tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f); > + tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d); > tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d); > + tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f); > tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7); > tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7); > tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7); > > Modified: cfe/trunk/test/CodeGen/sse-builtins.c > URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse- > builtins.c?rev=276102&r1=276101&r2=276102&view=diff > ========================================================================== > ==== > --- cfe/trunk/test/CodeGen/sse-builtins.c (original) > +++ cfe/trunk/test/CodeGen/sse-builtins.c Wed Jul 20 05:18:01 2016 > @@ -295,22 +295,19 @@ long long test_mm_cvtss_si64(__m128 A) { > > int test_mm_cvtt_ss2si(__m128 A) { > // CHECK-LABEL: test_mm_cvtt_ss2si > - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 > - // CHECK: fptosi float %{{.*}} to i32 > + // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) > return _mm_cvtt_ss2si(A); > } > > int test_mm_cvttss_si32(__m128 A) { > // CHECK-LABEL: test_mm_cvttss_si32 > - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 > - // CHECK: fptosi float %{{.*}} to i32 > + // CHECK: call i32 @llvm.x86.sse.cvttss2si(<4 x float> %{{.*}}) > return _mm_cvttss_si32(A); > } > > long long test_mm_cvttss_si64(__m128 A) { > // CHECK-LABEL: test_mm_cvttss_si64 > - // CHECK: extractelement <4 x float> %{{.*}}, i32 0 > - // CHECK: fptosi float %{{.*}} to i64 > + // CHECK: call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %{{.*}}) > return _mm_cvttss_si64(A); > } > > > Modified: cfe/trunk/test/CodeGen/sse2-builtins.c > URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2- > builtins.c?rev=276102&r1=276101&r2=276102&view=diff > ========================================================================== > ==== > --- cfe/trunk/test/CodeGen/sse2-builtins.c (original) > +++ cfe/trunk/test/CodeGen/sse2-builtins.c Wed Jul 20 05:18:01 2016 > @@ -507,7 +507,7 @@ long long test_mm_cvtsd_si64(__m128d A) > > __m128 test_mm_cvtsd_ss(__m128 A, __m128d B) { > // CHECK-LABEL: test_mm_cvtsd_ss > - // CHECK: fptrunc double %{{.*}} to float > + // CHECK: call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %{{.*}}, > <2 x double> %{{.*}}) > return _mm_cvtsd_ss(A, B); > } > > @@ -569,21 +569,19 @@ __m128i test_mm_cvttpd_epi32(__m128d A) > > __m128i test_mm_cvttps_epi32(__m128 A) { > // CHECK-LABEL: test_mm_cvttps_epi32 > - // CHECK: fptosi <4 x float> %{{.*}} to <4 x i32> > + // CHECK: call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %{{.*}}) > return _mm_cvttps_epi32(A); > } > > int test_mm_cvttsd_si32(__m128d A) { > // CHECK-LABEL: test_mm_cvttsd_si32 > - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 > - // CHECK: fptosi double %{{.*}} to i32 > + // CHECK: call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %{{.*}}) > return _mm_cvttsd_si32(A); > } > > long long test_mm_cvttsd_si64(__m128d A) { > // CHECK-LABEL: test_mm_cvttsd_si64 > - // CHECK: extractelement <2 x double> %{{.*}}, i32 0 > - // CHECK: fptosi double %{{.*}} to i64 > + // CHECK: call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %{{.*}}) > return _mm_cvttsd_si64(A); > } > > > > _______________________________________________ > cfe-commits mailing list > cfe-commits@lists.llvm.org > http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits