craig.topper created this revision.
craig.topper added a reviewer: RKSimon.
Herald added a subscriber: cfe-commits.

The 512-bit cvt(u)qq2tops, cvt(u)qqtopd, and cvt(u)dqtops intrinsics all have 
the possibility of taking an explicit rounding mode argument. If the rounding 
mode is CUR_DIRECTION we'd like to emit a sitofp/uitofp instruction and a 
select like we do for 256-bit intrinsics.

For cvt(u)qqtopd and cvt(u)dqtops we do this when the form of the software 
intrinsics that doesn't take a rounding mode argument is used. This is done by 
using convertvector in the header with the select builtin. But if the explicit 
rounding mode form of the intrinsic is used and CUR_DIRECTION is passed, we 
don't do this. We shouldn't have this inconsistency.

For cvt(u)qqtops nothing is done because we can't use the select builtin in the 
header without avx512vl. So we need to use custom codegen for this.

Even when the rounding mode isn't CUR_DIRECTION we should also use select in IR 
for consistency. And it will remove another scalar integer mask from our 
intrinsics.

To accomplish all of these goals I've taken a slightly unusual approach. I've 
added two new X86 specific intrinsics for sitofp/uitofp with rounding. These 
intrinsics are variadic on the input and output type so we only need 2 instead 
of 6. This avoids the need for a switch to map them in CGBuiltin.cpp. We just 
need to check signed vs unsigned. I believe other targets also use variadic 
intrinsics like this.

So if the rounding mode is CUR_DIRECTION we'll use an sitofp/uitofp 
instruction. Otherwise we'll use one of the new intrinsics. After that we'll 
emit a select instruction if needed.


Repository:
  rC Clang

https://reviews.llvm.org/D56998

Files:
  lib/CodeGen/CGBuiltin.cpp
  test/CodeGen/avx512dq-builtins.c
  test/CodeGen/avx512f-builtins.c

Index: test/CodeGen/avx512f-builtins.c
===================================================================
--- test/CodeGen/avx512f-builtins.c
+++ test/CodeGen/avx512f-builtins.c
@@ -5022,42 +5022,46 @@
 __m512 test_mm512_cvt_roundepi32_ps( __m512i __A)
 {
   // CHECK-LABEL: @test_mm512_cvt_roundepi32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.512
+  // CHECK: @llvm.x86.sitofp.round.v16f32.v16i32
   return _mm512_cvt_roundepi32_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512 test_mm512_mask_cvt_roundepi32_ps(__m512 __W, __mmask16 __U, __m512i __A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepi32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.512
+  // CHECK: @llvm.x86.sitofp.round.v16f32.v16i32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_cvt_roundepi32_ps(__W,__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512 test_mm512_maskz_cvt_roundepi32_ps(__mmask16 __U, __m512i __A)
 {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtdq2ps.512
+  // CHECK: @llvm.x86.sitofp.round.v16f32.v16i32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_cvt_roundepi32_ps(__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512 test_mm512_cvt_roundepu32_ps(__m512i __A)
 {
   // CHECK-LABEL: @test_mm512_cvt_roundepu32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.512
+  // CHECK: @llvm.x86.uitofp.round.v16f32.v16i32
   return _mm512_cvt_roundepu32_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512 test_mm512_mask_cvt_roundepu32_ps(__m512 __W, __mmask16 __U,__m512i __A)
 {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepu32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.512
+  // CHECK: @llvm.x86.uitofp.round.v16f32.v16i32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_cvt_roundepu32_ps(__W,__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
 __m512 test_mm512_maskz_cvt_roundepu32_ps(__mmask16 __U,__m512i __A)
 {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu32_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtudq2ps.512
+  // CHECK: @llvm.x86.uitofp.round.v16f32.v16i32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_maskz_cvt_roundepu32_ps(__U,__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
 }
 
Index: test/CodeGen/avx512dq-builtins.c
===================================================================
--- test/CodeGen/avx512dq-builtins.c
+++ test/CodeGen/avx512dq-builtins.c
@@ -613,55 +613,61 @@
 
 __m512d test_mm512_cvt_roundepi64_pd(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvt_roundepi64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  // CHECK: @llvm.x86.sitofp.round.v8f64.v8i64
   return _mm512_cvt_roundepi64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_mask_cvt_roundepi64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepi64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  // CHECK: @llvm.x86.sitofp.round.v8f64.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_cvt_roundepi64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_maskz_cvt_roundepi64_pd(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2pd.512
+  // CHECK: @llvm.x86.sitofp.round.v8f64.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_cvt_roundepi64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_cvtepi64_ps(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvtepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: sitofp <8 x i64> %{{.*}} to <8 x float>
   return _mm512_cvtepi64_ps(__A); 
 }
 
 __m256 test_mm512_mask_cvtepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvtepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: sitofp <8 x i64> %{{.*}} to <8 x float>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvtepi64_ps(__W, __U, __A); 
 }
 
 __m256 test_mm512_maskz_cvtepi64_ps(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvtepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: sitofp <8 x i64> %{{.*}} to <8 x float>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvtepi64_ps(__U, __A); 
 }
 
 __m256 test_mm512_cvt_roundepi64_ps(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvt_roundepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: @llvm.x86.sitofp.round.v8f32.v8i64
   return _mm512_cvt_roundepi64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_mask_cvt_roundepi64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: @llvm.x86.sitofp.round.v8f32.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvt_roundepi64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_maskz_cvt_roundepi64_ps(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepi64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtqq2ps.512
+  // CHECK: @llvm.x86.sitofp.round.v8f32.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvt_roundepi64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
@@ -831,55 +837,61 @@
 
 __m512d test_mm512_cvt_roundepu64_pd(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvt_roundepu64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  // CHECK: @llvm.x86.uitofp.round.v8f64.v8i64
   return _mm512_cvt_roundepu64_pd(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_mask_cvt_roundepu64_pd(__m512d __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepu64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  // CHECK: @llvm.x86.uitofp.round.v8f64.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_cvt_roundepu64_pd(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m512d test_mm512_maskz_cvt_roundepu64_pd(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu64_pd
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2pd.512
+  // CHECK: @llvm.x86.uitofp.round.v8f64.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_maskz_cvt_roundepu64_pd(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_cvtepu64_ps(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvtepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: uitofp <8 x i64> %{{.*}} to <8 x float>
   return _mm512_cvtepu64_ps(__A); 
 }
 
 __m256 test_mm512_mask_cvtepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvtepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: uitofp <8 x i64> %{{.*}} to <8 x float>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvtepu64_ps(__W, __U, __A); 
 }
 
 __m256 test_mm512_maskz_cvtepu64_ps(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvtepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: uitofp <8 x i64> %{{.*}} to <8 x float>
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvtepu64_ps(__U, __A); 
 }
 
 __m256 test_mm512_cvt_roundepu64_ps(__m512i __A) {
   // CHECK-LABEL: @test_mm512_cvt_roundepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: @llvm.x86.uitofp.round.v8f32.v8i64
   return _mm512_cvt_roundepu64_ps(__A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_mask_cvt_roundepu64_ps(__m256 __W, __mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_mask_cvt_roundepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: @llvm.x86.uitofp.round.v8f32.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_mask_cvt_roundepu64_ps(__W, __U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
 __m256 test_mm512_maskz_cvt_roundepu64_ps(__mmask8 __U, __m512i __A) {
   // CHECK-LABEL: @test_mm512_maskz_cvt_roundepu64_ps
-  // CHECK: @llvm.x86.avx512.mask.cvtuqq2ps.512
+  // CHECK: @llvm.x86.uitofp.round.v8f32.v8i64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
   return _mm512_maskz_cvt_roundepu64_ps(__U, __A, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC); 
 }
 
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -9328,6 +9328,25 @@
   return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
 }
 
+static Value *EmitX86ConvertIntToFp(CodeGenFunction &CGF,
+                                    ArrayRef<Value *> Ops, bool IsSigned) {
+  unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
+  llvm::Type *Ty = Ops[1]->getType();
+
+  Value *Res;
+  if (Rnd != 4) {
+    Intrinsic::ID IID = IsSigned ? Intrinsic::x86_sitofp_round
+                                 : Intrinsic::x86_uitofp_round;
+    Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
+    Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
+  } else {
+    Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
+                   : CGF.Builder.CreateUIToFP(Ops[0], Ty);
+  }
+
+  return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
+}
+
 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
 
   llvm::Type *Ty = Ops[0]->getType();
@@ -9989,6 +10008,15 @@
   case X86::BI__builtin_ia32_cvtq2mask512:
     return EmitX86ConvertToMask(*this, Ops[0]);
 
+  case X86::BI__builtin_ia32_cvtdq2ps512_mask:
+  case X86::BI__builtin_ia32_cvtqq2ps512_mask:
+  case X86::BI__builtin_ia32_cvtqq2pd512_mask:
+    return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true);
+  case X86::BI__builtin_ia32_cvtudq2ps512_mask:
+  case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
+  case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
+    return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false);
+
   case X86::BI__builtin_ia32_vfmaddss3:
   case X86::BI__builtin_ia32_vfmaddsd3:
   case X86::BI__builtin_ia32_vfmaddss3_mask:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to