mike.dvoretsky updated this revision to Diff 140972.
mike.dvoretsky edited the summary of this revision.
mike.dvoretsky added a comment.

On suggestion from @craig.topper moved all lowering to CGBuiltin.cpp with no 
new builtins added. Instead the existing builtins are lowered if their 
immediate values correspond to generic ceil and floor operations. 
https://reviews.llvm.org/D45203 is now required to enable transformations.


https://reviews.llvm.org/D45202

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/avx-builtins.c
  clang/test/CodeGen/avx512f-builtins.c
  clang/test/CodeGen/sse41-builtins.c

Index: clang/test/CodeGen/sse41-builtins.c
===================================================================
--- clang/test/CodeGen/sse41-builtins.c
+++ clang/test/CodeGen/sse41-builtins.c
@@ -44,25 +44,29 @@
 
 __m128d test_mm_ceil_pd(__m128d x) {
   // CHECK-LABEL: test_mm_ceil_pd
-  // CHECK: call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 2)
+  // CHECK: @llvm.ceil.v2f64
+  // CHECK-NOT: select
   return _mm_ceil_pd(x);
 }
 
 __m128 test_mm_ceil_ps(__m128 x) {
   // CHECK-LABEL: test_mm_ceil_ps
-  // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 2)
+  // CHECK: @llvm.ceil.v4f32
+  // CHECK-NOT: select
   return _mm_ceil_ps(x);
 }
 
 __m128d test_mm_ceil_sd(__m128d x, __m128d y) {
   // CHECK-LABEL: test_mm_ceil_sd
-  // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 2)
+  // CHECK: @llvm.ceil.v2f64
+  // CHECK: select
   return _mm_ceil_sd(x, y);
 }
 
 __m128 test_mm_ceil_ss(__m128 x, __m128 y) {
   // CHECK-LABEL: test_mm_ceil_ss
-  // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 2)
+  // CHECK: @llvm.ceil.v4f32
+  // CHECK: select
   return _mm_ceil_ss(x, y);
 }
 
@@ -196,25 +200,29 @@
 
 __m128d test_mm_floor_pd(__m128d x) {
   // CHECK-LABEL: test_mm_floor_pd
-  // CHECK: call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %{{.*}}, i32 1)
+  // CHECK: @llvm.floor.v2f64
+  // CHECK-NOT: select
   return _mm_floor_pd(x);
 }
 
 __m128 test_mm_floor_ps(__m128 x) {
   // CHECK-LABEL: test_mm_floor_ps
-  // CHECK: call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %{{.*}}, i32 1)
+  // CHECK: @llvm.floor.v4f32
+  // CHECK-NOT: select
   return _mm_floor_ps(x);
 }
 
 __m128d test_mm_floor_sd(__m128d x, __m128d y) {
   // CHECK-LABEL: test_mm_floor_sd
-  // CHECK: call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 1)
+  // CHECK: @llvm.floor.v2f64
+  // CHECK: select
   return _mm_floor_sd(x, y);
 }
 
 __m128 test_mm_floor_ss(__m128 x, __m128 y) {
   // CHECK-LABEL: test_mm_floor_ss
-  // CHECK: call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 1)
+  // CHECK: @llvm.floor.v4f32
+  // CHECK: select
   return _mm_floor_ss(x, y);
 }
 
Index: clang/test/CodeGen/avx512f-builtins.c
===================================================================
--- clang/test/CodeGen/avx512f-builtins.c
+++ clang/test/CodeGen/avx512f-builtins.c
@@ -7485,46 +7485,98 @@
   return _mm512_min_round_ps(__A,__B,_MM_FROUND_CUR_DIRECTION);
 }
 
+__m512 test_mm512_floor_ps(__m512 __A)
+{
+  // CHECK-LABEL: @test_mm512_floor_ps
+  // CHECK: @llvm.floor.v16f32
+  // CHECK-NOT: select
+  return _mm512_floor_ps(__A);
+}
+
+__m512d test_mm512_floor_pd(__m512d __A)
+{
+  // CHECK-LABEL: @test_mm512_floor_pd
+  // CHECK: @llvm.floor.v8f64
+  // CHECK-NOT: select
+  return _mm512_floor_pd(__A);
+}
+
 __m512 test_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
-  // CHECK-LABEL: @test_mm512_mask_floor_ps 
-  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512
+  // CHECK-LABEL: @test_mm512_mask_floor_ps
+  // CHECK: @llvm.floor.v16f32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_floor_ps (__W,__U,__A);
 }
 
 __m512d test_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
-  // CHECK-LABEL: @test_mm512_mask_floor_pd 
-  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512
+  // CHECK-LABEL: @test_mm512_mask_floor_pd
+  // CHECK: @llvm.floor.v8f64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_floor_pd (__W,__U,__A);
 }
 
+__m512 test_mm512_ceil_ps(__m512 __A)
+{
+  // CHECK-LABEL: @test_mm512_ceil_ps
+  // CHECK: @llvm.ceil.v16f32
+  // CHECK-NOT: select
+  return _mm512_ceil_ps(__A);
+}
+
+__m512d test_mm512_ceil_pd(__m512d __A)
+{
+  // CHECK-LABEL: @test_mm512_ceil_pd
+  // CHECK: @llvm.ceil.v8f64
+  // CHECK-NOT: select
+  return _mm512_ceil_pd(__A);
+}
+
 __m512 test_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
-  // CHECK-LABEL: @test_mm512_mask_ceil_ps 
-  // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512
+  // CHECK-LABEL: @test_mm512_mask_ceil_ps
+  // CHECK: @llvm.ceil.v16f32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_ceil_ps (__W,__U,__A);
 }
 
 __m512d test_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
-  // CHECK-LABEL: @test_mm512_mask_ceil_pd 
-  // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512
+  // CHECK-LABEL: @test_mm512_mask_ceil_pd
+  // CHECK: @llvm.ceil.v8f64
+  // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
   return _mm512_mask_ceil_pd (__W,__U,__A);
 }
 
 __m512 test_mm512_mask_roundscale_ps(__m512 __W, __mmask16 __U, __m512 __A) 
 {
   // CHECK-LABEL: @test_mm512_mask_roundscale_ps
   // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512
+  return _mm512_mask_roundscale_ps(__W,__U,__A, 3);
+}
+
+__m512 test_mm512_mask_roundscale_floor_ps(__m512 __W, __mmask16 __U, __m512 __A) 
+{
+  // CHECK-LABEL: @test_mm512_mask_roundscale_floor_ps
+  // CHECK: @llvm.floor.v16f32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
   return _mm512_mask_roundscale_ps(__W,__U,__A, 1);
 }
 
+__m512 test_mm512_mask_roundscale_ceil_ps(__m512 __W, __mmask16 __U, __m512 __A) 
+{
+  // CHECK-LABEL: @test_mm512_mask_roundscale_ceil_ps
+  // CHECK: @llvm.ceil.v16f32
+  // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
+  return _mm512_mask_roundscale_ps(__W,__U,__A, 2);
+}
+
 __m512 test_mm512_maskz_roundscale_ps(__mmask16 __U, __m512 __A) 
 {
   // CHECK-LABEL: @test_mm512_maskz_roundscale_ps
   // CHECK: @llvm.x86.avx512.mask.rndscale.ps.512
-  return _mm512_maskz_roundscale_ps(__U,__A, 1);
+  return _mm512_maskz_roundscale_ps(__U,__A, 3);
 }
 
 __m512 test_mm512_mask_roundscale_round_ps(__m512 __A,__mmask16 __U,__m512 __C)
@@ -7552,14 +7604,14 @@
 {
   // CHECK-LABEL: @test_mm512_mask_roundscale_pd
   // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512
-  return _mm512_mask_roundscale_pd(__W,__U,__A, 1);
+  return _mm512_mask_roundscale_pd(__W,__U,__A, 3);
 }
 
 __m512d test_mm512_maskz_roundscale_pd(__mmask8 __U, __m512d __A) 
 {
   // CHECK-LABEL: @test_mm512_maskz_roundscale_pd
   // CHECK: @llvm.x86.avx512.mask.rndscale.pd.512
-  return _mm512_maskz_roundscale_pd(__U,__A, 1);
+  return _mm512_maskz_roundscale_pd(__U,__A, 3);
 }
 
 __m512d test_mm512_mask_roundscale_round_pd(__m512d __A,__mmask8 __U,__m512d __C)
Index: clang/test/CodeGen/avx-builtins.c
===================================================================
--- clang/test/CodeGen/avx-builtins.c
+++ clang/test/CodeGen/avx-builtins.c
@@ -202,13 +202,15 @@
 
 __m256d test_mm256_ceil_pd(__m256d x) {
   // CHECK-LABEL: test_mm256_ceil_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 2)
+  // CHECK: @llvm.ceil.v4f64
+  // CHECK-NOT: select
   return _mm256_ceil_pd(x);
 }
 
 __m256 test_mm_ceil_ps(__m256 x) {
   // CHECK-LABEL: test_mm_ceil_ps
-  // CHECK: call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 2)
+  // CHECK: @llvm.ceil.v8f32
+  // CHECK-NOT: select
   return _mm256_ceil_ps(x);
 }
 
@@ -364,13 +366,15 @@
 
 __m256d test_mm256_floor_pd(__m256d x) {
   // CHECK-LABEL: test_mm256_floor_pd
-  // CHECK: call <4 x double> @llvm.x86.avx.round.pd.256(<4 x double> %{{.*}}, i32 1)
+  // CHECK: @llvm.floor.v4f64
+  // CHECK-NOT: select
   return _mm256_floor_pd(x);
 }
 
 __m256 test_mm_floor_ps(__m256 x) {
   // CHECK-LABEL: test_mm_floor_ps
-  // CHECK: call <8 x float> @llvm.x86.avx.round.ps.256(<8 x float> %{{.*}}, i32 1)
+  // CHECK: @llvm.floor.v8f32
+  // CHECK-NOT: select
   return _mm256_floor_ps(x);
 }
 
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -8238,6 +8238,95 @@
   return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
 }
 
+static Value *EmitX86Round(CodeGenFunction &CGF, ArrayRef<Value *> Ops,
+                           unsigned BuiltinID) {
+  int RoundControl;
+  if (BuiltinID == clang::X86::BI__builtin_ia32_roundss ||
+      BuiltinID == clang::X86::BI__builtin_ia32_roundsd)
+    RoundControl = cast<ConstantInt>(Ops[2])->getSExtValue();
+  else
+    RoundControl = cast<ConstantInt>(Ops[1])->getSExtValue();
+
+  int SAE;
+  if (BuiltinID == clang::X86::BI__builtin_ia32_rndscaleps_mask ||
+      BuiltinID == clang::X86::BI__builtin_ia32_rndscalepd_mask)
+    SAE = cast<ConstantInt>(Ops[4])->getSExtValue();
+  else
+    SAE = 4;
+
+  if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/)) {
+    Intrinsic::ID ID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case clang::X86::BI__builtin_ia32_roundps:
+      ID = Intrinsic::x86_sse41_round_ps;
+      break;
+    case clang::X86::BI__builtin_ia32_roundss:
+      ID = Intrinsic::x86_sse41_round_ss;
+      break;
+    case clang::X86::BI__builtin_ia32_roundsd:
+      ID = Intrinsic::x86_sse41_round_sd;
+      break;
+    case clang::X86::BI__builtin_ia32_roundpd:
+      ID = Intrinsic::x86_sse41_round_pd;
+      break;
+    case clang::X86::BI__builtin_ia32_roundpd256:
+      ID = Intrinsic::x86_avx_round_pd_256;
+      break;
+    case clang::X86::BI__builtin_ia32_roundps256:
+      ID = Intrinsic::x86_avx_round_ps_256;
+      break;
+    case clang::X86::BI__builtin_ia32_rndscaleps_mask:
+      ID = Intrinsic::x86_avx512_mask_rndscale_ps_512;
+      break;
+    case clang::X86::BI__builtin_ia32_rndscalepd_mask:
+      ID = Intrinsic::x86_avx512_mask_rndscale_pd_512;
+      break;
+    case clang::X86::BI__builtin_ia32_rndscalepd_128_mask:
+      ID = Intrinsic::x86_avx512_mask_rndscale_pd_128;
+      break;
+    case clang::X86::BI__builtin_ia32_rndscalepd_256_mask:
+      ID = Intrinsic::x86_avx512_mask_rndscale_pd_256;
+      break;
+    case clang::X86::BI__builtin_ia32_rndscaleps_128_mask:
+      ID = Intrinsic::x86_avx512_mask_rndscale_ps_128;
+      break;
+    case clang::X86::BI__builtin_ia32_rndscaleps_256_mask:
+      ID = Intrinsic::x86_avx512_mask_rndscale_ps_256;
+      break;
+    }
+    llvm::Function *F = CGF.CGM.getIntrinsic(ID);
+    return CGF.Builder.CreateCall(F, Ops);
+  }
+
+  Value *Src, *Dst, *Mask;
+  if (BuiltinID == clang::X86::BI__builtin_ia32_roundss ||
+      BuiltinID == clang::X86::BI__builtin_ia32_roundsd) {
+    Src = Ops[1];
+    Dst = Ops[0];
+    Mask = llvm::ConstantInt::get(CGF.Builder.getInt32Ty(), 1);
+  } else {
+    Src = Ops[0];
+    if (BuiltinID == clang::X86::BI__builtin_ia32_rndscaleps_mask ||
+        BuiltinID == clang::X86::BI__builtin_ia32_rndscalepd_mask ||
+        BuiltinID == clang::X86::BI__builtin_ia32_rndscalepd_128_mask ||
+        BuiltinID == clang::X86::BI__builtin_ia32_rndscalepd_256_mask ||
+        BuiltinID == clang::X86::BI__builtin_ia32_rndscaleps_128_mask ||
+        BuiltinID == clang::X86::BI__builtin_ia32_rndscaleps_256_mask) {
+      Dst = Ops[2];
+      Mask = Ops[3];
+    } else {
+      Dst = Src;
+      Mask = llvm::ConstantInt::getAllOnesValue(CGF.Builder.getInt32Ty());
+    }
+  }
+
+  Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
+  Value *F = CGF.CGM.getIntrinsic(ID, Src->getType());
+  Value *Res = CGF.Builder.CreateCall(F, {Src});
+  return EmitX86Select(CGF, Mask, Res, Dst);
+}
+
 static Value *EmitX86Abs(CodeGenFunction &CGF, ArrayRef<Value *> Ops) {
 
   llvm::Type *Ty = Ops[0]->getType();
@@ -8887,6 +8976,20 @@
     return Builder.CreateBitCast(Res, Ops[0]->getType());
   }
 
+  case X86::BI__builtin_ia32_roundps:
+  case X86::BI__builtin_ia32_roundss:
+  case X86::BI__builtin_ia32_roundsd:
+  case X86::BI__builtin_ia32_roundpd:
+  case X86::BI__builtin_ia32_roundpd256:
+  case X86::BI__builtin_ia32_roundps256:
+  case X86::BI__builtin_ia32_rndscaleps_mask:
+  case X86::BI__builtin_ia32_rndscalepd_mask:
+  case X86::BI__builtin_ia32_rndscalepd_128_mask:
+  case X86::BI__builtin_ia32_rndscalepd_256_mask:
+  case X86::BI__builtin_ia32_rndscaleps_128_mask:
+  case X86::BI__builtin_ia32_rndscaleps_256_mask:
+    return EmitX86Round(*this, Ops, BuiltinID);
+
   case X86::BI__builtin_ia32_vplzcntd_128_mask:
   case X86::BI__builtin_ia32_vplzcntd_256_mask:
   case X86::BI__builtin_ia32_vplzcntd_512_mask:
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to