[PATCH] D83500: [PowerPC][Power10] Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.

Amy Kwan via Phabricator via cfe-commits Thu, 09 Jul 2020 14:43:12 -0700

amyk updated this revision to Diff 276844.
amyk added a comment.

Address review comments


- update comments
- pull out common code


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83500/new/

https://reviews.llvm.org/D83500

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c

Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -24,10 +24,14 @@
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
+signed int sia;
 unsigned int uia, uib;
 unsigned char uca;
 unsigned short usa;
+signed long long slla;
 unsigned long long ulla;
+float fa;
+double da;
 
 vector unsigned long long test_vpdepd(void) {
   // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
@@ -581,3 +585,123 @@
   // CHECK: ret <4 x float>
   return vec_splati_ins(vfa, 0, 1.0f);
 }
+
+vector signed int test_vec_replace_elt_si(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <4 x i32>
+  // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_replace_elt(vsia, sia, 0);
+}
+
+vector unsigned int test_vec_replace_elt_ui(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-BE-NEXT: ret <4 x i32>
+  // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_replace_elt(vuia, uia, 1);
+}
+
+vector float test_vec_replace_elt_f(void) {
+  // CHECK-BE: bitcast float %{{.+}} to i32
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
+  // CHECK-BE-NEXT: ret <4 x float>
+  // CHECK: bitcast float %{{.+}} to i32
+  // CHECK-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
+  // CHECK-NEXT: ret <4 x float>
+  return vec_replace_elt(vfa, fa, 2);
+}
+
+vector signed long long test_vec_replace_elt_sll(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_replace_elt(vslla, slla, 0);
+}
+
+vector unsigned long long test_vec_replace_elt_ull(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_replace_elt(vulla, ulla, 0);
+}
+
+vector double test_vec_replace_elt_d(void) {
+  // CHECK-BE: bitcast double %{{.+}} to i64
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
+  // CHECK-BE-NEXT: ret <2 x double>
+  // CHECK: bitcast double %{{.+}} to i64
+  // CHECK-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
+  // CHECK-NEXT: ret <2 x double>
+  return vec_replace_elt(vda, da, 1);
+}
+
+vector unsigned char test_vec_replace_unaligned_si(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
+  // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vsia, sia, 6);
+}
+
+vector unsigned char test_vec_replace_unaligned_ui(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vuia, uia, 8);
+}
+
+vector unsigned char test_vec_replace_unaligned_f(void) {
+  // CHECK-BE: bitcast float %{{.+}} to i32
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: bitcast float %{{.+}} to i32
+  // CHECK-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
+  // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vfa, fa, 12);
+}
+
+vector unsigned char test_vec_replace_unaligned_sll(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 6
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 2
+  // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vslla, slla, 6);
+}
+
+vector unsigned char test_vec_replace_unaligned_ull(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 7
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 1
+  // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vulla, ulla, 7);
+}
+
+vector unsigned char test_vec_replace_unaligned_d(void) {
+  // CHECK-BE: bitcast double %{{.+}} to i64
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: bitcast double %{{.+}} to i64
+  // CHECK-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vda, da, 8);
+}
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -17095,6 +17095,14 @@
   return __builtin_vsx_xxblendvd(__a, __b, __c);
 }
 
+ /* vec_replace_elt */
+
+#define vec_replace_elt __builtin_altivec_vec_replace_elt
+
+/* vec_replace_unaligned */
+
+#define vec_replace_unaligned __builtin_altivec_vec_replace_unaligned
+
 /* vec_splati */
 
 #define vec_splati(__a)                                                        \
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14268,6 +14268,98 @@
     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
     return Builder.CreateCall(F, {X, Undef});
   }
+  case PPC::BI__builtin_altivec_vec_replace_elt: {
+    // The third argument of vec_replace_elt must be a compile time constant
+    // and will be emitted either to the vinsw or vinsd instruction.
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI &&
+           "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Function *F;
+    int64_t ConstArg = ArgCI->getSExtValue();
+    Value *Call;
+    if (Ops[1]->getType()->getPrimitiveSizeInBits() == 32) {
+      // When the second argument is 32 bits, it can either be an integer or
+      // a float. The vinsw intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
+      ConstArg *= 4;
+      // Fix the constant according to endianess.
+      if (getTarget().isLittleEndian())
+        ConstArg = 12 - ConstArg;
+      Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+      // Perform additional handling if the second argument is a float.
+      if (Ops[1]->getType()->isFloatTy()) {
+        Ops[0] = Builder.CreateBitCast(Ops[0],
+                                       llvm::FixedVectorType::get(Int32Ty, 4));
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int32Ty);
+        Call = Builder.CreateCall(F, Ops);
+        return Builder.CreateBitCast(Call, ResultType);
+      }
+    } else if (Ops[1]->getType()->getPrimitiveSizeInBits() == 64) {
+      // When the second argument is 64 bits, it can either be a long long or
+      // a double. The vinsd intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
+      ConstArg *= 8;
+      // Fix the constant according to endianness.
+      if (getTarget().isLittleEndian())
+        ConstArg = 8 - ConstArg;
+      Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+      // Perform additional handling if the second argument is a double.
+      if (Ops[1]->getType()->isDoubleTy()) {
+        Ops[0] = Builder.CreateBitCast(Ops[0],
+                                       llvm::FixedVectorType::get(Int64Ty, 2));
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
+        Call = Builder.CreateCall(F, Ops);
+        return Builder.CreateBitCast(Call,
+                                     llvm::FixedVectorType::get(DoubleTy, 2));
+      }
+    }
+    Call = Builder.CreateCall(F, Ops);
+    return Call;
+  }
+  case PPC::BI__builtin_altivec_vec_replace_unaligned: {
+    // The third argument of vec_replace_unaligned must be a compile time
+    // constant and will be emitted either to the vinsw or vinsd instruction.
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI &&
+           "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
+    llvm::Function *F;
+    int64_t ConstArg = ArgCI->getSExtValue();
+    Value *Call;
+    if (Ops[1]->getType()->getPrimitiveSizeInBits() == 32) {
+      // When the second argument is 32 bits, it can either be an integer or
+      // a float. The vinsw intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
+      // Fix the constant if we are on little endian.
+      if (getTarget().isLittleEndian())
+        ConstArg = 12 - ConstArg;
+      Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+      // Perform additional handling if the second argument is a float.
+      if (Ops[1]->getType()->isFloatTy()) {
+        Ops[0] = Builder.CreateBitCast(Ops[0],
+                                       llvm::FixedVectorType::get(Int32Ty, 4));
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int32Ty);
+      }
+    } else if (Ops[1]->getType()->getPrimitiveSizeInBits() == 64) {
+      // When the second argument is 64 bits, it can either be a long long or
+      // a double. The vinsd intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
+      // Fix the constant if we are on little endian.
+      if (getTarget().isLittleEndian())
+        ConstArg = 8 - ConstArg;
+      Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+      // Perform additional handling if the second argument is a double.
+      if (Ops[1]->getType()->isDoubleTy()) {
+        Ops[0] = Builder.CreateBitCast(Ops[0],
+                                       llvm::FixedVectorType::get(Int64Ty, 2));
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
+      }
+    }
+    // Emit the call to vinsd, and bitcast the result to a vector of char.
+    Call = Builder.CreateCall(F, Ops);
+    Call = Builder.CreateBitCast(Call, llvm::FixedVectorType::get(Int8Ty, 16));
+    return Call;
+  }
   case PPC::BI__builtin_altivec_vpopcntb:
   case PPC::BI__builtin_altivec_vpopcnth:
   case PPC::BI__builtin_altivec_vpopcntw:
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -335,6 +335,8 @@
 BUILTIN(__builtin_altivec_vinshvrx, "V8UsV8UsULLiV8Us", "")
 BUILTIN(__builtin_altivec_vinswvlx, "V4UiV4UiULLiV4Ui", "")
 BUILTIN(__builtin_altivec_vinswvrx, "V4UiV4UiULLiV4Ui", "")
+BUILTIN(__builtin_altivec_vec_replace_elt, "V4UiV4UiUiIi", "t")
+BUILTIN(__builtin_altivec_vec_replace_unaligned, "V4UiV4UiUiIi", "t")
 
 // VSX built-ins.

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D83500: [PowerPC][Power10] Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.

Reply via email to