[PATCH] D83500: [PowerPC][Power10] Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.

Amy Kwan via Phabricator via cfe-commits Thu, 09 Jul 2020 11:55:23 -0700

amyk created this revision.
amyk added reviewers: PowerPC, power-llvm-team, nemanjai, lei.
amyk added projects: LLVM, PowerPC, clang.
Herald added a subscriber: shchenz.


This patch implements custom codegen for the `vec_replace_elt` and 
`vec_replace_unaligned` builtins.

These builtins map to the `@llvm.ppc.altivec.vinsw` and 
`@llvm.ppc.altivec.vinsd` intrinsics depending on the arguments.
The main motivation for doing custom codegen for these intrinsics is because 
there are float and double versions of the
builtin. Normally, the converting the float to an integer would be done via 
`fptoui` in the IR, however it is more preferable to
use `bitcast`.

The original patch that implemented the front end done this adding unions to 
altivec.h (https://reviews.llvm.org/D82359) but
this patch uses custom codegen to use `bitcast` instead for the float 
conversion instead.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83500

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/altivec.h
  clang/test/CodeGen/builtins-ppc-p10vector.c

Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -24,10 +24,14 @@
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
+signed int sia;
 unsigned int uia, uib;
 unsigned char uca;
 unsigned short usa;
+signed long long slla;
 unsigned long long ulla;
+float fa;
+double da;
 
 vector unsigned long long test_vpdepd(void) {
   // CHECK: @llvm.ppc.altivec.vpdepd(<2 x i64>
@@ -581,3 +585,123 @@
   // CHECK: ret <4 x float>
   return vec_splati_ins(vfa, 0, 1.0f);
 }
+
+vector signed int test_vec_replace_elt_si(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <4 x i32>
+  // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_replace_elt(vsia, sia, 0);
+}
+
+vector unsigned int test_vec_replace_elt_ui(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-BE-NEXT: ret <4 x i32>
+  // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_replace_elt(vuia, uia, 1);
+}
+
+vector float test_vec_replace_elt_f(void) {
+  // CHECK-BE: bitcast float %{{.+}} to i32
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
+  // CHECK-BE-NEXT: ret <4 x float>
+  // CHECK: bitcast float %{{.+}} to i32
+  // CHECK-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
+  // CHECK-NEXT: ret <4 x float>
+  return vec_replace_elt(vfa, fa, 2);
+}
+
+vector signed long long test_vec_replace_elt_sll(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_replace_elt(vslla, slla, 0);
+}
+
+vector unsigned long long test_vec_replace_elt_ull(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <2 x i64>
+  // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_replace_elt(vulla, ulla, 0);
+}
+
+vector double test_vec_replace_elt_d(void) {
+  // CHECK-BE: bitcast double %{{.+}} to i64
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
+  // CHECK-BE-NEXT: ret <2 x double>
+  // CHECK: bitcast double %{{.+}} to i64
+  // CHECK-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
+  // CHECK-NEXT: ret <2 x double>
+  return vec_replace_elt(vda, da, 1);
+}
+
+vector unsigned char test_vec_replace_unaligned_si(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
+  // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vsia, sia, 6);
+}
+
+vector unsigned char test_vec_replace_unaligned_ui(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vuia, uia, 8);
+}
+
+vector unsigned char test_vec_replace_unaligned_f(void) {
+  // CHECK-BE: bitcast float %{{.+}} to i32
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: bitcast float %{{.+}} to i32
+  // CHECK-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
+  // CHECK-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vfa, fa, 12);
+}
+
+vector unsigned char test_vec_replace_unaligned_sll(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 6
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 2
+  // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vslla, slla, 6);
+}
+
+vector unsigned char test_vec_replace_unaligned_ull(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 7
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 1
+  // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vulla, ulla, 7);
+}
+
+vector unsigned char test_vec_replace_unaligned_d(void) {
+  // CHECK-BE: bitcast double %{{.+}} to i64
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK: bitcast double %{{.+}} to i64
+  // CHECK-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vda, da, 8);
+}
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -17095,6 +17095,14 @@
   return __builtin_vsx_xxblendvd(__a, __b, __c);
 }
 
+ /* vec_replace_elt */
+
+#define vec_replace_elt __builtin_altivec_vec_replace_elt
+
+/* vec_replace_unaligned */
+
+#define vec_replace_unaligned __builtin_altivec_vec_replace_unaligned
+
 /* vec_splati */
 
 #define vec_splati(__a)                                                        \
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14268,6 +14268,101 @@
     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
     return Builder.CreateCall(F, {X, Undef});
   }
+  case PPC::BI__builtin_altivec_vec_replace_elt: {
+    // The third argument to vec_replace_elt will be emitted to either
+    // the vinsw or vinsd instruction. It must be a compile time constant.
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI &&
+           "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Function *F;
+    int64_t ConstArg = ArgCI->getSExtValue();
+    Value *Call;
+    if (Ops[1]->getType()->getPrimitiveSizeInBits() == 32) {
+      // When the second argument is 32 bits, it can either be an integer or
+      // a float. The vinsw intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
+      // Fix the constant according to endianess.
+      if (getTarget().isLittleEndian())
+        ConstArg = 12 - (ConstArg*4);
+      else
+        ConstArg = (ConstArg*4);
+      Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+      // Perform additional handling if the second argument is a float.
+      if (Ops[1]->getType()->isFloatTy()) {
+        Ops[0] =
+          Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int32Ty);
+        Call = Builder.CreateCall(F, Ops);
+        Call = Builder.CreateBitCast(Call, ResultType);
+      } else
+        Call = Builder.CreateCall(F, Ops);
+    } else if (Ops[1]->getType()->getPrimitiveSizeInBits() == 64) {
+      // When the second argument is 64 bits, it can either be a long long or
+      // a double. The vinsd intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
+      // Fix the constant according to endianness.
+      if (getTarget().isLittleEndian())
+        ConstArg = 8 - (ConstArg*8);
+      else
+        ConstArg = (ConstArg*8);
+      Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+      // Perform additional handling if the second argument is a double.
+      if (Ops[1]->getType()->isDoubleTy()) {
+        Ops[0] =
+          Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
+        Call = Builder.CreateCall(F, Ops);
+        Call =
+          Builder.CreateBitCast(Call, llvm::FixedVectorType::get(DoubleTy, 2));
+      } else
+        Call = Builder.CreateCall(F, Ops);
+    }
+    return Call;
+  }
+  case PPC::BI__builtin_altivec_vec_replace_unaligned: {
+    // The third argument to vec_replace_unaligned will be emitted to either
+    // the vinsw or vinsd instruction. It must be a compile time constant.
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI &&
+           "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
+    llvm::Function *F;
+    int64_t ConstArg = ArgCI->getSExtValue();
+    Value *Call;
+    if (Ops[1]->getType()->getPrimitiveSizeInBits() == 32) {
+      // When the second argument is 32 bits, it can either be an integer or
+      // a float. The vinsw intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
+      // Fix the constant if we are on little endian.
+      if (getTarget().isLittleEndian())
+        ConstArg = 12 - ConstArg;
+      Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+      // Perform additional handling if the second argument is a float.
+      if (Ops[1]->getType()->isFloatTy()) {
+        Ops[0] =
+          Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int32Ty, 4));
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int32Ty);
+      }
+    } else if (Ops[1]->getType()->getPrimitiveSizeInBits() == 64) {
+      // When the second argument is 64 bits, it can either be a long long or
+      // a double. The vinsd intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
+      // Fix the constant if we are on little endian.
+      if (getTarget().isLittleEndian())
+        ConstArg = 8 - ConstArg;
+      Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+      // Perform additional handling if the second argument is a double.
+      if (Ops[1]->getType()->isDoubleTy()) {
+        Ops[0] =
+          Builder.CreateBitCast(Ops[0], llvm::FixedVectorType::get(Int64Ty, 2));
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
+      }
+    }
+    // Emit the call to vinsd, and bitcast the result to a vector of char.
+    Call = Builder.CreateCall(F, Ops);
+    Call = Builder.CreateBitCast(Call, llvm::FixedVectorType::get(Int8Ty, 16));
+    return Call;
+  }
   case PPC::BI__builtin_altivec_vpopcntb:
   case PPC::BI__builtin_altivec_vpopcnth:
   case PPC::BI__builtin_altivec_vpopcntw:
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -335,6 +335,8 @@
 BUILTIN(__builtin_altivec_vinshvrx, "V8UsV8UsULLiV8Us", "")
 BUILTIN(__builtin_altivec_vinswvlx, "V4UiV4UiULLiV4Ui", "")
 BUILTIN(__builtin_altivec_vinswvrx, "V4UiV4UiULLiV4Ui", "")
+BUILTIN(__builtin_altivec_vec_replace_elt, "V4UiV4UiULLiIi", "t")
+BUILTIN(__builtin_altivec_vec_replace_unaligned, "V4UiV4UiULLiIi", "t")
 
 // VSX built-ins.

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D83500: [PowerPC][Power10] Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.

Reply via email to