[PATCH] D83500: [PowerPC][Power10] Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.

Amy Kwan via Phabricator via cfe-commits Fri, 21 Aug 2020 18:42:56 -0700

amyk updated this revision to Diff 287142.
amyk added a comment.

Address review comments:


- Further consolidate the custom codegen of the two builtins
- Add SemaChecking for if the third argument is a constant, if the third 
argument is in range and if the second argument is the same type as the element 
type of the first argument
- Add extra test to test the semantic checks that were added


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D83500/new/

https://reviews.llvm.org/D83500

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Headers/altivec.h
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-p10vector.c
  clang/test/CodeGen/builtins-ppc-vec-ins-error.c

Index: clang/test/CodeGen/builtins-ppc-vec-ins-error.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/builtins-ppc-vec-ins-error.c
@@ -0,0 +1,81 @@
+// REQUIRES: powerpc-registered-target
+
+// RUN: %clang_cc1 -target-feature +vsx -target-cpu pwr10 \
+// RUN:   -triple powerpc64le-unknown-unknown -fsyntax-only %s -verify
+// RUN: %clang_cc1 -target-feature +vsx -target-cpu pwr10 \
+// RUN:   -triple powerpc64-unknown-unknown -fsyntax-only %s -verify
+
+#include <altivec.h>
+
+vector signed int vsia;
+vector unsigned int vuia;
+vector signed long long vslla;
+vector unsigned long long vulla;
+vector float vfa;
+vector double vda;
+signed int sia;
+unsigned int uia;
+signed long long slla;
+unsigned long long ulla;
+float fa;
+double da;
+
+vector signed int test_vec_replace_elt_si(void) {
+  return vec_replace_elt(vsia, sia, 13); // expected-error {{argument value 13 is outside the valid range [0, 12]}}
+}
+
+vector unsigned int test_vec_replace_elt_ui(void) {
+  return vec_replace_elt(vuia, sia, 1); // expected-error {{arguments are of different types ('unsigned int' vs 'int')}}
+}
+
+vector float test_vec_replace_elt_f(void) {
+  return vec_replace_elt(vfa, fa, 20); // expected-error {{argument value 20 is outside the valid range [0, 12]}}
+}
+
+vector float test_vec_replace_elt_f_2(void) {
+  return vec_replace_elt(vfa, da, 0); // expected-error {{arguments are of different types ('float' vs 'double')}}
+}
+
+vector signed long long test_vec_replace_elt_sll(void) {
+  return vec_replace_elt(vslla, slla, 9); // expected-error {{argument value 9 is outside the valid range [0, 8]}}
+}
+
+vector unsigned long long test_vec_replace_elt_ull(void) {
+  return vec_replace_elt(vulla, vda, 0); // expected-error {{arguments are of different types ('unsigned long long' vs '__vector double' (vector of 2 'double' values))}}
+}
+
+vector unsigned long long test_vec_replace_elt_ull_2(void) {
+  return vec_replace_elt(vulla, vulla, vsia); // expected-error {{argument to '__builtin_altivec_vec_replace_elt' must be a constant integer}}
+}
+
+vector double test_vec_replace_elt_d(void) {
+  return vec_replace_elt(vda, da, 33); // expected-error {{argument value 33 is outside the valid range [0, 8]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_si(void) {
+  return vec_replace_unaligned(vsia, da, 6); // expected-error {{arguments are of different types ('int' vs 'double')}}
+}
+
+vector unsigned char test_vec_replace_unaligned_ui(void) {
+  return vec_replace_unaligned(vuia, uia, 14); // expected-error {{argument value 14 is outside the valid range [0, 12]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_f(void) {
+  return vec_replace_unaligned(vfa, fa, 19); // expected-error {{argument value 19 is outside the valid range [0, 12]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_sll(void) {
+  return vec_replace_unaligned(vslla, fa, 0); // expected-error {{arguments are of different types ('long long' vs 'float')}}
+}
+
+vector unsigned char test_vec_replace_unaligned_ull(void) {
+  return vec_replace_unaligned(vulla, ulla, 12); // expected-error {{argument value 12 is outside the valid range [0, 8]}}
+}
+
+vector unsigned char test_vec_replace_unaligned_d(void) {
+  return vec_replace_unaligned(vda, fa, 8); // expected-error {{arguments are of different types ('double' vs 'float')}}
+}
+
+vector unsigned char test_vec_replace_unaligned_d_2(void) {
+  return vec_replace_unaligned(vda, vda, da); // expected-error {{argument to '__builtin_altivec_vec_replace_unaligned' must be a constant integer}}
+}
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -20,10 +20,14 @@
 vector unsigned __int128 vui128a, vui128b, vui128c;
 vector float vfa, vfb;
 vector double vda, vdb;
+signed int sia;
 unsigned int uia, uib;
 unsigned char uca;
 unsigned short usa;
+signed long long slla;
 unsigned long long ulla;
+float fa;
+double da;
 
 vector signed long long test_vec_mul_sll(void) {
   // CHECK: mul <2 x i64>
@@ -863,3 +867,123 @@
   // CHECK-NEXT: ret i32
   return vec_test_lsbb_all_zeros(vuca);
 }
+
+vector signed int test_vec_replace_elt_si(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <4 x i32>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
+  // CHECK-LE-NEXT: ret <4 x i32>
+  return vec_replace_elt(vsia, sia, 0);
+}
+
+vector unsigned int test_vec_replace_elt_ui(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-BE-NEXT: ret <4 x i32>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-LE-NEXT: ret <4 x i32>
+  return vec_replace_elt(vuia, uia, 1);
+}
+
+vector float test_vec_replace_elt_f(void) {
+  // CHECK-BE: bitcast float %{{.+}} to i32
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
+  // CHECK-BE-NEXT: ret <4 x float>
+  // CHECK-LE: bitcast float %{{.+}} to i32
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <4 x float>
+  // CHECK-LE-NEXT: ret <4 x float>
+  return vec_replace_elt(vfa, fa, 2);
+}
+
+vector signed long long test_vec_replace_elt_sll(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-LE-NEXT: ret <2 x i64>
+  return vec_replace_elt(vslla, slla, 0);
+}
+
+vector unsigned long long test_vec_replace_elt_ull(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-BE-NEXT: ret <2 x i64>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-LE-NEXT: ret <2 x i64>
+  return vec_replace_elt(vulla, ulla, 0);
+}
+
+vector double test_vec_replace_elt_d(void) {
+  // CHECK-BE: bitcast double %{{.+}} to i64
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
+  // CHECK-BE-NEXT: ret <2 x double>
+  // CHECK-LE: bitcast double %{{.+}} to i64
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <2 x double>
+  // CHECK-LE-NEXT: ret <2 x double>
+  return vec_replace_elt(vda, da, 1);
+}
+
+vector unsigned char test_vec_replace_unaligned_si(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 6
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vsia, sia, 6);
+}
+
+vector unsigned char test_vec_replace_unaligned_ui(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 4
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vuia, uia, 8);
+}
+
+vector unsigned char test_vec_replace_unaligned_f(void) {
+  // CHECK-BE: bitcast float %{{.+}} to i32
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 12
+  // CHECK-BE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: bitcast float %{{.+}} to i32
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsw(<4 x i32> %{{.+}}, i32 %{{.+}}, i32 0
+  // CHECK-LE-NEXT: bitcast <4 x i32> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vfa, fa, 12);
+}
+
+vector unsigned char test_vec_replace_unaligned_sll(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 6
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 2
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vslla, slla, 6);
+}
+
+vector unsigned char test_vec_replace_unaligned_ull(void) {
+  // CHECK-BE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 7
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 1
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vulla, ulla, 7);
+}
+
+vector unsigned char test_vec_replace_unaligned_d(void) {
+  // CHECK-BE: bitcast double %{{.+}} to i64
+  // CHECK-BE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 8
+  // CHECK-BE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-BE-NEXT: ret <16 x i8>
+  // CHECK-LE: bitcast double %{{.+}} to i64
+  // CHECK-LE-NEXT: @llvm.ppc.altivec.vinsd(<2 x i64> %{{.+}}, i64 %{{.+}}, i32 0
+  // CHECK-LE-NEXT: bitcast <2 x i64> %{{.*}} to <16 x i8>
+  // CHECK-LE-NEXT: ret <16 x i8>
+  return vec_replace_unaligned(vda, da, 8);
+}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3190,6 +3190,23 @@
     return false;
   };
 
+  // Check that the builtins for vins[w|d] (__builtin_altivec_vec_replace_elt,
+  // __builtin_altivec_vec_replace_unaligned) have the second scalar argument as
+  // the same type as the element type of the first vector argument.
+  auto SemaVecInsertCheck = [&](CallExpr *TheCall) -> bool {
+    Expr *VectorArg = TheCall->getArg(0);
+    Expr *ValueArg = TheCall->getArg(1);
+    QualType VectorArgTy = VectorArg->getType();
+    auto VectorEltTy = VectorArgTy->castAs<VectorType>()->getElementType();
+    QualType ValueArgTy = ValueArg->getType();
+    if (!Context.hasSameType(VectorEltTy, ValueArgTy))
+      return Diag(TheCall->getBeginLoc(),
+                  diag::err_typecheck_call_different_arg_types)
+        << TheCall->getSourceRange() << VectorEltTy << ValueArgTy
+        << VectorArg->getSourceRange() << ValueArg->getSourceRange();
+    return false;
+  };
+
   switch (BuiltinID) {
   default: return false;
   case PPC::BI__builtin_altivec_crypto_vshasigmaw:
@@ -3222,6 +3239,16 @@
     return SemaVSXCheck(TheCall);
   case PPC::BI__builtin_altivec_vgnb:
      return SemaBuiltinConstantArgRange(TheCall, 1, 2, 7);
+  case PPC::BI__builtin_altivec_vec_replace_elt:
+  case PPC::BI__builtin_altivec_vec_replace_unaligned: {
+    QualType VecInsArgTy = TheCall->getArg(1)->getType();
+    if (Context.getIntWidth(VecInsArgTy) == 32) // Check the range for vinsw.
+      return SemaBuiltinConstantArgRange(TheCall, 2, 0, 12) ||
+        SemaVecInsertCheck(TheCall);
+    else // Check the range for vinsd.
+      return SemaBuiltinConstantArgRange(TheCall, 2, 0, 8) ||
+        SemaVecInsertCheck(TheCall);
+  }
   case PPC::BI__builtin_vsx_xxeval:
      return SemaBuiltinConstantArgRange(TheCall, 3, 0, 255);
   case PPC::BI__builtin_altivec_vsldbi:
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -17283,6 +17283,14 @@
   return __builtin_vsx_xxblendvd(__a, __b, __c);
 }
 
+/* vec_replace_elt */
+
+#define vec_replace_elt __builtin_altivec_vec_replace_elt
+
+/* vec_replace_unaligned */
+
+#define vec_replace_unaligned __builtin_altivec_vec_replace_unaligned
+
 /* vec_splati */
 
 #define vec_splati(__a)                                                        \
Index: clang/lib/CodeGen/CGBuiltin.cpp
===================================================================
--- clang/lib/CodeGen/CGBuiltin.cpp
+++ clang/lib/CodeGen/CGBuiltin.cpp
@@ -14250,6 +14250,64 @@
     Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
     return Builder.CreateCall(F, {X, Undef});
   }
+  case PPC::BI__builtin_altivec_vec_replace_elt:
+  case PPC::BI__builtin_altivec_vec_replace_unaligned: {
+    // The third argument of vec_replace_elt and vec_replace_unaligned must
+    // be a compile time constant and will be emitted either to the vinsw
+    // or vinsd instruction.
+    ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
+    assert(ArgCI &&
+           "Third Arg to vinsw/vinsd intrinsic must be a constant integer!");
+    llvm::Type *ResultType = ConvertType(E->getType());
+    llvm::Function *F = nullptr;
+    Value *Call = nullptr;
+    int64_t ConstArg = ArgCI->getSExtValue();
+    unsigned ArgWidth = Ops[1]->getType()->getPrimitiveSizeInBits();
+    assert((ArgWidth == 32 || ArgWidth == 64) && "Invalid argument width");
+    if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt)
+      ConstArg *= ArgWidth / 8;
+    if (ArgWidth == 32) {
+      // When the second argument is 32 bits, it can either be an integer or
+      // a float. The vinsw intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsw);
+      // Fix the constant according to endianess.
+      if (getTarget().isLittleEndian())
+        ConstArg = 12 - ConstArg;
+      Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+      // Perform additional handling if the second argument is a float.
+      if (!Ops[1]->getType()->isIntegerTy(32)) {
+        Ops[0] = Builder.CreateBitCast(Ops[0],
+                                       llvm::FixedVectorType::get(Int32Ty, 4));
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int32Ty);
+      }
+    } else {
+      // When the second argument is 64 bits, it can either be a long long or
+      // a double. The vinsd intrinsic is used in this case.
+      F = CGM.getIntrinsic(Intrinsic::ppc_altivec_vinsd);
+      // Fix the constant if we are on little endian.
+      if (getTarget().isLittleEndian())
+        ConstArg = 8 - ConstArg;
+      Ops[2] = ConstantInt::getSigned(Int32Ty, ConstArg);
+      // Perform additional handling if the second argument is a double.
+      if (!Ops[1]->getType()->isIntegerTy(64)) {
+        Ops[0] = Builder.CreateBitCast(Ops[0],
+                                       llvm::FixedVectorType::get(Int64Ty, 2));
+        Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
+      }
+    }
+    // Emit the call to vinsw or vinsd.
+    Call = Builder.CreateCall(F, Ops);
+    // Depending on the builtin, bitcast to the approriate resultant type.
+    if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
+        !Ops[1]->getType()->isIntegerTy())
+      return Builder.CreateBitCast(Call, ResultType);
+    else if (BuiltinID == PPC::BI__builtin_altivec_vec_replace_elt &&
+             Ops[1]->getType()->isIntegerTy())
+      return Call;
+    else
+      return Builder.CreateBitCast(Call,
+                                   llvm::FixedVectorType::get(Int8Ty, 16));
+  }
   case PPC::BI__builtin_altivec_vpopcntb:
   case PPC::BI__builtin_altivec_vpopcnth:
   case PPC::BI__builtin_altivec_vpopcntw:
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -342,6 +342,8 @@
 BUILTIN(__builtin_altivec_vinshvrx, "V8UsV8UsUiV8Us", "")
 BUILTIN(__builtin_altivec_vinswvlx, "V4UiV4UiUiV4Ui", "")
 BUILTIN(__builtin_altivec_vinswvrx, "V4UiV4UiUiV4Ui", "")
+BUILTIN(__builtin_altivec_vec_replace_elt, "V4UiV4UiUiIi", "t")
+BUILTIN(__builtin_altivec_vec_replace_unaligned, "V4UiV4UiUiIi", "t")
 
 // P10 Vector Extract built-ins.
 BUILTIN(__builtin_altivec_vextdubvlx, "V2ULLiV16UcV16UcUi", "")

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D83500: [PowerPC][Power10] Implement custom codegen for the vec_replace_elt and vec_replace_unaligned builtins.

Reply via email to