biplmish updated this revision to Diff 274670.
biplmish added a comment.

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D81836/new/

https://reviews.llvm.org/D81836

Files:
  clang/include/clang/Basic/BuiltinsPPC.def
  clang/lib/Headers/altivec.h
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/builtins-ppc-p10vector.c
  llvm/include/llvm/IR/IntrinsicsPowerPC.td
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-permute-ops.ll

Index: llvm/test/CodeGen/PowerPC/p10-permute-ops.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/PowerPC/p10-permute-ops.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
+
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -mcpu=pwr10 \
+; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
+
+define <16 x i8> @testXXPERMX(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: testXXPERMX:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xxpermx v2, v2, v3, v4, 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call <16 x i8> @llvm.ppc.vsx.xxpermx(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 1)
+  ret <16 x i8> %0
+}
+declare <16 x i8> @llvm.ppc.vsx.xxpermx(<16 x i8>, <16 x i8>, <16 x i8>, i32 immarg)
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -733,6 +733,14 @@
                        IIC_VecGeneral, []>;
 }
 
+let Predicates = [PrefixInstrs] in {
+  def : Pat<(v16i8 
+            (int_ppc_vsx_xxpermx v16i8:$A, v16i8:$B, v16i8:$C , timm:$D)),
+            (COPY_TO_REGCLASS (XXPERMX (COPY_TO_REGCLASS $A, VSRC),
+            (COPY_TO_REGCLASS $B, VSRC), (COPY_TO_REGCLASS $C, VSRC), $D),
+            VSRC)>;
+}
+
 let Predicates = [IsISA3_1] in {
   def VSLDBI : VNForm_VTAB5_SD3<22, 0, (outs vrrc:$VRT),
                                 (ins vrrc:$VRA, vrrc:$VRB, u3imm:$SH),
Index: llvm/include/llvm/IR/IntrinsicsPowerPC.td
===================================================================
--- llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1007,6 +1007,12 @@
 def int_ppc_vsx_xxgenpcvdm :
       PowerPC_VSX_Intrinsic<"xxgenpcvdm", [llvm_v2i64_ty],
                             [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>;
+
+// P10 VSX Vector permute extended.
+def  int_ppc_vsx_xxpermx: GCCBuiltin<"__builtin_vsx_xxpermx">,
+             Intrinsic<[llvm_v16i8_ty],
+                       [llvm_v16i8_ty,llvm_v16i8_ty,llvm_v16i8_ty,llvm_i32_ty],
+                       [IntrNoMem, ImmArg<ArgIndex<3>>]>;
 }
 
 //===----------------------------------------------------------------------===//
Index: clang/test/CodeGen/builtins-ppc-p10vector.c
===================================================================
--- clang/test/CodeGen/builtins-ppc-p10vector.c
+++ clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -5,12 +5,17 @@
 
 #include <altivec.h>
 
-vector signed char vsca;
+vector signed char vsca, vscb;
 vector unsigned char vuca, vucb, vucc;
+vector signed short vssa, vssb;
 vector unsigned short vusa, vusb, vusc;
+vector signed int vsia, vsib;
 vector unsigned int vuia, vuib, vuic;
+vector signed long long vslla, vsllb;
 vector unsigned long long vulla, vullb, vullc;
 vector unsigned __int128 vui128a, vui128b, vui128c;
+vector float vfa, vfb;
+vector double vda, vdb;
 unsigned int uia;
 
 vector unsigned long long test_vpdepd(void) {
@@ -146,3 +151,71 @@
   // CHECK-NEXT: ret <2 x i64>
   return vec_cnttzm(vulla, vullb);
 }
+
+vector signed char test_vec_permx_sc(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_permx(vsca, vscb, vucc, 0);
+}
+
+vector unsigned char test_vec_permx_uc(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: ret <16 x i8>
+  return vec_permx(vuca, vucb, vucc, 1);
+}
+
+vector signed short test_vec_permx_ss(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16>
+  // CHECK-NEXT: ret <8 x i16>
+  return vec_permx(vssa, vssb, vucc, 2);
+}
+
+vector unsigned short test_vec_permx_us(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <8 x i16>
+  // CHECK-NEXT: ret <8 x i16>
+  return vec_permx(vusa, vusb, vucc, 3);
+}
+
+vector signed int test_vec_permx_si(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32>
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_permx(vsia, vsib, vucc, 4);
+}
+
+vector unsigned int test_vec_permx_ui(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x i32>
+  // CHECK-NEXT: ret <4 x i32>
+  return vec_permx(vuia, vuib, vucc, 5);
+}
+
+vector signed long long test_vec_permx_sll(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64>
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_permx(vslla, vsllb, vucc, 6);
+}
+
+vector unsigned long long test_vec_permx_ull(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x i64>
+  // CHECK-NEXT: ret <2 x i64>
+  return vec_permx(vulla, vullb, vucc, 7);
+}
+
+vector float test_vec_permx_f(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <4 x float>
+  // CHECK-NEXT: ret <4 x float>
+  return vec_permx(vfa, vfb, vucc, 0);
+}
+
+vector double test_vec_permx_d(void) {
+  // CHECK: @llvm.ppc.vsx.xxpermx(<16 x i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}, i32
+  // CHECK-NEXT: bitcast <16 x i8> %{{.*}} to <2 x double>
+  // CHECK-NEXT: ret <2 x double>
+  return vec_permx(vda, vdb, vucc, 1);
+}
Index: clang/lib/Sema/SemaChecking.cpp
===================================================================
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -3128,6 +3128,8 @@
      return SemaBuiltinConstantArgRange(TheCall, 1, 2, 7);
   case PPC::BI__builtin_vsx_xxeval:
      return SemaBuiltinConstantArgRange(TheCall, 3, 0, 255);
+  case PPC::BI__builtin_vsx_xxpermx:
+     return SemaBuiltinConstantArgRange(TheCall, 3, 0, 7);
   }
   return SemaBuiltinConstantArgRange(TheCall, i, l, u);
 }
Index: clang/lib/Headers/altivec.h
===================================================================
--- clang/lib/Headers/altivec.h
+++ clang/lib/Headers/altivec.h
@@ -16881,6 +16881,42 @@
   return __builtin_altivec_vctzdm(__a, __b);
 }
 
+#ifdef __VSX__
+
+/* vec_permx */
+
+#define vec_permx(__a, __b, __c, __d)                                          \
+  _Generic((__a), vector signed char                                           \
+           : (vector signed char)__builtin_vsx_xxpermx(                        \
+                 (vector signed char)(__a), (__b), (__c), (__d)),              \
+             vector unsigned char                                              \
+           : (vector unsigned char)__builtin_vsx_xxpermx(                      \
+                 (vector unsigned char)(__a), (__b), (__c), (__d)),            \
+             vector signed short                                               \
+           : (vector signed short)__builtin_vsx_xxpermx(                       \
+                 (vector signed short)(__a), (__b), (__c), (__d)),             \
+             vector unsigned short                                             \
+           : (vector unsigned short)__builtin_vsx_xxpermx(                     \
+                 (vector unsigned short)(__a), (__b), (__c), (__d)),           \
+             vector signed int                                                 \
+           : (vector signed int)__builtin_vsx_xxpermx(                         \
+                 (vector signed int)(__a), (__b), (__c), (__d)),               \
+             vector unsigned int                                               \
+           : (vector unsigned int)__builtin_vsx_xxpermx(                       \
+                 (vector unsigned int)(__a), (__b), (__c), (__d)),             \
+             vector signed long long                                           \
+           : (vector signed long long)__builtin_vsx_xxpermx(                   \
+                 (vector signed long long)(__a), (__b), (__c), (__d)),         \
+             vector unsigned long long                                         \
+           : (vector unsigned long long)__builtin_vsx_xxpermx(                 \
+                 (vector unsigned long long)(__a), (__b), (__c), (__d)),       \
+             vector float                                                      \
+           : (vector float)__builtin_vsx_xxpermx((vector float)(__a), (__b),   \
+                                                 (__c), (__d)),                \
+             vector double                                                     \
+           : (vector double)__builtin_vsx_xxpermx((vector double)(__a), (__b), \
+                                                  (__c), (__d)))
+#endif /* __VSX__ */
 #endif /* __POWER10_VECTOR__ */
 
 #undef __ATTRS_o_ai
Index: clang/include/clang/Basic/BuiltinsPPC.def
===================================================================
--- clang/include/clang/Basic/BuiltinsPPC.def
+++ clang/include/clang/Basic/BuiltinsPPC.def
@@ -447,6 +447,9 @@
 
 BUILTIN(__builtin_vsx_xxeval, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "")
 
+// P10 Vector Permute Extended built-in.
+BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "")
+
 // Float 128 built-ins
 BUILTIN(__builtin_sqrtf128_round_to_odd, "LLdLLd", "")
 BUILTIN(__builtin_addf128_round_to_odd, "LLdLLdLLd", "")
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to