Author: Nemanja Ivanovic Date: 2021-10-01T08:27:15-05:00 New Revision: 369d785574f5a22c086d0c40268a39a64bdd7217
URL: https://github.com/llvm/llvm-project/commit/369d785574f5a22c086d0c40268a39a64bdd7217 DIFF: https://github.com/llvm/llvm-project/commit/369d785574f5a22c086d0c40268a39a64bdd7217.diff LOG: [PowerPC] Optimal sequence for doubleword vec_all_{eq|ne} on Power7 These builtins produce inefficient code for CPU's prior to Power8 due to vcmpequd being unavailable. The predicate forms can actually leverage the available vcmpequw along with xxlxor to produce a better sequence. Added: Modified: clang/lib/Headers/altivec.h clang/test/CodeGen/builtins-ppc-vsx.c Removed: ################################################################################ diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index 6a179d86d71f9..5da4fbf72ce97 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -14815,42 +14815,43 @@ static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool int __a, #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed long long __a, vector signed long long __b) { +#ifdef __POWER8_VECTOR__ return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, __b); +#else + // No vcmpequd on Power7 so we xor the two vectors and compare against zero as + // 32-bit elements. + return vec_all_eq((vector signed int)vec_xor(__a, __b), (vector signed int)0); +#endif } static __inline__ int __ATTRS_o_ai vec_all_eq(vector long long __a, vector bool long long __b) { - return __builtin_altivec_vcmpequd_p(__CR6_LT, __a, (vector long long)__b); + return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a, vector unsigned long long __b) { - return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a, - (vector long long)__b); + return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned long long __a, vector bool long long __b) { - return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a, - (vector long long)__b); + return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a, vector long long __b) { - return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a, - (vector long long)__b); + return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a, vector unsigned long long __b) { - return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a, - (vector long long)__b); + return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_all_eq(vector bool long long __a, vector bool long long __b) { - return __builtin_altivec_vcmpequd_p(__CR6_LT, (vector long long)__a, - (vector long long)__b); + return vec_all_eq((vector signed long long)__a, (vector signed long long)__b); } #endif @@ -17038,43 +17039,43 @@ static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool int __a, #ifdef __VSX__ static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a, vector signed long long __b) { +#ifdef __POWER8_VECTOR__ return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a, __b); +#else + // Take advantage of the optimized sequence for vec_all_eq when vcmpequd is + // not available. + return !vec_all_eq(__a, __b); +#endif } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a, vector unsigned long long __b) { - return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, (vector long long)__a, - (vector long long)__b); + return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed long long __a, vector bool long long __b) { - return __builtin_altivec_vcmpequd_p(__CR6_LT_REV, __a, - (vector signed long long)__b); + return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned long long __a, vector bool long long __b) { - return __builtin_altivec_vcmpequd_p( - __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b); + return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a, vector signed long long __b) { - return __builtin_altivec_vcmpequd_p( - __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b); + return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a, vector unsigned long long __b) { - return __builtin_altivec_vcmpequd_p( - __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b); + return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } static __inline__ int __ATTRS_o_ai vec_any_ne(vector bool long long __a, vector bool long long __b) { - return __builtin_altivec_vcmpequd_p( - __CR6_LT_REV, (vector signed long long)__a, (vector signed long long)__b); + return vec_any_ne((vector signed long long)__a, (vector signed long long)__b); } #endif diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c index 0cbcdae504c91..784f3ca2219c9 100644 --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -2589,32 +2589,46 @@ void test_p8overloads_backwards_compat() { /* ----------------------- predicates --------------------------- */ /* vec_all_eq */ res_i = vec_all_eq(vsll, vsll); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p res_i = vec_all_eq(vsll, vbll); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p res_i = vec_all_eq(vull, vull); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p res_i = vec_all_eq(vull, vbll); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p res_i = vec_all_eq(vbll, vsll); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p res_i = vec_all_eq(vbll, vull); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p res_i = vec_all_eq(vbll, vbll); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p /* vec_all_ne */ res_i = vec_all_ne(vsll, vsll); @@ -2679,32 +2693,60 @@ void test_p8overloads_backwards_compat() { /* vec_any_ne */ res_i = vec_any_ne(vsll, vsll); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK: xor i1 + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor i1 res_i = vec_any_ne(vsll, vbll); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK: xor i1 + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor i1 res_i = vec_any_ne(vull, vull); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK: xor i1 + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor i1 res_i = vec_any_ne(vull, vbll); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK: xor i1 + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor i1 res_i = vec_any_ne(vbll, vsll); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK: xor i1 + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor i1 res_i = vec_any_ne(vbll, vull); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK: xor i1 + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor i1 res_i = vec_any_ne(vbll, vbll); - // CHECK: @llvm.ppc.altivec.vcmpequd.p - // CHECK-LE: @llvm.ppc.altivec.vcmpequd.p + // CHECK: xor <2 x i64> + // CHECK: @llvm.ppc.altivec.vcmpequw.p + // CHECK: xor i1 + // CHECK-LE: xor <2 x i64> + // CHECK-LE: @llvm.ppc.altivec.vcmpequw.p + // CHECK-LE: xor i1 /* vec_all_ge */ res_i = vec_all_ge(vsll, vsll); _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits