Author: Nemanja Ivanovic Date: 2021-01-25T12:23:33-06:00 New Revision: 1150bfa6bb099f9a85a140f66fde7b7f7aa54e60
URL: https://github.com/llvm/llvm-project/commit/1150bfa6bb099f9a85a140f66fde7b7f7aa54e60 DIFF: https://github.com/llvm/llvm-project/commit/1150bfa6bb099f9a85a140f66fde7b7f7aa54e60.diff LOG: [PowerPC] Add missing negate for VPERMXOR on little endian subtargets This intrinsic is supposed to have the permute control vector complemented on little endian systems (as the ABI specifies and GCC implements). With the current code gen, the result vector is byte-reversed. Differential revision: https://reviews.llvm.org/D95004 Added: Modified: llvm/lib/Target/PowerPC/PPCInstrAltivec.td llvm/lib/Target/PowerPC/PPCInstrVSX.td llvm/test/CodeGen/PowerPC/crypto_bifs.ll Removed: ################################################################################ diff --git a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td index c029ecb63e72..1a34aa09315b 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/llvm/lib/Target/PowerPC/PPCInstrAltivec.td @@ -1327,8 +1327,8 @@ def VPMSUMW : VX1_Int_Ty<1160, "vpmsumw", int_ppc_altivec_crypto_vpmsumw, v4i32>; def VPMSUMD : VX1_Int_Ty<1224, "vpmsumd", int_ppc_altivec_crypto_vpmsumd, v2i64>; -def VPERMXOR : VA1a_Int_Ty<45, "vpermxor", - int_ppc_altivec_crypto_vpermxor, v16i8>; +def VPERMXOR : VAForm_1<45, (outs vrrc:$VD), (ins vrrc:$VA, vrrc:$VB, vrrc:$VC), + "vpermxor $VD, $VA, $VB, $VC", IIC_VecFP, []>; // Vector doubleword integer pack and unpack. let hasSideEffects = 1 in { diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index 2e45d731c953..db6e00c71b89 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2408,6 +2408,8 @@ def MrgWords { // arbitrarily chosen to be Big, Little. // // Predicate combinations available: +// [HasVSX, IsLittleEndian, HasP8Altivec] Altivec patterns using VSX instr. +// [HasVSX, IsBigEndian, HasP8Altivec] Altivec patterns using VSX instr. // [HasVSX] // [HasVSX, IsBigEndian] // [HasVSX, IsLittleEndian] @@ -2436,6 +2438,18 @@ def MrgWords { // [HasVSX, IsISA3_0, HasDirectMove, IsBigEndian, IsPPC64] // [HasVSX, IsISA3_0, HasDirectMove, IsLittleEndian] +// These Altivec patterns are here because we need a VSX instruction to match +// the intrinsic (but only for little endian system). +let Predicates = [HasVSX, IsLittleEndian, HasP8Altivec] in + def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, + v16i8:$b, v16i8:$c)), + (v16i8 (VPERMXOR $a, $b, (XXLNOR (COPY_TO_REGCLASS $c, VSRC), + (COPY_TO_REGCLASS $c, VSRC))))>; +let Predicates = [HasVSX, IsBigEndian, HasP8Altivec] in + def : Pat<(v16i8 (int_ppc_altivec_crypto_vpermxor v16i8:$a, + v16i8:$b, v16i8:$c)), + (v16i8 (VPERMXOR $a, $b, $c))>; + let AddedComplexity = 400 in { // Valid for any VSX subtarget, regardless of endianness. let Predicates = [HasVSX] in { diff --git a/llvm/test/CodeGen/PowerPC/crypto_bifs.ll b/llvm/test/CodeGen/PowerPC/crypto_bifs.ll index b34482abfcd0..e38fe90ecc57 100644 --- a/llvm/test/CodeGen/PowerPC/crypto_bifs.ll +++ b/llvm/test/CodeGen/PowerPC/crypto_bifs.ll @@ -1,7 +1,11 @@ -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s -; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \ +; RUN: -mcpu=pwr7 -mattr=+crypto < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE ; FIXME: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s ; FIXME: The original intent was to add a check-next for the blr after every check. ; However, this currently fails since we don't eliminate stores of the unused @@ -103,6 +107,7 @@ entry: %2 = load <16 x i8>, <16 x i8>* %c, align 16 %3 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) ret <16 x i8> %3 +; CHECK-LE: xxlnor ; CHECK: vpermxor 2, } @@ -127,6 +132,7 @@ entry: %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5) %7 = bitcast <16 x i8> %6 to <8 x i16> ret <8 x i16> %7 +; CHECK-LE: xxlnor ; CHECK: vpermxor 2, } @@ -148,6 +154,7 @@ entry: %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5) %7 = bitcast <16 x i8> %6 to <4 x i32> ret <4 x i32> %7 +; CHECK-LE: xxlnor ; CHECK: vpermxor 2, } @@ -169,6 +176,7 @@ entry: %6 = call <16 x i8> @llvm.ppc.altivec.crypto.vpermxor(<16 x i8> %1, <16 x i8> %3, <16 x i8> %5) %7 = bitcast <16 x i8> %6 to <2 x i64> ret <2 x i64> %7 +; CHECK-LE: xxlnor ; CHECK: vpermxor 2, } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits