Conanap updated this revision to Diff 302886.
Conanap marked 5 inline comments as done.
Conanap added a comment.
Addressed some formatting problems as well as corrected incorrect arguments for
Hi case.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D90173/new/
https://reviews.llvm.org/D90173
Files:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrPrefix.td
llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
Index: llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
+++ llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
@@ -118,3 +118,25 @@
%vecins1 = shufflevector <4 x i32> <i32 -1414812757, i32 undef, i32 -1414812757, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
ret <4 x i32> %vecins1
}
+
+define dso_local <2 x double> @test_xxsplti32dx_8() {
+; CHECK-LABEL: test_xxsplti32dx_8
+; CHECK-LE: xxlxor vs34, vs34, vs34
+; CHECK-LE: xxsplti32dx vs34, 1, 1082660167
+; CHECK-BE: xxlxor vs34, vs34, vs34
+; CHECK-BE: xxsplti32dx vs34, 0, 1082660167
+; CHECK: blr
+entry:
+ ret <2 x double> <double 0x40881547AE147AE1, double 0x40881547AE147AE1>
+}
+
+define dso_local <8 x i16> @test_xxsplti32dx_9() {
+; CHECK-LABEL: test_xxsplti32dx_9
+; CHECK-LE: xxlxor vs34, vs34, vs34
+; CHECK-LE: xxsplti32dx vs34, 1, 23855277
+; CHECK-BE: xxlxor vs34, vs34, vs34
+; CHECK-BE: xxsplti32dx vs34, 0, 19070977
+; CHECK: blr
+entry:
+ ret <8 x i16> <i16 291, i16 undef, i16 undef, i16 364, i16 undef, i16 1, i16 173, i16 undef>
+}
Index: llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -1,32 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
-; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
+; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
; RUN: -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \
-; RUN: --check-prefix=CHECK-NOPCREL
+; RUN: --check-prefixes=CHECK-NOPCREL-BE,CHECK-NOPCREL,CHECK-NO-COMMON
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
; RUN: -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL
+; RUN: FileCheck %s --check-prefixes=CHECK-NOPCREL-LE,CHECK-NOPCREL,CHECK-NO-COMMON
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
; RUN: -mattr=-prefix-instrs -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-NOPCREL
+; RUN: FileCheck %s --check-prefixes=CHECK-NOPREFIX,CHECK-NO-COMMON
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
; RUN: -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \
-; RUN: FileCheck %s
+; RUN: FileCheck %s --check-prefixes=CHECK,CHECK-BE
define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr {
; CHECK-LABEL: testDoubleToDoubleFail:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: plxv vs34, .LCPI0_0@PCREL(0), 1
+; CHECK-NEXT: xxlxor vs34, vs34, vs34
+; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 1081435463
+; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 1081435463
; CHECK-NEXT: blr
;
; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail:
; CHECK-NOPCREL: # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI0_0@toc@ha
-; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI0_0@toc@l
-; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3
+; CHECK-NOPCREL-NEXT: xxlxor vs34, vs34, vs34
+; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, 1081435463
+; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 1081435463
; CHECK-NOPCREL-NEXT: blr
+; CHECK-NOPREFIX-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPREFIX: # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI0_0@toc@l
+; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT: blr
+
entry:
ret <2 x double> <double 3.423300e+02, double 3.423300e+02>
}
@@ -34,16 +43,25 @@
define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr {
; CHECK-LABEL: testFloatDenormToDouble:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: plxv vs34, .LCPI1_0@PCREL(0), 1
+; CHECK-NEXT: xxlxor vs34, vs34, vs34
+; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 940259579
+; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 940259579
; CHECK-NEXT: blr
;
; CHECK-NOPCREL-LABEL: testFloatDenormToDouble:
; CHECK-NOPCREL: # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI1_0@toc@ha
-; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI1_0@toc@l
-; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3
+; CHECK-NOPCREL-NEXT: xxlxor vs34, vs34, vs34
+; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, 940259579
+; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 940259579
; CHECK-NOPCREL-NEXT: blr
+; CHECK-NOPREFIX-LABEL: testFloatDenormToDouble:
+; CHECK-NOPREFIX: # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI1_0@toc@l
+; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT: blr
+
entry:
ret <2 x double> <double 0x380B38FB80000000, double 0x380B38FB80000000>
}
@@ -51,16 +69,25 @@
define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr {
; CHECK-LABEL: testDoubleToDoubleNaNFail:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: plxv vs34, .LCPI2_0@PCREL(0), 1
+; CHECK-NEXT: xxlxor vs34, vs34, vs34
+; CHECK-LE-NEXT: xxsplti32dx vs34, 1, -1
+; CHECK-BE-NEXT: xxsplti32dx vs34, 0, -1
; CHECK-NEXT: blr
;
; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail:
; CHECK-NOPCREL: # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI2_0@toc@ha
-; CHECK-NOPCREL-NEXT: addi r3, r3, .LCPI2_0@toc@l
-; CHECK-NOPCREL-NEXT: lxvx vs34, 0, r3
+; CHECK-NOPCREL-NEXT: xxlxor vs34, vs34, vs34
+; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, -1
+; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, -1
; CHECK-NOPCREL-NEXT: blr
+; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPREFIX: # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT: addis r3, r2, .LCPI2_0@toc@ha
+; CHECK-NOPREFIX-NEXT: addi r3, r3, .LCPI2_0@toc@l
+; CHECK-NOPREFIX-NEXT: lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT: blr
+
entry:
ret <2 x double> <double 0xFFFFFFFFFFFFFFF0, double 0xFFFFFFFFFFFFFFF0>
}
@@ -71,11 +98,11 @@
; CHECK-NEXT: plfd f1, .LCPI3_0@PCREL(0), 1
; CHECK-NEXT: blr
;
-; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar:
-; CHECK-NOPCREL: # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NOPCREL-NEXT: lfd f1, .LCPI3_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT: blr
+; CHECK-NO-COMMON-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NO-COMMON: # %bb.0: # %entry
+; CHECK-NO-COMMON-NEXT: addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-NO-COMMON-NEXT: lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NO-COMMON-NEXT: blr
entry:
ret double 3.423300e+02
@@ -87,11 +114,11 @@
; CHECK-NEXT: plfs f1, .LCPI4_0@PCREL(0), 1
; CHECK-NEXT: blr
;
-; CHECK-NOPCREL-LABEL: testFloatDenormScalar:
-; CHECK-NOPCREL: # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI4_0@toc@ha
-; CHECK-NOPCREL-NEXT: lfs f1, .LCPI4_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT: blr
+; CHECK-NO-COMMON-LABEL: testFloatDenormScalar:
+; CHECK-NO-COMMON: # %bb.0: # %entry
+; CHECK-NO-COMMON-NEXT: addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-NO-COMMON-NEXT: lfs f1, .LCPI4_0@toc@l(r3)
+; CHECK-NO-COMMON-NEXT: blr
entry:
ret float 0x380B38FB80000000
@@ -103,11 +130,11 @@
; CHECK-NEXT: plfs f1, .LCPI5_0@PCREL(0), 1
; CHECK-NEXT: blr
;
-; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar:
-; CHECK-NOPCREL: # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT: addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-NOPCREL-NEXT: lfs f1, .LCPI5_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT: blr
+; CHECK-NO-COMMON-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NO-COMMON: # %bb.0: # %entry
+; CHECK-NO-COMMON-NEXT: addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-NO-COMMON-NEXT: lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NO-COMMON-NEXT: blr
entry:
ret double 0x380B38FB80000000
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2512,6 +2512,9 @@
def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
(v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
+
+ def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)),
+ (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>;
}
let Predicates = [IsISA3_1, HasVSX] in {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9178,17 +9178,49 @@
bool BVNIsConstantSplat =
BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
HasAnyUndefs, 0, !Subtarget.isLittleEndian());
+ bool LE = Subtarget.isLittleEndian();
// If it is a splat of a double, check if we can shrink it to a 32 bit
// non-denormal float which when converted back to double gives us the same
// double. This is to exploit the XXSPLTIDP instruction.
- if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
- (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
- convertToNonDenormSingle(APSplatBits)) {
- SDValue SplatNode = DAG.getNode(
- PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
- DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
- return DAG.getBitcast(Op.getValueType(), SplatNode);
+ // If we lose precision, we use XXSPLTI32DX.
+ if (BVNIsConstantSplat && (SplatBitSize == 64) &&
+ Subtarget.hasPrefixInstrs()) {
+ if(convertToNonDenormSingle(APSplatBits) &&
+ (Op->getValueType(0) == MVT::v2f64)) {
+ SDValue SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
+ DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
+ return DAG.getBitcast(Op.getValueType(), SplatNode);
+ } else { // we may lose precision, so we have to use XXSPLTI32DX.
+
+ uint32_t Hi = (uint32_t) ((APSplatBits.getZExtValue() &
+ 0xFFFFFFFF00000000LL) >> 32);
+ uint32_t Lo = (uint32_t) (APSplatBits.getZExtValue() &
+ 0xFFFFFFFF00000000LL);
+ SDValue SplatNode;
+
+ if (!Hi || !Lo)
+ // If either load is 0, then we should generate XXLXOR to set to 0.
+ SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
+
+ if (Lo) {
+ SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI32DX, !Hi ? SDLoc(SplatNode) : dl,
+ MVT::v2i64, !Hi ? SplatNode : DAG.getUNDEF(MVT::v2i64),
+ DAG.getTargetConstant(LE ? 0 : 1, dl, MVT::i32),
+ DAG.getTargetConstant(Lo, dl, MVT::i32));
+ }
+ if (Hi) {
+ SplatNode = DAG.getNode(
+ PPCISD::XXSPLTI32DX, SDLoc(SplatNode),
+ MVT::v2i64, SplatNode,
+ DAG.getTargetConstant(LE ? 1 : 0, SplatNode, MVT::i32),
+ DAG.getTargetConstant(Hi, SplatNode, MVT::i32));
+ }
+
+ return DAG.getBitcast(Op.getValueType(), SplatNode);
+ }
}
if (!BVNIsConstantSplat || SplatBitSize > 32) {
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits