[PATCH] D90173: [PowerPC] Exploit splat instruction xxsplti32dx in Power10

Albion Fung via Phabricator via cfe-commits Wed, 04 Nov 2020 10:12:12 -0800

Conanap updated this revision to Diff 302886.
Conanap marked 5 inline comments as done.
Conanap added a comment.


Addressed some formatting problems as well as corrected incorrect arguments for 
Hi case.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D90173/new/

https://reviews.llvm.org/D90173

Files:
  llvm/lib/Target/PowerPC/PPCISelLowering.cpp
  llvm/lib/Target/PowerPC/PPCInstrPrefix.td
  llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
  llvm/test/CodeGen/PowerPC/p10-splatImm32.ll

Index: llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
+++ llvm/test/CodeGen/PowerPC/p10-splatImm32.ll
@@ -118,3 +118,25 @@
   %vecins1 = shufflevector <4 x i32> <i32 -1414812757, i32 undef, i32 -1414812757, i32 undef>, <4 x i32> %a, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x i32> %vecins1
 }
+
+define dso_local <2 x double> @test_xxsplti32dx_8() {
+; CHECK-LABEL: test_xxsplti32dx_8
+; CHECK-LE: xxlxor vs34, vs34, vs34
+; CHECK-LE: xxsplti32dx vs34, 1, 1082660167
+; CHECK-BE: xxlxor vs34, vs34, vs34
+; CHECK-BE: xxsplti32dx vs34, 0, 1082660167
+; CHECK: blr
+entry:
+  ret <2 x double> <double 0x40881547AE147AE1, double 0x40881547AE147AE1>
+}
+
+define dso_local <8 x i16> @test_xxsplti32dx_9() {
+; CHECK-LABEL: test_xxsplti32dx_9
+; CHECK-LE: xxlxor vs34, vs34, vs34
+; CHECK-LE: xxsplti32dx vs34, 1, 23855277
+; CHECK-BE: xxlxor vs34, vs34, vs34
+; CHECK-BE: xxsplti32dx vs34, 0, 19070977
+; CHECK: blr
+entry:
+  ret <8 x i16> <i16 291, i16 undef, i16 undef, i16 364, i16 undef, i16 1, i16 173, i16 undef>
+}
Index: llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
+++ llvm/test/CodeGen/PowerPC/p10-splatImm-CPload-pcrel.ll
@@ -1,32 +1,41 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
-; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s
+; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -mcpu=pwr10 < %s | FileCheck %s \
-; RUN:     --check-prefix=CHECK-NOPCREL
+; RUN:     --check-prefixes=CHECK-NOPCREL-BE,CHECK-NOPCREL,CHECK-NO-COMMON
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -mattr=-pcrelative-memops -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s --check-prefix=CHECK-NOPCREL
+; RUN:     FileCheck %s --check-prefixes=CHECK-NOPCREL-LE,CHECK-NOPCREL,CHECK-NO-COMMON
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -O2 \
 ; RUN:     -mattr=-prefix-instrs -ppc-asm-full-reg-names -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s --check-prefix=CHECK-NOPCREL
+; RUN:     FileCheck %s --check-prefixes=CHECK-NOPREFIX,CHECK-NO-COMMON
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -O2 \
 ; RUN:     -ppc-asm-full-reg-names -target-abi=elfv2 -mcpu=pwr10 < %s | \
-; RUN:     FileCheck %s
+; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-BE
 
 define dso_local <2 x double> @testDoubleToDoubleFail() local_unnamed_addr {
 ; CHECK-LABEL: testDoubleToDoubleFail:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI0_0@PCREL(0), 1
+; CHECK-NEXT:    xxlxor vs34, vs34, vs34
+; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 1081435463
+; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 1081435463
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LABEL: testDoubleToDoubleFail:
 ; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI0_0@toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPCREL-NEXT:    xxlxor vs34, vs34, vs34
+; CHECK-NOPCREL-LE-NEXT: xxsplti32dx vs34, 1, 1081435463
+; CHECK-NOPCREL-BE-NEXT: xxsplti32dx vs34, 0, 1081435463
 ; CHECK-NOPCREL-NEXT:    blr
 
+; CHECK-NOPREFIX-LABEL: testDoubleToDoubleFail:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI0_0@toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+
 entry:
   ret <2 x double> <double 3.423300e+02, double 3.423300e+02>
 }
@@ -34,16 +43,25 @@
 define dso_local <2 x double> @testFloatDenormToDouble() local_unnamed_addr {
 ; CHECK-LABEL: testFloatDenormToDouble:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI1_0@PCREL(0), 1
+; CHECK-NEXT:    xxlxor vs34, vs34, vs34
+; CHECK-LE-NEXT: xxsplti32dx vs34, 1, 940259579
+; CHECK-BE-NEXT: xxsplti32dx vs34, 0, 940259579
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LABEL: testFloatDenormToDouble:
 ; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI1_0@toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPCREL-NEXT:       xxlxor vs34, vs34, vs34
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, 940259579
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, 940259579
 ; CHECK-NOPCREL-NEXT:    blr
 
+; CHECK-NOPREFIX-LABEL: testFloatDenormToDouble:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI1_0@toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+
 entry:
   ret <2 x double> <double 0x380B38FB80000000, double 0x380B38FB80000000>
 }
@@ -51,16 +69,25 @@
 define dso_local <2 x double> @testDoubleToDoubleNaNFail() local_unnamed_addr {
 ; CHECK-LABEL: testDoubleToDoubleNaNFail:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    plxv vs34, .LCPI2_0@PCREL(0), 1
+; CHECK-NEXT:    xxlxor vs34, vs34, vs34
+; CHECK-LE-NEXT:    xxsplti32dx vs34, 1, -1
+; CHECK-BE-NEXT:    xxsplti32dx vs34, 0, -1
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NOPCREL-LABEL: testDoubleToDoubleNaNFail:
 ; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
-; CHECK-NOPCREL-NEXT:    addi r3, r3, .LCPI2_0@toc@l
-; CHECK-NOPCREL-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPCREL-NEXT:    xxlxor vs34, vs34, vs34
+; CHECK-NOPCREL-LE-NEXT:    xxsplti32dx vs34, 1, -1
+; CHECK-NOPCREL-BE-NEXT:    xxsplti32dx vs34, 0, -1
 ; CHECK-NOPCREL-NEXT:    blr
 
+; CHECK-NOPREFIX-LABEL: testDoubleToDoubleNaNFail:
+; CHECK-NOPREFIX:       # %bb.0: # %entry
+; CHECK-NOPREFIX-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
+; CHECK-NOPREFIX-NEXT:    addi r3, r3, .LCPI2_0@toc@l
+; CHECK-NOPREFIX-NEXT:    lxvx vs34, 0, r3
+; CHECK-NOPREFIX-NEXT:    blr
+
 entry:
   ret <2 x double> <double 0xFFFFFFFFFFFFFFF0, double 0xFFFFFFFFFFFFFFF0>
 }
@@ -71,11 +98,11 @@
 ; CHECK-NEXT:    plfd f1, .LCPI3_0@PCREL(0), 1
 ; CHECK-NEXT:    blr
 ;
-; CHECK-NOPCREL-LABEL: testDoubleNonRepresentableScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
+; CHECK-NO-COMMON-LABEL: testDoubleNonRepresentableScalar:
+; CHECK-NO-COMMON:       # %bb.0: # %entry
+; CHECK-NO-COMMON-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-NO-COMMON-NEXT:    lfd f1, .LCPI3_0@toc@l(r3)
+; CHECK-NO-COMMON-NEXT:    blr
 
 entry:
   ret double 3.423300e+02
@@ -87,11 +114,11 @@
 ; CHECK-NEXT:    plfs f1, .LCPI4_0@PCREL(0), 1
 ; CHECK-NEXT:    blr
 ;
-; CHECK-NOPCREL-LABEL: testFloatDenormScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
+; CHECK-NO-COMMON-LABEL: testFloatDenormScalar:
+; CHECK-NO-COMMON:       # %bb.0: # %entry
+; CHECK-NO-COMMON-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
+; CHECK-NO-COMMON-NEXT:    lfs f1, .LCPI4_0@toc@l(r3)
+; CHECK-NO-COMMON-NEXT:    blr
 
 entry:
   ret float 0x380B38FB80000000
@@ -103,11 +130,11 @@
 ; CHECK-NEXT:    plfs f1, .LCPI5_0@PCREL(0), 1
 ; CHECK-NEXT:    blr
 ;
-; CHECK-NOPCREL-LABEL: testFloatDenormToDoubleScalar:
-; CHECK-NOPCREL:       # %bb.0: # %entry
-; CHECK-NOPCREL-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-NOPCREL-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
-; CHECK-NOPCREL-NEXT:    blr
+; CHECK-NO-COMMON-LABEL: testFloatDenormToDoubleScalar:
+; CHECK-NO-COMMON:       # %bb.0: # %entry
+; CHECK-NO-COMMON-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
+; CHECK-NO-COMMON-NEXT:    lfs f1, .LCPI5_0@toc@l(r3)
+; CHECK-NO-COMMON-NEXT:    blr
 
 entry:
   ret double 0x380B38FB80000000
Index: llvm/lib/Target/PowerPC/PPCInstrPrefix.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -2512,6 +2512,9 @@
 
   def : Pat<(v1i128 (rotl v1i128:$vA, v1i128:$vB)),
             (v1i128 (VRLQ v1i128:$vA, v1i128:$vB))>;
+
+  def : Pat <(v2i64 (PPCxxsplti32dx v2i64:$XT, i32:$XI, i32:$IMM32)),
+             (v2i64 (XXSPLTI32DX v2i64:$XT, i32:$XI, i32:$IMM32))>;
 }
 
 let Predicates = [IsISA3_1, HasVSX] in {
Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9178,17 +9178,49 @@
   bool BVNIsConstantSplat =
       BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
                            HasAnyUndefs, 0, !Subtarget.isLittleEndian());
+  bool LE = Subtarget.isLittleEndian();
 
   // If it is a splat of a double, check if we can shrink it to a 32 bit
   // non-denormal float which when converted back to double gives us the same
   // double. This is to exploit the XXSPLTIDP instruction.
-  if (BVNIsConstantSplat && Subtarget.hasPrefixInstrs() &&
-      (SplatBitSize == 64) && (Op->getValueType(0) == MVT::v2f64) &&
-      convertToNonDenormSingle(APSplatBits)) {
-    SDValue SplatNode = DAG.getNode(
-        PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
-        DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
-    return DAG.getBitcast(Op.getValueType(), SplatNode);
+  // If we lose precision, we use XXSPLTI32DX.
+  if (BVNIsConstantSplat  && (SplatBitSize == 64) &&
+      Subtarget.hasPrefixInstrs()) {
+    if(convertToNonDenormSingle(APSplatBits) &&
+       (Op->getValueType(0) == MVT::v2f64)) {
+      SDValue SplatNode = DAG.getNode(
+          PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
+          DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
+      return DAG.getBitcast(Op.getValueType(), SplatNode);
+    } else { // we may lose precision, so we have to use XXSPLTI32DX.
+
+      uint32_t Hi = (uint32_t) ((APSplatBits.getZExtValue() &
+                                 0xFFFFFFFF00000000LL) >> 32);
+      uint32_t Lo = (uint32_t) (APSplatBits.getZExtValue() &
+                                 0xFFFFFFFF00000000LL);
+      SDValue SplatNode;
+
+      if (!Hi || !Lo)
+        // If either load is 0, then we should generate XXLXOR to set to 0.
+        SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
+
+      if (Lo) {
+        SplatNode = DAG.getNode(
+            PPCISD::XXSPLTI32DX, !Hi ? SDLoc(SplatNode) : dl,
+            MVT::v2i64, !Hi ? SplatNode : DAG.getUNDEF(MVT::v2i64),
+            DAG.getTargetConstant(LE ? 0 : 1, dl, MVT::i32),
+            DAG.getTargetConstant(Lo, dl, MVT::i32));
+      }
+      if (Hi) {
+        SplatNode = DAG.getNode(
+            PPCISD::XXSPLTI32DX, SDLoc(SplatNode), 
+            MVT::v2i64, SplatNode,
+            DAG.getTargetConstant(LE ? 1 : 0, SplatNode, MVT::i32),
+            DAG.getTargetConstant(Hi, SplatNode, MVT::i32));
+      }
+
+      return DAG.getBitcast(Op.getValueType(), SplatNode);
+    }
   }
 
   if (!BVNIsConstantSplat || SplatBitSize > 32) {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D90173: [PowerPC] Exploit splat instruction xxsplti32dx in Power10

Reply via email to