NeHuang created this revision.
NeHuang added reviewers: nemanjai, stefanp, PowerPC.
NeHuang added a project: LLVM.
Herald added subscribers: shchenz, kbarton, hiraditya.
NeHuang requested review of this revision.

This patch adds the backend optimization to match XL behavior for the two 
builtins `__tdw` and `__tw` that when the second input argument is an 
immediate, emitting `tdi`/`twi` instructions instead of `td`/`tw`.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D112285

Files:
  llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
  llvm/lib/Target/PowerPC/PPCInstr64Bit.td
  llvm/lib/Target/PowerPC/PPCInstrInfo.td
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll
  llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll

Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap.ll
@@ -127,6 +127,110 @@
   ret void
 }
 
+; tw -> twi
+define dso_local void @test__twi_boundary(i32 %a) {
+; CHECK-LABEL: test__twi_boundary:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    twi 3, 3, 32767
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 32767, i32 3)
+  ret void
+}
+
+define dso_local void @test__twi_boundary1(i32 %a) {
+; CHECK-LABEL: test__twi_boundary1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    twi 3, 3, -32768
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 -32768, i32 3)
+  ret void
+}
+
+define dso_local void @test__tw_boundary2(i32 %a) {
+; CHECK-LABEL: test__tw_boundary2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lis 4, 0
+; CHECK-NEXT:    ori 4, 4, 32768
+; CHECK-NEXT:    tw 3, 3, 4
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 32768, i32 3)
+  ret void
+}
+
+define dso_local void @test__tw_boundary3(i32 %a) {
+; CHECK-LABEL: test__tw_boundary3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lis 4, -1
+; CHECK-NEXT:    ori 4, 4, 32767
+; CHECK-NEXT:    tw 3, 3, 4
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 -32769, i32 3)
+  ret void
+}
+
+define dso_local void @test__twlgti(i32 %a) {
+; CHECK-LABEL: test__twlgti:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    twlgti 3, 0
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 0, i32 1)
+  ret void
+}
+
+define dso_local void @test__twllti(i32 %a) {
+; CHECK-LABEL: test__twllti:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    twllti 3, 1
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 1, i32 2)
+  ret void
+}
+
+define dso_local void @test__tweqi(i32 %a) {
+; CHECK-LABEL: test__tweqi:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tweqi 3, 2
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 2, i32 4)
+  ret void
+}
+
+define dso_local void @test__twgti(i32 %a) {
+; CHECK-LABEL: test__twgti:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    twgti 3, 16
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 16, i32 8)
+  ret void
+}
+
+define dso_local void @test__twlti(i32 %a) {
+; CHECK-LABEL: test__twlti:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    twlti 3, 64
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 64, i32 16)
+  ret void
+}
+
+define dso_local void @test__twnei(i32 %a) {
+; CHECK-LABEL: test__twnei:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    twnei 3, 256
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 256, i32 24)
+  ret void
+}
+
+define dso_local void @test__twui(i32 %a) {
+; CHECK-LABEL: test__twui:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    twui 3, 512
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tw(i32 %a, i32 512, i32 31)
+  ret void
+}
+
 ; trap
 declare void @llvm.ppc.trap(i32 %a)
 define dso_local void @test__trap(i32 %a) {
Index: llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll
===================================================================
--- llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll
+++ llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-trap-64bit-only.ll
@@ -124,6 +124,110 @@
   ret void
 }
 
+; tdw -> tdi
+define dso_local void @test__tdi_boundary(i64 %a) {
+; CHECK-LABEL: test__tdi_boundary:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tdi 3, 3, 32767
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 32767, i32 3)
+  ret void
+}
+
+define dso_local void @test__tdi_boundary1(i64 %a) {
+; CHECK-LABEL: test__tdi_boundary1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tdi 3, 3, -32768
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 -32768, i32 3)
+  ret void
+}
+
+define dso_local void @test__td_boundary2(i64 %a) {
+; CHECK-LABEL: test__td_boundary2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li 4, 0
+; CHECK-NEXT:    ori 4, 4, 32768
+; CHECK-NEXT:    td 3, 3, 4
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 32768, i32 3)
+  ret void
+}
+
+define dso_local void @test__td_boundary3(i64 %a) {
+; CHECK-LABEL: test__td_boundary3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lis 4, -1
+; CHECK-NEXT:    ori 4, 4, 32767
+; CHECK-NEXT:    td 3, 3, 4
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 -32769, i32 3)
+  ret void
+}
+
+define dso_local void @test__tdlgti(i64 %a) {
+; CHECK-LABEL: test__tdlgti:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tdlgti 3, 0
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 0, i32 1)
+  ret void
+}
+
+define dso_local void @test__tdllti(i64 %a) {
+; CHECK-LABEL: test__tdllti:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tdllti 3, 1
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 1, i32 2)
+  ret void
+}
+
+define dso_local void @test__tdeqi(i64 %a) {
+; CHECK-LABEL: test__tdeqi:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tdeqi 3, 2
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 2, i32 4)
+  ret void
+}
+
+define dso_local void @test__tdgti(i64 %a) {
+; CHECK-LABEL: test__tdgti:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tdgti 3, 16
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 16, i32 8)
+  ret void
+}
+
+define dso_local void @test__tdlti(i64 %a) {
+; CHECK-LABEL: test__tdlti:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tdlti 3, 64
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 64, i32 16)
+  ret void
+}
+
+define dso_local void @test__tdnei(i64 %a) {
+; CHECK-LABEL: test__tdnei:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tdnei 3, 256
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 256, i32 24)
+  ret void
+}
+
+define dso_local void @test__tdui(i64 %a) {
+; CHECK-LABEL: test__tdui:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    tdui 3, 512
+; CHECK-NEXT:    blr
+  call void @llvm.ppc.tdw(i64 %a, i64 512, i32 31)
+  ret void
+}
+
 ; trapd
 declare void @llvm.ppc.trapd(i64 %a)
 define dso_local void @test__trapd(i64 %a) {
Index: llvm/lib/Target/PowerPC/PPCInstrInfo.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -5477,8 +5477,6 @@
           (STWCX gprc:$A, ForceXForm:$dst)>;
 def : Pat<(int_ppc_stbcx ForceXForm:$dst, gprc:$A),
           (STBCX gprc:$A, ForceXForm:$dst)>;
-def : Pat<(int_ppc_tw gprc:$A, gprc:$B, i32:$IMM),
-          (TW $IMM, $A, $B)>;
 def : Pat<(int_ppc_trap gprc:$A),
           (TWI 24, $A, 0)>;
 
Index: llvm/lib/Target/PowerPC/PPCInstr64Bit.td
===================================================================
--- llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -1877,8 +1877,6 @@
 
 def : Pat<(int_ppc_stdcx ForceXForm:$dst, g8rc:$A),
           (STDCX g8rc:$A, ForceXForm:$dst)>;
-def : Pat<(int_ppc_tdw g8rc:$A, g8rc:$B, i32:$IMM),
-          (TD $IMM, $A, $B)>;
 
 // trapd
 def : Pat<(int_ppc_trapd g8rc:$A),
Index: llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
===================================================================
--- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -4997,6 +4997,30 @@
     }
     break;
 
+  case ISD::INTRINSIC_VOID: {
+    if (N->getConstantOperandVal(1) == Intrinsic::ppc_tdw ||
+        N->getConstantOperandVal(1) == Intrinsic::ppc_tw) {
+      int16_t SImm;
+      if (isIntS16Immediate(N->getOperand(3), SImm)) {
+        SDValue Ops[] = {N->getOperand(4), N->getOperand(2),
+                         getI32Imm(int(SImm) & 0xFFFF, dl)};
+        CurDAG->SelectNodeTo(N,
+                             N->getConstantOperandVal(1) == Intrinsic::ppc_tdw
+                                 ? PPC::TDI
+                                 : PPC::TWI,
+                             MVT::Other, Ops);
+      } else {
+        SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
+        CurDAG->SelectNodeTo(N,
+                             N->getConstantOperandVal(1) == Intrinsic::ppc_tdw
+                                 ? PPC::TD
+                                 : PPC::TW,
+                             MVT::Other, Ops);
+      }
+      return;
+    }
+    break;
+  }
   case ISD::INTRINSIC_WO_CHAIN: {
     // We emit the PPC::FSELS instruction here because of type conflicts with
     // the comparison operand. The FSELS instruction is defined to use an 8-byte
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to