https://github.com/ecnelises updated https://github.com/llvm/llvm-project/pull/76495
>From aaa11bc775b9aa3a0398ba2bbca4087e99f04243 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Thu, 28 Dec 2023 16:54:25 +0800 Subject: [PATCH 1/2] [PowerPC] Implement fence builtin --- clang/include/clang/Basic/BuiltinsPPC.def | 3 +++ clang/lib/Basic/Targets/PPC.cpp | 1 + llvm/include/llvm/IR/IntrinsicsPowerPC.td | 5 +++++ llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 7 ++++++- llvm/lib/Target/PowerPC/PPCInstrInfo.td | 4 ++++ .../CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll | 11 +++++++++++ 6 files changed, 30 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index a35488ed3dfa56..829c60defe17c6 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -944,6 +944,9 @@ TARGET_BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "", "vsx") // Set the floating point rounding mode BUILTIN(__builtin_setrnd, "di", "") +// Barrier for instruction motion +BUILTIN(__builtin_ppc_fence, "v", "") + // Get content from current FPSCR BUILTIN(__builtin_readflm, "d", "") diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 045c273f03c7a0..41935abfb65d3b 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -212,6 +212,7 @@ static void defineXLCompatMacros(MacroBuilder &Builder) { Builder.defineMacro("__darn_32", "__builtin_darn_32"); Builder.defineMacro("__darn_raw", "__builtin_darn_raw"); Builder.defineMacro("__dcbf", "__builtin_dcbf"); + Builder.defineMacro("__fence", "__builtin_ppc_fence"); Builder.defineMacro("__fmadd", "__builtin_fma"); Builder.defineMacro("__fmadds", "__builtin_fmaf"); Builder.defineMacro("__abs", "__builtin_abs"); diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 3ede2a3736bf30..6d1e8eb47405dd 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -29,6 +29,11 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". [IntrArgMemOnly, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>; + // Emit pseudo instruction as fence of instruction motion + def int_ppc_fence : ClangBuiltin<"__builtin_ppc_fence">, + DefaultAttrsIntrinsic<[], [], + [IntrNoMerge, IntrHasSideEffects]>; + // Get content from current FPSCR register def int_ppc_readflm : ClangBuiltin<"__builtin_readflm">, DefaultAttrsIntrinsic<[llvm_double_ty], [], diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index aaced58defe603..af55c6cf337120 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2155,11 +2155,16 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const { bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { + switch (MI.getOpcode()) { + default: break; // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion // across them, since some FP operations may change content of FPSCR. // TODO: Model FPSCR in PPC instruction definitions and remove the workaround - if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF) + case PPC::MFFS: + case PPC::MTFSF: + case PPC::FENCE: return true; + } return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index b1601739fd4569..c0344dfbf3a728 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1328,6 +1328,9 @@ def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM), "#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>; } +let isBarrier = 1, hasSideEffects = 1, Defs = [RM] in +def FENCE : PPCEmitTimePseudo<(outs), (ins), "#FENCE", []>; + let Defs = [LR] in def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; @@ -3187,6 +3190,7 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), (TCRETURNri CTRRC:$dst, imm:$imm)>; +def : Pat<(int_ppc_fence), (FENCE)>; def : Pat<(int_ppc_readflm), (MFFS)>; def : Pat<(int_ppc_mffsl), (MFFSL)>; diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll index 2c9fd2034f887c..555de90c56c364 100644 --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll @@ -29,3 +29,14 @@ entry: ret void } declare void @llvm.ppc.iospace.sync() + +define dso_local void @test_builtin_ppc_fence() { +; CHECK-LABEL: test_builtin_ppc_fence: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #FENCE +; CHECK-NEXT: blr +entry: + call void @llvm.ppc.fence() + ret void +} +declare void @llvm.ppc.fence() >From 6f31c61891777cd80f339f9e6278762c054710c8 Mon Sep 17 00:00:00 2001 From: Qiu Chaofan <qiuco...@cn.ibm.com> Date: Thu, 28 Dec 2023 17:26:50 +0800 Subject: [PATCH 2/2] Add test --- .../PowerPC/builtins-ppc-xlcompat-sync.c | 24 ++++++++++ .../PowerPC/builtins-ppc-xlcompat-msync.ll | 11 ----- llvm/test/CodeGen/PowerPC/fence.ll | 45 +++++++++++++++++++ 3 files changed, 69 insertions(+), 11 deletions(-) create mode 100644 llvm/test/CodeGen/PowerPC/fence.ll diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c index 9187bb855dac22..a5cc97161c56ac 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c @@ -194,6 +194,18 @@ void test_dcbz() { __dcbz(c); } +// CHECK-LABEL: @test_fence( +// CHECK: call void @llvm.ppc.fence() +// CHECK-NEXT: ret void +// +// CHECK-32-LABEL: @test_fence( +// CHECK-32: call void @llvm.ppc.fence() +// CHECK-32-NEXT: ret void +// +void test_fence() { + __fence(); +} + // CHECK-LABEL: @test_builtin_ppc_popcntb( // CHECK: [[TMP0:%.*]] = load i64, ptr @a, align 8 // CHECK-NEXT: [[POPCNTB:%.*]] = call i64 @llvm.ppc.popcntb.i64.i64(i64 [[TMP0]]) @@ -375,3 +387,15 @@ void test_builtin_ppc_dcbtst() { void test_builtin_ppc_dcbz() { __builtin_ppc_dcbz(c); } + +// CHECK-LABEL: @test_builtin_ppc_fence( +// CHECK: call void @llvm.ppc.fence() +// CHECK-NEXT: ret void +// +// CHECK-32-LABEL: @test_builtin_ppc_fence( +// CHECK-32: call void @llvm.ppc.fence() +// CHECK-32-NEXT: ret void +// +void test_builtin_ppc_fence() { + __builtin_ppc_fence(); +} diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll index 555de90c56c364..2c9fd2034f887c 100644 --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-msync.ll @@ -29,14 +29,3 @@ entry: ret void } declare void @llvm.ppc.iospace.sync() - -define dso_local void @test_builtin_ppc_fence() { -; CHECK-LABEL: test_builtin_ppc_fence: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: #FENCE -; CHECK-NEXT: blr -entry: - call void @llvm.ppc.fence() - ret void -} -declare void @llvm.ppc.fence() diff --git a/llvm/test/CodeGen/PowerPC/fence.ll b/llvm/test/CodeGen/PowerPC/fence.ll new file mode 100644 index 00000000000000..da14e8be0d4288 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fence.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \ +; RUN: 2>&1 | FileCheck %s --check-prefix=LOG + +define dso_local void @test_builtin_ppc_fence() { +; CHECK-LABEL: test_builtin_ppc_fence: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #FENCE +; CHECK-NEXT: blr +entry: + call void @llvm.ppc.fence() + ret void +} +declare void @llvm.ppc.fence() + +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +; +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +define double @motion(double %a, double %b, double %c, double %d) { +entry: + %0 = fdiv double %a, %b + %1 = fdiv double %b, %d + call void @llvm.ppc.fence() + %2 = fdiv double %c, %d + %3 = fdiv double %a, %c + call void @llvm.ppc.fence() + %4 = fadd double %0, %1 + %5 = fadd double %2, %3 + %6 = fsub double %4, %5 + ret double %6 +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits