llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-clang Author: Qiu Chaofan (ecnelises) <details> <summary>Changes</summary> This builtin will work as barrier for instruction motion (scheduling, etc.) --- Full diff: https://github.com/llvm/llvm-project/pull/76495.diff 7 Files Affected: - (modified) clang/include/clang/Basic/BuiltinsPPC.def (+3) - (modified) clang/lib/Basic/Targets/PPC.cpp (+1) - (modified) clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c (+24) - (modified) llvm/include/llvm/IR/IntrinsicsPowerPC.td (+5) - (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.cpp (+6-1) - (modified) llvm/lib/Target/PowerPC/PPCInstrInfo.td (+4) - (added) llvm/test/CodeGen/PowerPC/fence.ll (+45) ``````````diff diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index a35488ed3dfa56..829c60defe17c6 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -944,6 +944,9 @@ TARGET_BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "", "vsx") // Set the floating point rounding mode BUILTIN(__builtin_setrnd, "di", "") +// Barrier for instruction motion +BUILTIN(__builtin_ppc_fence, "v", "") + // Get content from current FPSCR BUILTIN(__builtin_readflm, "d", "") diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 045c273f03c7a0..41935abfb65d3b 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -212,6 +212,7 @@ static void defineXLCompatMacros(MacroBuilder &Builder) { Builder.defineMacro("__darn_32", "__builtin_darn_32"); Builder.defineMacro("__darn_raw", "__builtin_darn_raw"); Builder.defineMacro("__dcbf", "__builtin_dcbf"); + Builder.defineMacro("__fence", "__builtin_ppc_fence"); Builder.defineMacro("__fmadd", "__builtin_fma"); Builder.defineMacro("__fmadds", "__builtin_fmaf"); Builder.defineMacro("__abs", "__builtin_abs"); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c index 9187bb855dac22..a5cc97161c56ac 100644 --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-sync.c @@ -194,6 +194,18 @@ void test_dcbz() { __dcbz(c); } +// CHECK-LABEL: @test_fence( +// CHECK: call void @llvm.ppc.fence() +// CHECK-NEXT: ret void +// +// CHECK-32-LABEL: @test_fence( +// CHECK-32: call void @llvm.ppc.fence() +// CHECK-32-NEXT: ret void +// +void test_fence() { + __fence(); +} + // CHECK-LABEL: @test_builtin_ppc_popcntb( // CHECK: [[TMP0:%.*]] = load i64, ptr @a, align 8 // CHECK-NEXT: [[POPCNTB:%.*]] = call i64 @llvm.ppc.popcntb.i64.i64(i64 [[TMP0]]) @@ -375,3 +387,15 @@ void test_builtin_ppc_dcbtst() { void test_builtin_ppc_dcbz() { __builtin_ppc_dcbz(c); } + +// CHECK-LABEL: @test_builtin_ppc_fence( +// CHECK: call void @llvm.ppc.fence() +// CHECK-NEXT: ret void +// +// CHECK-32-LABEL: @test_builtin_ppc_fence( +// CHECK-32: call void @llvm.ppc.fence() +// CHECK-32-NEXT: ret void +// +void test_builtin_ppc_fence() { + __builtin_ppc_fence(); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 3ede2a3736bf30..6d1e8eb47405dd 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -29,6 +29,11 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". [IntrArgMemOnly, NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<1>>]>; def int_ppc_dcbzl : Intrinsic<[], [llvm_ptr_ty], []>; + // Emit pseudo instruction as fence of instruction motion + def int_ppc_fence : ClangBuiltin<"__builtin_ppc_fence">, + DefaultAttrsIntrinsic<[], [], + [IntrNoMerge, IntrHasSideEffects]>; + // Get content from current FPSCR register def int_ppc_readflm : ClangBuiltin<"__builtin_readflm">, DefaultAttrsIntrinsic<[llvm_double_ty], [], diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp index aaced58defe603..af55c6cf337120 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -2155,11 +2155,16 @@ bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const { bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const { + switch (MI.getOpcode()) { + default: break; // Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion // across them, since some FP operations may change content of FPSCR. // TODO: Model FPSCR in PPC instruction definitions and remove the workaround - if (MI.getOpcode() == PPC::MFFS || MI.getOpcode() == PPC::MTFSF) + case PPC::MFFS: + case PPC::MTFSF: + case PPC::FENCE: return true; + } return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index b1601739fd4569..c0344dfbf3a728 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1328,6 +1328,9 @@ def SETFLM : PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FLM), "#SETFLM", [(set f64:$FRT, (int_ppc_setflm f8rc:$FLM))]>; } +let isBarrier = 1, hasSideEffects = 1, Defs = [RM] in +def FENCE : PPCEmitTimePseudo<(outs), (ins), "#FENCE", []>; + let Defs = [LR] in def MovePCtoLR : PPCEmitTimePseudo<(outs), (ins), "#MovePCtoLR", []>, PPC970_Unit_BRU; @@ -3187,6 +3190,7 @@ def : Pat<(PPCtc_return (i32 texternalsym:$dst), imm:$imm), def : Pat<(PPCtc_return CTRRC:$dst, imm:$imm), (TCRETURNri CTRRC:$dst, imm:$imm)>; +def : Pat<(int_ppc_fence), (FENCE)>; def : Pat<(int_ppc_readflm), (MFFS)>; def : Pat<(int_ppc_mffsl), (MFFSL)>; diff --git a/llvm/test/CodeGen/PowerPC/fence.ll b/llvm/test/CodeGen/PowerPC/fence.ll new file mode 100644 index 00000000000000..da14e8be0d4288 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fence.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix \ +; RUN: -mcpu=pwr7 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux \ +; RUN: -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc < %s -mtriple powerpc64le-unknown-linux -debug-only=machine-scheduler \ +; RUN: 2>&1 | FileCheck %s --check-prefix=LOG + +define dso_local void @test_builtin_ppc_fence() { +; CHECK-LABEL: test_builtin_ppc_fence: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #FENCE +; CHECK-NEXT: blr +entry: + call void @llvm.ppc.fence() + ret void +} +declare void @llvm.ppc.fence() + +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +; +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +; LOG: ***** MI Scheduling ***** +; LOG-NEXT: motion:%bb.0 entry +; LOG: ExitSU: FENCE implicit-def dead $rm +define double @motion(double %a, double %b, double %c, double %d) { +entry: + %0 = fdiv double %a, %b + %1 = fdiv double %b, %d + call void @llvm.ppc.fence() + %2 = fdiv double %c, %d + %3 = fdiv double %a, %c + call void @llvm.ppc.fence() + %4 = fadd double %0, %1 + %5 = fadd double %2, %3 + %6 = fsub double %4, %5 + ret double %6 +} `````````` </details> https://github.com/llvm/llvm-project/pull/76495 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits