llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Sumanth Gundapaneni (sgundapa) <details> <summary>Changes</summary> Inspect a basic block and if its single basic block loop with a small number of instructions, set the Loop Alignment to 32 bytes. This will avoid the cache line break in the first packet of loop which will cause a stall per each execution of loop. --- Patch is 34.23 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/83379.diff 9 Files Affected: - (modified) clang/test/CodeGen/builtins-hexagon.c (+1-1) - (modified) llvm/lib/Target/Hexagon/CMakeLists.txt (+1) - (added) llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp (+216) - (modified) llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp (+8-1) - (modified) llvm/lib/Target/Hexagon/HexagonTargetMachine.h (+1) - (added) llvm/test/CodeGen/Hexagon/loop-balign.ll (+91) - (added) llvm/test/CodeGen/Hexagon/loop_align_count.ll (+115) - (added) llvm/test/CodeGen/Hexagon/loop_align_count.mir (+130) - (added) llvm/test/CodeGen/Hexagon/v6-haar-balign32.ll (+117) ``````````diff diff --git a/clang/test/CodeGen/builtins-hexagon.c b/clang/test/CodeGen/builtins-hexagon.c index 9a1b733da5cdb8..52073f27ae70f5 100644 --- a/clang/test/CodeGen/builtins-hexagon.c +++ b/clang/test/CodeGen/builtins-hexagon.c @@ -1,5 +1,5 @@ // REQUIRES: hexagon-registered-target -// RUN: %clang_cc1 -triple hexagon-unknown-elf -target-cpu hexagonv65 -target-feature +hvxv65 -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple hexagon-unknown-elf -target-cpu hexagonv65 -target-feature +hvxv65 -target-feature +hvx-length128b -emit-llvm %s -o - | FileCheck %s void test() { int v64 __attribute__((__vector_size__(64))); diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index a22a5c11e6ab3a..cdc062eee72b1e 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -43,6 +43,7 @@ add_llvm_target(HexagonCodeGen HexagonISelDAGToDAGHVX.cpp HexagonISelLowering.cpp HexagonISelLoweringHVX.cpp + HexagonLoopAlign.cpp HexagonLoopIdiomRecognition.cpp HexagonMachineFunctionInfo.cpp HexagonMachineScheduler.cpp diff --git a/llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp b/llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp new file mode 100644 index 00000000000000..c79b528ff2f3f9 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp @@ -0,0 +1,216 @@ +//===----- HexagonLoopAlign.cpp - Generate loop alignment directives -----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Inspect a basic block and if its single basic block loop with a small +// number of instructions, set the prefLoopAlignment to 32 bytes (5). +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "hexagon-loop-align" + +#include "HexagonTargetMachine.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/SchedulerRegistry.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +static cl::opt<bool> + DisableLoopAlign("disable-hexagon-loop-align", cl::Hidden, + cl::desc("Disable Hexagon loop alignment pass")); + +static cl::opt<uint32_t> HVXLoopAlignLimitUB( + "hexagon-hvx-loop-align-limit-ub", cl::Hidden, cl::init(16), + cl::desc("Set hexagon hvx loop upper bound align limit")); + +static cl::opt<uint32_t> TinyLoopAlignLimitUB( + "hexagon-tiny-loop-align-limit-ub", cl::Hidden, cl::init(16), + cl::desc("Set hexagon tiny-core loop upper bound align limit")); + +static cl::opt<uint32_t> + LoopAlignLimitUB("hexagon-loop-align-limit-ub", cl::Hidden, cl::init(8), + cl::desc("Set hexagon loop upper bound align limit")); + +static cl::opt<uint32_t> + LoopAlignLimitLB("hexagon-loop-align-limit-lb", cl::Hidden, cl::init(4), + cl::desc("Set hexagon loop lower bound align limit")); + +static cl::opt<uint32_t> + LoopBndlAlignLimit("hexagon-loop-bundle-align-limit", cl::Hidden, + cl::init(4), + cl::desc("Set hexagon loop align bundle limit")); + +static cl::opt<uint32_t> TinyLoopBndlAlignLimit( + "hexagon-tiny-loop-bundle-align-limit", cl::Hidden, cl::init(8), + cl::desc("Set hexagon tiny-core loop align bundle limit")); + +static cl::opt<uint32_t> + LoopEdgeThreshold("hexagon-loop-edge-threshold", cl::Hidden, cl::init(7500), + cl::desc("Set hexagon loop align edge theshold")); + +namespace llvm { +FunctionPass *createHexagonLoopAlign(); +void initializeHexagonLoopAlignPass(PassRegistry &); +} // namespace llvm + +namespace { + +class HexagonLoopAlign : public MachineFunctionPass { + const HexagonSubtarget *HST = nullptr; + const TargetMachine *HTM = nullptr; + const HexagonInstrInfo *HII = nullptr; + +public: + static char ID; + HexagonLoopAlign() : MachineFunctionPass(ID) { + initializeHexagonLoopAlignPass(*PassRegistry::getPassRegistry()); + } + bool shouldBalignLoop(MachineBasicBlock &BB, bool AboveThres); + bool isSingleLoop(MachineBasicBlock &MBB); + bool attemptToBalignSmallLoop(MachineFunction &MF, MachineBasicBlock &MBB); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<MachineBranchProbabilityInfo>(); + AU.addRequired<MachineBlockFrequencyInfo>(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return "Hexagon LoopAlign pass"; } + bool runOnMachineFunction(MachineFunction &MF) override; +}; + +char HexagonLoopAlign::ID = 0; + +bool HexagonLoopAlign::shouldBalignLoop(MachineBasicBlock &BB, + bool AboveThres) { + bool isVec = false; + unsigned InstCnt = 0; + unsigned BndlCnt = 0; + + for (MachineBasicBlock::instr_iterator II = BB.instr_begin(), + IE = BB.instr_end(); + II != IE; ++II) { + + // End if the instruction is endloop. + if (HII->isEndLoopN(II->getOpcode())) + break; + // Count the number of bundles. + if (II->isBundle()) { + BndlCnt++; + continue; + } + // Skip over debug instructions. + if (II->isDebugInstr()) + continue; + // Check if there are any HVX instructions in loop. + isVec |= HII->isHVXVec(*II); + // Count the number of instructions. + InstCnt++; + } + + LLVM_DEBUG({ + dbgs() << "Bundle Count : " << BndlCnt << "\n"; + dbgs() << "Instruction Count : " << InstCnt << "\n"; + }); + + unsigned LimitUB = 0; + unsigned LimitBndl = LoopBndlAlignLimit; + // The conditions in the order of priority. + if (HST->isTinyCore()) { + LimitUB = TinyLoopAlignLimitUB; + LimitBndl = TinyLoopBndlAlignLimit; + } else if (isVec) + LimitUB = HVXLoopAlignLimitUB; + else if (AboveThres) + LimitUB = LoopAlignLimitUB; + + // if the upper bound is not set to a value, implies we didn't meet + // the criteria. + if (LimitUB == 0) + return false; + + return InstCnt >= LoopAlignLimitLB && InstCnt <= LimitUB && + BndlCnt <= LimitBndl; +} + +bool HexagonLoopAlign::isSingleLoop(MachineBasicBlock &MBB) { + int Succs = MBB.succ_size(); + return (MBB.isSuccessor(&MBB) && (Succs == 2)); +} + +bool HexagonLoopAlign::attemptToBalignSmallLoop(MachineFunction &MF, + MachineBasicBlock &MBB) { + if (!isSingleLoop(MBB)) + return false; + + const MachineBranchProbabilityInfo *MBPI = + &getAnalysis<MachineBranchProbabilityInfo>(); + const MachineBlockFrequencyInfo *MBFI = + &getAnalysis<MachineBlockFrequencyInfo>(); + + // Compute frequency of back edge, + BlockFrequency BlockFreq = MBFI->getBlockFreq(&MBB); + BranchProbability BrProb = MBPI->getEdgeProbability(&MBB, &MBB); + BlockFrequency EdgeFreq = BlockFreq * BrProb; + LLVM_DEBUG({ + dbgs() << "Loop Align Pass:\n"; + dbgs() << "\tedge with freq(" << EdgeFreq.getFrequency() << ")\n"; + }); + + bool AboveThres = EdgeFreq.getFrequency() > LoopEdgeThreshold; + if (shouldBalignLoop(MBB, AboveThres)) { + // We found a loop, change its alignment to be 32 (5). + MBB.setAlignment(llvm::Align(1 << 5)); + return true; + } + return false; +} + +// Inspect each basic block, and if its a single BB loop, see if it +// meets the criteria for increasing alignment to 32. + +bool HexagonLoopAlign::runOnMachineFunction(MachineFunction &MF) { + + HST = &MF.getSubtarget<HexagonSubtarget>(); + HII = HST->getInstrInfo(); + HTM = &MF.getTarget(); + + if (skipFunction(MF.getFunction())) + return false; + if (DisableLoopAlign) + return false; + + // This optimization is performed at + // i) -O2 and above, and when the loop has a HVX instruction. + // ii) -O3 + if (HST->useHVXOps()) { + if (HTM->getOptLevel() < CodeGenOptLevel::Default) + return false; + } else { + if (HTM->getOptLevel() < CodeGenOptLevel::Aggressive) + return false; + } + + bool Changed = false; + for (MachineFunction::iterator MBBi = MF.begin(), MBBe = MF.end(); + MBBi != MBBe; ++MBBi) { + MachineBasicBlock &MBB = *MBBi; + Changed |= attemptToBalignSmallLoop(MF, MBB); + } + return Changed; +} + +} // namespace + +INITIALIZE_PASS(HexagonLoopAlign, "hexagon-loop-align", + "Hexagon LoopAlign pass", false, false) + +//===----------------------------------------------------------------------===// +// Public Constructor Functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createHexagonLoopAlign() { return new HexagonLoopAlign(); } diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 7d77286339399d..3c346c334d6d30 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -164,6 +164,7 @@ namespace llvm { void initializeHexagonGenMuxPass(PassRegistry&); void initializeHexagonHardwareLoopsPass(PassRegistry&); void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &); + void initializeHexagonLoopAlignPass(PassRegistry &); void initializeHexagonNewValueJumpPass(PassRegistry&); void initializeHexagonOptAddrModePass(PassRegistry&); void initializeHexagonPacketizerPass(PassRegistry&); @@ -194,6 +195,7 @@ namespace llvm { FunctionPass *createHexagonHardwareLoops(); FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOptLevel OptLevel); + FunctionPass *createHexagonLoopAlign(); FunctionPass *createHexagonLoopRescheduling(); FunctionPass *createHexagonNewValueJump(); FunctionPass *createHexagonOptAddrMode(); @@ -256,8 +258,10 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT, TT, CPU, FS, Options, getEffectiveRelocModel(RM), getEffectiveCodeModel(CM, CodeModel::Small), (HexagonNoOpt ? CodeGenOptLevel::None : OL)), - TLOF(std::make_unique<HexagonTargetObjectFile>()) { + TLOF(std::make_unique<HexagonTargetObjectFile>()), + Subtarget(Triple(TT), CPU, FS, *this) { initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry()); + initializeHexagonLoopAlignPass(*PassRegistry::getPassRegistry()); initializeHexagonTfrCleanupPass(*PassRegistry::getPassRegistry()); initAsmInfo(); } @@ -476,6 +480,9 @@ void HexagonPassConfig::addPreEmitPass() { // Packetization is mandatory: it handles gather/scatter at all opt levels. addPass(createHexagonPacketizer(NoOpt)); + if (!NoOpt) + addPass(createHexagonLoopAlign()); + if (EnableVectorPrint) addPass(createHexagonVectorPrint()); diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h index c5fed0cd65a814..34ff45b6acf345 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.h @@ -23,6 +23,7 @@ namespace llvm { class HexagonTargetMachine : public LLVMTargetMachine { std::unique_ptr<TargetLoweringObjectFile> TLOF; + HexagonSubtarget Subtarget; mutable StringMap<std::unique_ptr<HexagonSubtarget>> SubtargetMap; public: diff --git a/llvm/test/CodeGen/Hexagon/loop-balign.ll b/llvm/test/CodeGen/Hexagon/loop-balign.ll new file mode 100644 index 00000000000000..9d1f42a4b14b18 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/loop-balign.ll @@ -0,0 +1,91 @@ +; RUN: llc -march=hexagon -O3 < %s | FileCheck %s -check-prefix=BALIGN +; BALIGN: .p2align{{.*}}5 + +; The test for checking the alignment of 'for.body4.for.body4_crit_edge' basic block + +define dso_local void @foo(i32 %nCol, i32 %nRow, ptr nocapture %resMat) local_unnamed_addr { +entry: + %shl = shl i32 %nRow, 2 + %cmp36 = icmp sgt i32 %nRow, 0 + %0 = add i32 %nCol, -1 + %.inv = icmp slt i32 %0, 1 + %1 = select i1 %.inv, i32 1, i32 %nCol + br label %Outerloop + +Outerloop: ; preds = %for.end7, %entry + %r12.0 = phi i32 [ 0, %entry ], [ %inc8, %for.end7 ] + %r7_6.0 = phi i64 [ undef, %entry ], [ %r7_6.1.lcssa, %for.end7 ] + %r0i.0 = phi i32 [ undef, %entry ], [ %r0i.1.lcssa, %for.end7 ] + %r5.0 = phi ptr [ %resMat, %entry ], [ %r5.1.lcssa, %for.end7 ] + %r8.0 = phi i32 [ %shl, %entry ], [ %r8.1.lcssa, %for.end7 ] + br i1 %cmp36, label %for.body.lr.ph, label %for.end7 + +for.body.lr.ph: ; preds = %Outerloop + %cmp332 = icmp eq i32 %r12.0, 0 + %exitcond.peel = icmp eq i32 %r12.0, 1 + br label %for.body + +for.body: ; preds = %for.end, %for.body.lr.ph + %r8.141 = phi i32 [ %r8.0, %for.body.lr.ph ], [ %add, %for.end ] + %r5.140 = phi ptr [ %r5.0, %for.body.lr.ph ], [ %add.ptr, %for.end ] + %i.039 = phi i32 [ 0, %for.body.lr.ph ], [ %inc6, %for.end ] + %r0i.138 = phi i32 [ %r0i.0, %for.body.lr.ph ], [ %4, %for.end ] + %r7_6.137 = phi i64 [ %r7_6.0, %for.body.lr.ph ], [ %r7_6.2.lcssa, %for.end ] + %add = add nsw i32 %r8.141, %shl + br i1 %cmp332, label %for.end, label %for.body4.peel + +for.body4.peel: ; preds = %for.body + %r1i.0.in.peel = inttoptr i32 %r8.141 to ptr + %r1i.0.peel = load i32, ptr %r1i.0.in.peel, align 4 + %2 = tail call i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64 %r7_6.137, i32 %r1i.0.peel, i32 %r0i.138) + br i1 %exitcond.peel, label %for.end, label %for.body4.preheader.peel.newph + +for.body4.preheader.peel.newph: ; preds = %for.body4.peel + %r1i.0.in = inttoptr i32 %add to ptr + %r1i.0 = load i32, ptr %r1i.0.in, align 4 + br label %for.body4 + +for.body4: ; preds = %for.body4.for.body4_crit_edge, %for.body4.preheader.peel.newph + %inc.phi = phi i32 [ %inc.0, %for.body4.for.body4_crit_edge ], [ 2, %for.body4.preheader.peel.newph ] + %r7_6.233 = phi i64 [ %3, %for.body4.for.body4_crit_edge ], [ %2, %for.body4.preheader.peel.newph ] + %3 = tail call i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64 %r7_6.233, i32 %r1i.0, i32 %r0i.138) + %exitcond = icmp eq i32 %inc.phi, %r12.0 + br i1 %exitcond, label %for.end.loopexit, label %for.body4.for.body4_crit_edge + +for.body4.for.body4_crit_edge: ; preds = %for.body4 + %inc.0 = add nuw nsw i32 %inc.phi, 1 + br label %for.body4 + +for.end.loopexit: ; preds = %for.body4 + br label %for.end + +for.end: ; preds = %for.end.loopexit, %for.body4.peel, %for.body + %r7_6.2.lcssa = phi i64 [ %r7_6.137, %for.body ], [ %2, %for.body4.peel ], [ %3, %for.end.loopexit ] + %4 = tail call i32 @llvm.hexagon.S2.clbp(i64 %r7_6.2.lcssa) + store i32 %4, ptr %r5.140, align 4 + %add.ptr = getelementptr inbounds i8, ptr %r5.140, i32 undef + %inc6 = add nuw nsw i32 %i.039, 1 + %exitcond47 = icmp eq i32 %inc6, %nRow + br i1 %exitcond47, label %for.end7.loopexit, label %for.body + +for.end7.loopexit: ; preds = %for.end + br label %for.end7 + +for.end7: ; preds = %for.end7.loopexit, %Outerloop + %r7_6.1.lcssa = phi i64 [ %r7_6.0, %Outerloop ], [ %r7_6.2.lcssa, %for.end7.loopexit ] + %r0i.1.lcssa = phi i32 [ %r0i.0, %Outerloop ], [ %4, %for.end7.loopexit ] + %r5.1.lcssa = phi ptr [ %r5.0, %Outerloop ], [ %add.ptr, %for.end7.loopexit ] + %r8.1.lcssa = phi i32 [ %r8.0, %Outerloop ], [ %add, %for.end7.loopexit ] + %inc8 = add nuw i32 %r12.0, 1 + %exitcond48 = icmp eq i32 %inc8, %1 + br i1 %exitcond48, label %if.end, label %Outerloop + +if.end: ; preds = %for.end7 + ret void +} + +; Function Attrs: nounwind readnone +declare i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64, i32, i32) + +; Function Attrs: nounwind readnone +declare i32 @llvm.hexagon.S2.clbp(i64) diff --git a/llvm/test/CodeGen/Hexagon/loop_align_count.ll b/llvm/test/CodeGen/Hexagon/loop_align_count.ll new file mode 100644 index 00000000000000..07d7e4a8d61176 --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/loop_align_count.ll @@ -0,0 +1,115 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv73 -O2 -mattr=+hvxv73,hvx-length64b \ +; RUN: -debug-only=hexagon-loop-align 2>&1 < %s | FileCheck %s +; Validate that there are 4 bundles in the loop. + +; CHECK: Loop Align Pass: +; CHECK: Bundle Count : 4 +; CHECK: .p2align{{.*}}5 + +; Function Attrs: nounwind +define void @ham(ptr noalias nocapture readonly %arg, i32 %arg1, i32 %arg2, i32 %arg3, ptr noalias nocapture %arg4, i32 %arg5) #0 { +bb: + %ashr = ashr i32 %arg3, 2 + %ashr6 = ashr i32 %arg3, 1 + %add = add nsw i32 %ashr6, %ashr + %icmp = icmp sgt i32 %arg2, 0 + br i1 %icmp, label %bb7, label %bb61 + +bb7: ; preds = %bb + %sdiv = sdiv i32 %arg1, 64 + %icmp8 = icmp sgt i32 %arg1, 63 + br label %bb9 + +bb9: ; preds = %bb57, %bb7 + %phi = phi i32 [ 0, %bb7 ], [ %add58, %bb57 ] + %ashr10 = ashr exact i32 %phi, 1 + %mul = mul nsw i32 %ashr10, %arg3 + br i1 %icmp8, label %bb11, label %bb57 + +bb11: ; preds = %bb9 + %add12 = add nsw i32 %phi, 1 + %mul13 = mul nsw i32 %add12, %arg5 + %mul14 = mul nsw i32 %phi, %arg5 + %add15 = add i32 %add, %mul + %add16 = add i32 %mul, %ashr + %add17 = add i32 %mul, %ashr6 + %getelementptr = getelementptr inbounds i8, ptr %arg4, i32 %mul13 + %getelementptr18 = getelementptr inbounds i8, ptr %arg4, i32 %mul14 + %getelementptr19 = getelementptr inbounds i16, ptr %arg, i32 %add15 + %getelementptr20 = getelementptr inbounds i16, ptr %arg, i32 %add16 + %getelementptr21 = getelementptr inbounds i16, ptr %arg, i32 %add17 + %getelementptr22 = getelementptr inbounds i16, ptr %arg, i32 %mul + %bitcast = bitcast ptr %getelementptr to ptr + %bitcast23 = bitcast ptr %getelementptr18 to ptr + %bitcast24 = bitcast ptr %getelementptr19 to ptr + %bitcast25 = bitcast ptr %getelementptr20 to ptr + %bitcast26 = bitcast ptr %getelementptr21 to ptr + %bitcast27 = bitcast ptr %getelementptr22 to ptr + br label %bb28 + +bb28: ; preds = %bb28, %bb11 + %phi29 = phi i32 [ 0, %bb11 ], [ %add54, %bb28 ] + %phi30 = phi ptr [ %bitcast27, %bb11 ], [ %getelementptr36, %bb28 ] + %phi31 = phi ptr [ %bitcast26, %bb11 ], [ %getelementptr37, %bb28 ] + %phi32 = phi ptr [ %bitcast25, %bb11 ], [ %getelementptr39, %bb28 ] + %phi33 = phi ptr [ %bitcast24, %bb11 ], [ %getelementptr41, %bb28 ] + %phi34 = phi ptr [ %bitcast, %bb11 ], [ %getelementptr53, %bb28 ] + %phi35 = phi ptr [ %bitcast23, %bb11 ], [ %getelementptr52, %bb28 ] + %getelementptr36 = getelementptr inbounds <16 x i32>, ptr %phi30, i32 1 + %load = load <16 x i32>, ptr %phi30, align 64 + %getelementptr37 = getelementptr inbounds <16 x i32>, ptr %phi31, i32 1 + %load38 = load <16 x i32>, ptr %phi31, align 64 + %getelementptr39 = getelementptr inbounds <16 x i32>, ptr %phi32, i32 1 + %load40 = load <16 x i32>, ptr %phi32, align 64 + %getelementptr41 = getelementptr inbounds <16 x i32>, ptr %phi33, i32 1 + %load42 = load <16 x i32>, ptr %phi33, align 64 + %call = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %load, <16 x i32> %load38) + %call43 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %load, <16 x i32> %load38) + %call44 = tail call <16 x i32> @llvm.hexagon.V6.vaddh(<16 x i32> %load40, <16 x i32> %load42) + %call45 = tail call <16 x i32> @llvm.hexagon.V6.vsubh(<16 x i32> %load40, <16 x i32> %load42) + %call46 = tail call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %call, <16 x i32> %call44) + %call47 = tail call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %call, <16 x i32> %call44) + %call48 = tail call <16 x i32> @llvm.hexagon.V6.vavgh(<16 x i32> %call43, <16 x i32> %call45) + %call49 = tail call <16 x i32> @llvm.hexagon.V6.vnavgh(<16 x i32> %call43, <16 x i32> %call45) + %call50 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %call47, <16 x i32> %call46) + %call51 = tail call <16 x i32> @llvm.hexagon.V6.vsathub(<16 x i32> %call49, <16 x i32> %call48) + %getelementptr52 = getelementptr inbounds <16 x i32>, ptr %phi35, i32 ... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/83379 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits