https://github.com/zhaoqi5 updated https://github.com/llvm/llvm-project/pull/122999
>From ac63a4f1c8e8d1b3831c83c5fab2a139a284dcc6 Mon Sep 17 00:00:00 2001 From: Qi Zhao <zhaoq...@loongson.cn> Date: Tue, 14 Jan 2025 21:35:31 +0800 Subject: [PATCH] [LoongArch] Merge base and offset for tls-le code sequence Adapt the merge base offset pass to optimize the tls-le code sequence. --- .../LoongArch/LoongArchMergeBaseOffset.cpp | 165 ++++++++- .../LoongArch/machinelicm-address-pseudos.ll | 6 +- .../LoongArch/merge-base-offset-tlsle.ll | 318 +++++++----------- 3 files changed, 265 insertions(+), 224 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp index 7f98f7718a538d..bef56e58bdc88d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchMergeBaseOffset.cpp @@ -37,6 +37,8 @@ class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass { bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12, MachineInstr *&Lo20, MachineInstr *&Hi12, MachineInstr *&Last); + bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Add, + MachineInstr *&Lo12); bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20, MachineInstr *&Hi12, @@ -176,7 +178,80 @@ bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20, return true; } -// Update the offset in Hi20, Lo12, Lo20 and Hi12 instructions. +// Detect the pattern: +// +// (small/medium): +// lu12i.w vreg1, %le_hi20_r(s) +// add.w/d vreg2, vreg1, r2, %le_add_r(s) +// addi.w/d vreg3, vreg2, %le_lo12_r(s) + +// The pattern is only accepted if: +// 1) The first instruction has only one use, which is the PseudoAddTPRel. +// The second instruction has only one use, which is the ADDI. The +// second instruction's last operand is the tp register. +// 2) The address operands have the appropriate type, reflecting the +// lowering of a thread_local global address using the pattern. +// 3) The offset value in the ThreadLocal Global Address is 0. +bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20, + MachineInstr *&Add, + MachineInstr *&Lo12) { + if (Hi20.getOpcode() != LoongArch::LU12I_W) + return false; + + auto isGlobalOrCPI = [](const MachineOperand &Op) { + return Op.isGlobal() || Op.isCPI(); + }; + + const MachineOperand &Hi20Op1 = Hi20.getOperand(1); + if (LoongArchII::getDirectFlags(Hi20Op1) != LoongArchII::MO_LE_HI_R || + !isGlobalOrCPI(Hi20Op1) || Hi20Op1.getOffset() != 0) + return false; + + Register HiDestReg = Hi20.getOperand(0).getReg(); + if (!MRI->hasOneUse(HiDestReg)) + return false; + + Add = &*MRI->use_instr_begin(HiDestReg); + if ((ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_D) || + (!ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_W)) + return false; + + if (Add->getOperand(2).getReg() != LoongArch::R2) + return false; + + const MachineOperand &AddOp3 = Add->getOperand(3); + if (LoongArchII::getDirectFlags(AddOp3) != LoongArchII::MO_LE_ADD_R || + !(isGlobalOrCPI(AddOp3) || AddOp3.isMCSymbol()) || + AddOp3.getOffset() != 0) + return false; + + Register AddDestReg = Add->getOperand(0).getReg(); + if (!MRI->hasOneUse(AddDestReg)) + return false; + + Lo12 = &*MRI->use_instr_begin(AddDestReg); + if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) || + (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W)) + return false; + + const MachineOperand &Lo12Op2 = Lo12->getOperand(2); + if (LoongArchII::getDirectFlags(Lo12Op2) != LoongArchII::MO_LE_LO_R || + !(isGlobalOrCPI(Lo12Op2) || Lo12Op2.isMCSymbol()) || + Lo12Op2.getOffset() != 0) + return false; + + if (Hi20Op1.isGlobal()) { + LLVM_DEBUG(dbgs() << " Found lowered global address: " + << *Hi20Op1.getGlobal() << "\n"); + } else if (Hi20Op1.isCPI()) { + LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex() + << "\n"); + } + + return true; +} + +// Update the offset in Hi20, (Add), Lo12, (Lo20 and Hi12) instructions. // Delete the tail instruction and update all the uses to use the // output from Last. void LoongArchMergeBaseOffsetOpt::foldOffset( @@ -190,31 +265,49 @@ void LoongArchMergeBaseOffsetOpt::foldOffset( Lo20->getOperand(2).setOffset(Offset); Hi12->getOperand(2).setOffset(Offset); } + + // For tls-le, offset of the second PseudoAddTPRel instr should also be + // updated. + MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg()); + if (Hi20.getOpcode() == LoongArch::LU12I_W) + Add->getOperand(3).setOffset(Offset); + // Delete the tail instruction. MachineInstr *Def = Last ? Last : &Lo12; MRI->constrainRegClass(Def->getOperand(0).getReg(), MRI->getRegClass(Tail.getOperand(0).getReg())); MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg()); Tail.eraseFromParent(); + LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n" - << " " << Hi20 << " " << Lo12;); + << " " << Hi20;); + if (Hi20.getOpcode() == LoongArch::LU12I_W) { + LLVM_DEBUG(dbgs() << " " << *Add;); + } + LLVM_DEBUG(dbgs() << " " << Lo12;); if (Lo20 && Hi12) { LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;); } } // Detect patterns for large offsets that are passed into an ADD instruction. -// If the pattern is found, updates the offset in Hi20, Lo12, Lo20 and Hi12 -// instructions and deletes TailAdd and the instructions that produced the -// offset. +// If the pattern is found, updates the offset in Hi20, (Add), Lo12, +// (Lo20 and Hi12) instructions and deletes TailAdd and the instructions that +// produced the offset. // // (The instructions marked with "!" are not necessarily present) // // Base address lowering is of the form: -// Hi20: pcalau12i vreg1, %pc_hi20(s) -// +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s) -// | Lo20: lu32i.d vreg2, %pc64_lo20(s) ! -// +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) ! +// 1) pcala: +// Hi20: pcalau12i vreg1, %pc_hi20(s) +// +--- Lo12: addi.d vreg2, vreg1, %pc_lo12(s) +// | Lo20: lu32i.d vreg2, %pc64_lo20(s) ! +// +--- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) ! +// | +// | 2) tls-le: +// | Hi20: lu12i.w vreg1, %le_hi20_r(s) +// | Add: add.w/d vreg1, vreg1, r2, %le_add_r(s) +// +--- Lo12: addi.w/d vreg2, vreg1, %le_lo12_r(s) // | // | The large offset can be one of the forms: // | @@ -334,7 +427,8 @@ bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20, // Look for arithmetic instructions we can get an offset from. // We might be able to remove the arithmetic instructions by folding the - // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I). + // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I) or + // LU12I_W+PseudoAddTPRel+ADDI. if (!MRI->hasOneUse(DestReg)) return false; @@ -454,6 +548,7 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20, // If all the uses are memory ops with the same offset, we can transform: // // 1. (small/medium): + // 1.1. pcala // pcalau12i vreg1, %pc_hi20(s) // addi.d vreg2, vreg1, %pc_lo12(s) // ld.w vreg3, 8(vreg2) @@ -463,6 +558,18 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20, // pcalau12i vreg1, %pc_hi20(s+8) // ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1) // + // 1.2. tls-le + // lu12i.w vreg1, %le_hi20_r(s) + // add.w/d vreg2, vreg1, r2, %le_add_r(s) + // addi.w/d vreg3, vreg2, %le_lo12_r(s) + // ld.w vreg4, 8(vreg3) + // + // => + // + // lu12i.w vreg1, %le_hi20_r(s+8) + // add.w/d vreg2, vreg1, r2, %le_add_r(s+8) + // ld.w vreg4, vreg2, %le_lo12_r(s+8)(vreg2) + // // 2. (large): // pcalau12i vreg1, %pc_hi20(s) // addi.d vreg2, $zero, %pc_lo12(s) @@ -598,7 +705,8 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20, return false; // If optimized by this pass successfully, MO_RELAX bitmask target-flag should - // be removed from the code sequence. + // be removed from the pcala code sequence. Code sequence of tls-le can still + // be relaxed after being optimized. // // For example: // pcalau12i $a0, %pc_hi20(symbol) @@ -614,15 +722,20 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20, // optimized, it cannot be relaxed any more. So MO_RELAX flag should not be // carried by them. Hi20.getOperand(1).setOffset(NewOffset); - Hi20.getOperand(1).setTargetFlags( - LoongArchII::getDirectFlags(Hi20.getOperand(1))); MachineOperand &ImmOp = Lo12.getOperand(2); ImmOp.setOffset(NewOffset); - ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp)); if (Lo20 && Hi12) { Lo20->getOperand(2).setOffset(NewOffset); Hi12->getOperand(2).setOffset(NewOffset); } + if (Hi20.getOpcode() == LoongArch::PCALAU12I) { + Hi20.getOperand(1).setTargetFlags( + LoongArchII::getDirectFlags(Hi20.getOperand(1))); + ImmOp.setTargetFlags(LoongArchII::getDirectFlags(ImmOp)); + } else if (Hi20.getOpcode() == LoongArch::LU12I_W) { + MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg()); + Add->getOperand(3).setOffset(NewOffset); + } // Update the immediate in the load/store instructions to add the offset. const LoongArchInstrInfo &TII = *ST->getInstrInfo(); @@ -673,7 +786,14 @@ bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20, return true; } - MRI->replaceRegWith(Lo12.getOperand(0).getReg(), Hi20.getOperand(0).getReg()); + if (Hi20.getOpcode() == LoongArch::PCALAU12I) { + MRI->replaceRegWith(Lo12.getOperand(0).getReg(), + Hi20.getOperand(0).getReg()); + } else if (Hi20.getOpcode() == LoongArch::LU12I_W) { + MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg()); + MRI->replaceRegWith(Lo12.getOperand(0).getReg(), + Add->getOperand(0).getReg()); + } Lo12.eraseFromParent(); return true; } @@ -693,8 +813,19 @@ bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) { MachineInstr *Lo20 = nullptr; MachineInstr *Hi12 = nullptr; MachineInstr *Last = nullptr; - if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last)) - continue; + if (Hi20.getOpcode() == LoongArch::PCALAU12I) { + // Detect foldable pcala code sequence in small/medium/large code model. + if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last)) + continue; + } else if (Hi20.getOpcode() == LoongArch::LU12I_W) { + MachineInstr *Add = nullptr; + // Detect foldable tls-le code sequence in small/medium code model. + if (!detectFoldable(Hi20, Add, Lo12)) + continue; + } + // For tls-le, we do not pass the second PseudoAddTPRel instr in order to + // reuse the existing hooks and the last three paramaters should always be + // nullptr. MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last); MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last); } diff --git a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll index e0a93e3051bf88..92d079ab3a8d87 100644 --- a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll +++ b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll @@ -317,11 +317,10 @@ define void @test_la_tls_le(i32 signext %n) { ; LA32-NEXT: move $a1, $zero ; LA32-NEXT: lu12i.w $a2, %le_hi20_r(le) ; LA32-NEXT: add.w $a2, $a2, $tp, %le_add_r(le) -; LA32-NEXT: addi.w $a2, $a2, %le_lo12_r(le) ; LA32-NEXT: .p2align 4, , 16 ; LA32-NEXT: .LBB4_1: # %loop ; LA32-NEXT: # =>This Inner Loop Header: Depth=1 -; LA32-NEXT: ld.w $zero, $a2, 0 +; LA32-NEXT: ld.w $zero, $a2, %le_lo12_r(le) ; LA32-NEXT: addi.w $a1, $a1, 1 ; LA32-NEXT: blt $a1, $a0, .LBB4_1 ; LA32-NEXT: # %bb.2: # %ret @@ -332,11 +331,10 @@ define void @test_la_tls_le(i32 signext %n) { ; LA64-NEXT: move $a1, $zero ; LA64-NEXT: lu12i.w $a2, %le_hi20_r(le) ; LA64-NEXT: add.d $a2, $a2, $tp, %le_add_r(le) -; LA64-NEXT: addi.d $a2, $a2, %le_lo12_r(le) ; LA64-NEXT: .p2align 4, , 16 ; LA64-NEXT: .LBB4_1: # %loop ; LA64-NEXT: # =>This Inner Loop Header: Depth=1 -; LA64-NEXT: ld.w $zero, $a2, 0 +; LA64-NEXT: ld.w $zero, $a2, %le_lo12_r(le) ; LA64-NEXT: addi.w $a1, $a1, 1 ; LA64-NEXT: blt $a1, $a0, .LBB4_1 ; LA64-NEXT: # %bb.2: # %ret diff --git a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll index 7e995d224ce1d2..9ed9a865ce55d4 100644 --- a/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll +++ b/llvm/test/CodeGen/LoongArch/merge-base-offset-tlsle.ll @@ -11,16 +11,14 @@ define dso_local signext i8 @tlsle_load_s8() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8) -; LA32-NEXT: ld.b $a0, $a0, 0 +; LA32-NEXT: ld.b $a0, $a0, %le_lo12_r(g_i8) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_s8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8) -; LA64-NEXT: ld.b $a0, $a0, 0 +; LA64-NEXT: ld.b $a0, $a0, %le_lo12_r(g_i8) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8) @@ -33,16 +31,14 @@ define dso_local zeroext i8 @tlsle_load_u8() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8) -; LA32-NEXT: ld.bu $a0, $a0, 0 +; LA32-NEXT: ld.bu $a0, $a0, %le_lo12_r(g_i8) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_u8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8) -; LA64-NEXT: ld.bu $a0, $a0, 0 +; LA64-NEXT: ld.bu $a0, $a0, %le_lo12_r(g_i8) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8) @@ -55,18 +51,16 @@ define dso_local void @tlsle_store_i8() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8) ; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: st.b $a1, $a0, 0 +; LA32-NEXT: st.b $a1, $a0, %le_lo12_r(g_i8) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_store_i8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8) ; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: st.b $a1, $a0, 0 +; LA64-NEXT: st.b $a1, $a0, %le_lo12_r(g_i8) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8) @@ -81,16 +75,14 @@ define dso_local signext i16 @tlsle_load_s16() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16) -; LA32-NEXT: ld.h $a0, $a0, 0 +; LA32-NEXT: ld.h $a0, $a0, %le_lo12_r(g_i16) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_s16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16) -; LA64-NEXT: ld.h $a0, $a0, 0 +; LA64-NEXT: ld.h $a0, $a0, %le_lo12_r(g_i16) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16) @@ -103,16 +95,14 @@ define dso_local zeroext i16 @tlsle_load_u16() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16) -; LA32-NEXT: ld.hu $a0, $a0, 0 +; LA32-NEXT: ld.hu $a0, $a0, %le_lo12_r(g_i16) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_u16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16) -; LA64-NEXT: ld.hu $a0, $a0, 0 +; LA64-NEXT: ld.hu $a0, $a0, %le_lo12_r(g_i16) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16) @@ -125,18 +115,16 @@ define dso_local void @tlsle_store_i16() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i16) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i16) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i16) ; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: st.h $a1, $a0, 0 +; LA32-NEXT: st.h $a1, $a0, %le_lo12_r(g_i16) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_store_i16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i16) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i16) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i16) ; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: st.h $a1, $a0, 0 +; LA64-NEXT: st.h $a1, $a0, %le_lo12_r(g_i16) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i16) @@ -151,16 +139,14 @@ define dso_local signext i32 @tlsle_load_s32() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32) -; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, %le_lo12_r(g_i32) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_s32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32) -; LA64-NEXT: ld.w $a0, $a0, 0 +; LA64-NEXT: ld.w $a0, $a0, %le_lo12_r(g_i32) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32) @@ -173,16 +159,14 @@ define dso_local zeroext i32 @tlsle_load_u32() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32) -; LA32-NEXT: ld.w $a0, $a0, 0 +; LA32-NEXT: ld.w $a0, $a0, %le_lo12_r(g_i32) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_u32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32) -; LA64-NEXT: ld.wu $a0, $a0, 0 +; LA64-NEXT: ld.wu $a0, $a0, %le_lo12_r(g_i32) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32) @@ -195,18 +179,16 @@ define dso_local void @tlsle_store_i32() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32) ; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_i32) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_store_i32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32) ; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: st.w $a1, $a0, 0 +; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_i32) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32) @@ -230,8 +212,7 @@ define dso_local i64 @tlsle_load_i64() nounwind { ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i64) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i64) -; LA64-NEXT: ld.d $a0, $a0, 0 +; LA64-NEXT: ld.d $a0, $a0, %le_lo12_r(g_i64) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64) @@ -254,9 +235,8 @@ define dso_local void @tlsle_store_i64() nounwind { ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i64) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i64) ; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_i64) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i64) @@ -271,16 +251,14 @@ define dso_local float @tlsle_load_f32() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f32) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f32) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f32) -; LA32-NEXT: fld.s $fa0, $a0, 0 +; LA32-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_f32) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_f32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f32) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f32) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f32) -; LA64-NEXT: fld.s $fa0, $a0, 0 +; LA64-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_f32) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32) @@ -293,18 +271,16 @@ define dso_local void @tlsle_store_f32() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f32) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f32) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f32) ; LA32-NEXT: lu12i.w $a1, 260096 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_f32) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_store_f32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f32) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f32) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f32) ; LA64-NEXT: lu12i.w $a1, 260096 -; LA64-NEXT: st.w $a1, $a0, 0 +; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_f32) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f32) @@ -319,16 +295,14 @@ define dso_local double @tlsle_load_f64() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f64) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f64) -; LA32-NEXT: fld.d $fa0, $a0, 0 +; LA32-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_f64) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_f64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f64) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f64) -; LA64-NEXT: fld.d $fa0, $a0, 0 +; LA64-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_f64) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64) @@ -341,18 +315,16 @@ define dso_local void @tlsle_store_f64() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_f64) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_f64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_f64) ; LA32-NEXT: vldi $vr0, -912 -; LA32-NEXT: fst.d $fa0, $a0, 0 +; LA32-NEXT: fst.d $fa0, $a0, %le_lo12_r(g_f64) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_store_f64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_f64) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_f64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_f64) ; LA64-NEXT: lu52i.d $a1, $zero, 1023 -; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_f64) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_f64) @@ -380,11 +352,10 @@ define dso_local void @tlsle_store_multi() nounwind { ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_m64) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_m64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_m64) ; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_m64) ; LA64-NEXT: ori $a1, $zero, 2 -; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_m64) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_m64) @@ -400,18 +371,16 @@ define dso_local void @tlsle_store_sf32() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_sf32) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_sf32) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_sf32) -; LA32-NEXT: fld.s $fa0, $a0, 0 -; LA32-NEXT: fst.s $fa0, $a0, 0 +; LA32-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_sf32) +; LA32-NEXT: fst.s $fa0, $a0, %le_lo12_r(g_sf32) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_store_sf32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_sf32) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_sf32) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_sf32) -; LA64-NEXT: fld.s $fa0, $a0, 0 -; LA64-NEXT: fst.s $fa0, $a0, 0 +; LA64-NEXT: fld.s $fa0, $a0, %le_lo12_r(g_sf32) +; LA64-NEXT: fst.s $fa0, $a0, %le_lo12_r(g_sf32) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf32) @@ -427,18 +396,16 @@ define dso_local void @tlsle_store_sf64() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_sf64) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_sf64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_sf64) -; LA32-NEXT: fld.d $fa0, $a0, 0 -; LA32-NEXT: fst.d $fa0, $a0, 0 +; LA32-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_sf64) +; LA32-NEXT: fst.d $fa0, $a0, %le_lo12_r(g_sf64) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_store_sf64: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_sf64) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_sf64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_sf64) -; LA64-NEXT: fld.d $fa0, $a0, 0 -; LA64-NEXT: fst.d $fa0, $a0, 0 +; LA64-NEXT: fld.d $fa0, $a0, %le_lo12_r(g_sf64) +; LA64-NEXT: fst.d $fa0, $a0, %le_lo12_r(g_sf64) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_sf64) @@ -455,24 +422,20 @@ define dso_local void @tlsle_copy_i32x4() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_src) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x4_src) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x4_src) -; LA32-NEXT: vld $vr0, $a0, 0 +; LA32-NEXT: vld $vr0, $a0, %le_lo12_r(g_i32x4_src) ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_dst) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x4_dst) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x4_dst) -; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: vst $vr0, $a0, %le_lo12_r(g_i32x4_dst) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_copy_i32x4: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_src) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x4_src) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x4_src) -; LA64-NEXT: vld $vr0, $a0, 0 +; LA64-NEXT: vld $vr0, $a0, %le_lo12_r(g_i32x4_src) ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x4_dst) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x4_dst) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x4_dst) -; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: vst $vr0, $a0, %le_lo12_r(g_i32x4_dst) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x4_src) @@ -490,24 +453,20 @@ define dso_local void @tlsle_copy_i32x8() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_src) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x8_src) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x8_src) -; LA32-NEXT: xvld $xr0, $a0, 0 +; LA32-NEXT: xvld $xr0, $a0, %le_lo12_r(g_i32x8_src) ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_dst) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i32x8_dst) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i32x8_dst) -; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i32x8_dst) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_copy_i32x8: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_src) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x8_src) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x8_src) -; LA64-NEXT: xvld $xr0, $a0, 0 +; LA64-NEXT: xvld $xr0, $a0, %le_lo12_r(g_i32x8_src) ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i32x8_dst) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i32x8_dst) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i32x8_dst) -; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i32x8_dst) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i32x8_src) @@ -524,24 +483,20 @@ define dso_local void @tlsle_copy_i8_to_i8x16() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8) -; LA32-NEXT: vldrepl.b $vr0, $a0, 0 +; LA32-NEXT: vldrepl.b $vr0, $a0, %le_lo12_r(g_i8) ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x16) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8x16) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8x16) -; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: vst $vr0, $a0, %le_lo12_r(g_i8x16) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_copy_i8_to_i8x16: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8) -; LA64-NEXT: vldrepl.b $vr0, $a0, 0 +; LA64-NEXT: vldrepl.b $vr0, $a0, %le_lo12_r(g_i8) ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x16) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8x16) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8x16) -; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: vst $vr0, $a0, %le_lo12_r(g_i8x16) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8) @@ -558,24 +513,20 @@ define dso_local void @tlsle_copy_i8_to_i8x32() nounwind { ; LA32: # %bb.0: # %entry ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8) -; LA32-NEXT: xvldrepl.b $xr0, $a0, 0 +; LA32-NEXT: xvldrepl.b $xr0, $a0, %le_lo12_r(g_i8) ; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x32) ; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_i8x32) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_i8x32) -; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i8x32) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_copy_i8_to_i8x32: ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8) -; LA64-NEXT: xvldrepl.b $xr0, $a0, 0 +; LA64-NEXT: xvldrepl.b $xr0, $a0, %le_lo12_r(g_i8) ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_i8x32) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_i8x32) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_i8x32) -; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: xvst $xr0, $a0, %le_lo12_r(g_i8x32) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_i8) @@ -606,10 +557,9 @@ define dso_local void @tlsle_rmw() nounwind { ; LA64: # %bb.0: # %entry ; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_rmw) ; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_rmw) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_rmw) -; LA64-NEXT: ld.d $a1, $a0, 0 +; LA64-NEXT: ld.d $a1, $a0, %le_lo12_r(g_rmw) ; LA64-NEXT: addi.d $a1, $a1, 1 -; LA64-NEXT: st.d $a1, $a0, 0 +; LA64-NEXT: st.d $a1, $a0, %le_lo12_r(g_rmw) ; LA64-NEXT: ret entry: %0 = call ptr @llvm.threadlocal.address.p0(ptr @g_rmw) @@ -624,22 +574,18 @@ entry: define dso_local void @tlsle_store_a32() nounwind { ; LA32-LABEL: tlsle_store_a32: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a32) -; LA32-NEXT: lu12i.w $a1, 1 -; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4096) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32+4096) ; LA32-NEXT: ori $a1, $zero, 1 -; LA32-NEXT: st.w $a1, $a0, 0 +; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4096) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_store_a32: ; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32) -; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a32) +; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4096) +; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32+4096) ; LA64-NEXT: ori $a1, $zero, 1 -; LA64-NEXT: stptr.w $a1, $a0, 4096 +; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4096) ; LA64-NEXT: ret entry: store i32 1, ptr getelementptr inbounds ([1 x i32], ptr @g_a32, i32 1024), align 4 @@ -681,29 +627,27 @@ entry: define dso_local void @tlsle_control_flow_with_mem_access() nounwind { ; LA32-LABEL: tlsle_control_flow_with_mem_access: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a32) -; LA32-NEXT: ld.w $a1, $a0, 4 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a32+4) +; LA32-NEXT: ld.w $a1, $a0, %le_lo12_r(g_a32+4) ; LA32-NEXT: ori $a2, $zero, 1 ; LA32-NEXT: blt $a1, $a2, .LBB25_2 ; LA32-NEXT: # %bb.1: # %if.then ; LA32-NEXT: ori $a1, $zero, 10 -; LA32-NEXT: st.w $a1, $a0, 4 +; LA32-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4) ; LA32-NEXT: .LBB25_2: # %if.end ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_control_flow_with_mem_access: ; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32) -; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a32) -; LA64-NEXT: ld.w $a1, $a0, 4 +; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a32+4) +; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a32+4) +; LA64-NEXT: ld.w $a1, $a0, %le_lo12_r(g_a32+4) ; LA64-NEXT: ori $a2, $zero, 1 ; LA64-NEXT: blt $a1, $a2, .LBB25_2 ; LA64-NEXT: # %bb.1: # %if.then ; LA64-NEXT: ori $a1, $zero, 10 -; LA64-NEXT: st.w $a1, $a0, 4 +; LA64-NEXT: st.w $a1, $a0, %le_lo12_r(g_a32+4) ; LA64-NEXT: .LBB25_2: # %if.end ; LA64-NEXT: ret entry: @@ -724,18 +668,16 @@ if.end: define dso_local ptr @tlsle_load_addr_offset_1() nounwind { ; LA32-LABEL: tlsle_load_addr_offset_1: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, 8 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+8) +; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+8) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_addr_offset_1: ; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64) -; LA64-NEXT: addi.d $a0, $a0, 8 +; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8) +; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+8) +; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+8) ; LA64-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1) @@ -744,20 +686,16 @@ entry: define dso_local ptr @tlsle_load_addr_offset_257() nounwind { ; LA32-LABEL: tlsle_load_addr_offset_257: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, 2047 -; LA32-NEXT: addi.w $a0, $a0, 9 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2056) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2056) +; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2056) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_addr_offset_257: ; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64) -; LA64-NEXT: addi.d $a0, $a0, 2047 -; LA64-NEXT: addi.d $a0, $a0, 9 +; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2056) +; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+2056) +; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+2056) ; LA64-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 257) @@ -766,19 +704,16 @@ entry: define dso_local ptr @tlsle_load_addr_offset_1048576() nounwind { ; LA32-LABEL: tlsle_load_addr_offset_1048576: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64) -; LA32-NEXT: lu12i.w $a1, 2048 -; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388608) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+8388608) +; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+8388608) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_addr_offset_1048576: ; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64) -; LA64-NEXT: addu16i.d $a0, $a0, 128 +; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388608) +; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+8388608) +; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+8388608) ; LA64-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1048576) @@ -787,21 +722,16 @@ entry: define dso_local ptr @tlsle_load_addr_offset_1048577() nounwind { ; LA32-LABEL: tlsle_load_addr_offset_1048577: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64) -; LA32-NEXT: lu12i.w $a1, 2048 -; LA32-NEXT: ori $a1, $a1, 8 -; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388616) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+8388616) +; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+8388616) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_addr_offset_1048577: ; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64) -; LA64-NEXT: addu16i.d $a0, $a0, 128 -; LA64-NEXT: addi.d $a0, $a0, 8 +; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+8388616) +; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+8388616) +; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+8388616) ; LA64-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 1048577) @@ -810,20 +740,16 @@ entry: define dso_local ptr @tlsle_load_addr_offset_268432896() nounwind { ; LA32-LABEL: tlsle_load_addr_offset_268432896: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64) -; LA32-NEXT: lu12i.w $a1, 524283 -; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463168) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2147463168) +; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2147463168) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_addr_offset_268432896: ; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64) -; LA64-NEXT: lu12i.w $a1, 524283 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463168) +; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+2147463168) +; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+2147463168) ; LA64-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 268432896) @@ -832,22 +758,16 @@ entry: define dso_local ptr @tlsle_load_addr_offset_268432897() nounwind { ; LA32-LABEL: tlsle_load_addr_offset_268432897: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64) -; LA32-NEXT: lu12i.w $a1, 524283 -; LA32-NEXT: ori $a1, $a1, 8 -; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463176) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2147463176) +; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2147463176) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_addr_offset_268432897: ; LA64: # %bb.0: # %entry -; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64) -; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64) -; LA64-NEXT: lu12i.w $a1, 524283 -; LA64-NEXT: ori $a1, $a1, 8 -; LA64-NEXT: add.d $a0, $a0, $a1 +; LA64-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2147463176) +; LA64-NEXT: add.d $a0, $a0, $tp, %le_add_r(g_a64+2147463176) +; LA64-NEXT: addi.d $a0, $a0, %le_lo12_r(g_a64+2147463176) ; LA64-NEXT: ret entry: ret ptr getelementptr inbounds ([1 x i64], ptr @g_a64, i64 268432897) @@ -877,11 +797,9 @@ entry: define dso_local ptr @tlsle_load_addr_offset_248792680471040() nounwind { ; LA32-LABEL: tlsle_load_addr_offset_248792680471040: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64) -; LA32-NEXT: lu12i.w $a1, 502733 -; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+2059194368) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+2059194368) +; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+2059194368) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_addr_offset_248792680471040: @@ -900,12 +818,9 @@ entry: define dso_local ptr @tlsle_load_addr_offset_9380351707272() nounwind { ; LA32-LABEL: tlsle_load_addr_offset_9380351707272: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64) -; LA32-NEXT: lu12i.w $a1, 279556 -; LA32-NEXT: ori $a1, $a1, 1088 -; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+1145062464) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+1145062464) +; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+1145062464) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_addr_offset_9380351707272: @@ -945,12 +860,9 @@ entry: define dso_local ptr @tlsle_load_addr_offset_614749556925924693() nounwind { ; LA32-LABEL: tlsle_load_addr_offset_614749556925924693: ; LA32: # %bb.0: # %entry -; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64) -; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64) -; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64) -; LA32-NEXT: lu12i.w $a1, 209666 -; LA32-NEXT: ori $a1, $a1, 2728 -; LA32-NEXT: add.w $a0, $a0, $a1 +; LA32-NEXT: lu12i.w $a0, %le_hi20_r(g_a64+858794664) +; LA32-NEXT: add.w $a0, $a0, $tp, %le_add_r(g_a64+858794664) +; LA32-NEXT: addi.w $a0, $a0, %le_lo12_r(g_a64+858794664) ; LA32-NEXT: ret ; ; LA64-LABEL: tlsle_load_addr_offset_614749556925924693: _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits