https://github.com/tangaac created https://github.com/llvm/llvm-project/pull/113255
Two options for clang: -mlam-bh & -mno-lam-bh. Enable or disable amswap[__db].{b/h} and amadd[__db].{b/h} instructions. The default is -mno-lam-bh. Only works on LoongArch64. >From 51c4019cf9cf6eed49e4bd24e04c350c0cef645c Mon Sep 17 00:00:00 2001 From: tangaac <tangya...@loongson.cn> Date: Fri, 11 Oct 2024 14:16:53 +0800 Subject: [PATCH] support amswap[__db].{b/h} and amadd[__db].{b/h} instructions for LoongArch. --- clang/include/clang/Driver/Options.td | 4 + clang/lib/Basic/Targets/LoongArch.cpp | 7 +- clang/lib/Basic/Targets/LoongArch.h | 2 + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 9 + clang/test/Driver/loongarch-march.c | 8 +- clang/test/Driver/loongarch-mlam-bh.c | 30 + clang/test/Preprocessor/init-loongarch.c | 23 +- .../TargetParser/LoongArchTargetParser.def | 3 +- .../llvm/TargetParser/LoongArchTargetParser.h | 3 + llvm/lib/Target/LoongArch/LoongArch.td | 9 +- .../LoongArch/LoongArchISelLowering.cpp | 6 + .../Target/LoongArch/LoongArchInstrInfo.td | 18 + .../TargetParser/LoongArchTargetParser.cpp | 4 +- .../ir-instruction/atomicrmw-lam-bh.ll | 1674 +++++++++++++++++ 14 files changed, 1786 insertions(+), 14 deletions(-) create mode 100644 clang/test/Driver/loongarch-mlam-bh.c create mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lam-bh.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 152c43d7908ff8..3cbab84b6bf986 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5394,6 +5394,10 @@ def mfrecipe : Flag<["-"], "mfrecipe">, Group<m_loongarch_Features_Group>, HelpText<"Enable frecipe.{s/d} and frsqrte.{s/d}">; def mno_frecipe : Flag<["-"], "mno-frecipe">, Group<m_loongarch_Features_Group>, HelpText<"Disable frecipe.{s/d} and frsqrte.{s/d}">; +def mlam_bh : Flag<["-"], "mlam-bh">, Group<m_loongarch_Features_Group>, + HelpText<"Enable amswap_[db].{b/h} and amadd_[db].{b/h}">; +def mno_lam_bh : Flag<["-"], "mno-lam-bh">, Group<m_loongarch_Features_Group>, + HelpText<"Disable amswap_[db].{b/h} and amadd_[db].{b/h}">; def mannotate_tablejump : Flag<["-"], "mannotate-tablejump">, Group<m_loongarch_Features_Group>, HelpText<"Enable annotate table jump instruction to correlate it with the jump table.">; def mno_annotate_tablejump : Flag<["-"], "mno-annotate-tablejump">, Group<m_loongarch_Features_Group>, diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index cb3fd12c48ddb6..3f07dcc3b9c374 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -205,7 +205,7 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, // TODO: As more features of the V1.1 ISA are supported, a unified "v1.1" // arch feature set will be used to include all sub-features belonging to // the V1.1 ISA version. - if (HasFeatureFrecipe) + if (HasFeatureFrecipe || HasFeatureLAM_BH) Builder.defineMacro("__loongarch_arch", Twine('"') + "la64v1.1" + Twine('"')); else @@ -236,6 +236,9 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, if (HasFeatureFrecipe) Builder.defineMacro("__loongarch_frecipe", Twine(1)); + if (HasFeatureLAM_BH) + Builder.defineMacro("__loongarch_lam_bh", Twine(1)); + StringRef ABI = getABI(); if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") Builder.defineMacro("__loongarch_lp64"); @@ -312,6 +315,8 @@ bool LoongArchTargetInfo::handleTargetFeatures( HasUnalignedAccess = false; else if (Feature == "+frecipe") HasFeatureFrecipe = true; + else if (Feature == "+lam-bh") + HasFeatureLAM_BH = true; } return true; } diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index c668ca7eca047a..3585e9f7968b4b 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -30,6 +30,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { bool HasFeatureLSX; bool HasFeatureLASX; bool HasFeatureFrecipe; + bool HasFeatureLAM_BH; public: LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) @@ -39,6 +40,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { HasFeatureLSX = false; HasFeatureLASX = false; HasFeatureFrecipe = false; + HasFeatureLAM_BH = false; LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index 355253e4b3b07c..e69a5562137ccd 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -260,6 +260,15 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, else Features.push_back("-frecipe"); } + + // Select lam-bh feature determined by -m[no-]lam-bh. + if (const Arg *A = + Args.getLastArg(options::OPT_mlam_bh, options::OPT_mno_lam_bh)) { + if (A->getOption().matches(options::OPT_mlam_bh)) + Features.push_back("+lam-bh"); + else + Features.push_back("-lam-bh"); + } } std::string loongarch::postProcessTargetCPUString(const std::string &CPU, diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c index 2d5b315d962a1e..d4cd5b07ae905f 100644 --- a/clang/test/Driver/loongarch-march.c +++ b/clang/test/Driver/loongarch-march.c @@ -39,21 +39,21 @@ // CC1-LA64V1P1: "-target-cpu" "loongarch64" // CC1-LA64V1P1-NOT: "-target-feature" -// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" +// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" // CC1-LA64V1P1-NOT: "-target-feature" // CC1-LA64V1P1: "-target-abi" "lp64d" // CC1-LA664: "-target-cpu" "la664" // CC1-LA664-NOT: "-target-feature" -// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" +// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" // CC1-LA664-NOT: "-target-feature" // CC1-LA664: "-target-abi" "lp64d" // IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" // IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" // IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual" -// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lsx,+ual" -// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lasx,+lsx,+ual" +// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+frecipe,+lam-bh,+lsx,+ual" +// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+f,+frecipe,+lam-bh,+lasx,+lsx,+ual" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-mlam-bh.c b/clang/test/Driver/loongarch-mlam-bh.c new file mode 100644 index 00000000000000..6f2901e594dfcc --- /dev/null +++ b/clang/test/Driver/loongarch-mlam-bh.c @@ -0,0 +1,30 @@ +/// Test -m[no]lam-bh options. + +// RUN: %clang --target=loongarch64 -mlam-bh -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LAM-BH +// RUN: %clang --target=loongarch64 -mno-lam-bh -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-LAM-BH +// RUN: %clang --target=loongarch64 -mno-lam-bh -mlam-bh -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-LAM-BH +// RUN: %clang --target=loongarch64 -mlam-bh -mno-lam-bh -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-LAM-BH + +// RUN: %clang --target=loongarch64 -mlam-bh -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LAM-BH +// RUN: %clang --target=loongarch64 -mno-lam-bh -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-LAM-BH +// RUN: %clang --target=loongarch64 -mno-lam-bh -mlam-bh -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-LAM-BH +// RUN: %clang --target=loongarch64 -mlam-bh -mno-lam-bh -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-LAM-BH + + +// CC1-LAM-BH: "-target-feature" "+lam-bh" +// CC1-NO-LAM-BH: "-target-feature" "-lam-bh" + +// IR-LAM-BH: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+lam-bh{{(,.*)?}}" +// IR-NO-LAM-BH: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-lam-bh{{(,.*)?}}" + +int foo(void) { + return 42; +} \ No newline at end of file diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c index 771d56ffb1c1b9..85c42bea6c7ca5 100644 --- a/clang/test/Preprocessor/init-loongarch.c +++ b/clang/test/Preprocessor/init-loongarch.c @@ -798,7 +798,7 @@ // LA64-FPU0-LP64S-NOT: #define __loongarch_single_float // LA64-FPU0-LP64S: #define __loongarch_soft_float 1 -/// Check __loongarch_arch{_tune/_frecipe}. +/// Check __loongarch_arch{_tune/_frecipe/_lam_bh}. // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s @@ -823,26 +823,37 @@ // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la64v1.1 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe | \ -// RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.1 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lsx | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +frecipe | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +lam-bh | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lam-bh | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lam-bh | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +lam-bh | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe -Xclang -target-feature -Xclang -lam-bh | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=la664 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la664 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=la664 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH -DARCH=la664 -DTUNE=loongarch64 %s // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" // FRECIPE: #define __loongarch_frecipe 1 +// LAM-BH: #define __loongarch_lam_bh 1 // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" // RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def index 101a48cbd53994..6cd2018b7b59cb 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def @@ -11,6 +11,7 @@ LOONGARCH_FEATURE("+lbt", FK_LBT) LOONGARCH_FEATURE("+lvz", FK_LVZ) LOONGARCH_FEATURE("+ual", FK_UAL) LOONGARCH_FEATURE("+frecipe", FK_FRECIPE) +LOONGARCH_FEATURE("+lam-bh", FK_LAM_BH) #undef LOONGARCH_FEATURE @@ -20,6 +21,6 @@ LOONGARCH_FEATURE("+frecipe", FK_FRECIPE) LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) -LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE) +LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH) #undef LOONGARCH_ARCH diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h index c0bb15a5163b12..1b90e1d9ade39b 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h @@ -49,6 +49,9 @@ enum FeatureKind : uint32_t { // Floating-point approximate reciprocal instructions are available. FK_FRECIPE = 1 << 9, + + // Atomic memory swap and add instructions for byte and half word are available. + FK_LAM_BH = 1 << 10, }; struct FeatureInfo { diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index ddb27dc6404fa8..54ebf86666abf9 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -112,6 +112,12 @@ def FeatureFrecipe "Support frecipe.{s/d} and frsqrte.{s/d} instructions.">; def HasFrecipe : Predicate<"Subtarget->hasFrecipe()">; +// Atomic memory swap and add instructions for byte and half word +def FeatureLAM_BH + : SubtargetFeature<"lam-bh", "HasLAM_BH", "true", + "Support amswap[_db].{b/h} and amadd[_db].{b/h} instructions.">; +def HasLAM_BH : Predicate<"Subtarget->hasLAM_BH()">; + def TunePreferWInst : SubtargetFeature<"prefer-w-inst", "PreferWInst", "true", "Prefer instructions with W suffix">; @@ -151,7 +157,8 @@ def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit, FeatureExtLASX, FeatureExtLVZ, FeatureExtLBT, - FeatureFrecipe]>; + FeatureFrecipe, + FeatureLAM_BH]>; //===----------------------------------------------------------------------===// // Define the LoongArch target. diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 676d43ef22c47b..2d9e4e839650f0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -5735,6 +5735,12 @@ LoongArchTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { AI->getOperation() == AtomicRMWInst::USubSat) return AtomicExpansionKind::CmpXChg; + if (Subtarget.hasLAM_BH() && Subtarget.is64Bit() && + (AI->getOperation() == AtomicRMWInst::Xchg || + AI->getOperation() == AtomicRMWInst::Add)) { + return AtomicExpansionKind::None; + } + unsigned Size = AI->getType()->getPrimitiveSizeInBits(); if (Size == 8 || Size == 16) return AtomicExpansionKind::MaskedIntrinsic; diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index f97aace363cb7a..03d629e44f6d97 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -2088,8 +2088,26 @@ multiclass binary_atomic_op_wd<string inst, string op, string signed = ""> { (!cast<Instruction>(inst#"__DB_D"#signed) GPR:$rk, GPR:$rj)>; } +// Atomic operation for byte and half word +multiclass binary_atomic_op_bh<string inst, string op> { + def : Pat<(!cast<PatFrag>(op#"_i8_monotonic") GPR:$rj, GPR:$rk), + (!cast<Instruction>(inst#"_B") GPR:$rk, GPR:$rj)>; + def : Pat<(!cast<PatFrag>(op#"_i16_monotonic") GPR:$rj, GPR:$rk), + (!cast<Instruction>(inst#"_H") GPR:$rk, GPR:$rj)>; + + def : Pat<(!cast<PatFrag>(op#"_i8") GPR:$rj, GPR:$rk), + (!cast<Instruction>(inst#"__DB_B") GPR:$rk, GPR:$rj)>; + def : Pat<(!cast<PatFrag>(op#"_i16") GPR:$rj, GPR:$rk), + (!cast<Instruction>(inst#"__DB_H") GPR:$rk, GPR:$rj)>; +} + let Predicates = [IsLA64] in { +let Predicates = [ HasLAM_BH ] in { +defm : binary_atomic_op_bh<"AMSWAP", "atomic_swap">; +defm : binary_atomic_op_bh<"AMADD", "atomic_load_add">; +} + defm : binary_atomic_op_wd<"AMSWAP", "atomic_swap">; defm : binary_atomic_op_wd<"AMADD", "atomic_load_add">; defm : binary_atomic_op_wd<"AMAND", "atomic_load_and">; diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp index 8e86d18de2ad9a..27e3b5683c5a6e 100644 --- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp +++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp @@ -50,8 +50,10 @@ bool LoongArch::getArchFeatures(StringRef Arch, Features.push_back("+d"); Features.push_back("+lsx"); Features.push_back("+ual"); - if (Arch == "la64v1.1") + if (Arch == "la64v1.1") { Features.push_back("+frecipe"); + Features.push_back("+lam-bh"); + } return true; } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lam-bh.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lam-bh.ll new file mode 100644 index 00000000000000..e71c87daed74ea --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomicrmw-lam-bh.ll @@ -0,0 +1,1674 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --mattr=+d,-lam-bh < %s | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch32 --mattr=+d,+lam-bh < %s | FileCheck %s --check-prefix=LA32-LAM-BH +; RUN: llc --mtriple=loongarch64 --mattr=+d,-lam-bh < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch64 --mattr=+d,+lam-bh < %s | FileCheck %s --check-prefix=LA64-LAM-BH + + +define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB0_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_i8_acquire: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a3, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: andi $a1, $a1, 255 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: addi.w $a5, $a1, 0 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB0_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB0_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_i8_acquire: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap_db.b $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 %b acquire + ret i8 %1 +} + +define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_0_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a1, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a2, $zero, 255 +; LA32-NEXT: sll.w $a2, $a2, $a1 +; LA32-NEXT: nor $a2, $a2, $zero +; LA32-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB1_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a1 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_0_i8_acquire: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a1, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a2, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a2, $a2, $a1 +; LA32-LAM-BH-NEXT: nor $a2, $a2, $zero +; LA32-LAM-BH-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a3, $a0, 0 +; LA32-LAM-BH-NEXT: and $a4, $a3, $a2 +; LA32-LAM-BH-NEXT: sc.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a4, .LBB1_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a3, $a1 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_0_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a2, $zero, 255 +; LA64-NEXT: sll.w $a2, $a2, $a1 +; LA64-NEXT: nor $a2, $a2, $zero +; LA64-NEXT: amand_db.w $a3, $a2, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a1 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_0_i8_acquire: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap_db.b $a1, $zero, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a1 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 0 acquire + ret i8 %1 +} + +define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a1, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a2, $zero, 255 +; LA32-NEXT: sll.w $a2, $a2, $a1 +; LA32-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: or $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB2_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a1 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a1, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a2, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a2, $a2, $a1 +; LA32-LAM-BH-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a3, $a0, 0 +; LA32-LAM-BH-NEXT: or $a4, $a3, $a2 +; LA32-LAM-BH-NEXT: sc.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a4, .LBB2_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a3, $a1 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a2, $zero, 255 +; LA64-NEXT: sll.w $a2, $a2, $a1 +; LA64-NEXT: amor_db.w $a3, $a2, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a1 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_minus_1_i8_acquire: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: addi.w $a2, $zero, -1 +; LA64-LAM-BH-NEXT: amswap_db.b $a1, $a2, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a1 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 -1 acquire + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB3_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_i16_acquire: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a3, 15 +; LA32-LAM-BH-NEXT: ori $a3, $a3, 4095 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: addi.w $a5, $a1, 0 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB3_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB3_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_i16_acquire: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap_db.h $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 %b acquire + ret i16 %1 +} + +define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_0_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a1, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: sll.w $a2, $a2, $a1 +; LA32-NEXT: nor $a2, $a2, $zero +; LA32-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: and $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB4_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a1 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_0_i16_acquire: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a1, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a2, 15 +; LA32-LAM-BH-NEXT: ori $a2, $a2, 4095 +; LA32-LAM-BH-NEXT: sll.w $a2, $a2, $a1 +; LA32-LAM-BH-NEXT: nor $a2, $a2, $zero +; LA32-LAM-BH-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a3, $a0, 0 +; LA32-LAM-BH-NEXT: and $a4, $a3, $a2 +; LA32-LAM-BH-NEXT: sc.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a4, .LBB4_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a3, $a1 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_0_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: sll.w $a2, $a2, $a1 +; LA64-NEXT: nor $a2, $a2, $zero +; LA64-NEXT: amand_db.w $a3, $a2, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a1 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_0_i16_acquire: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap_db.h $a1, $zero, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a1 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 0 acquire + ret i16 %1 +} + +define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind { +; LA32-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a1, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a2, 15 +; LA32-NEXT: ori $a2, $a2, 4095 +; LA32-NEXT: sll.w $a2, $a2, $a1 +; LA32-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a3, $a0, 0 +; LA32-NEXT: or $a4, $a3, $a2 +; LA32-NEXT: sc.w $a4, $a0, 0 +; LA32-NEXT: beqz $a4, .LBB5_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a3, $a1 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a1, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a2, 15 +; LA32-LAM-BH-NEXT: ori $a2, $a2, 4095 +; LA32-LAM-BH-NEXT: sll.w $a2, $a2, $a1 +; LA32-LAM-BH-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a3, $a0, 0 +; LA32-LAM-BH-NEXT: or $a4, $a3, $a2 +; LA32-LAM-BH-NEXT: sc.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a4, .LBB5_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a3, $a1 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a1, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a2, 15 +; LA64-NEXT: ori $a2, $a2, 4095 +; LA64-NEXT: sll.w $a2, $a2, $a1 +; LA64-NEXT: amor_db.w $a3, $a2, $a0 +; LA64-NEXT: srl.w $a0, $a3, $a1 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_minus_1_i16_acquire: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: addi.w $a2, $zero, -1 +; LA64-LAM-BH-NEXT: amswap_db.h $a1, $a2, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a1 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 -1 acquire + ret i16 %1 +} + +define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB6_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_i8_release: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a3, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: andi $a1, $a1, 255 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: addi.w $a5, $a1, 0 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB6_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB6_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_i8_release: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap_db.b $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB7_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_i16_release: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a3, 15 +; LA32-LAM-BH-NEXT: ori $a3, $a3, 4095 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: addi.w $a5, $a1, 0 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB7_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB7_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB7_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_i16_release: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap_db.h $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 %b release + ret i16 %1 +} + +define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB8_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_i8_acq_rel: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a3, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: andi $a1, $a1, 255 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: addi.w $a5, $a1, 0 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB8_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB8_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB8_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_i8_acq_rel: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap_db.b $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB9_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_i16_acq_rel: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a3, 15 +; LA32-LAM-BH-NEXT: ori $a3, $a3, 4095 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: addi.w $a5, $a1, 0 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB9_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB9_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB9_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_i16_acq_rel: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap_db.h $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB10_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_i8_seq_cst: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a3, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: andi $a1, $a1, 255 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: addi.w $a5, $a1, 0 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB10_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB10_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB10_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_i8_seq_cst: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap_db.b $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB11_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_i16_seq_cst: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a3, 15 +; LA32-LAM-BH-NEXT: ori $a3, $a3, 4095 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: addi.w $a5, $a1, 0 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB11_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB11_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB11_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_i16_seq_cst: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap_db.h $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB12_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_i8_monotonic: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a3, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: andi $a1, $a1, 255 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: addi.w $a5, $a1, 0 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB12_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB12_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_i8_monotonic: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap.b $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_xchg_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: addi.w $a5, $a1, 0 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB13_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_xchg_i16_monotonic: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a3, 15 +; LA32-LAM-BH-NEXT: ori $a3, $a3, 4095 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: addi.w $a5, $a1, 0 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB13_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_xchg_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB13_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: addi.w $a5, $a1, 0 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB13_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_xchg_i16_monotonic: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amswap.h $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw xchg ptr %a, i16 %b monotonic + ret i16 %1 +} + +define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB14_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_add_i8_acquire: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a3, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: andi $a1, $a1, 255 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: add.w $a5, $a4, $a1 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB14_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB14_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB14_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_add_i8_acquire: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amadd_db.b $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw add ptr %a, i8 %b acquire + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_acquire: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB15_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_add_i16_acquire: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a3, 15 +; LA32-LAM-BH-NEXT: ori $a3, $a3, 4095 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: add.w $a5, $a4, $a1 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB15_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_acquire: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB15_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_add_i16_acquire: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amadd_db.h $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw add ptr %a, i16 %b acquire + ret i16 %1 +} + +define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB16_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_add_i8_release: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a3, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: andi $a1, $a1, 255 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: add.w $a5, $a4, $a1 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB16_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_release: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB16_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_add_i8_release: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amadd_db.b $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw add ptr %a, i8 %b release + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_release: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB17_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_add_i16_release: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a3, 15 +; LA32-LAM-BH-NEXT: ori $a3, $a3, 4095 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: add.w $a5, $a4, $a1 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB17_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_release: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB17_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_add_i16_release: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amadd_db.h $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw add ptr %a, i16 %b release + ret i16 %1 +} + +define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB18_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_add_i8_acq_rel: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a3, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: andi $a1, $a1, 255 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: add.w $a5, $a4, $a1 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB18_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB18_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_add_i8_acq_rel: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amadd_db.b $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw add ptr %a, i8 %b acq_rel + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_acq_rel: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB19_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_add_i16_acq_rel: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a3, 15 +; LA32-LAM-BH-NEXT: ori $a3, $a3, 4095 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: add.w $a5, $a4, $a1 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB19_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_acq_rel: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB19_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_add_i16_acq_rel: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amadd_db.h $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw add ptr %a, i16 %b acq_rel + ret i16 %1 +} + +define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB20_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_add_i8_seq_cst: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a3, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: andi $a1, $a1, 255 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: add.w $a5, $a4, $a1 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB20_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB20_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_add_i8_seq_cst: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amadd_db.b $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw add ptr %a, i8 %b seq_cst + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_seq_cst: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB21_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_add_i16_seq_cst: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a3, 15 +; LA32-LAM-BH-NEXT: ori $a3, $a3, 4095 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: add.w $a5, $a4, $a1 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB21_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_seq_cst: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB21_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_add_i16_seq_cst: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amadd_db.h $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw add ptr %a, i16 %b seq_cst + ret i16 %1 +} + +define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i8_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: ori $a3, $zero, 255 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: andi $a1, $a1, 255 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB22_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_add_i8_monotonic: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: ori $a3, $zero, 255 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: andi $a1, $a1, 255 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: add.w $a5, $a4, $a1 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB22_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i8_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: ori $a3, $zero, 255 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: andi $a1, $a1, 255 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB22_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_add_i8_monotonic: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amadd.b $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw add ptr %a, i8 %b monotonic + ret i8 %1 +} + +define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind { +; LA32-LABEL: atomicrmw_add_i16_monotonic: +; LA32: # %bb.0: +; LA32-NEXT: slli.w $a2, $a0, 3 +; LA32-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-NEXT: lu12i.w $a3, 15 +; LA32-NEXT: ori $a3, $a3, 4095 +; LA32-NEXT: sll.w $a3, $a3, $a2 +; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-NEXT: sll.w $a1, $a1, $a2 +; LA32-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; LA32-NEXT: ll.w $a4, $a0, 0 +; LA32-NEXT: add.w $a5, $a4, $a1 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: and $a5, $a5, $a3 +; LA32-NEXT: xor $a5, $a4, $a5 +; LA32-NEXT: sc.w $a5, $a0, 0 +; LA32-NEXT: beqz $a5, .LBB23_1 +; LA32-NEXT: # %bb.2: +; LA32-NEXT: srl.w $a0, $a4, $a2 +; LA32-NEXT: ret +; +; LA32-LAM-BH-LABEL: atomicrmw_add_i16_monotonic: +; LA32-LAM-BH: # %bb.0: +; LA32-LAM-BH-NEXT: slli.w $a2, $a0, 3 +; LA32-LAM-BH-NEXT: bstrins.w $a0, $zero, 1, 0 +; LA32-LAM-BH-NEXT: lu12i.w $a3, 15 +; LA32-LAM-BH-NEXT: ori $a3, $a3, 4095 +; LA32-LAM-BH-NEXT: sll.w $a3, $a3, $a2 +; LA32-LAM-BH-NEXT: bstrpick.w $a1, $a1, 15, 0 +; LA32-LAM-BH-NEXT: sll.w $a1, $a1, $a2 +; LA32-LAM-BH-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; LA32-LAM-BH-NEXT: ll.w $a4, $a0, 0 +; LA32-LAM-BH-NEXT: add.w $a5, $a4, $a1 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: and $a5, $a5, $a3 +; LA32-LAM-BH-NEXT: xor $a5, $a4, $a5 +; LA32-LAM-BH-NEXT: sc.w $a5, $a0, 0 +; LA32-LAM-BH-NEXT: beqz $a5, .LBB23_1 +; LA32-LAM-BH-NEXT: # %bb.2: +; LA32-LAM-BH-NEXT: srl.w $a0, $a4, $a2 +; LA32-LAM-BH-NEXT: ret +; +; LA64-LABEL: atomicrmw_add_i16_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: slli.d $a2, $a0, 3 +; LA64-NEXT: bstrins.d $a0, $zero, 1, 0 +; LA64-NEXT: lu12i.w $a3, 15 +; LA64-NEXT: ori $a3, $a3, 4095 +; LA64-NEXT: sll.w $a3, $a3, $a2 +; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 +; LA64-NEXT: sll.w $a1, $a1, $a2 +; LA64-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1 +; LA64-NEXT: ll.w $a4, $a0, 0 +; LA64-NEXT: add.w $a5, $a4, $a1 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: and $a5, $a5, $a3 +; LA64-NEXT: xor $a5, $a4, $a5 +; LA64-NEXT: sc.w $a5, $a0, 0 +; LA64-NEXT: beqz $a5, .LBB23_1 +; LA64-NEXT: # %bb.2: +; LA64-NEXT: srl.w $a0, $a4, $a2 +; LA64-NEXT: ret +; +; LA64-LAM-BH-LABEL: atomicrmw_add_i16_monotonic: +; LA64-LAM-BH: # %bb.0: +; LA64-LAM-BH-NEXT: amadd.h $a2, $a1, $a0 +; LA64-LAM-BH-NEXT: move $a0, $a2 +; LA64-LAM-BH-NEXT: ret +; + %1 = atomicrmw add ptr %a, i16 %b monotonic + ret i16 %1 +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits