https://github.com/tangaac updated https://github.com/llvm/llvm-project/pull/116771
>From ee422d26ad2695d34b0bf471f6d4fa2c3bef8ca8 Mon Sep 17 00:00:00 2001 From: tangaac <tangya...@loongson.cn> Date: Tue, 19 Nov 2024 17:43:31 +0800 Subject: [PATCH 1/3] [LoongArch] Support sc.q instruction for 128bit cmpxchg operation --- clang/include/clang/Driver/Options.td | 4 + clang/lib/Basic/Targets/LoongArch.cpp | 7 +- clang/lib/Basic/Targets/LoongArch.h | 2 + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 8 + clang/test/Driver/loongarch-march.c | 8 +- clang/test/Driver/loongarch-mscq.c | 30 ++ clang/test/Preprocessor/init-loongarch.c | 31 +- .../TargetParser/LoongArchTargetParser.def | 3 +- .../llvm/TargetParser/LoongArchTargetParser.h | 3 + llvm/lib/Target/LoongArch/LoongArch.td | 9 +- .../LoongArchExpandAtomicPseudoInsts.cpp | 107 +++++++ .../LoongArch/LoongArchISelLowering.cpp | 46 +++ .../Target/LoongArch/LoongArchInstrInfo.td | 14 + .../TargetParser/LoongArchTargetParser.cpp | 1 + .../ir-instruction/atomic-cmpxchg-128.ll | 287 ++++++++++++++++++ 15 files changed, 542 insertions(+), 18 deletions(-) create mode 100644 clang/test/Driver/loongarch-mscq.c create mode 100644 llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 808f089914c9bb..96cef360bd5251 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5425,6 +5425,10 @@ def mdiv32 : Flag<["-"], "mdiv32">, Group<m_loongarch_Features_Group>, HelpText<"Use div.w[u] and mod.w[u] instructions with input not sign-extended.">; def mno_div32 : Flag<["-"], "mno-div32">, Group<m_loongarch_Features_Group>, HelpText<"Do not use div.w[u] and mod.w[u] instructions with input not sign-extended.">; +def mscq : Flag<["-"], "mscq">, Group<m_loongarch_Features_Group>, + HelpText<"Enable sc.q instruction.">; +def mno_scq : Flag<["-"], "mno-scq">, Group<m_loongarch_Features_Group>, + HelpText<"Disable sc.q instruction.">; def mannotate_tablejump : Flag<["-"], "mannotate-tablejump">, Group<m_loongarch_Features_Group>, HelpText<"Enable annotate table jump instruction to correlate it with the jump table.">; def mno_annotate_tablejump : Flag<["-"], "mno-annotate-tablejump">, Group<m_loongarch_Features_Group>, diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp index d36186aa9c2fbf..bb0d0b68cfcb0a 100644 --- a/clang/lib/Basic/Targets/LoongArch.cpp +++ b/clang/lib/Basic/Targets/LoongArch.cpp @@ -206,7 +206,7 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, // arch feature set will be used to include all sub-features belonging to // the V1.1 ISA version. if (HasFeatureFrecipe && HasFeatureLAM_BH && HasFeatureLAMCAS && - HasFeatureLD_SEQ_SA && HasFeatureDiv32) + HasFeatureLD_SEQ_SA && HasFeatureDiv32 && HasFeatureSCQ) Builder.defineMacro("__loongarch_arch", Twine('"') + "la64v1.1" + Twine('"')); else @@ -249,6 +249,9 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, if (HasFeatureDiv32) Builder.defineMacro("__loongarch_div32", Twine(1)); + if (HasFeatureSCQ) + Builder.defineMacro("__loongarch_scq", Twine(1)); + StringRef ABI = getABI(); if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") Builder.defineMacro("__loongarch_lp64"); @@ -333,6 +336,8 @@ bool LoongArchTargetInfo::handleTargetFeatures( HasFeatureLD_SEQ_SA = true; else if (Feature == "+div32") HasFeatureDiv32 = true; + else if (Feature == "+scq") + HasFeatureSCQ = true; } return true; } diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index abaa05aa42d438..5c34c84ff8d3e8 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -34,6 +34,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { bool HasFeatureLAMCAS; bool HasFeatureLD_SEQ_SA; bool HasFeatureDiv32; + bool HasFeatureSCQ; public: LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) @@ -47,6 +48,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { HasFeatureLAMCAS = false; HasFeatureLD_SEQ_SA = false; HasFeatureDiv32 = false; + HasFeatureSCQ = false; LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp index bbd9397aa2378a..4dd07f25bab0fb 100644 --- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp @@ -301,6 +301,14 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, else Features.push_back("-div32"); } + + // Select scq feature determined by -m[no-]scq. + if (const Arg *A = Args.getLastArg(options::OPT_mscq, options::OPT_mno_scq)) { + if (A->getOption().matches(options::OPT_mscq)) + Features.push_back("+scq"); + else + Features.push_back("-scq"); + } } std::string loongarch::postProcessTargetCPUString(const std::string &CPU, diff --git a/clang/test/Driver/loongarch-march.c b/clang/test/Driver/loongarch-march.c index cfcfa852efea58..b52cdb330716ff 100644 --- a/clang/test/Driver/loongarch-march.c +++ b/clang/test/Driver/loongarch-march.c @@ -39,21 +39,21 @@ // CC1-LA64V1P1: "-target-cpu" "loongarch64" // CC1-LA64V1P1-NOT: "-target-feature" -// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+lamcas" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32" +// CC1-LA64V1P1: "-target-feature" "+64bit" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+lamcas" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32" "-target-feature" "+scq" // CC1-LA64V1P1-NOT: "-target-feature" // CC1-LA64V1P1: "-target-abi" "lp64d" // CC1-LA664: "-target-cpu" "la664" // CC1-LA664-NOT: "-target-feature" -// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+lamcas" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32" +// CC1-LA664: "-target-feature" "+64bit" "-target-feature" "+f" "-target-feature" "+d" "-target-feature" "+lsx" "-target-feature" "+lasx" "-target-feature" "+ual" "-target-feature" "+frecipe" "-target-feature" "+lam-bh" "-target-feature" "+lamcas" "-target-feature" "+ld-seq-sa" "-target-feature" "+div32" "-target-feature" "+scq" // CC1-LA664-NOT: "-target-feature" // CC1-LA664: "-target-abi" "lp64d" // IR-LOONGARCH64: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+f,+ual" // IR-LA464: attributes #[[#]] ={{.*}}"target-cpu"="la464" {{.*}}"target-features"="+64bit,+d,+f,+lasx,+lsx,+ual" // IR-LA64V1P0: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+lsx,+ual" -// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+div32,+frecipe,+lam-bh,+lamcas,+ld-seq-sa,+lsx,+ual" -// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+div32,+f,+frecipe,+lam-bh,+lamcas,+lasx,+ld-seq-sa,+lsx,+ual" +// IR-LA64V1P1: attributes #[[#]] ={{.*}}"target-cpu"="loongarch64" {{.*}}"target-features"="+64bit,+d,+div32,+frecipe,+lam-bh,+lamcas,+ld-seq-sa,+lsx,+scq,+ual" +// IR-LA664: attributes #[[#]] ={{.*}}"target-cpu"="la664" {{.*}}"target-features"="+64bit,+d,+div32,+f,+frecipe,+lam-bh,+lamcas,+lasx,+ld-seq-sa,+lsx,+scq,+ual" int foo(void) { return 3; diff --git a/clang/test/Driver/loongarch-mscq.c b/clang/test/Driver/loongarch-mscq.c new file mode 100644 index 00000000000000..cd798ba5d8ff2b --- /dev/null +++ b/clang/test/Driver/loongarch-mscq.c @@ -0,0 +1,30 @@ +/// Test -m[no]scq options. + +// RUN: %clang --target=loongarch64 -mscq -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-SCQ +// RUN: %clang --target=loongarch64 -mno-scq -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-SCQ +// RUN: %clang --target=loongarch64 -mno-scq -mscq -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-SCQ +// RUN: %clang --target=loongarch64 -mscq -mno-scq -fsyntax-only %s -### 2>&1 | \ +// RUN: FileCheck %s --check-prefix=CC1-NO-SCQ + +// RUN: %clang --target=loongarch64 -mscq -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-SCQ +// RUN: %clang --target=loongarch64 -mno-scq -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-SCQ +// RUN: %clang --target=loongarch64 -mno-scq -mscq -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-SCQ +// RUN: %clang --target=loongarch64 -mscq -mno-scq -S -emit-llvm %s -o - | \ +// RUN: FileCheck %s --check-prefix=IR-NO-SCQ + + +// CC1-SCQ: "-target-feature" "+scq" +// CC1-NO-SCQ: "-target-feature" "-scq" + +// IR-SCQ: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}+scq{{(,.*)?}}" +// IR-NO-SCQ: attributes #[[#]] ={{.*}}"target-features"="{{(.*,)?}}-scq{{(,.*)?}}" + +int foo(void) { + return 42; +} \ No newline at end of file diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c index 19458a2b14f40c..f6fd603dc39c0b 100644 --- a/clang/test/Preprocessor/init-loongarch.c +++ b/clang/test/Preprocessor/init-loongarch.c @@ -798,7 +798,7 @@ // LA64-FPU0-LP64S-NOT: #define __loongarch_single_float // LA64-FPU0-LP64S: #define __loongarch_soft_float 1 -/// Check __loongarch_arch{_tune/_frecipe/_lam_bh/_lamcas/_ld_seq_sa/_div32}. +/// Check __loongarch_arch{_tune/_frecipe/_lam_bh/_lamcas/_ld_seq_sa/_div32/_scq}. // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s @@ -823,11 +823,11 @@ // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la64v1.1 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=la64v1.1 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -frecipe | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lsx | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +frecipe | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +frecipe | \ @@ -835,7 +835,7 @@ // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +lam-bh | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lam-bh | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lam-bh | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAM-BH -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +lam-bh | \ @@ -843,7 +843,7 @@ // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +lamcas | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAMCAS -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -lamcas | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LD-SEQ-SA,DIV32,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lamcas | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LAMCAS -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +lamcas | \ @@ -851,7 +851,7 @@ // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +ld-seq-sa | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -ld-seq-sa | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,DIV32,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +ld-seq-sa | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,LD-SEQ-SA -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +ld-seq-sa | \ @@ -859,21 +859,29 @@ // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +div32 | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -div32| \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +div32 | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=loongarch64 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +div32 | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s -// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh -Xclang -target-feature -Xclang +lamcas -Xclang -target-feature -Xclang +ld-seq-sa -Xclang -target-feature -Xclang +div32 | \ +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +scq | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.1 -Xclang -target-feature -Xclang -scq | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +scq | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,SCQ -DARCH=loongarch64 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -Xclang -target-feature -Xclang +lsx -Xclang -target-feature -Xclang +scq | \ +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,SCQ -DARCH=la64v1.0 -DTUNE=loongarch64 %s +// RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la64v1.0 -Xclang -target-feature -Xclang +frecipe -Xclang -target-feature -Xclang +lam-bh -Xclang -target-feature -Xclang +lamcas -Xclang -target-feature -Xclang +ld-seq-sa -Xclang -target-feature -Xclang +div32 -Xclang -target-feature -Xclang +scq | \ // RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE -DARCH=la64v1.1 -DTUNE=loongarch64 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la664 -DTUNE=la664 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=la664 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -mtune=la664 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=la64v1.0 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=loongarch64 -mtune=la664 | \ // RUN: FileCheck --match-full-lines --check-prefix=ARCH-TUNE -DARCH=loongarch64 -DTUNE=la664 %s // RUN: %clang --target=loongarch64 -x c -E -dM %s -o - -march=la664 -mtune=loongarch64 | \ -// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32 -DARCH=la664 -DTUNE=loongarch64 %s +// RUN: FileCheck --match-full-lines --check-prefixes=ARCH-TUNE,FRECIPE,LAM-BH,LAMCAS,LD-SEQ-SA,DIV32,SCQ -DARCH=la664 -DTUNE=loongarch64 %s // ARCH-TUNE: #define __loongarch_arch "[[ARCH]]" // DIV32: #define __loongarch_div32 1 @@ -881,6 +889,7 @@ // LAM-BH: #define __loongarch_lam_bh 1 // LAMCAS: #define __loongarch_lamcas 1 // LD-SEQ-SA: #define __loongarch_ld_seq_sa 1 +// SCQ: #define __loongarch_scq 1 // ARCH-TUNE: #define __loongarch_tune "[[TUNE]]" // RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def index 6731a2c975cd54..1bcf65b37f201e 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.def +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.def @@ -15,6 +15,7 @@ LOONGARCH_FEATURE("+lam-bh", FK_LAM_BH) LOONGARCH_FEATURE("+lamcas", FK_LAMCAS) LOONGARCH_FEATURE("+ld-seq-sa", FK_LD_SEQ_SA) LOONGARCH_FEATURE("+div32", FK_DIV32) +LOONGARCH_FEATURE("+scq", FK_SCQ) #undef LOONGARCH_FEATURE @@ -24,6 +25,6 @@ LOONGARCH_FEATURE("+div32", FK_DIV32) LOONGARCH_ARCH("loongarch64", AK_LOONGARCH64, FK_64BIT | FK_FP32 | FK_FP64 | FK_UAL) LOONGARCH_ARCH("la464", AK_LA464, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL) -LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH | FK_LAMCAS | FK_LD_SEQ_SA | FK_DIV32) +LOONGARCH_ARCH("la664", AK_LA664, FK_64BIT | FK_FP32 | FK_FP64 | FK_LSX | FK_LASX | FK_UAL | FK_FRECIPE | FK_LAM_BH | FK_LAMCAS | FK_LD_SEQ_SA | FK_DIV32 | FK_SCQ) #undef LOONGARCH_ARCH diff --git a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h index 52cd51f43ad640..e08e7bc182e112 100644 --- a/llvm/include/llvm/TargetParser/LoongArchTargetParser.h +++ b/llvm/include/llvm/TargetParser/LoongArchTargetParser.h @@ -63,6 +63,9 @@ enum FeatureKind : uint32_t { // Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended. FK_DIV32 = 1 << 13, + + // sc.q is available. + FK_SCQ = 1 << 14, }; struct FeatureInfo { diff --git a/llvm/lib/Target/LoongArch/LoongArch.td b/llvm/lib/Target/LoongArch/LoongArch.td index 596c8c90c0a1f6..5fd52babfc6ec3 100644 --- a/llvm/lib/Target/LoongArch/LoongArch.td +++ b/llvm/lib/Target/LoongArch/LoongArch.td @@ -135,6 +135,12 @@ def FeatureDiv32 "Assume div.w[u] and mod.w[u] can handle inputs that are not sign-extended">; def HasDiv32 : Predicate<"Subtarget->hasDiv32()">; +// Support SC.Q instruction +def FeatureSCQ + : SubtargetFeature<"scq", "HasSCQ", "true", + "Support sc.q instruction">; +def HasSCQ : Predicate<"Subtarget->hasSCQ()">; + def TunePreferWInst : SubtargetFeature<"prefer-w-inst", "PreferWInst", "true", "Prefer instructions with W suffix">; @@ -180,7 +186,8 @@ def : ProcessorModel<"la664", NoSchedModel, [Feature64Bit, FeatureLAM_BH, FeatureLAMCAS, FeatureLD_SEQ_SA, - FeatureDiv32]>; + FeatureDiv32, + FeatureSCQ]>; //===----------------------------------------------------------------------===// // Define the LoongArch target. diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp index 35f84425cb0eba..4a5475bafd475d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -58,6 +58,9 @@ class LoongArchExpandAtomicPseudo : public MachineFunctionPass { bool expandAtomicCmpXchg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked, int Width, MachineBasicBlock::iterator &NextMBBI); + bool expandAtomicCmpXchg128(MachineBasicBlock &MBB, + MachineBasicBlock::iterator, + MachineBasicBlock::iterator &NextMBBI); }; char LoongArchExpandAtomicPseudo::ID = 0; @@ -131,6 +134,9 @@ bool LoongArchExpandAtomicPseudo::expandMI( return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI); case LoongArch::PseudoCmpXchg64: return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI); + case LoongArch::PseudoCmpXchg128: + case LoongArch::PseudoCmpXchg128Acquire: + return expandAtomicCmpXchg128(MBB, MBBI, NextMBBI); case LoongArch::PseudoMaskedCmpXchg32: return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI); case LoongArch::PseudoMaskedAtomicLoadMax32: @@ -604,6 +610,107 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg( return true; } +bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg128( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineFunction *MF = MBB.getParent(); + auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + // Insert new MBBs + MF->insert(++MBB.getIterator(), LoopHeadMBB); + MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB); + MF->insert(++LoopTailMBB->getIterator(), TailMBB); + MF->insert(++TailMBB->getIterator(), DoneMBB); + + // Set up successors and transfer remaining instructions to DoneMBB. + LoopHeadMBB->addSuccessor(LoopTailMBB); + LoopHeadMBB->addSuccessor(TailMBB); + LoopTailMBB->addSuccessor(DoneMBB); + LoopTailMBB->addSuccessor(LoopHeadMBB); + TailMBB->addSuccessor(DoneMBB); + DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end()); + DoneMBB->transferSuccessors(&MBB); + MBB.addSuccessor(LoopHeadMBB); + + Register DestLoReg = MI.getOperand(0).getReg(); + Register DestHiReg = MI.getOperand(1).getReg(); + Register ScratchReg = MI.getOperand(2).getReg(); + Register AddrReg = MI.getOperand(3).getReg(); + Register CmpValLoReg = MI.getOperand(4).getReg(); + Register CmpValHiReg = MI.getOperand(5).getReg(); + Register NewValLoReg = MI.getOperand(6).getReg(); + Register NewValHiReg = MI.getOperand(7).getReg(); + + // .loophead: + // ll.d res_lo, (addr) + // ld.d res_hi, (addr), 8 + // bne dest_lo, cmpval_lo, tail + // bne dest_hi, cmpval_hi, tail + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_D), DestLoReg) + .addReg(AddrReg) + .addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LD_D), DestHiReg) + .addReg(AddrReg) + .addImm(8); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE)) + .addReg(DestLoReg) + .addReg(CmpValLoReg) + .addMBB(TailMBB); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE)) + .addReg(DestHiReg) + .addReg(CmpValHiReg) + .addMBB(TailMBB); + // .looptail: + // move scratch, newval_lo + // sc.q scratch, newval_hi, (addr) + // beqz scratch, loophead + // b done + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg) + .addReg(NewValLoReg) + .addReg(LoongArch::R0); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_Q), ScratchReg) + .addReg(ScratchReg) + .addReg(NewValHiReg) + .addReg(AddrReg); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ)) + .addReg(ScratchReg) + .addMBB(LoopHeadMBB); + BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB); + int hint; + + switch (MI.getOpcode()) { + case LoongArch::PseudoCmpXchg128Acquire: + // acquire acqrel seqcst + hint = 0b10100; + break; + case LoongArch::PseudoCmpXchg128: + hint = 0x700; + break; + default: + llvm_unreachable("Unexpected opcode"); + } + + // .tail: + // dbar 0x700 | acquire + BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *LoopHeadMBB); + computeAndAddLiveIns(LiveRegs, *LoopTailMBB); + computeAndAddLiveIns(LiveRegs, *TailMBB); + computeAndAddLiveIns(LiveRegs, *DoneMBB); + + return true; +} + } // end namespace INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo", diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 16bceacfaa222c..15a9dd892b1ca5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -376,6 +376,11 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, // cmpxchg sizes down to 8 bits become legal if LAMCAS is available. if (Subtarget.hasLAMCAS()) setMinCmpXchgSizeInBits(8); + + if (Subtarget.hasSCQ()) { + setMaxAtomicSizeInBitsSupported(128); + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); + } } bool LoongArchTargetLowering::isOffsetFoldingLegal( @@ -2825,6 +2830,43 @@ replaceINTRINSIC_WO_CHAINResults(SDNode *N, SmallVectorImpl<SDValue> &Results, } } +static void replaceCMP_XCHG_128Results(SDNode *N, + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { + assert(N->getValueType(0) == MVT::i128 && + "AtomicCmpSwap on types less than 128 should be legal"); + MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); + + unsigned Opcode; + switch (MemOp->getMergedOrdering()) { + case AtomicOrdering::Acquire: + case AtomicOrdering::AcquireRelease: + case AtomicOrdering::SequentiallyConsistent: + Opcode = LoongArch::PseudoCmpXchg128Acquire; + break; + case AtomicOrdering::Monotonic: + case AtomicOrdering::Release: + Opcode = LoongArch::PseudoCmpXchg128; + break; + default: + llvm_unreachable("Unexpected ordering!"); + } + + SDLoc DL(N); + auto CmpVal = DAG.SplitScalar(N->getOperand(2), DL, MVT::i64, MVT::i64); + auto NewVal = DAG.SplitScalar(N->getOperand(3), DL, MVT::i64, MVT::i64); + SDValue Ops[] = {N->getOperand(1), CmpVal.first, CmpVal.second, + NewVal.first, NewVal.second, N->getOperand(0)}; + + SDNode *CmpSwap = DAG.getMachineNode( + Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i64, MVT::Other), + Ops); + DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, + SDValue(CmpSwap, 0), SDValue(CmpSwap, 1))); + Results.push_back(SDValue(CmpSwap, 3)); +} + void LoongArchTargetLowering::ReplaceNodeResults( SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { SDLoc DL(N); @@ -3135,6 +3177,10 @@ void LoongArchTargetLowering::ReplaceNodeResults( Results.push_back(Result); break; } + case ISD::ATOMIC_CMP_SWAP: { + replaceCMP_XCHG_128Results(N, Results, DAG); + break; + } } } diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 6134daf2fbe630..e7bd8c9a375a2e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -2024,6 +2024,20 @@ class PseudoCmpXchg def PseudoCmpXchg32 : PseudoCmpXchg; def PseudoCmpXchg64 : PseudoCmpXchg; +class PseudoCmpXchg128Pat + : Pseudo<(outs GPR:$res_lo, GPR:$res_hi, GPR:$scratch), + (ins GPR:$addr, GPR:$cmpval_lo, GPR:$cmpval_hi, + GPR:$newval_lo, GPR:$newval_hi)> { + let Constraints = "@earlyclobber $res_lo,@earlyclobber $res_hi,@earlyclobber $scratch"; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 0; + let Size = 36; +} + +def PseudoCmpXchg128 : PseudoCmpXchg128Pat; +def PseudoCmpXchg128Acquire : PseudoCmpXchg128Pat; + def PseudoMaskedCmpXchg32 : Pseudo<(outs GPR:$res, GPR:$scratch), (ins GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, diff --git a/llvm/lib/TargetParser/LoongArchTargetParser.cpp b/llvm/lib/TargetParser/LoongArchTargetParser.cpp index c8a07c32247cdf..e394c0c15b207c 100644 --- a/llvm/lib/TargetParser/LoongArchTargetParser.cpp +++ b/llvm/lib/TargetParser/LoongArchTargetParser.cpp @@ -56,6 +56,7 @@ bool LoongArch::getArchFeatures(StringRef Arch, Features.push_back("+lamcas"); Features.push_back("+ld-seq-sa"); Features.push_back("+div32"); + Features.push_back("+scq"); } return true; } diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll new file mode 100644 index 00000000000000..11b2622a94551a --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll @@ -0,0 +1,287 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch64 -mattr=+d,-scq < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch64 -mattr=+d,+scq < %s | FileCheck %s --check-prefix=LA64-SCQ + +define void @cmpxchg_i128_acquire_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; LA64-LABEL: cmpxchg_i128_acquire_acquire: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: move $a6, $a4 +; LA64-NEXT: st.d $a2, $sp, 8 +; LA64-NEXT: st.d $a1, $sp, 0 +; LA64-NEXT: addi.d $a1, $sp, 0 +; LA64-NEXT: ori $a4, $zero, 2 +; LA64-NEXT: ori $a5, $zero, 2 +; LA64-NEXT: move $a2, $a3 +; LA64-NEXT: move $a3, $a6 +; LA64-NEXT: bl %plt(__atomic_compare_exchange_16) +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret +; +; LA64-SCQ-LABEL: cmpxchg_i128_acquire_acquire: +; LA64-SCQ: # %bb.0: +; LA64-SCQ-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 +; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 +; LA64-SCQ-NEXT: bne $a5, $a1, .LBB0_3 +; LA64-SCQ-NEXT: bne $a6, $a2, .LBB0_3 +; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1 +; LA64-SCQ-NEXT: move $a7, $a3 +; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0 +; LA64-SCQ-NEXT: beqz $a7, .LBB0_1 +; LA64-SCQ-NEXT: b .LBB0_4 +; LA64-SCQ-NEXT: .LBB0_3: +; LA64-SCQ-NEXT: dbar 20 +; LA64-SCQ-NEXT: .LBB0_4: +; LA64-SCQ-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire acquire + ret void +} + +define void @cmpxchg_i128_acquire_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; LA64-LABEL: cmpxchg_i128_acquire_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: move $a5, $a4 +; LA64-NEXT: st.d $a2, $sp, 8 +; LA64-NEXT: st.d $a1, $sp, 0 +; LA64-NEXT: addi.d $a1, $sp, 0 +; LA64-NEXT: ori $a4, $zero, 2 +; LA64-NEXT: move $a2, $a3 +; LA64-NEXT: move $a3, $a5 +; LA64-NEXT: move $a5, $zero +; LA64-NEXT: bl %plt(__atomic_compare_exchange_16) +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret +; +; LA64-SCQ-LABEL: cmpxchg_i128_acquire_monotonic: +; LA64-SCQ: # %bb.0: +; LA64-SCQ-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 +; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 +; LA64-SCQ-NEXT: bne $a5, $a1, .LBB1_3 +; LA64-SCQ-NEXT: bne $a6, $a2, .LBB1_3 +; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB1_1 Depth=1 +; LA64-SCQ-NEXT: move $a7, $a3 +; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0 +; LA64-SCQ-NEXT: beqz $a7, .LBB1_1 +; LA64-SCQ-NEXT: b .LBB1_4 +; LA64-SCQ-NEXT: .LBB1_3: +; LA64-SCQ-NEXT: dbar 20 +; LA64-SCQ-NEXT: .LBB1_4: +; LA64-SCQ-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire monotonic + ret void +} + +define i128 @cmpxchg_i128_acquire_acquire_reti128(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; LA64-LABEL: cmpxchg_i128_acquire_acquire_reti128: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: move $a6, $a4 +; LA64-NEXT: st.d $a2, $sp, 8 +; LA64-NEXT: st.d $a1, $sp, 0 +; LA64-NEXT: addi.d $a1, $sp, 0 +; LA64-NEXT: ori $a4, $zero, 2 +; LA64-NEXT: ori $a5, $zero, 2 +; LA64-NEXT: move $a2, $a3 +; LA64-NEXT: move $a3, $a6 +; LA64-NEXT: bl %plt(__atomic_compare_exchange_16) +; LA64-NEXT: ld.d $a1, $sp, 8 +; LA64-NEXT: ld.d $a0, $sp, 0 +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret +; +; LA64-SCQ-LABEL: cmpxchg_i128_acquire_acquire_reti128: +; LA64-SCQ: # %bb.0: +; LA64-SCQ-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 +; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 +; LA64-SCQ-NEXT: bne $a5, $a1, .LBB2_3 +; LA64-SCQ-NEXT: bne $a6, $a2, .LBB2_3 +; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB2_1 Depth=1 +; LA64-SCQ-NEXT: move $a7, $a3 +; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0 +; LA64-SCQ-NEXT: beqz $a7, .LBB2_1 +; LA64-SCQ-NEXT: b .LBB2_4 +; LA64-SCQ-NEXT: .LBB2_3: +; LA64-SCQ-NEXT: dbar 20 +; LA64-SCQ-NEXT: .LBB2_4: +; LA64-SCQ-NEXT: move $a0, $a5 +; LA64-SCQ-NEXT: move $a1, $a6 +; LA64-SCQ-NEXT: ret + %tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire acquire + %res = extractvalue { i128, i1 } %tmp, 0 + ret i128 %res +} + +define i1 @cmpxchg_i128_acquire_acquire_reti1(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; LA64-LABEL: cmpxchg_i128_acquire_acquire_reti1: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: move $a6, $a4 +; LA64-NEXT: st.d $a2, $sp, 8 +; LA64-NEXT: st.d $a1, $sp, 0 +; LA64-NEXT: addi.d $a1, $sp, 0 +; LA64-NEXT: ori $a4, $zero, 2 +; LA64-NEXT: ori $a5, $zero, 2 +; LA64-NEXT: move $a2, $a3 +; LA64-NEXT: move $a3, $a6 +; LA64-NEXT: bl %plt(__atomic_compare_exchange_16) +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret +; +; LA64-SCQ-LABEL: cmpxchg_i128_acquire_acquire_reti1: +; LA64-SCQ: # %bb.0: +; LA64-SCQ-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 +; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 +; LA64-SCQ-NEXT: bne $a5, $a1, .LBB3_3 +; LA64-SCQ-NEXT: bne $a6, $a2, .LBB3_3 +; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB3_1 Depth=1 +; LA64-SCQ-NEXT: move $a7, $a3 +; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0 +; LA64-SCQ-NEXT: beqz $a7, .LBB3_1 +; LA64-SCQ-NEXT: b .LBB3_4 +; LA64-SCQ-NEXT: .LBB3_3: +; LA64-SCQ-NEXT: dbar 20 +; LA64-SCQ-NEXT: .LBB3_4: +; LA64-SCQ-NEXT: xor $a0, $a6, $a2 +; LA64-SCQ-NEXT: xor $a1, $a5, $a1 +; LA64-SCQ-NEXT: or $a0, $a1, $a0 +; LA64-SCQ-NEXT: sltui $a0, $a0, 1 +; LA64-SCQ-NEXT: ret + %tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val acquire acquire + %res = extractvalue { i128, i1 } %tmp, 1 + ret i1 %res +} + + +define void @cmpxchg_i128_monotonic_monotonic(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; LA64-LABEL: cmpxchg_i128_monotonic_monotonic: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $a2, $sp, 8 +; LA64-NEXT: st.d $a1, $sp, 0 +; LA64-NEXT: addi.d $a1, $sp, 0 +; LA64-NEXT: move $a2, $a3 +; LA64-NEXT: move $a3, $a4 +; LA64-NEXT: move $a4, $zero +; LA64-NEXT: move $a5, $zero +; LA64-NEXT: bl %plt(__atomic_compare_exchange_16) +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret +; +; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic: +; LA64-SCQ: # %bb.0: +; LA64-SCQ-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 +; LA64-SCQ-NEXT: bne $a5, $a1, .LBB4_3 +; LA64-SCQ-NEXT: bne $a6, $a2, .LBB4_3 +; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; LA64-SCQ-NEXT: move $a7, $a3 +; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0 +; LA64-SCQ-NEXT: beqz $a7, .LBB4_1 +; LA64-SCQ-NEXT: b .LBB4_4 +; LA64-SCQ-NEXT: .LBB4_3: +; LA64-SCQ-NEXT: dbar 1792 +; LA64-SCQ-NEXT: .LBB4_4: +; LA64-SCQ-NEXT: ret + %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic + ret void +} + +define i128 @cmpxchg_i128_monotonic_monotonic_reti128(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; LA64-LABEL: cmpxchg_i128_monotonic_monotonic_reti128: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $a2, $sp, 8 +; LA64-NEXT: st.d $a1, $sp, 0 +; LA64-NEXT: addi.d $a1, $sp, 0 +; LA64-NEXT: move $a2, $a3 +; LA64-NEXT: move $a3, $a4 +; LA64-NEXT: move $a4, $zero +; LA64-NEXT: move $a5, $zero +; LA64-NEXT: bl %plt(__atomic_compare_exchange_16) +; LA64-NEXT: ld.d $a1, $sp, 8 +; LA64-NEXT: ld.d $a0, $sp, 0 +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret +; +; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic_reti128: +; LA64-SCQ: # %bb.0: +; LA64-SCQ-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 +; LA64-SCQ-NEXT: bne $a5, $a1, .LBB5_3 +; LA64-SCQ-NEXT: bne $a6, $a2, .LBB5_3 +; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; LA64-SCQ-NEXT: move $a7, $a3 +; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0 +; LA64-SCQ-NEXT: beqz $a7, .LBB5_1 +; LA64-SCQ-NEXT: b .LBB5_4 +; LA64-SCQ-NEXT: .LBB5_3: +; LA64-SCQ-NEXT: dbar 1792 +; LA64-SCQ-NEXT: .LBB5_4: +; LA64-SCQ-NEXT: move $a0, $a5 +; LA64-SCQ-NEXT: move $a1, $a6 +; LA64-SCQ-NEXT: ret + %tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic + %res = extractvalue { i128, i1 } %tmp, 0 + ret i128 %res +} + +define i1 @cmpxchg_i128_monotonic_monotonic_reti1(ptr %ptr, i128 %cmp, i128 %val) nounwind { +; LA64-LABEL: cmpxchg_i128_monotonic_monotonic_reti1: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $a2, $sp, 8 +; LA64-NEXT: st.d $a1, $sp, 0 +; LA64-NEXT: addi.d $a1, $sp, 0 +; LA64-NEXT: move $a2, $a3 +; LA64-NEXT: move $a3, $a4 +; LA64-NEXT: move $a4, $zero +; LA64-NEXT: move $a5, $zero +; LA64-NEXT: bl %plt(__atomic_compare_exchange_16) +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret +; +; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic_reti1: +; LA64-SCQ: # %bb.0: +; LA64-SCQ-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 +; LA64-SCQ-NEXT: bne $a5, $a1, .LBB6_3 +; LA64-SCQ-NEXT: bne $a6, $a2, .LBB6_3 +; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; LA64-SCQ-NEXT: move $a7, $a3 +; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0 +; LA64-SCQ-NEXT: beqz $a7, .LBB6_1 +; LA64-SCQ-NEXT: b .LBB6_4 +; LA64-SCQ-NEXT: .LBB6_3: +; LA64-SCQ-NEXT: dbar 1792 +; LA64-SCQ-NEXT: .LBB6_4: +; LA64-SCQ-NEXT: xor $a0, $a6, $a2 +; LA64-SCQ-NEXT: xor $a1, $a5, $a1 +; LA64-SCQ-NEXT: or $a0, $a1, $a0 +; LA64-SCQ-NEXT: sltui $a0, $a0, 1 +; LA64-SCQ-NEXT: ret + %tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic + %res = extractvalue { i128, i1 } %tmp, 1 + ret i1 %res +} >From b8d1a1975759ed05a33c70664205abf4a1bee9c8 Mon Sep 17 00:00:00 2001 From: tangaac <tangya...@loongson.cn> Date: Thu, 28 Nov 2024 11:19:17 +0800 Subject: [PATCH 2/3] scq work with ld-seq-sa --- .../LoongArchExpandAtomicPseudoInsts.cpp | 3 +- .../ir-instruction/atomic-cmpxchg-128.ll | 167 ++++++++++++------ 2 files changed, 113 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp index 4a5475bafd475d..f46360faf97a67 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -697,7 +697,8 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg128( // .tail: // dbar 0x700 | acquire - BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint); + if (!(hint == 0x700 && MF->getSubtarget<LoongArchSubtarget>().hasLD_SEQ_SA())) + BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint); NextMBBI = MBB.end(); MI.eraseFromParent(); diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll index 11b2622a94551a..d0187760dba3bd 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc --mtriple=loongarch64 -mattr=+d,-scq < %s | FileCheck %s --check-prefix=LA64 -; RUN: llc --mtriple=loongarch64 -mattr=+d,+scq < %s | FileCheck %s --check-prefix=LA64-SCQ +; RUN: llc --mtriple=loongarch64 -mattr=+d,-scq,-ld-seq-sa < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch64 -mattr=+d,+scq,-ld-seq-sa < %s | FileCheck %s --check-prefixes=LA64-SCQ,NO-LD-SEQ-SA +; RUN: llc --mtriple=loongarch64 -mattr=+d,+scq,+ld-seq-sa < %s | FileCheck %s --check-prefixes=LA64-SCQ,LD-SEQ-SA define void @cmpxchg_i128_acquire_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwind { ; LA64-LABEL: cmpxchg_i128_acquire_acquire: @@ -182,22 +183,38 @@ define void @cmpxchg_i128_monotonic_monotonic(ptr %ptr, i128 %cmp, i128 %val) no ; LA64-NEXT: addi.d $sp, $sp, 32 ; LA64-NEXT: ret ; -; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic: -; LA64-SCQ: # %bb.0: -; LA64-SCQ-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 -; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 -; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 -; LA64-SCQ-NEXT: bne $a5, $a1, .LBB4_3 -; LA64-SCQ-NEXT: bne $a6, $a2, .LBB4_3 -; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 -; LA64-SCQ-NEXT: move $a7, $a3 -; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0 -; LA64-SCQ-NEXT: beqz $a7, .LBB4_1 -; LA64-SCQ-NEXT: b .LBB4_4 -; LA64-SCQ-NEXT: .LBB4_3: -; LA64-SCQ-NEXT: dbar 1792 -; LA64-SCQ-NEXT: .LBB4_4: -; LA64-SCQ-NEXT: ret +; NO-LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic: +; NO-LD-SEQ-SA: # %bb.0: +; NO-LD-SEQ-SA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; NO-LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; NO-LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 +; NO-LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB4_3 +; NO-LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB4_3 +; NO-LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; NO-LD-SEQ-SA-NEXT: move $a7, $a3 +; NO-LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0 +; NO-LD-SEQ-SA-NEXT: beqz $a7, .LBB4_1 +; NO-LD-SEQ-SA-NEXT: b .LBB4_4 +; NO-LD-SEQ-SA-NEXT: .LBB4_3: +; NO-LD-SEQ-SA-NEXT: dbar 1792 +; NO-LD-SEQ-SA-NEXT: .LBB4_4: +; NO-LD-SEQ-SA-NEXT: ret +; +; LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic: +; LD-SEQ-SA: # %bb.0: +; LD-SEQ-SA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 +; LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 +; LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB4_3 +; LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB4_3 +; LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB4_1 Depth=1 +; LD-SEQ-SA-NEXT: move $a7, $a3 +; LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0 +; LD-SEQ-SA-NEXT: beqz $a7, .LBB4_1 +; LD-SEQ-SA-NEXT: b .LBB4_4 +; LD-SEQ-SA-NEXT: .LBB4_3: +; LD-SEQ-SA-NEXT: .LBB4_4: +; LD-SEQ-SA-NEXT: ret %res = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic ret void } @@ -221,24 +238,42 @@ define i128 @cmpxchg_i128_monotonic_monotonic_reti128(ptr %ptr, i128 %cmp, i128 ; LA64-NEXT: addi.d $sp, $sp, 32 ; LA64-NEXT: ret ; -; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic_reti128: -; LA64-SCQ: # %bb.0: -; LA64-SCQ-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 -; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 -; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 -; LA64-SCQ-NEXT: bne $a5, $a1, .LBB5_3 -; LA64-SCQ-NEXT: bne $a6, $a2, .LBB5_3 -; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 -; LA64-SCQ-NEXT: move $a7, $a3 -; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0 -; LA64-SCQ-NEXT: beqz $a7, .LBB5_1 -; LA64-SCQ-NEXT: b .LBB5_4 -; LA64-SCQ-NEXT: .LBB5_3: -; LA64-SCQ-NEXT: dbar 1792 -; LA64-SCQ-NEXT: .LBB5_4: -; LA64-SCQ-NEXT: move $a0, $a5 -; LA64-SCQ-NEXT: move $a1, $a6 -; LA64-SCQ-NEXT: ret +; NO-LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic_reti128: +; NO-LD-SEQ-SA: # %bb.0: +; NO-LD-SEQ-SA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; NO-LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; NO-LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 +; NO-LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB5_3 +; NO-LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB5_3 +; NO-LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; NO-LD-SEQ-SA-NEXT: move $a7, $a3 +; NO-LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0 +; NO-LD-SEQ-SA-NEXT: beqz $a7, .LBB5_1 +; NO-LD-SEQ-SA-NEXT: b .LBB5_4 +; NO-LD-SEQ-SA-NEXT: .LBB5_3: +; NO-LD-SEQ-SA-NEXT: dbar 1792 +; NO-LD-SEQ-SA-NEXT: .LBB5_4: +; NO-LD-SEQ-SA-NEXT: move $a0, $a5 +; NO-LD-SEQ-SA-NEXT: move $a1, $a6 +; NO-LD-SEQ-SA-NEXT: ret +; +; LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic_reti128: +; LD-SEQ-SA: # %bb.0: +; LD-SEQ-SA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 +; LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 +; LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB5_3 +; LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB5_3 +; LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB5_1 Depth=1 +; LD-SEQ-SA-NEXT: move $a7, $a3 +; LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0 +; LD-SEQ-SA-NEXT: beqz $a7, .LBB5_1 +; LD-SEQ-SA-NEXT: b .LBB5_4 +; LD-SEQ-SA-NEXT: .LBB5_3: +; LD-SEQ-SA-NEXT: .LBB5_4: +; LD-SEQ-SA-NEXT: move $a0, $a5 +; LD-SEQ-SA-NEXT: move $a1, $a6 +; LD-SEQ-SA-NEXT: ret %tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic %res = extractvalue { i128, i1 } %tmp, 0 ret i128 %res @@ -261,26 +296,46 @@ define i1 @cmpxchg_i128_monotonic_monotonic_reti1(ptr %ptr, i128 %cmp, i128 %val ; LA64-NEXT: addi.d $sp, $sp, 32 ; LA64-NEXT: ret ; -; LA64-SCQ-LABEL: cmpxchg_i128_monotonic_monotonic_reti1: -; LA64-SCQ: # %bb.0: -; LA64-SCQ-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 -; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 -; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 -; LA64-SCQ-NEXT: bne $a5, $a1, .LBB6_3 -; LA64-SCQ-NEXT: bne $a6, $a2, .LBB6_3 -; LA64-SCQ-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 -; LA64-SCQ-NEXT: move $a7, $a3 -; LA64-SCQ-NEXT: sc.q $a7, $a4, $a0 -; LA64-SCQ-NEXT: beqz $a7, .LBB6_1 -; LA64-SCQ-NEXT: b .LBB6_4 -; LA64-SCQ-NEXT: .LBB6_3: -; LA64-SCQ-NEXT: dbar 1792 -; LA64-SCQ-NEXT: .LBB6_4: -; LA64-SCQ-NEXT: xor $a0, $a6, $a2 -; LA64-SCQ-NEXT: xor $a1, $a5, $a1 -; LA64-SCQ-NEXT: or $a0, $a1, $a0 -; LA64-SCQ-NEXT: sltui $a0, $a0, 1 -; LA64-SCQ-NEXT: ret +; NO-LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic_reti1: +; NO-LD-SEQ-SA: # %bb.0: +; NO-LD-SEQ-SA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; NO-LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; NO-LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 +; NO-LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB6_3 +; NO-LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB6_3 +; NO-LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; NO-LD-SEQ-SA-NEXT: move $a7, $a3 +; NO-LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0 +; NO-LD-SEQ-SA-NEXT: beqz $a7, .LBB6_1 +; NO-LD-SEQ-SA-NEXT: b .LBB6_4 +; NO-LD-SEQ-SA-NEXT: .LBB6_3: +; NO-LD-SEQ-SA-NEXT: dbar 1792 +; NO-LD-SEQ-SA-NEXT: .LBB6_4: +; NO-LD-SEQ-SA-NEXT: xor $a0, $a6, $a2 +; NO-LD-SEQ-SA-NEXT: xor $a1, $a5, $a1 +; NO-LD-SEQ-SA-NEXT: or $a0, $a1, $a0 +; NO-LD-SEQ-SA-NEXT: sltui $a0, $a0, 1 +; NO-LD-SEQ-SA-NEXT: ret +; +; LD-SEQ-SA-LABEL: cmpxchg_i128_monotonic_monotonic_reti1: +; LD-SEQ-SA: # %bb.0: +; LD-SEQ-SA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 +; LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 +; LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB6_3 +; LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB6_3 +; LD-SEQ-SA-NEXT: # %bb.2: # in Loop: Header=BB6_1 Depth=1 +; LD-SEQ-SA-NEXT: move $a7, $a3 +; LD-SEQ-SA-NEXT: sc.q $a7, $a4, $a0 +; LD-SEQ-SA-NEXT: beqz $a7, .LBB6_1 +; LD-SEQ-SA-NEXT: b .LBB6_4 +; LD-SEQ-SA-NEXT: .LBB6_3: +; LD-SEQ-SA-NEXT: .LBB6_4: +; LD-SEQ-SA-NEXT: xor $a0, $a6, $a2 +; LD-SEQ-SA-NEXT: xor $a1, $a5, $a1 +; LD-SEQ-SA-NEXT: or $a0, $a1, $a0 +; LD-SEQ-SA-NEXT: sltui $a0, $a0, 1 +; LD-SEQ-SA-NEXT: ret %tmp = cmpxchg ptr %ptr, i128 %cmp, i128 %val monotonic monotonic %res = extractvalue { i128, i1 } %tmp, 1 ret i1 %res >From 884776bd98b883cc4c2ec2fd1b2332e71aa35ed7 Mon Sep 17 00:00:00 2001 From: tangaac <tangya...@loongson.cn> Date: Mon, 2 Dec 2024 10:42:52 +0800 Subject: [PATCH 3/3] small change --- .../LoongArch/LoongArchExpandAtomicPseudoInsts.cpp | 1 + llvm/lib/TargetParser/Host.cpp | 2 +- .../LoongArch/ir-instruction/atomic-cmpxchg-128.ll | 10 ++++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp index f46360faf97a67..12d8a0f275cb54 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp @@ -654,6 +654,7 @@ bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg128( BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_D), DestLoReg) .addReg(AddrReg) .addImm(0); + BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::DBAR)).addImm(0b10100); BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LD_D), DestHiReg) .addReg(AddrReg) .addImm(8); diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp index 6e06de323fadad..3932c07a9c0813 100644 --- a/llvm/lib/TargetParser/Host.cpp +++ b/llvm/lib/TargetParser/Host.cpp @@ -2028,12 +2028,12 @@ const StringMap<bool> sys::getHostCPUFeatures() { Features["div32"] = cpucfg2 & (1U << 26); // CPUCFG.2.DIV32 Features["lam-bh"] = cpucfg2 & (1U << 27); // CPUCFG.2.LAM_BH Features["lamcas"] = cpucfg2 & (1U << 28); // CPUCFG.2.LAMCAS + Features["scq"] = cpucfg2 & (1U << 30); // CPUCFG.2.SCQLL Features["ld-seq-sa"] = cpucfg3 & (1U << 23); // CPUCFG.3.LD_SEQ_SA // TODO: Need to complete. // Features["llacq-screl"] = cpucfg2 & (1U << 29); // CPUCFG.2.LLACQ_SCREL - // Features["scq"] = cpucfg2 & (1U << 30); // CPUCFG.2.SCQ return Features; } #elif defined(__linux__) && defined(__riscv) diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll index d0187760dba3bd..b2fcecc793eda3 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/atomic-cmpxchg-128.ll @@ -25,6 +25,7 @@ define void @cmpxchg_i128_acquire_acquire(ptr %ptr, i128 %cmp, i128 %val) nounwi ; LA64-SCQ: # %bb.0: ; LA64-SCQ-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1 ; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: dbar 20 ; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 ; LA64-SCQ-NEXT: bne $a5, $a1, .LBB0_3 ; LA64-SCQ-NEXT: bne $a6, $a2, .LBB0_3 @@ -63,6 +64,7 @@ define void @cmpxchg_i128_acquire_monotonic(ptr %ptr, i128 %cmp, i128 %val) noun ; LA64-SCQ: # %bb.0: ; LA64-SCQ-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1 ; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: dbar 20 ; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 ; LA64-SCQ-NEXT: bne $a5, $a1, .LBB1_3 ; LA64-SCQ-NEXT: bne $a6, $a2, .LBB1_3 @@ -103,6 +105,7 @@ define i128 @cmpxchg_i128_acquire_acquire_reti128(ptr %ptr, i128 %cmp, i128 %val ; LA64-SCQ: # %bb.0: ; LA64-SCQ-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1 ; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: dbar 20 ; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 ; LA64-SCQ-NEXT: bne $a5, $a1, .LBB2_3 ; LA64-SCQ-NEXT: bne $a6, $a2, .LBB2_3 @@ -144,6 +147,7 @@ define i1 @cmpxchg_i128_acquire_acquire_reti1(ptr %ptr, i128 %cmp, i128 %val) no ; LA64-SCQ: # %bb.0: ; LA64-SCQ-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1 ; LA64-SCQ-NEXT: ll.d $a5, $a0, 0 +; LA64-SCQ-NEXT: dbar 20 ; LA64-SCQ-NEXT: ld.d $a6, $a0, 8 ; LA64-SCQ-NEXT: bne $a5, $a1, .LBB3_3 ; LA64-SCQ-NEXT: bne $a6, $a2, .LBB3_3 @@ -187,6 +191,7 @@ define void @cmpxchg_i128_monotonic_monotonic(ptr %ptr, i128 %cmp, i128 %val) no ; NO-LD-SEQ-SA: # %bb.0: ; NO-LD-SEQ-SA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; NO-LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; NO-LD-SEQ-SA-NEXT: dbar 20 ; NO-LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 ; NO-LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB4_3 ; NO-LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB4_3 @@ -204,6 +209,7 @@ define void @cmpxchg_i128_monotonic_monotonic(ptr %ptr, i128 %cmp, i128 %val) no ; LD-SEQ-SA: # %bb.0: ; LD-SEQ-SA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1 ; LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; LD-SEQ-SA-NEXT: dbar 20 ; LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 ; LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB4_3 ; LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB4_3 @@ -242,6 +248,7 @@ define i128 @cmpxchg_i128_monotonic_monotonic_reti128(ptr %ptr, i128 %cmp, i128 ; NO-LD-SEQ-SA: # %bb.0: ; NO-LD-SEQ-SA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; NO-LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; NO-LD-SEQ-SA-NEXT: dbar 20 ; NO-LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 ; NO-LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB5_3 ; NO-LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB5_3 @@ -261,6 +268,7 @@ define i128 @cmpxchg_i128_monotonic_monotonic_reti128(ptr %ptr, i128 %cmp, i128 ; LD-SEQ-SA: # %bb.0: ; LD-SEQ-SA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1 ; LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; LD-SEQ-SA-NEXT: dbar 20 ; LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 ; LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB5_3 ; LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB5_3 @@ -300,6 +308,7 @@ define i1 @cmpxchg_i128_monotonic_monotonic_reti1(ptr %ptr, i128 %cmp, i128 %val ; NO-LD-SEQ-SA: # %bb.0: ; NO-LD-SEQ-SA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; NO-LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; NO-LD-SEQ-SA-NEXT: dbar 20 ; NO-LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 ; NO-LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB6_3 ; NO-LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB6_3 @@ -321,6 +330,7 @@ define i1 @cmpxchg_i128_monotonic_monotonic_reti1(ptr %ptr, i128 %cmp, i128 %val ; LD-SEQ-SA: # %bb.0: ; LD-SEQ-SA-NEXT: .LBB6_1: # =>This Inner Loop Header: Depth=1 ; LD-SEQ-SA-NEXT: ll.d $a5, $a0, 0 +; LD-SEQ-SA-NEXT: dbar 20 ; LD-SEQ-SA-NEXT: ld.d $a6, $a0, 8 ; LD-SEQ-SA-NEXT: bne $a5, $a1, .LBB6_3 ; LD-SEQ-SA-NEXT: bne $a6, $a2, .LBB6_3 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits