[clang] [libc] [llvm] [libc][math] Refactor fsqrt to Header Only (PR #175444)

Jolynn Wee Zhuo Lin via cfe-commits Mon, 12 Jan 2026 07:39:54 -0800

=?utf-8?q?Michał_Górny?= <[email protected]>,Aviral Garg
 <[email protected]>,
Andrzej =?utf-8?q?Warzyński?= <[email protected]>,Vassil
 Vassilev <[email protected]>,hev <[email protected]>,Valeriy
 Savchenko <[email protected]>,Trevor Gross <[email protected]>,
Vedran =?utf-8?q?Miletić?= <[email protected]>,jolwnn
 <[email protected]>,jolwnn <[email protected]>,Jolynn Wee Zhuo Lin
 <[email protected]>,jolwnn <[email protected]>
Message-ID:
In-Reply-To: <llvm.org/llvm/llvm-project/pull/[email protected]>



https://github.com/jolwnn updated 
https://github.com/llvm/llvm-project/pull/175444

>From ac7fefc8062525bc580b3c6f46fe7dda15fdced9 Mon Sep 17 00:00:00 2001
From: jolwnn <[email protected]>
Date: Mon, 12 Jan 2026 00:40:46 +0800
Subject: [PATCH 01/13] refactor fsqrt to header only

---
 libc/shared/math.h                            |  1 +
 libc/shared/math/fsqrt.h                      | 24 +++++++++++++++++
 libc/src/__support/math/CMakeLists.txt        |  8 ++++++
 libc/src/__support/math/fsqrt.h               | 26 +++++++++++++++++++
 libc/src/math/generic/CMakeLists.txt          |  2 +-
 libc/src/math/generic/fsqrt.cpp               |  6 ++---
 libc/test/shared/shared_math_test.cpp         |  1 +
 .../llvm-project-overlay/libc/BUILD.bazel     | 10 ++++++-
 8 files changed, 72 insertions(+), 6 deletions(-)
 create mode 100644 libc/shared/math/fsqrt.h
 create mode 100644 libc/src/__support/math/fsqrt.h

diff --git a/libc/shared/math.h b/libc/shared/math.h
index 7fb4c43f509c4..8fcd8ef98d9b6 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -66,5 +66,6 @@
 #include "math/rsqrtf.h"
 #include "math/rsqrtf16.h"
 #include "math/sin.h"
+#include "math/fsqrt.h"
 
 #endif // LLVM_LIBC_SHARED_MATH_H
diff --git a/libc/shared/math/fsqrt.h b/libc/shared/math/fsqrt.h
new file mode 100644
index 0000000000000..635b155b58e44
--- /dev/null
+++ b/libc/shared/math/fsqrt.h
@@ -0,0 +1,24 @@
+//===-- Implementation header for fsqrt ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_FSQRT_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_FSQRT_H
+
+#include "shared/libc_common.h"
+#include "src/__support/math/fsqrt.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace shared {
+
+using math::fsqrt;
+
+} // namespace shared
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SHARED_MATH_FSQRT_H
\ No newline at end of file
diff --git a/libc/src/__support/math/CMakeLists.txt 
b/libc/src/__support/math/CMakeLists.txt
index 741da7432c94f..86811c223e497 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -594,6 +594,14 @@ add_header_library(
     libc.src.__support.math.exp10_float16_constants
 )
 
+add_header_library(
+  fsqrt
+  HDRS
+    fsqrt.h
+  DEPENDS
+    libc.src.__support.FPUtil.generic.sqrt
+)
+
 add_header_library(
   frexpf128
   HDRS
diff --git a/libc/src/__support/math/fsqrt.h b/libc/src/__support/math/fsqrt.h
new file mode 100644
index 0000000000000..8dd6afc7845f0
--- /dev/null
+++ b/libc/src/__support/math/fsqrt.h
@@ -0,0 +1,26 @@
+//===-- Implementation header for fsqrt ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_FSQRT_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_FSQRT_H
+
+#include "src/__support/FPUtil/generic/sqrt.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+LIBC_INLINE static constexpr float fsqrt(double x) {
+  return fputil::sqrt<float>(x);
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_FSQRT_H
diff --git a/libc/src/math/generic/CMakeLists.txt 
b/libc/src/math/generic/CMakeLists.txt
index 9c0da076b6cf0..01783691a1c1c 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -5244,7 +5244,7 @@ add_entrypoint_object(
   HDRS
     ../fsqrt.h
   DEPENDS
-    libc.src.__support.FPUtil.generic.sqrt
+    libc.src.__support.math.fsqrt
 )
 
 add_entrypoint_object(
diff --git a/libc/src/math/generic/fsqrt.cpp b/libc/src/math/generic/fsqrt.cpp
index d54471fd067bf..df1d4a9add9b2 100644
--- a/libc/src/math/generic/fsqrt.cpp
+++ b/libc/src/math/generic/fsqrt.cpp
@@ -7,12 +7,10 @@
 
//===----------------------------------------------------------------------===//
 
 #include "src/math/fsqrt.h"
-#include "src/__support/FPUtil/generic/sqrt.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
+#include "src/__support/math/fsqrt.h"
 
 namespace LIBC_NAMESPACE_DECL {
 
-LLVM_LIBC_FUNCTION(float, fsqrt, (double x)) { return fputil::sqrt<float>(x); }
+LLVM_LIBC_FUNCTION(float, fsqrt, (double x)) { return math::fsqrt(x); }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/test/shared/shared_math_test.cpp 
b/libc/test/shared/shared_math_test.cpp
index f823d414e2afd..1ab47b3a96bfd 100644
--- a/libc/test/shared/shared_math_test.cpp
+++ b/libc/test/shared/shared_math_test.cpp
@@ -90,6 +90,7 @@ TEST(LlvmLibcSharedMathTest, AllDouble) {
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp2(0.0));
   EXPECT_FP_EQ(0x1p+0, LIBC_NAMESPACE::shared::exp10(0.0));
   EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::expm1(0.0));
+  EXPECT_FP_EQ(0x0p+0, LIBC_NAMESPACE::shared::fsqrt(0.0));
   EXPECT_FP_EQ(0.0, LIBC_NAMESPACE::shared::sin(0.0));
 }
 
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel 
b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 210c25dddd0b9..e4a36fa52a7fb 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2804,6 +2804,14 @@ libc_support_library(
     ],
 )
 
+libc_support_library(
+    name = "__support_math_fsqrt",
+    hdrs = ["src/__support/math/fsqrt.h"],
+    deps = [
+        ":__support_fputil_sqrt",
+    ],
+)
+
 libc_support_library(
     name = "__support_math_frexpf128",
     hdrs = ["src/__support/math/frexpf128.h"],
@@ -4250,7 +4258,7 @@ libc_math_function(name = "fromfpxf16")
 libc_math_function(
     name = "fsqrt",
     additional_deps = [
-        ":__support_fputil_sqrt",
+        ":__support_math_fsqrt",
     ],
 )
 

>From f97105f4901fb372a3a9f344d2ae7e5005fae5c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <[email protected]>
Date: Sun, 11 Jan 2026 14:12:10 +0100
Subject: [PATCH 02/13] [clang] [unittest] Fix linking against dylib (#175317)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a regression introduced in #174513 that would cause `BasicTests` to
link directly to static `LLVMTargetParser` library instead of using the
component linking, to respect dylib.

Signed-off-by: Michał Górny <[email protected]>
---
 clang/unittests/Basic/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/unittests/Basic/CMakeLists.txt 
b/clang/unittests/Basic/CMakeLists.txt
index 4918d07ace22b..058243fd3fdba 100644
--- a/clang/unittests/Basic/CMakeLists.txt
+++ b/clang/unittests/Basic/CMakeLists.txt
@@ -16,8 +16,8 @@ add_distinct_clang_unittest(BasicTests
   clangBasic
   clangLex
   LINK_LIBS
-  LLVMTargetParser
   LLVMTestingSupport
   LLVM_COMPONENTS
   Support
+  TargetParser
   )

>From c465b24b1c43036c41481bf4e8c9db79e80360dc Mon Sep 17 00:00:00 2001
From: Aviral Garg <[email protected]>
Date: Sun, 11 Jan 2026 19:32:49 +0530
Subject: [PATCH 03/13] [Support] Add KnownBits::isNonPositive() helper and
 exhaustive test coverage for sign predicates (#175284)

This patch adds:

1. KnownBits::isNonPositive() - Returns true if this value is known to
be non-positive (i.e., the signed maximum value is <= 0). This is
implemented using getSignedMaxValue().isNonPositive().

2. SignPredicatesExhaustive test - An exhaustive test that validates the
correctness of isNegative(), isNonNegative(), isStrictlyPositive(),
isNonPositive(), and isNonZero() by iterating through all possible
KnownBits combinations for 1-bit and 4-bit widths and verifying that the
predicates return true if and only if all possible values represented by
the KnownBits satisfy the predicate.

Fixes #175203
---
 llvm/include/llvm/Support/KnownBits.h    |  3 ++
 llvm/unittests/Support/KnownBitsTest.cpp | 41 ++++++++++++++++++++++++
 2 files changed, 44 insertions(+)

diff --git a/llvm/include/llvm/Support/KnownBits.h 
b/llvm/include/llvm/Support/KnownBits.h
index bff944325880b..ea37b435e2848 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -115,6 +115,9 @@ struct KnownBits {
     return Zero.isSignBitSet() && !One.isZero();
   }
 
+  /// Returns true if this value is known to be non-positive.
+  bool isNonPositive() const { return getSignedMaxValue().isNonPositive(); }
+
   /// Make this value negative.
   void makeNegative() {
     One.setSignBit();
diff --git a/llvm/unittests/Support/KnownBitsTest.cpp 
b/llvm/unittests/Support/KnownBitsTest.cpp
index ce0bf86e39dd7..abd0b3607c9fc 100644
--- a/llvm/unittests/Support/KnownBitsTest.cpp
+++ b/llvm/unittests/Support/KnownBitsTest.cpp
@@ -281,6 +281,47 @@ TEST(KnownBitsTest, SignBitUnknown) {
   EXPECT_TRUE(Known.isSignUnknown());
 }
 
+TEST(KnownBitsTest, SignPredicatesExhaustive) {
+  for (unsigned Bits : {1, 4}) {
+    ForeachKnownBits(Bits, [&](const KnownBits &Known) {
+      if (Known.hasConflict())
+        return;
+
+      bool AllNegative = true;
+      bool AllNonNegative = true;
+      bool AllStrictlyPositive = true;
+      bool AllNonPositive = true;
+      bool AllNonZero = true;
+
+      ForeachNumInKnownBits(Known, [&](const APInt &N) {
+        AllNegative &= N.isNegative();
+        AllNonNegative &= N.isNonNegative();
+        AllStrictlyPositive &= N.isStrictlyPositive();
+        AllNonPositive &= N.isNonPositive();
+        AllNonZero &= !N.isZero();
+      });
+
+      // isNegative() is optimal: returns true iff sign bit is known one.
+      EXPECT_EQ(AllNegative, Known.isNegative())
+          << "isNegative: Known = " << Known;
+      // isNonNegative() is optimal: returns true iff sign bit is known zero.
+      EXPECT_EQ(AllNonNegative, Known.isNonNegative())
+          << "isNonNegative: Known = " << Known;
+      // isStrictlyPositive() is optimal: returns true iff sign bit is known
+      // zero and at least one other bit is known one.
+      EXPECT_EQ(AllStrictlyPositive, Known.isStrictlyPositive())
+          << "isStrictlyPositive: Known = " << Known;
+      // isNonPositive() is optimal: returns true iff (sign bit is known one)
+      // or (known to be zero).
+      EXPECT_EQ(AllNonPositive, Known.isNonPositive())
+          << "isNonPositive: Known = " << Known;
+      // isNonZero() is optimal: returns true iff at least one bit is known 
one.
+      EXPECT_EQ(AllNonZero, Known.isNonZero())
+          << "isNonZero: Known = " << Known;
+    });
+  }
+}
+
 TEST(KnownBitsTest, BinaryExhaustive) {
   testBinaryOpExhaustive(
       "and",

>From cdae94521dd894460364734f7aff782b10fca39b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= <[email protected]>
Date: Sun, 11 Jan 2026 15:14:27 +0100
Subject: [PATCH 04/13] [CIR][AArch64] Add lowering for unpredicated svdup
 builtins (#174433)

This PR adds CIR lowering support for unpredicated `svdup` SVE builtins.
The corresponding ACLE intrinsics are documented at:
* https://developer.arm.com/architectures/instruction-sets/intrinsics

(search for svdup).

Since LLVM provides a direct intrinsic for svdup with a 1:1 mapping, CIR
lowers these builtins by emitting a call to the corresponding LLVM
intrinsic.

DESIGN NOTES
------------
With this change, ACLE intrinsics that have a corresponding LLVM intrinsic can
generally be lowered by CIR by reusing LLVM intrinsic metadata, avoiding
duplicated intrinsic-name definitions, unless codegen-relevant SVETypeFlags are
involved. As a consequence, CIR may no longer emit NYI diagnostics for
intrinsics that (a) have a known LLVM intrinsic mapping and (b) do not use such
codegen-relevant `SVETypeFlag`s; these intrinsics are lowered directly.

IMPLEMENTATION NOTES
--------------------
* Intrinsic discovery logic mirrors the approach in
  CodeGen/TargetBuiltins/ARM.cpp, but is simplified since CIR only
  requires the intrinsic name.
* Test inputs are copied from the existing svdup tests:
  tests/CodeGen/AArch64/sve-intrinsics/acle_sve_dup.c.
* The LLVM IR produced _with_ and _without_ `-fclangir` is identical,
  modulo basic block labels, SROA, and function attributes.

EXAMPLE LOWERING
----------------
Input:
```C

svint8_t test_svdup_n_s8(int8_t op)
{
  return svdup_n_s8(op);
}
```

OUTPUT 1 (default):
```llvm
define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 noundef %op) #0 {
entry:
  %op.addr = alloca i8, align 1
  store i8 %op, ptr %op.addr, align 1
  %0 = load i8, ptr %op.addr, align 1
  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %0)
  ret <vscale x 16 x i8> %1
}
```

OUTPUT 2 (via `-fclangir`):
```llvm
define dso_local <vscale x 16 x i8> @test_svdup_n_s8(i8 %0) #0 {
  %2 = alloca i8, i64 1, align 1
  %3 = alloca <vscale x 16 x i8>, i64 1, align 16
  store i8 %0, ptr %2, align 1
  %4 = load i8, ptr %2, align 1
  %5 = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.x.nxv16i8(i8 %4)
  store <vscale x 16 x i8> %5, ptr %3, align 16
  %6 = load <vscale x 16 x i8>, ptr %3, align 16
  ret <vscale x 16 x i8> %6
}
```
---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  | 115 +++++++++-
 clang/lib/CIR/CodeGen/CIRGenFunction.h        |   4 +
 .../CodeGenBuiltins/AArch64/acle_sve_dup.c    | 211 ++++++++++++++++++
 3 files changed, 329 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index e28b3c6cdc2ff..7998fb6b5eaac 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -13,6 +13,7 @@
 
 #include "CIRGenBuilder.h"
 #include "CIRGenFunction.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/CIR/MissingFeatures.h"
 
 // TODO(cir): once all builtins are covered, decide whether we still
@@ -25,7 +26,6 @@
 #include "mlir/IR/Value.h"
 #include "clang/AST/GlobalDecl.h"
 #include "clang/Basic/Builtins.h"
-#include "clang/Basic/TargetBuiltins.h"
 
 using namespace clang;
 using namespace clang::CIRGen;
@@ -52,6 +52,80 @@ static mlir::Value genVscaleTimesFactor(mlir::Location loc,
                                builder.getUInt64(scalingFactor, loc));
 }
 
+static bool aarch64SVEIntrinsicsProvenSorted = false;
+
+namespace {
+struct AArch64BuiltinInfo {
+  unsigned builtinID;
+  unsigned llvmIntrinsic;
+  uint64_t typeModifier;
+
+  bool operator<(unsigned rhsBuiltinID) const {
+    return builtinID < rhsBuiltinID;
+  }
+  bool operator<(const AArch64BuiltinInfo &te) const {
+    return builtinID < te.builtinID;
+  }
+};
+} // end anonymous namespace
+
+#define SVEMAP1(NameBase, llvmIntrinsic, TypeModifier)                         
\
+  {SVE::BI__builtin_sve_##NameBase, Intrinsic::llvmIntrinsic, TypeModifier}
+
+#define SVEMAP2(NameBase, TypeModifier)                                        
\
+  {SVE::BI__builtin_sve_##NameBase, 0, TypeModifier}
+static const AArch64BuiltinInfo aarch64SVEIntrinsicMap[] = {
+#define GET_SVE_LLVM_INTRINSIC_MAP
+#include "clang/Basic/arm_sve_builtin_cg.inc"
+#undef GET_SVE_LLVM_INTRINSIC_MAP
+};
+
+static const AArch64BuiltinInfo *
+findARMVectorIntrinsicInMap(ArrayRef<AArch64BuiltinInfo> intrinsicMap,
+                            unsigned builtinID, bool &mapProvenSorted) {
+
+#ifndef NDEBUG
+  if (!mapProvenSorted) {
+    assert(llvm::is_sorted(intrinsicMap));
+    mapProvenSorted = true;
+  }
+#endif
+
+  const AArch64BuiltinInfo *info = llvm::lower_bound(intrinsicMap, builtinID);
+
+  if (info != intrinsicMap.end() && info->builtinID == builtinID)
+    return info;
+
+  return nullptr;
+}
+
+bool CIRGenFunction::getAArch64SVEProcessedOperands(
+    unsigned builtinID, const CallExpr *expr, SmallVectorImpl<mlir::Value> 
&ops,
+    SVETypeFlags typeFlags) {
+  // Find out if any arguments are required to be integer constant expressions.
+  unsigned iceArguments = 0;
+  ASTContext::GetBuiltinTypeError error;
+  getContext().GetBuiltinType(builtinID, error, &iceArguments);
+  assert(error == ASTContext::GE_None && "Should not codegen an error");
+
+  for (unsigned i = 0, e = expr->getNumArgs(); i != e; i++) {
+    bool isIce = iceArguments & (1 << i);
+    mlir::Value arg = emitScalarExpr(expr->getArg(i));
+
+    if (isIce) {
+      cgm.errorNYI(expr->getSourceRange(),
+                   std::string("unimplemented AArch64 builtin call: ") +
+                       getContext().BuiltinInfo.getName(builtinID));
+    }
+
+    // FIXME: Handle types like svint16x2_t, which are currently incorrectly
+    // converted to i32. These should be treated as structs and unpacked.
+
+    ops.push_back(arg);
+  }
+  return true;
+}
+
 std::optional<mlir::Value>
 CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned builtinID,
                                           const CallExpr *expr) {
@@ -65,8 +139,40 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned 
builtinID,
 
   assert(!cir::MissingFeatures::aarch64SVEIntrinsics());
 
+  auto *builtinIntrInfo = findARMVectorIntrinsicInMap(
+      aarch64SVEIntrinsicMap, builtinID, aarch64SVEIntrinsicsProvenSorted);
+
+  // The operands of the builtin call
+  llvm::SmallVector<mlir::Value> ops;
+
+  SVETypeFlags typeFlags(builtinIntrInfo->typeModifier);
+  if (!CIRGenFunction::getAArch64SVEProcessedOperands(builtinID, expr, ops,
+                                                      typeFlags))
+    return mlir::Value{};
+
+  if (typeFlags.isLoad() || typeFlags.isStore() || typeFlags.isGatherLoad() ||
+      typeFlags.isScatterStore() || typeFlags.isPrefetch() ||
+      typeFlags.isGatherPrefetch() || typeFlags.isStructLoad() ||
+      typeFlags.isStructStore() || typeFlags.isTupleSet() ||
+      typeFlags.isTupleGet() || typeFlags.isTupleCreate() ||
+      typeFlags.isUndef())
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented AArch64 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+
   mlir::Location loc = getLoc(expr->getExprLoc());
 
+  if (builtinIntrInfo->llvmIntrinsic != 0) {
+    std::string llvmIntrName(Intrinsic::getBaseName(
+        (llvm::Intrinsic::ID)builtinIntrInfo->llvmIntrinsic));
+
+    llvmIntrName.erase(0, /*std::strlen(".llvm")=*/5);
+
+    return emitIntrinsicCallOp(builder, loc, llvmIntrName,
+                               convertType(expr->getType()),
+                               mlir::ValueRange{ops});
+  }
+
   switch (builtinID) {
   default:
     return std::nullopt;
@@ -103,10 +209,12 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned 
builtinID,
   case SVE::BI__builtin_sve_svpmullb_u64:
   case SVE::BI__builtin_sve_svpmullb_n_u16:
   case SVE::BI__builtin_sve_svpmullb_n_u64:
+
   case SVE::BI__builtin_sve_svdup_n_b8:
   case SVE::BI__builtin_sve_svdup_n_b16:
   case SVE::BI__builtin_sve_svdup_n_b32:
   case SVE::BI__builtin_sve_svdup_n_b64:
+
   case SVE::BI__builtin_sve_svdupq_n_b8:
   case SVE::BI__builtin_sve_svdupq_n_b16:
   case SVE::BI__builtin_sve_svdupq_n_b32:
@@ -129,22 +237,27 @@ CIRGenFunction::emitAArch64SVEBuiltinExpr(unsigned 
builtinID,
                  std::string("unimplemented AArch64 builtin call: ") +
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
+
   case SVE::BI__builtin_sve_svlen_u8:
   case SVE::BI__builtin_sve_svlen_s8:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 
16);
+
   case SVE::BI__builtin_sve_svlen_u16:
   case SVE::BI__builtin_sve_svlen_s16:
   case SVE::BI__builtin_sve_svlen_f16:
   case SVE::BI__builtin_sve_svlen_bf16:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 8);
+
   case SVE::BI__builtin_sve_svlen_u32:
   case SVE::BI__builtin_sve_svlen_s32:
   case SVE::BI__builtin_sve_svlen_f32:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 4);
+
   case SVE::BI__builtin_sve_svlen_u64:
   case SVE::BI__builtin_sve_svlen_s64:
   case SVE::BI__builtin_sve_svlen_f64:
     return genVscaleTimesFactor(loc, builder, convertType(expr->getType()), 2);
+
   case SVE::BI__builtin_sve_svtbl2_u8:
   case SVE::BI__builtin_sve_svtbl2_s8:
   case SVE::BI__builtin_sve_svtbl2_u16:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h 
b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index 3101fc6cd228c..5fe1d9a4f2b76 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -31,6 +31,7 @@
 #include "clang/AST/Stmt.h"
 #include "clang/AST/Type.h"
 #include "clang/Basic/OperatorKinds.h"
+#include "clang/Basic/TargetBuiltins.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/MissingFeatures.h"
 #include "clang/CIR/TypeEvaluationKind.h"
@@ -1265,6 +1266,9 @@ class CIRGenFunction : public CIRGenTypeCache {
   /// CIR emit functions
   /// ----------------------
 public:
+  bool getAArch64SVEProcessedOperands(unsigned builtinID, const CallExpr *expr,
+                                      SmallVectorImpl<mlir::Value> &ops,
+                                      clang::SVETypeFlags typeFlags);
   std::optional<mlir::Value>
   emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr,
                          ReturnValueSlot returnValue,
diff --git a/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c 
b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
new file mode 100644
index 0000000000000..3e0a892d6b368
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/AArch64/acle_sve_dup.c
@@ -0,0 +1,211 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s 
--check-prefixes=ALL,CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -fclangir -emit-cir -o - %s | FileCheck %s 
--check-prefixes=ALL,CIR
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -fclangir -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+
+// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64 -target-feature +sve 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | FileCheck %s 
--check-prefixes=ALL,LLVM_OGCG_CIR
+#include <arm_sve.h>
+
+#if defined __ARM_FEATURE_SME
+#define MODE_ATTR __arm_streaming
+#else
+#define MODE_ATTR
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+// A simple used,unused... macro, long enough to represent any SVE builtin.
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4
+#endif
+
+// ALL-LABEL: @test_svdup_n_s8
+svint8_t test_svdup_n_s8(int8_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s8i {{.*}} -> !cir.vector<[16] x !s8i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s8i) -> !cir.vector<[16] x !s8i>
+
+// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
+// LLVM_OGCG_CIR:    store i8 [[OP]], ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s8,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_s16
+svint16_t test_svdup_n_s16(int16_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s16i {{.*}} -> !cir.vector<[8] x !s16i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s16i) -> !cir.vector<[8] x !s16i>
+
+// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
+// LLVM_OGCG_CIR:    store i16 [[OP]], ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s16,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_s32
+svint32_t test_svdup_n_s32(int32_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s32i {{.*}} -> !cir.vector<[4] x !s32i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s32i) -> !cir.vector<[4] x !s32i>
+
+// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
+// LLVM_OGCG_CIR:    store i32 [[OP]], ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s32,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_s64
+svint64_t test_svdup_n_s64(int64_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !s64i {{.*}} -> !cir.vector<[2] x !s64i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!s64i) -> !cir.vector<[2] x !s64i>
+
+// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
+// LLVM_OGCG_CIR:    store i64 [[OP]], ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_s64,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u8
+svuint8_t test_svdup_n_u8(uint8_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u8i {{.*}} -> !cir.vector<[16] x !u8i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(1) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u8i) -> !cir.vector<[16] x !u8i>
+
+// LLVM_OGCG_CIR-SAME: i8 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i8,{{([[:space:]]?i64 1,)?}} 
align 1
+// LLVM_OGCG_CIR:    store i8 [[OP]], ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i8, ptr [[OP_ADDR]], align 1
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 16 x i8> 
@llvm.aarch64.sve.dup.x.nxv16i8(i8 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u8,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u16
+svuint16_t test_svdup_n_u16(uint16_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u16i {{.*}} -> !cir.vector<[8] x !u16i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u16i) -> !cir.vector<[8] x !u16i>
+
+// LLVM_OGCG_CIR-SAME: i16 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i16,{{([[:space:]]?i64 1,)?}} 
align 2
+// LLVM_OGCG_CIR:    store i16 [[OP]], ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i16, ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x i16> 
@llvm.aarch64.sve.dup.x.nxv8i16(i16 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u16,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u32
+svuint32_t test_svdup_n_u32(uint32_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u32i {{.*}} -> !cir.vector<[4] x !u32i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u32i) -> !cir.vector<[4] x !u32i>
+
+// LLVM_OGCG_CIR-SAME: i32 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i32,{{([[:space:]]?i64 1,)?}} 
align 4
+// LLVM_OGCG_CIR:    store i32 [[OP]], ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i32, ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x i32> 
@llvm.aarch64.sve.dup.x.nxv4i32(i32 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u32,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_u64
+svuint64_t test_svdup_n_u64(uint64_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !u64i {{.*}} -> !cir.vector<[2] x !u64i>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!u64i) -> !cir.vector<[2] x !u64i>
+
+// LLVM_OGCG_CIR-SAME: i64 {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca i64,{{([[:space:]]?i64 1,)?}} 
align 8
+// LLVM_OGCG_CIR:    store i64 [[OP]], ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load i64, ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x i64> 
@llvm.aarch64.sve.dup.x.nxv2i64(i64 [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_u64,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_f16
+svfloat16_t test_svdup_n_f16(float16_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !cir.f16 {{.*}} -> !cir.vector<[8] x !cir.f16>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(2) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.f16) -> !cir.vector<[8] x !cir.f16>
+
+// LLVM_OGCG_CIR-SAME: half {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca half,{{([[:space:]]?i64 1,)?}} 
align 2
+// LLVM_OGCG_CIR:    store half [[OP]], ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load half, ptr [[OP_ADDR]], align 2
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 8 x half> 
@llvm.aarch64.sve.dup.x.nxv8f16(half [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_f16,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_f32
+svfloat32_t test_svdup_n_f32(float32_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !cir.float {{.*}} -> !cir.vector<[4] x 
!cir.float>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(4) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.float) -> !cir.vector<[4] x !cir.float>
+
+// LLVM_OGCG_CIR-SAME: float {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca float,{{([[:space:]]?i64 1,)?}} 
align 4
+// LLVM_OGCG_CIR:    store float [[OP]], ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load float, ptr [[OP_ADDR]], align 4
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 4 x float> 
@llvm.aarch64.sve.dup.x.nxv4f32(float [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_f32,)(op);
+}
+
+// ALL-LABEL: @test_svdup_n_f64
+svfloat64_t test_svdup_n_f64(float64_t op) MODE_ATTR
+{
+// CIR-SAME:      %[[OP:.*]]: !cir.double {{.*}} -> !cir.vector<[2] x 
!cir.double>
+// CIR:           %[[ALLOCA:.*]] = cir.alloca
+// CIR:           cir.store %[[OP]], %[[ALLOCA]]
+// CIR:           %[[LOAD:.*]] = cir.load align(8) %[[ALLOCA]]
+// CIR:           cir.call_llvm_intrinsic "aarch64.sve.dup.x" %[[LOAD]] : 
(!cir.double) -> !cir.vector<[2] x !cir.double>
+
+// LLVM_OGCG_CIR-SAME: double {{(noundef)?[[:space:]]?}}[[OP:%.*]])
+// LLVM_OGCG_CIR:    [[OP_ADDR:%.*]] = alloca double,{{([[:space:]]?i64 1,)?}} 
align 8
+// LLVM_OGCG_CIR:    store double [[OP]], ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[OP_LOAD:%.*]] = load double, ptr [[OP_ADDR]], align 8
+// LLVM_OGCG_CIR:    [[RES:%.*]] = call <vscale x 2 x double> 
@llvm.aarch64.sve.dup.x.nxv2f64(double [[OP_LOAD]])
+  return SVE_ACLE_FUNC(svdup,_n,_f64,)(op);
+}

>From 27edd2ddcadfab382589a019f09ad36d5707f731 Mon Sep 17 00:00:00 2001
From: Vassil Vassilev <[email protected]>
Date: Sun, 11 Jan 2026 16:52:58 +0200
Subject: [PATCH 05/13] [clang-repl] Fix OrcRuntime lookup for Solaris and unit
 tests. (#175435)

The out-of-process execution in the interpreter depends on the orc
runtime. It is generally easy to discover as it is in the clang runtime
path. However, the clang runtime path is relative to clang's resource
directory which is relative to the clang binary. That does not work well
if clang is linked into a different binary which can be in a random
place in the build directory structure.

This patch performs a conservative approach to detect the common
directory structure and correctly infer the paths. That fixes the
out-of-process execution unittests. The patch also contains a small
adjustment for solaris.

Another take on trying to fix the issue uncovered by #175322.
---
 clang/lib/Interpreter/Interpreter.cpp         | 104 ++++++++----------
 clang/tools/clang-repl/ClangRepl.cpp          |   4 +-
 .../OutOfProcessInterpreterTests.cpp          |  22 ++--
 3 files changed, 59 insertions(+), 71 deletions(-)

diff --git a/clang/lib/Interpreter/Interpreter.cpp 
b/clang/lib/Interpreter/Interpreter.cpp
index 763d298b052f2..f69c57fe48001 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -275,16 +275,14 @@ llvm::Error 
IncrementalExecutorBuilder::UpdateOrcRuntimePath(
   if (!IsOutOfProcess)
     return llvm::Error::success();
 
-  // Candidate runtime filenames to look for (tweak as appropriate).
   static constexpr std::array<const char *, 3> OrcRTLibNames = {
       "liborc_rt.a",
       "liborc_rt_osx.a",
       "liborc_rt-x86_64.a",
   };
 
-  // Return the first file found inside 'Base' (Base may be a directory).
   auto findInDir = [&](llvm::StringRef Base) -> std::optional<std::string> {
-    if (Base.empty())
+    if (Base.empty() || !llvm::sys::fs::exists(Base))
       return std::nullopt;
     for (const char *LibName : OrcRTLibNames) {
       llvm::SmallString<256> Candidate(Base);
@@ -296,74 +294,60 @@ llvm::Error 
IncrementalExecutorBuilder::UpdateOrcRuntimePath(
   };
 
   const clang::driver::Driver &D = C.getDriver();
-
+  const clang::driver::ToolChain &TC = C.getDefaultToolChain();
   llvm::SmallVector<std::string, 8> triedPaths;
 
-  // Prefer Driver::ResourceDir-derived locations:
-  // ResourceDir is typically: <prefix>/lib/clang/<version>
-  if (!D.ResourceDir.empty()) {
-    llvm::SmallString<256> Resource(D.ResourceDir);
-
-    // Directly searching ResourceDir is cheap and sometimes sufficient.
-    if (auto F = findInDir(Resource)) {
-      OrcRuntimePath = *F;
-      return llvm::Error::success();
-    }
-    triedPaths.emplace_back(std::string(Resource.str()));
-
-    // Build <prefix>/lib/clang/<version>/lib. Resource already contains
-    // .../clang/<version>)
-    llvm::SmallString<256> ClangLibDir(Resource);
-    // ClangLibDir currently: <prefix>/lib/clang/<version>
-    // We want: <prefix>/lib/clang/<version>/lib
-    llvm::sys::path::append(ClangLibDir, "lib");
-    if (auto F = findInDir(ClangLibDir)) {
-      OrcRuntimePath = *F;
-      return llvm::Error::success();
-    }
-    triedPaths.emplace_back(std::string(ClangLibDir.str()));
-
-    // Walk up to <prefix>/lib and search there and common variants.
-    llvm::SmallString<256> PrefixLib = Resource;
-    llvm::sys::path::remove_filename(PrefixLib); // remove <version>
-    llvm::sys::path::remove_filename(PrefixLib); // remove clang
-    if (!PrefixLib.empty()) {
-      if (auto F = findInDir(PrefixLib)) {
-        OrcRuntimePath = *F;
-        return llvm::Error::success();
+  llvm::SmallString<256> Resource(D.ResourceDir);
+  if (llvm::sys::fs::exists(Resource)) {
+    // Ask the ToolChain for its runtime paths first (most authoritative).
+    for (auto RuntimePath :
+         {TC.getRuntimePath(), std::make_optional(TC.getCompilerRTPath())}) {
+      if (RuntimePath) {
+        if (auto Found = findInDir(*RuntimePath)) {
+          OrcRuntimePath = *Found;
+          return llvm::Error::success();
+        }
+        triedPaths.emplace_back(*RuntimePath);
       }
-      triedPaths.emplace_back(std::string(PrefixLib.str()));
-
-      // Also check <prefix>/<libdir_basename>/clang/<version>/lib if present 
in
-      // this environment. We extract version from the original ResourceDir
-      // filename (the '<version>' component).
-      llvm::SmallString<64> Version =
-          llvm::sys::path::filename(llvm::StringRef(Resource));
-      llvm::SmallString<256> FormalClangLib = PrefixLib;
-      llvm::sys::path::append(FormalClangLib, "lib", "clang", Version, "lib");
-      if (auto F = findInDir(FormalClangLib)) {
+    }
+
+    // Check ResourceDir and ResourceDir/lib
+    for (auto P : {Resource.str().str(), (Resource + "/lib").str()}) {
+      if (auto F = findInDir(P)) {
         OrcRuntimePath = *F;
         return llvm::Error::success();
       }
-      triedPaths.emplace_back(std::string(FormalClangLib.str()));
+      triedPaths.emplace_back(P);
     }
-  }
-
-  // ToolChain runtime/compiler-rt locations (if available).
-  const clang::driver::ToolChain &TC = C.getDefaultToolChain();
-  for (auto RuntimePath :
-       {TC.getRuntimePath(), std::make_optional(TC.getCompilerRTPath())}) {
-    if (RuntimePath && TC.getVFS().exists(*RuntimePath)) {
-      if (auto Found = findInDir(*RuntimePath)) {
-        OrcRuntimePath = *Found;
-        return llvm::Error::success();
-      } else {
-        triedPaths.emplace_back(*RuntimePath);
+  } else {
+    // The binary was misplaced. Generic Backward Search (Climbing the tree)
+    // This allows unit tests in tools/clang/unittests to find the real lib/
+    llvm::SmallString<256> Cursor = Resource;
+    // ResourceDir-derived locations
+    llvm::StringRef Version = llvm::sys::path::filename(Resource);
+    llvm::StringRef OSName = TC.getOSLibName();
+    while (llvm::sys::path::has_parent_path(Cursor)) {
+      Cursor = llvm::sys::path::parent_path(Cursor).str();
+      // At each level, try standard relative layouts
+      for (auto Rel :
+           {(llvm::Twine("lib/clang/") + Version + "/lib/" + OSName).str(),
+            (llvm::Twine("lib/clang/") + Version + "/lib").str(),
+            (llvm::Twine("lib/") + OSName).str(), std::string("lib/clang")}) {
+        llvm::SmallString<256> Candidate = Cursor;
+        llvm::sys::path::append(Candidate, Rel);
+        if (auto F = findInDir(Candidate)) {
+          OrcRuntimePath = *F;
+          return llvm::Error::success();
+        }
+        triedPaths.emplace_back(std::string(Candidate.str()));
       }
+      // Stop if we hit the root or go too far (safety check)
+      if (triedPaths.size() > 32)
+        break;
     }
   }
 
-  // If we reached here, nothing was found. Build a helpful error string.
+  // Build a helpful error string if everything failed.
   std::string Joined;
   for (size_t i = 0; i < triedPaths.size(); ++i) {
     if (i)
diff --git a/clang/tools/clang-repl/ClangRepl.cpp 
b/clang/tools/clang-repl/ClangRepl.cpp
index e94749555ad1a..95786d688b76e 100644
--- a/clang/tools/clang-repl/ClangRepl.cpp
+++ b/clang/tools/clang-repl/ClangRepl.cpp
@@ -288,6 +288,8 @@ int main(int argc, const char **argv) {
     return 0;
   }
 
+  ExitOnErr(sanitizeOopArguments(argv[0]));
+
   clang::IncrementalCompilerBuilder CB;
   CB.SetCompilerArgs(ClangArgv);
 
@@ -320,8 +322,6 @@ int main(int argc, const char **argv) {
     DeviceCI = ExitOnErr(CB.CreateCudaDevice());
   }
 
-  ExitOnErr(sanitizeOopArguments(argv[0]));
-
   // FIXME: Investigate if we could use runToolOnCodeWithArgs from tooling. It
   // can replace the boilerplate code for creation of the compiler instance.
   std::unique_ptr<clang::CompilerInstance> CI;
diff --git a/clang/unittests/Interpreter/OutOfProcessInterpreterTests.cpp 
b/clang/unittests/Interpreter/OutOfProcessInterpreterTests.cpp
index d33005244d8da..225d6c8c66cab 100644
--- a/clang/unittests/Interpreter/OutOfProcessInterpreterTests.cpp
+++ b/clang/unittests/Interpreter/OutOfProcessInterpreterTests.cpp
@@ -102,6 +102,15 @@ static std::string getExecutorPath() {
   return ExecutorPath.str().str();
 }
 
+class OutOfProcessInterpreterTest : public InterpreterTestBase {
+protected:
+  static bool HostSupportsOutOfProcessJIT() {
+    if (!InterpreterTestBase::HostSupportsJIT())
+      return false;
+    return !getExecutorPath().empty();
+  }
+};
+
 struct OutOfProcessInterpreterInfo {
   std::string OrcRuntimePath;
   std::unique_ptr<Interpreter> Interpreter;
@@ -162,8 +171,8 @@ static size_t DeclsSize(TranslationUnitDecl *PTUDecl) {
   return std::distance(PTUDecl->decls().begin(), PTUDecl->decls().end());
 }
 
-TEST_F(InterpreterTestBase, SanityWithRemoteExecution) {
-  if (!HostSupportsJIT())
+TEST_F(OutOfProcessInterpreterTest, SanityWithRemoteExecution) {
+  if (!HostSupportsOutOfProcessJIT())
     GTEST_SKIP();
 
   auto io_ctx = std::make_shared<IOContext>();
@@ -174,11 +183,6 @@ TEST_F(InterpreterTestBase, SanityWithRemoteExecution) {
   Interpreter *Interp = Info.Interpreter.get();
   ASSERT_TRUE(Interp);
 
-  std::string ExecutorPath = getExecutorPath();
-  if (!llvm::sys::fs::exists(Info.OrcRuntimePath) ||
-      !llvm::sys::fs::exists(ExecutorPath))
-    GTEST_SKIP();
-
   using PTU = PartialTranslationUnit;
   PTU &R1(cantFail(Interp->Parse("void g(); void g() {}")));
   EXPECT_EQ(2U, DeclsSize(R1.TUPart));
@@ -192,8 +196,8 @@ TEST_F(InterpreterTestBase, SanityWithRemoteExecution) {
   EXPECT_NE(std::string::npos, captured_stdout.find("CustomizeFork executed"));
 }
 
-TEST_F(InterpreterTestBase, FindRuntimeInterface) {
-  if (!HostSupportsJIT())
+TEST_F(OutOfProcessInterpreterTest, FindRuntimeInterface) {
+  if (!HostSupportsOutOfProcessJIT())
     GTEST_SKIP();
 
   // make a fresh io context for this test

>From 9d8fcb3e3bae5142bc63c9773763bae18bb19be1 Mon Sep 17 00:00:00 2001
From: hev <[email protected]>
Date: Sun, 11 Jan 2026 23:04:14 +0800
Subject: [PATCH 06/13] [llvm][LoongArch] Add reloc types for LA32R/LA32S
 (#175352)

This patch introduces the relocation types added in la-abi-sepcs v2.50.

Link: https://github.com/loongson/la-abi-specs/pull/16
---
 .../llvm/BinaryFormat/ELFRelocs/LoongArch.def | 18 ++++++++++++
 .../ELF/reloc-types-loongarch64.test          | 26 +++++++++++++++++
 llvm/unittests/Object/ELFTest.cpp             | 28 +++++++++++++++++++
 3 files changed, 72 insertions(+)

diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def 
b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
index 4859057abcbb9..96e2c1645b57a 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/LoongArch.def
@@ -149,3 +149,21 @@ ELF_RELOC(R_LARCH_TLS_LE_LO12_R,       123)
 ELF_RELOC(R_LARCH_TLS_LD_PCREL20_S2,   124)
 ELF_RELOC(R_LARCH_TLS_GD_PCREL20_S2,   125)
 ELF_RELOC(R_LARCH_TLS_DESC_PCREL20_S2, 126)
+
+// Relocs added in ELF for the LoongArch™ Architecture v20251210, part of the
+// v2.50 LoongArch ABI specs.
+//
+// Spec addition: https://github.com/loongson/la-abi-specs/pull/16
+ELF_RELOC(R_LARCH_CALL30, 127)
+ELF_RELOC(R_LARCH_PCADD_HI20, 128)
+ELF_RELOC(R_LARCH_PCADD_LO12, 129)
+ELF_RELOC(R_LARCH_GOT_PCADD_HI20, 130)
+ELF_RELOC(R_LARCH_GOT_PCADD_LO12, 131)
+ELF_RELOC(R_LARCH_TLS_IE_PCADD_HI20, 132)
+ELF_RELOC(R_LARCH_TLS_IE_PCADD_LO12, 133)
+ELF_RELOC(R_LARCH_TLS_LD_PCADD_HI20, 134)
+ELF_RELOC(R_LARCH_TLS_LD_PCADD_LO12, 135)
+ELF_RELOC(R_LARCH_TLS_GD_PCADD_HI20, 136)
+ELF_RELOC(R_LARCH_TLS_GD_PCADD_LO12, 137)
+ELF_RELOC(R_LARCH_TLS_DESC_PCADD_HI20, 138)
+ELF_RELOC(R_LARCH_TLS_DESC_PCADD_LO12, 139)
diff --git a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test 
b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test
index 26c4e8f5ca846..6d0569b9f6437 100644
--- a/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test
+++ b/llvm/test/tools/llvm-readobj/ELF/reloc-types-loongarch64.test
@@ -119,6 +119,19 @@
 # CHECK: Type: R_LARCH_TLS_LD_PCREL20_S2 (124)
 # CHECK: Type: R_LARCH_TLS_GD_PCREL20_S2 (125)
 # CHECK: Type: R_LARCH_TLS_DESC_PCREL20_S2 (126)
+# CHECK: Type: R_LARCH_CALL30 (127)
+# CHECK: Type: R_LARCH_PCADD_HI20 (128)
+# CHECK: Type: R_LARCH_PCADD_LO12 (129)
+# CHECK: Type: R_LARCH_GOT_PCADD_HI20 (130)
+# CHECK: Type: R_LARCH_GOT_PCADD_LO12 (131)
+# CHECK: Type: R_LARCH_TLS_IE_PCADD_HI20 (132)
+# CHECK: Type: R_LARCH_TLS_IE_PCADD_LO12 (133)
+# CHECK: Type: R_LARCH_TLS_LD_PCADD_HI20 (134)
+# CHECK: Type: R_LARCH_TLS_LD_PCADD_LO12 (135)
+# CHECK: Type: R_LARCH_TLS_GD_PCADD_HI20 (136)
+# CHECK: Type: R_LARCH_TLS_GD_PCADD_LO12 (137)
+# CHECK: Type: R_LARCH_TLS_DESC_PCADD_HI20 (138)
+# CHECK: Type: R_LARCH_TLS_DESC_PCADD_LO12 (139)
 
 --- !ELF
 FileHeader:
@@ -245,3 +258,16 @@ Sections:
       - Type: R_LARCH_TLS_LD_PCREL20_S2
       - Type: R_LARCH_TLS_GD_PCREL20_S2
       - Type: R_LARCH_TLS_DESC_PCREL20_S2
+      - Type: R_LARCH_CALL30
+      - Type: R_LARCH_PCADD_HI20
+      - Type: R_LARCH_PCADD_LO12
+      - Type: R_LARCH_GOT_PCADD_HI20
+      - Type: R_LARCH_GOT_PCADD_LO12
+      - Type: R_LARCH_TLS_IE_PCADD_HI20
+      - Type: R_LARCH_TLS_IE_PCADD_LO12
+      - Type: R_LARCH_TLS_LD_PCADD_HI20
+      - Type: R_LARCH_TLS_LD_PCADD_LO12
+      - Type: R_LARCH_TLS_GD_PCADD_HI20
+      - Type: R_LARCH_TLS_GD_PCADD_LO12
+      - Type: R_LARCH_TLS_DESC_PCADD_HI20
+      - Type: R_LARCH_TLS_DESC_PCADD_LO12
diff --git a/llvm/unittests/Object/ELFTest.cpp 
b/llvm/unittests/Object/ELFTest.cpp
index b0faf073e4ab5..6d659774848ee 100644
--- a/llvm/unittests/Object/ELFTest.cpp
+++ b/llvm/unittests/Object/ELFTest.cpp
@@ -253,6 +253,34 @@ TEST(ELFTest, getELFRelocationTypeNameForLoongArch) {
             getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_64_PCREL));
   EXPECT_EQ("R_LARCH_CALL36",
             getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CALL36));
+  EXPECT_EQ("R_LARCH_CALL30",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_CALL30));
+  EXPECT_EQ("R_LARCH_PCADD_HI20",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCADD_HI20));
+  EXPECT_EQ("R_LARCH_PCADD_LO12",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_PCADD_LO12));
+  EXPECT_EQ("R_LARCH_GOT_PCADD_HI20",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GOT_PCADD_HI20));
+  EXPECT_EQ("R_LARCH_GOT_PCADD_LO12",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_GOT_PCADD_LO12));
+  EXPECT_EQ("R_LARCH_TLS_IE_PCADD_HI20",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_IE_PCADD_HI20));
+  EXPECT_EQ("R_LARCH_TLS_IE_PCADD_LO12",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_IE_PCADD_LO12));
+  EXPECT_EQ("R_LARCH_TLS_LD_PCADD_HI20",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_LD_PCADD_HI20));
+  EXPECT_EQ("R_LARCH_TLS_LD_PCADD_LO12",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_LD_PCADD_LO12));
+  EXPECT_EQ("R_LARCH_TLS_GD_PCADD_HI20",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_GD_PCADD_HI20));
+  EXPECT_EQ("R_LARCH_TLS_GD_PCADD_LO12",
+            getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_GD_PCADD_LO12));
+  EXPECT_EQ(
+      "R_LARCH_TLS_DESC_PCADD_HI20",
+      getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_DESC_PCADD_HI20));
+  EXPECT_EQ(
+      "R_LARCH_TLS_DESC_PCADD_LO12",
+      getELFRelocationTypeName(EM_LOONGARCH, R_LARCH_TLS_DESC_PCADD_LO12));
 }
 
 TEST(ELFTest, getRISCVVendorRelocationTypeName) {

>From f0353cdd4c05a722effd42483f1efb11c991a422 Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <[email protected]>
Date: Sun, 11 Jan 2026 15:28:31 +0000
Subject: [PATCH 07/13] [ValueTracking] Support horizontal vector add in
 computeKnownBits (#174410)

Alive2 proofs:
* Leading zeros - [4vi32](https://alive2.llvm.org/ce/z/w--S2D),
[16vi8](https://alive2.llvm.org/ce/z/hEdVks)
* Leading ones - [4vi16](https://alive2.llvm.org/ce/z/RyPdBS),
[16vi8](https://alive2.llvm.org/ce/z/UTFFt9)
---
 llvm/include/llvm/Support/KnownBits.h         |  5 +++
 llvm/lib/Analysis/ValueTracking.cpp           |  8 ++++
 llvm/lib/Support/KnownBits.cpp                | 40 +++++++++++++++++
 .../vector-reduce-add-known-bits.ll           | 45 +++++++++++++++++++
 .../PhaseOrdering/AArch64/udotabd.ll          | 20 ++++-----
 llvm/unittests/Support/KnownBitsTest.cpp      | 34 ++++++++++++++
 6 files changed, 142 insertions(+), 10 deletions(-)
 create mode 100644 
llvm/test/Transforms/InstCombine/vector-reduce-add-known-bits.ll

diff --git a/llvm/include/llvm/Support/KnownBits.h 
b/llvm/include/llvm/Support/KnownBits.h
index ea37b435e2848..b9bde435d2ee5 100644
--- a/llvm/include/llvm/Support/KnownBits.h
+++ b/llvm/include/llvm/Support/KnownBits.h
@@ -514,6 +514,11 @@ struct KnownBits {
   /// Compute known bits for the absolute value.
   LLVM_ABI KnownBits abs(bool IntMinIsPoison = false) const;
 
+  /// Compute known bits for horizontal add for a vector with NumElts
+  /// elements, where each element has the known bits represented by this
+  /// object.
+  LLVM_ABI KnownBits reduceAdd(unsigned NumElts) const;
+
   KnownBits byteSwap() const {
     return KnownBits(Zero.byteSwap(), One.byteSwap());
   }
diff --git a/llvm/lib/Analysis/ValueTracking.cpp 
b/llvm/lib/Analysis/ValueTracking.cpp
index 90e23bf81d99e..dbb44c8828545 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -2133,6 +2133,14 @@ static void computeKnownBitsFromOperator(const Operator 
*I,
           Known.One.clearAllBits();
         break;
       }
+      case Intrinsic::vector_reduce_add: {
+        auto *VecTy = dyn_cast<FixedVectorType>(I->getOperand(0)->getType());
+        if (!VecTy)
+          break;
+        computeKnownBits(I->getOperand(0), Known, Q, Depth + 1);
+        Known = Known.reduceAdd(VecTy->getNumElements());
+        break;
+      }
       case Intrinsic::umin:
         computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
         computeKnownBits(I->getOperand(1), DemandedElts, Known2, Q, Depth + 1);
diff --git a/llvm/lib/Support/KnownBits.cpp b/llvm/lib/Support/KnownBits.cpp
index 7db8e1641462e..c7eb37808d166 100644
--- a/llvm/lib/Support/KnownBits.cpp
+++ b/llvm/lib/Support/KnownBits.cpp
@@ -601,6 +601,46 @@ KnownBits KnownBits::abs(bool IntMinIsPoison) const {
   return KnownAbs;
 }
 
+KnownBits KnownBits::reduceAdd(unsigned NumElts) const {
+  if (NumElts == 0)
+    return KnownBits(getBitWidth());
+
+  unsigned BitWidth = getBitWidth();
+  KnownBits Result(BitWidth);
+
+  if (isConstant())
+    // If all elements are the same constant, we can simply compute it
+    return KnownBits::makeConstant(NumElts * getConstant());
+
+  // The main idea is as follows.
+  //
+  // If KnownBits for each element has L leading zeros then
+  // X_i < 2^(W - L) for every i from [1, N].
+  //
+  //   ADD X_i <= ADD max(X_i) = N * max(X_i)
+  //           <  N * 2^(W - L)
+  //           <  2^(W - L + ceil(log2(N)))
+  //
+  // As the result, we can conclude that
+  //
+  //   L' = L - ceil(log2(N))
+  //
+  // Similar logic can be applied to leading ones.
+  unsigned LostBits = Log2_32_Ceil(NumElts);
+
+  if (isNonNegative()) {
+    unsigned LeadingZeros = countMinLeadingZeros();
+    LeadingZeros = LeadingZeros > LostBits ? LeadingZeros - LostBits : 0;
+    Result.Zero.setHighBits(LeadingZeros);
+  } else if (isNegative()) {
+    unsigned LeadingOnes = countMinLeadingOnes();
+    LeadingOnes = LeadingOnes > LostBits ? LeadingOnes - LostBits : 0;
+    Result.One.setHighBits(LeadingOnes);
+  }
+
+  return Result;
+}
+
 static KnownBits computeForSatAddSub(bool Add, bool Signed,
                                      const KnownBits &LHS,
                                      const KnownBits &RHS) {
diff --git a/llvm/test/Transforms/InstCombine/vector-reduce-add-known-bits.ll 
b/llvm/test/Transforms/InstCombine/vector-reduce-add-known-bits.ll
new file mode 100644
index 0000000000000..60b898b492063
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/vector-reduce-add-known-bits.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 
UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i32 @reduce_add_eliminate_mask(ptr %p) {
+; CHECK-LABEL: define i32 @reduce_add_eliminate_mask(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[VEC:%.*]] = load <4 x i32>, ptr [[P]], align 16
+; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[VEC]], splat (i32 268435455)
+; CHECK-NEXT:    [[SUM:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x 
i32> [[AND]])
+; CHECK-NEXT:    ret i32 [[SUM]]
+;
+  %vec = load <4 x i32>, ptr %p
+  %and = and <4 x i32> %vec, splat (i32 268435455)
+  %sum = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %and)
+  %masked = and i32 %sum, 1073741823
+  ret i32 %masked
+}
+
+define i1 @reduce_add_simplify_comparison(ptr %p) {
+; CHECK-LABEL: define i1 @reduce_add_simplify_comparison(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    ret i1 true
+;
+  %vec = load <8 x i32>, ptr %p
+  %and = and <8 x i32> %vec, splat (i32 16777215)
+  %sum = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %and)
+  %cmp = icmp ult i32 %sum, 134217728
+  ret i1 %cmp
+}
+
+define i64 @reduce_add_sext(ptr %p) {
+; CHECK-LABEL: define i64 @reduce_add_sext(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:    [[VEC:%.*]] = load <2 x i32>, ptr [[P]], align 8
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[VEC]], splat (i32 4194303)
+; CHECK-NEXT:    [[SUM:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x 
i32> [[AND]])
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg i32 [[SUM]] to i64
+; CHECK-NEXT:    ret i64 [[EXT]]
+;
+  %vec = load <2 x i32>, ptr %p
+  %and = and <2 x i32> %vec, splat (i32 4194303)
+  %sum = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %and)
+  %ext = sext i32 %sum to i64
+  ret i64 %ext
+}
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll 
b/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll
index 4c7e39d31b5c6..e2f7f8f7e5cac 100644
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/udotabd.ll
@@ -29,7 +29,7 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef 
%s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[TMP13:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 
x i16> [[TMP12]], i1 false)
 ; CHECK-O3-NEXT:    [[TMP14:%.*]] = zext <16 x i16> [[TMP13]] to <16 x i32>
 ; CHECK-O3-NEXT:    [[TMP15:%.*]] = tail call i32 
@llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP14]])
-; CHECK-O3-NEXT:    [[OP_RDX_1:%.*]] = add i32 [[TMP15]], [[TMP7]]
+; CHECK-O3-NEXT:    [[OP_RDX_1:%.*]] = add nuw nsw i32 [[TMP15]], [[TMP7]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_1:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR9]], i64 [[IDX_EXT8]]
 ; CHECK-O3-NEXT:    [[TMP16:%.*]] = load <16 x i8>, ptr [[ADD_PTR_1]], align 
1, !tbaa [[CHAR_TBAA0]]
@@ -40,7 +40,7 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef 
%s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[TMP21:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 
x i16> [[TMP20]], i1 false)
 ; CHECK-O3-NEXT:    [[TMP22:%.*]] = zext <16 x i16> [[TMP21]] to <16 x i32>
 ; CHECK-O3-NEXT:    [[TMP23:%.*]] = tail call i32 
@llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP22]])
-; CHECK-O3-NEXT:    [[OP_RDX_2:%.*]] = add i32 [[TMP23]], [[OP_RDX_1]]
+; CHECK-O3-NEXT:    [[OP_RDX_2:%.*]] = add nuw nsw i32 [[TMP23]], [[OP_RDX_1]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR_1]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_2:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR9_1]], i64 [[IDX_EXT8]]
 ; CHECK-O3-NEXT:    [[TMP24:%.*]] = load <16 x i8>, ptr [[ADD_PTR_2]], align 
1, !tbaa [[CHAR_TBAA0]]
@@ -51,7 +51,7 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef 
%s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[TMP29:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 
x i16> [[TMP28]], i1 false)
 ; CHECK-O3-NEXT:    [[TMP30:%.*]] = zext <16 x i16> [[TMP29]] to <16 x i32>
 ; CHECK-O3-NEXT:    [[TMP31:%.*]] = tail call i32 
@llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP30]])
-; CHECK-O3-NEXT:    [[OP_RDX_3:%.*]] = add i32 [[TMP31]], [[OP_RDX_2]]
+; CHECK-O3-NEXT:    [[OP_RDX_3:%.*]] = add nuw nsw i32 [[TMP31]], [[OP_RDX_2]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_3:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR_2]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_3:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR9_2]], i64 [[IDX_EXT8]]
 ; CHECK-O3-NEXT:    [[TMP32:%.*]] = load <16 x i8>, ptr [[ADD_PTR_3]], align 
1, !tbaa [[CHAR_TBAA0]]
@@ -62,7 +62,7 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef 
%s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[TMP37:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 
x i16> [[TMP36]], i1 false)
 ; CHECK-O3-NEXT:    [[TMP38:%.*]] = zext <16 x i16> [[TMP37]] to <16 x i32>
 ; CHECK-O3-NEXT:    [[TMP39:%.*]] = tail call i32 
@llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP38]])
-; CHECK-O3-NEXT:    [[OP_RDX_4:%.*]] = add i32 [[TMP39]], [[OP_RDX_3]]
+; CHECK-O3-NEXT:    [[OP_RDX_4:%.*]] = add nuw nsw i32 [[TMP39]], [[OP_RDX_3]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_4:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR_3]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_4:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR9_3]], i64 [[IDX_EXT8]]
 ; CHECK-O3-NEXT:    [[TMP40:%.*]] = load <16 x i8>, ptr [[ADD_PTR_4]], align 
1, !tbaa [[CHAR_TBAA0]]
@@ -73,7 +73,7 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef 
%s_p1, ptr noundef %p2,
 ; CHECK-O3-NEXT:    [[TMP45:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 
x i16> [[TMP44]], i1 false)
 ; CHECK-O3-NEXT:    [[TMP46:%.*]] = zext <16 x i16> [[TMP45]] to <16 x i32>
 ; CHECK-O3-NEXT:    [[TMP47:%.*]] = tail call i32 
@llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP46]])
-; CHECK-O3-NEXT:    [[OP_RDX_5:%.*]] = add i32 [[TMP47]], [[OP_RDX_4]]
+; CHECK-O3-NEXT:    [[OP_RDX_5:%.*]] = add nuw nsw i32 [[TMP47]], [[OP_RDX_4]]
 ; CHECK-O3-NEXT:    [[ADD_PTR_5:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR_4]], i64 [[IDX_EXT]]
 ; CHECK-O3-NEXT:    [[ADD_PTR9_5:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR9_4]], i64 [[IDX_EXT8]]
 ; CHECK-O3-NEXT:    [[TMP48:%.*]] = load <16 x i8>, ptr [[ADD_PTR_5]], align 
1, !tbaa [[CHAR_TBAA0]]
@@ -209,7 +209,7 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef 
%s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[TMP11:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 
x i16> [[TMP10]], i1 true)
 ; CHECK-LTO-NEXT:    [[TMP52:%.*]] = zext nneg <16 x i16> [[TMP11]] to <16 x 
i32>
 ; CHECK-LTO-NEXT:    [[TMP60:%.*]] = tail call i32 
@llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP52]])
-; CHECK-LTO-NEXT:    [[OP_RDX_1:%.*]] = add i32 [[TMP60]], [[TMP44]]
+; CHECK-LTO-NEXT:    [[OP_RDX_1:%.*]] = add nuw nsw i32 [[TMP60]], [[TMP44]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_1:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_1:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR9]], i64 [[IDX_EXT8]]
 ; CHECK-LTO-NEXT:    [[TMP12:%.*]] = load <16 x i8>, ptr [[ADD_PTR_1]], align 
1, !tbaa [[CHAR_TBAA0]]
@@ -220,7 +220,7 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef 
%s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[TMP17:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 
x i16> [[TMP16]], i1 true)
 ; CHECK-LTO-NEXT:    [[TMP68:%.*]] = zext nneg <16 x i16> [[TMP17]] to <16 x 
i32>
 ; CHECK-LTO-NEXT:    [[TMP76:%.*]] = tail call i32 
@llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP68]])
-; CHECK-LTO-NEXT:    [[OP_RDX_2:%.*]] = add i32 [[OP_RDX_1]], [[TMP76]]
+; CHECK-LTO-NEXT:    [[OP_RDX_2:%.*]] = add nuw nsw i32 [[OP_RDX_1]], [[TMP76]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_2:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR_1]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_2:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR9_1]], i64 [[IDX_EXT8]]
 ; CHECK-LTO-NEXT:    [[TMP18:%.*]] = load <16 x i8>, ptr [[ADD_PTR_2]], align 
1, !tbaa [[CHAR_TBAA0]]
@@ -231,7 +231,7 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef 
%s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[TMP23:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 
x i16> [[TMP22]], i1 true)
 ; CHECK-LTO-NEXT:    [[TMP84:%.*]] = zext nneg <16 x i16> [[TMP23]] to <16 x 
i32>
 ; CHECK-LTO-NEXT:    [[TMP92:%.*]] = tail call i32 
@llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP84]])
-; CHECK-LTO-NEXT:    [[OP_RDX_3:%.*]] = add i32 [[OP_RDX_2]], [[TMP92]]
+; CHECK-LTO-NEXT:    [[OP_RDX_3:%.*]] = add nuw nsw i32 [[OP_RDX_2]], [[TMP92]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_3:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR_2]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_3:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR9_2]], i64 [[IDX_EXT8]]
 ; CHECK-LTO-NEXT:    [[TMP24:%.*]] = load <16 x i8>, ptr [[ADD_PTR_3]], align 
1, !tbaa [[CHAR_TBAA0]]
@@ -242,7 +242,7 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef 
%s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[TMP29:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 
x i16> [[TMP28]], i1 true)
 ; CHECK-LTO-NEXT:    [[TMP100:%.*]] = zext nneg <16 x i16> [[TMP29]] to <16 x 
i32>
 ; CHECK-LTO-NEXT:    [[TMP108:%.*]] = tail call i32 
@llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP100]])
-; CHECK-LTO-NEXT:    [[OP_RDX_4:%.*]] = add i32 [[OP_RDX_3]], [[TMP108]]
+; CHECK-LTO-NEXT:    [[OP_RDX_4:%.*]] = add nuw nsw i32 [[OP_RDX_3]], 
[[TMP108]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_4:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR_3]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_4:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR9_3]], i64 [[IDX_EXT8]]
 ; CHECK-LTO-NEXT:    [[TMP30:%.*]] = load <16 x i8>, ptr [[ADD_PTR_4]], align 
1, !tbaa [[CHAR_TBAA0]]
@@ -253,7 +253,7 @@ define dso_local i32 @test(ptr noundef %p1, i32 noundef 
%s_p1, ptr noundef %p2,
 ; CHECK-LTO-NEXT:    [[TMP35:%.*]] = tail call <16 x i16> @llvm.abs.v16i16(<16 
x i16> [[TMP34]], i1 true)
 ; CHECK-LTO-NEXT:    [[TMP116:%.*]] = zext nneg <16 x i16> [[TMP35]] to <16 x 
i32>
 ; CHECK-LTO-NEXT:    [[TMP117:%.*]] = tail call i32 
@llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP116]])
-; CHECK-LTO-NEXT:    [[OP_RDX_5:%.*]] = add i32 [[OP_RDX_4]], [[TMP117]]
+; CHECK-LTO-NEXT:    [[OP_RDX_5:%.*]] = add nuw nsw i32 [[OP_RDX_4]], 
[[TMP117]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR_5:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR_4]], i64 [[IDX_EXT]]
 ; CHECK-LTO-NEXT:    [[ADD_PTR9_5:%.*]] = getelementptr inbounds i8, ptr 
[[ADD_PTR9_4]], i64 [[IDX_EXT8]]
 ; CHECK-LTO-NEXT:    [[TMP37:%.*]] = load <16 x i8>, ptr [[ADD_PTR_5]], align 
1, !tbaa [[CHAR_TBAA0]]
diff --git a/llvm/unittests/Support/KnownBitsTest.cpp 
b/llvm/unittests/Support/KnownBitsTest.cpp
index abd0b3607c9fc..8da300a947b1d 100644
--- a/llvm/unittests/Support/KnownBitsTest.cpp
+++ b/llvm/unittests/Support/KnownBitsTest.cpp
@@ -886,4 +886,38 @@ TEST(KnownBitsTest, MulExhaustive) {
   }
 }
 
+TEST(KnownBitsTest, ReduceAddExhaustive) {
+  unsigned Bits = 4;
+  for (unsigned NumElts : {2, 4, 5}) {
+    ForeachKnownBits(Bits, [&](const KnownBits &EltKnown) {
+      KnownBits Computed = EltKnown.reduceAdd(NumElts);
+      KnownBits Exact(Bits);
+      Exact.Zero.setAllBits();
+      Exact.One.setAllBits();
+
+      llvm::function_ref<void(unsigned, APInt)> EnumerateCombinations;
+      auto EnumerateCombinationsImpl = [&](unsigned Depth, APInt CurrentSum) {
+        if (Depth == NumElts) {
+          Exact.One &= CurrentSum;
+          Exact.Zero &= ~CurrentSum;
+          return;
+        }
+        ForeachNumInKnownBits(EltKnown, [&](const APInt &Elt) {
+          EnumerateCombinations(Depth + 1, CurrentSum + Elt);
+        });
+      };
+      EnumerateCombinations = EnumerateCombinationsImpl;
+
+      // Here we recursively generate NumElts unique elements matching known
+      // bits and collect exact known bits for all possible combinations.
+      EnumerateCombinations(0, APInt(Bits, 0));
+
+      if (!Exact.hasConflict()) {
+        EXPECT_TRUE(checkResult("reduceAdd", Exact, Computed, {EltKnown},
+                                /*CheckOptimality=*/false));
+      }
+    });
+  }
+}
+
 } // end anonymous namespace

>From edefafbb34b203800d6b67857ccab09556376c73 Mon Sep 17 00:00:00 2001
From: Trevor Gross <[email protected]>
Date: Sun, 11 Jan 2026 09:43:40 -0600
Subject: [PATCH 08/13] [SystemZ] Remove the `softPromoteHalfType` override
 (#175410)

`softPromoteHalfType` is being phased out because it is prone to
miscompilations (further context at [1]). SystemZ is one of the few
remaining platforms to override the default, so remove it here.

This only affects SystemZ when the `soft-float` option is used.

[1]: https://github.com/llvm/llvm-project/pull/175149
---
 llvm/lib/Target/SystemZ/SystemZISelLowering.h |  1 -
 .../CodeGen/SystemZ/fmuladd-soft-float.ll     | 22 +++++++++++--------
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h 
b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 4a45153a23756..13a1cd1614a53 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -41,7 +41,6 @@ class SystemZTargetLowering : public TargetLowering {
                                  const SystemZSubtarget &STI);
 
   bool useSoftFloat() const override;
-  bool softPromoteHalfType() const override { return false; }
 
   // Override TargetLowering.
   MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
diff --git a/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll 
b/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll
index a982f9af52358..c1185acc9cfee 100644
--- a/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll
+++ b/llvm/test/CodeGen/SystemZ/fmuladd-soft-float.ll
@@ -11,24 +11,28 @@ define half @fmuladd_intrinsic_f16(half %a, half %b, half 
%c) #0 {
 ; SOFT-FLOAT-NEXT:    .cfi_offset %r15, -40
 ; SOFT-FLOAT-NEXT:    aghi %r15, -160
 ; SOFT-FLOAT-NEXT:    .cfi_def_cfa_offset 320
-; SOFT-FLOAT-NEXT:    # kill: def $r4l killed $r4l def $r4d
-; SOFT-FLOAT-NEXT:    llghr %r0, %r4
-; SOFT-FLOAT-NEXT:    lr %r13, %r3
-; SOFT-FLOAT-NEXT:    lr %r12, %r2
-; SOFT-FLOAT-NEXT:    lgr %r2, %r0
+; SOFT-FLOAT-NEXT:    # kill: def $r2l killed $r2l def $r2d
+; SOFT-FLOAT-NEXT:    llghr %r2, %r2
+; SOFT-FLOAT-NEXT:    lr %r13, %r4
+; SOFT-FLOAT-NEXT:    lr %r12, %r3
 ; SOFT-FLOAT-NEXT:    brasl %r14, __extendhfsf2@PLT
 ; SOFT-FLOAT-NEXT:    llghr %r0, %r12
 ; SOFT-FLOAT-NEXT:    lgr %r12, %r2
 ; SOFT-FLOAT-NEXT:    lgr %r2, %r0
 ; SOFT-FLOAT-NEXT:    brasl %r14, __extendhfsf2@PLT
+; SOFT-FLOAT-NEXT:    lgr %r3, %r2
+; SOFT-FLOAT-NEXT:    lgr %r2, %r12
+; SOFT-FLOAT-NEXT:    brasl %r14, __mulsf3@PLT
+; SOFT-FLOAT-NEXT:    brasl %r14, __truncsfhf2@PLT
 ; SOFT-FLOAT-NEXT:    llghr %r0, %r13
 ; SOFT-FLOAT-NEXT:    lgr %r13, %r2
 ; SOFT-FLOAT-NEXT:    lgr %r2, %r0
 ; SOFT-FLOAT-NEXT:    brasl %r14, __extendhfsf2@PLT
-; SOFT-FLOAT-NEXT:    lgr %r3, %r2
-; SOFT-FLOAT-NEXT:    lgr %r2, %r13
-; SOFT-FLOAT-NEXT:    brasl %r14, __mulsf3@PLT
-; SOFT-FLOAT-NEXT:    lgr %r3, %r12
+; SOFT-FLOAT-NEXT:    llghr %r0, %r13
+; SOFT-FLOAT-NEXT:    lgr %r13, %r2
+; SOFT-FLOAT-NEXT:    lgr %r2, %r0
+; SOFT-FLOAT-NEXT:    brasl %r14, __extendhfsf2@PLT
+; SOFT-FLOAT-NEXT:    lgr %r3, %r13
 ; SOFT-FLOAT-NEXT:    brasl %r14, __addsf3@PLT
 ; SOFT-FLOAT-NEXT:    brasl %r14, __truncsfhf2@PLT
 ; SOFT-FLOAT-NEXT:    # kill: def $r2l killed $r2l killed $r2d

>From 04cf043a7e7d5efb25f0dbd00f9cdc6da35a8c70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Vedran=20Mileti=C4=87?= <[email protected]>
Date: Sun, 11 Jan 2026 16:48:23 +0100
Subject: [PATCH 09/13] [llvm][CAS] Fixed build with
 -D_LIBCPP_REMOVE_TRANSITIVE_INCLUDES (#173797)

---
 llvm/include/llvm/CAS/OnDiskGraphDB.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/include/llvm/CAS/OnDiskGraphDB.h 
b/llvm/include/llvm/CAS/OnDiskGraphDB.h
index 162aa8cad0fe2..15afd2c0889ff 100644
--- a/llvm/include/llvm/CAS/OnDiskGraphDB.h
+++ b/llvm/include/llvm/CAS/OnDiskGraphDB.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/CAS/OnDiskDataAllocator.h"
 #include "llvm/CAS/OnDiskTrieRawHashMap.h"
+#include <atomic>
 
 namespace llvm::cas::ondisk {
 

>From 9f68b8637cc8c9638d604a6b5504a3c8d8763f5d Mon Sep 17 00:00:00 2001
From: jolwnn <[email protected]>
Date: Mon, 12 Jan 2026 00:56:21 +0800
Subject: [PATCH 10/13] fix clang-format issues

---
 libc/shared/math.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/shared/math.h b/libc/shared/math.h
index 8fcd8ef98d9b6..05c901e3984cf 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -60,12 +60,12 @@
 #include "math/frexpf.h"
 #include "math/frexpf128.h"
 #include "math/frexpf16.h"
+#include "math/fsqrt.h"
 #include "math/ldexpf.h"
 #include "math/ldexpf128.h"
 #include "math/ldexpf16.h"
 #include "math/rsqrtf.h"
 #include "math/rsqrtf16.h"
 #include "math/sin.h"
-#include "math/fsqrt.h"
 
 #endif // LLVM_LIBC_SHARED_MATH_H

>From 5b1269877ed34d2efbf49ceffc193795adda75c6 Mon Sep 17 00:00:00 2001
From: jolwnn <[email protected]>
Date: Mon, 12 Jan 2026 01:19:16 +0800
Subject: [PATCH 11/13] fix build error

---
 libc/shared/math/fsqrt.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libc/shared/math/fsqrt.h b/libc/shared/math/fsqrt.h
index 635b155b58e44..12b6249b03b55 100644
--- a/libc/shared/math/fsqrt.h
+++ b/libc/shared/math/fsqrt.h
@@ -6,8 +6,8 @@
 //
 
//===----------------------------------------------------------------------===//
 
-#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_FSQRT_H
-#define LLVM_LIBC_SRC___SUPPORT_MATH_FSQRT_H
+#ifndef LLVM_LIBC_SHARED_MATH_FSQRT_H
+#define LLVM_LIBC_SHARED_MATH_FSQRT_H
 
 #include "shared/libc_common.h"
 #include "src/__support/math/fsqrt.h"
@@ -21,4 +21,4 @@ using math::fsqrt;
 } // namespace shared
 } // namespace LIBC_NAMESPACE_DECL
 
-#endif // LLVM_LIBC_SHARED_MATH_FSQRT_H
\ No newline at end of file
+#endif // LLVM_LIBC_SHARED_MATH_FSQRT_H

>From 76fa142694b8ff2b354d9cc1fd34e17f73ca5154 Mon Sep 17 00:00:00 2001
From: Jolynn Wee Zhuo Lin <[email protected]>
Date: Mon, 12 Jan 2026 23:03:51 +0800
Subject: [PATCH 12/13] Update libc/shared/math/fsqrt.h

Co-authored-by: Muhammad Bassiouni 
<[email protected]>
---
 libc/shared/math/fsqrt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/shared/math/fsqrt.h b/libc/shared/math/fsqrt.h
index 12b6249b03b55..c8d6f262622de 100644
--- a/libc/shared/math/fsqrt.h
+++ b/libc/shared/math/fsqrt.h
@@ -1,4 +1,4 @@
-//===-- Implementation header for fsqrt ------------------------*- C++ -*-===//
+//===-- Shared header for fsqrt ---------------------------------*- C++ 
-*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.

>From ff6c97ffd668c07a2e2cb599fd041578da455e4d Mon Sep 17 00:00:00 2001
From: jolwnn <[email protected]>
Date: Mon, 12 Jan 2026 23:38:30 +0800
Subject: [PATCH 13/13] fix order and update cmake with fsqrt test

---
 libc/test/shared/CMakeLists.txt                   |  1 +
 utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 14 +++++++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt
index 0f23162798a8b..bc830cb6d5998 100644
--- a/libc/test/shared/CMakeLists.txt
+++ b/libc/test/shared/CMakeLists.txt
@@ -56,6 +56,7 @@ add_fp_unittest(
     libc.src.__support.math.frexpf
     libc.src.__support.math.frexpf128
     libc.src.__support.math.frexpf16
+    libc.src.__support.math.fsqrt
     libc.src.__support.math.ldexpf
     libc.src.__support.math.ldexpf128
     libc.src.__support.math.ldexpf16
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel 
b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index e4a36fa52a7fb..e517364ce602b 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2805,19 +2805,19 @@ libc_support_library(
 )
 
 libc_support_library(
-    name = "__support_math_fsqrt",
-    hdrs = ["src/__support/math/fsqrt.h"],
+    name = "__support_math_frexpf128",
+    hdrs = ["src/__support/math/frexpf128.h"],
     deps = [
-        ":__support_fputil_sqrt",
+        ":__support_fputil_manipulation_functions",
+        ":__support_macros_properties_types",
     ],
 )
 
 libc_support_library(
-    name = "__support_math_frexpf128",
-    hdrs = ["src/__support/math/frexpf128.h"],
+    name = "__support_math_fsqrt",
+    hdrs = ["src/__support/math/fsqrt.h"],
     deps = [
-        ":__support_fputil_manipulation_functions",
-        ":__support_macros_properties_types",
+        ":__support_fputil_sqrt",
     ],
 )
 

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [libc] [llvm] [libc][math] Refactor fsqrt to Header Only (PR #175444)

Reply via email to