https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/154464

>From 8e19054cf3888342fa853f9fae30032a60cd22d9 Mon Sep 17 00:00:00 2001
From: Joseph Huber <hube...@outlook.com>
Date: Tue, 19 Aug 2025 23:01:03 -0500
Subject: [PATCH] [Clang] Add builtins for masked vector loads / stores

Summary:
Clang has support for boolean vectors, these builtins expose the LLVM
instruction of the same name. This differs from a manual load and select
by potentially suppressing traps from deactivated lanes.

Fixes: https://github.com/llvm/llvm-project/issues/107753

Fix builtin attributes

Cleanup
---
 clang/docs/LanguageExtensions.rst             | 18 ++++
 clang/docs/ReleaseNotes.rst                   |  3 +
 clang/include/clang/Basic/Builtins.td         | 12 +++
 .../clang/Basic/DiagnosticSemaKinds.td        |  9 +-
 clang/lib/CodeGen/CGBuiltin.cpp               | 38 ++++++++
 clang/lib/Sema/SemaChecking.cpp               | 87 +++++++++++++++++++
 clang/test/CodeGen/builtin-masked.c           | 53 +++++++++++
 clang/test/Sema/builtin-masked.c              | 25 ++++++
 8 files changed, 243 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/builtin-masked.c
 create mode 100644 clang/test/Sema/builtin-masked.c

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 12ca4cf42f7cc..df256c7f8c063 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -941,6 +941,24 @@ Let ``VT`` be a vector type and ``ET`` the element type of 
``VT``.
                                          for the comparison.
 ======================================= 
====================================================================== 
==================================
 
+*Masked Builtins*
+
+Each builtin accesses memory according to a provided boolean mask. These are
+provided as ``__builtin_masked_load`` and ``__builtin_masked_store``. The first
+argument is always boolean mask vector.
+
+Example:
+
+.. code-block:: c++
+
+    using v8b = bool [[clang::ext_vector_type(8)]];
+    using v8i = int [[clang::ext_vector_type(8)]];
+
+    v8i load(v8b m, v8i *p) { return __builtin_masked_load(m, p); }
+
+    void store(v8b m, v8i v, v8i *p) { __builtin_masked_store(m, v, p); }
+
+
 Matrix Types
 ============
 
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index e88d68fa99664..6f92ce8d1ba44 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -140,6 +140,9 @@ Non-comprehensive list of changes in this release
 - A vector of booleans is now a valid condition for the ternary ``?:`` 
operator.
   This binds to a simple vector select operation.
 
+- Added ``__builtin_masked_load`` and ``__builtin_masked_store`` for 
conditional
+  memory loads from vectors. Binds to the LLVM intrinsic of the same name.
+
 - Use of ``__has_feature`` to detect the ``ptrauth_qualifier`` and 
``ptrauth_intrinsics``
   features has been deprecated, and is restricted to the arm64e target only. 
The
   correct method to check for these features is to test for the ``__PTRAUTH__``
diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index ad340e2ed0eec..56f380ceba4ce 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1232,6 +1232,18 @@ def ConvertVector : Builtin {
   let Prototype = "void(...)";
 }
 
+def MaskedLoad : Builtin {
+  let Spellings = ["__builtin_masked_load"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def MaskedStore : Builtin {
+  let Spellings = ["__builtin_masked_store"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def AllocaUninitialized : Builtin {
   let Spellings = ["__builtin_alloca_uninitialized"];
   let Attributes = [FunctionWithBuiltinPrefix, NoThrow];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c733e8823cea6..3b34b7174b65f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -10994,10 +10994,15 @@ def err_block_on_vm : Error<
 def err_sizeless_nonlocal : Error<
   "non-local variable with sizeless type %0">;
 
+def err_vec_masked_load_store_ptr : Error<
+ "%ordinal0 argument must be a %1">;
+def err_vec_masked_load_store_size : Error<
+ "all arguments to %0 must have the same number of elements (was %1 and %2)">;
+
 def err_vec_builtin_non_vector : Error<
  "%select{first two|all}1 arguments to %0 must be vectors">;
 def err_vec_builtin_incompatible_vector : Error<
-  "%select{first two|all}1 arguments to %0 must have the same type">;
+  "%select{first two|all|last two}1 arguments to %0 must have the same type">;
 def err_vsx_builtin_nonconstant_argument : Error<
   "argument %0 to %1 must be a 2-bit unsigned literal (i.e. 0, 1, 2 or 3)">;
 
@@ -12859,7 +12864,7 @@ def err_builtin_invalid_arg_type: Error<
   "%plural{0:|: }1"
   // Second component: integer-like types
   "%select{|integer|signed integer|unsigned integer|'int'|"
-  "pointer to a valid matrix element}2"
+  "pointer to a valid matrix element|boolean}2"
   // A space after a non-empty second component
   "%plural{0:|: }2"
   // An 'or' if non-empty second and third components are combined
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 59414fe466704..d9cc37d123fb4 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4255,6 +4255,44 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
     return RValue::get(Result);
   }
 
+  case Builtin::BI__builtin_masked_load: {
+    llvm::Value *Mask = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Ptr = EmitScalarExpr(E->getArg(1));
+
+    llvm::Type *RetTy = CGM.getTypes().ConvertType(E->getType());
+    CharUnits Align = CGM.getNaturalTypeAlignment(E->getType(), nullptr);
+    llvm::Value *AlignVal =
+        llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
+
+    llvm::Value *PassThru = llvm::PoisonValue::get(RetTy);
+
+    Function *F =
+        CGM.getIntrinsic(Intrinsic::masked_load, {RetTy, UnqualPtrTy});
+
+    llvm::Value *Result =
+        Builder.CreateCall(F, {Ptr, AlignVal, Mask, PassThru}, "masked_load");
+    return RValue::get(Result);
+  };
+  case Builtin::BI__builtin_masked_store: {
+    llvm::Value *Mask = EmitScalarExpr(E->getArg(0));
+    llvm::Value *Val = EmitScalarExpr(E->getArg(1));
+    llvm::Value *Ptr = EmitScalarExpr(E->getArg(2));
+
+    QualType ValTy = E->getArg(1)->getType();
+    llvm::Type *ValLLTy = CGM.getTypes().ConvertType(ValTy);
+    llvm::Type *PtrTy = Ptr->getType();
+
+    CharUnits Align = CGM.getNaturalTypeAlignment(ValTy, nullptr);
+    llvm::Value *AlignVal =
+        llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
+
+    llvm::Function *F =
+        CGM.getIntrinsic(llvm::Intrinsic::masked_store, {ValLLTy, PtrTy});
+
+    Builder.CreateCall(F, {Val, Ptr, AlignVal, Mask});
+    return RValue::get(nullptr);
+  }
+
   case Builtin::BI__builtin_isinf_sign: {
     // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
     CodeGenFunction::CGFPOptionsRAII FPOptsRAII(*this, E);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index c21c40e707008..d90b86944aa03 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2266,6 +2266,89 @@ static bool BuiltinCountZeroBitsGeneric(Sema &S, 
CallExpr *TheCall) {
   return false;
 }
 
+static bool CheckMaskedBuiltinArgs(Sema &S, Expr *MaskArg, Expr *PtrArg,
+                                         unsigned Pos) {
+  QualType MaskTy = MaskArg->getType();
+  if (!MaskTy->isExtVectorBoolType()) {
+        S.Diag(MaskArg->getBeginLoc(), diag::err_builtin_invalid_arg_type)
+        << 1 << /* vector of */ 4 << /* booleans */ 6 << /* no fp */ 0
+        << MaskTy;
+        return true;
+  }
+
+  QualType PtrTy = PtrArg->getType();
+  if (!PtrTy->isPointerType() || !PtrTy->getPointeeType()->isVectorType()) {
+        S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr)
+        << Pos << "pointer to vector";
+        return true;
+}
+return false;
+}
+
+static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) {
+  if (S.checkArgCount(TheCall, 2))
+    return ExprError();
+
+  Expr *MaskArg = TheCall->getArg(0);
+  Expr *PtrArg = TheCall->getArg(1);
+  if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 2))
+    return ExprError();
+
+  QualType MaskTy = MaskArg->getType();
+  QualType PtrTy = PtrArg->getType();
+  QualType PointeeTy = PtrTy->getPointeeType();
+  const VectorType *MaskVecTy = MaskTy->getAs<VectorType>();
+  const VectorType *DataVecTy = PointeeTy->getAs<VectorType>();
+  if (MaskVecTy->getNumElements() != DataVecTy->getNumElements())
+    return ExprError(
+        S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
+        << "__builtin_masked_vector_load" << MaskTy << PointeeTy);
+
+  TheCall->setType(PointeeTy);
+  return TheCall;
+}
+
+static ExprResult BuiltinMaskedStore(Sema &S, CallExpr *TheCall) {
+  if (S.checkArgCount(TheCall, 3))
+    return ExprError();
+
+  Expr *MaskArg = TheCall->getArg(0);
+  Expr *ValArg = TheCall->getArg(1);
+  Expr *PtrArg = TheCall->getArg(2);
+
+  if (CheckMaskedBuiltinArgs(S, MaskArg, PtrArg, 2))
+    return ExprError();
+
+  QualType MaskTy = MaskArg->getType();
+  QualType PtrTy = PtrArg->getType();
+  QualType ValTy = ValArg->getType();
+  if (!ValTy->isVectorType())
+    return ExprError(
+        S.Diag(ValArg->getExprLoc(), diag::err_vec_masked_load_store_ptr)
+        << 2 << "vector");
+
+  QualType PointeeTy = PtrTy->getPointeeType();
+  const VectorType *MaskVecTy = MaskTy->getAs<VectorType>();
+  const VectorType *ValVecTy = ValTy->getAs<VectorType>();
+  const VectorType *PtrVecTy = PointeeTy->getAs<VectorType>();
+
+  if (MaskVecTy->getNumElements() != ValVecTy->getNumElements() ||
+      MaskVecTy->getNumElements() != PtrVecTy->getNumElements())
+    return ExprError(
+        S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
+        << "__builtin_masked_store" << MaskTy << PointeeTy);
+
+  if (!S.Context.hasSameType(ValTy, PointeeTy))
+    return ExprError(S.Diag(TheCall->getBeginLoc(),
+                            diag::err_vec_builtin_incompatible_vector)
+                     << TheCall->getDirectCallee() << /*isMorethantwoArgs*/ 2
+                     << SourceRange(TheCall->getArg(1)->getBeginLoc(),
+                                    TheCall->getArg(1)->getEndLoc()));
+
+  TheCall->setType(S.Context.VoidTy);
+  return TheCall;
+}
+
 static ExprResult BuiltinInvoke(Sema &S, CallExpr *TheCall) {
   SourceLocation Loc = TheCall->getBeginLoc();
   MutableArrayRef Args(TheCall->getArgs(), TheCall->getNumArgs());
@@ -2518,6 +2601,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
     return BuiltinShuffleVector(TheCall);
     // TheCall will be freed by the smart pointer here, but that's fine, since
     // BuiltinShuffleVector guts it, but then doesn't release it.
+  case Builtin::BI__builtin_masked_load:
+    return BuiltinMaskedLoad(*this, TheCall);
+  case Builtin::BI__builtin_masked_store:
+    return BuiltinMaskedStore(*this, TheCall);
   case Builtin::BI__builtin_invoke:
     return BuiltinInvoke(*this, TheCall);
   case Builtin::BI__builtin_prefetch:
diff --git a/clang/test/CodeGen/builtin-masked.c 
b/clang/test/CodeGen/builtin-masked.c
new file mode 100644
index 0000000000000..67071ba19bd25
--- /dev/null
+++ b/clang/test/CodeGen/builtin-masked.c
@@ -0,0 +1,53 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | 
FileCheck %s
+
+typedef int v8i __attribute__((ext_vector_type(8)));
+typedef _Bool v8b __attribute__((ext_vector_type(8)));
+
+// CHECK-LABEL: define dso_local <8 x i32> @test_load(
+// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef [[P:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store i8 [[M_COERCE]], ptr [[M]], align 1
+// CHECK-NEXT:    [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1
+// CHECK-NEXT:    [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1>
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i1> [[M1]] to i8
+// CHECK-NEXT:    store i8 [[TMP0]], ptr [[M_ADDR]], align 1
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[MASKED_LOAD:%.*]] = call <8 x i32> 
@llvm.masked.load.v8i32.p0(ptr [[TMP2]], i32 32, <8 x i1> [[TMP1]], <8 x i32> 
poison)
+// CHECK-NEXT:    ret <8 x i32> [[MASKED_LOAD]]
+//
+v8i test_load(v8b m, v8i *p) {
+  return __builtin_masked_load(m, p);
+}
+
+// CHECK-LABEL: define dso_local void @test_store(
+// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 
32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <8 x i32>, align 32
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store i8 [[M_COERCE]], ptr [[M]], align 1
+// CHECK-NEXT:    [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1
+// CHECK-NEXT:    [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1>
+// CHECK-NEXT:    [[V:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8
+// CHECK-NEXT:    store i8 [[TMP1]], ptr [[M_ADDR]], align 1
+// CHECK-NEXT:    store <8 x i32> [[V]], ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    call void @llvm.masked.store.v8i32.p0(<8 x i32> [[TMP3]], 
ptr [[TMP4]], i32 32, <8 x i1> [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+void test_store(v8b m, v8i v, v8i *p) {
+  __builtin_masked_store(m, v, p);
+}
diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c
new file mode 100644
index 0000000000000..0935b8359af2a
--- /dev/null
+++ b/clang/test/Sema/builtin-masked.c
@@ -0,0 +1,25 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
+
+typedef int v8i __attribute__((ext_vector_type(8)));
+typedef _Bool v8b __attribute__((ext_vector_type(8)));
+typedef _Bool v2b __attribute__((ext_vector_type(2)));
+typedef float v8f __attribute__((ext_vector_type(8)));
+
+void test_masked_load(v8i *pf, v8b mask, v2b mask2) {
+  (void)__builtin_masked_load(mask); // expected-error {{too few arguments to 
function call, expected 2, have 1}}
+  (void)__builtin_masked_load(mask, pf, pf); // expected-error {{too many 
arguments to function call, expected 2, have 3}}
+  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to 
__builtin_masked_load must have the same number of elements}}
+  (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument 
must be a pointer to vector}}
+  (void)__builtin_masked_load(mask, 0); // expected-error {{2nd argument must 
be a pointer to vector}}
+  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to 
__builtin_masked_load must have the same number of elements}}
+}
+
+void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
+  __builtin_masked_store(mask); // expected-error {{too few arguments to 
function call, expected 3, have 1}}
+  __builtin_masked_store(mask, 0, 0, 0); // expected-error {{too many 
arguments to function call, expected 3, have 4}}
+  __builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a 
vector of boolean types (was 'int')}}
+  __builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must 
be a vector}}
+  __builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must 
be a pointer to vector}}
+  __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to 
__builtin_masked_store must have the same number of elements}}
+  __builtin_masked_store(mask, *pf, pf2); // expected-error {{last two 
arguments to '__builtin_masked_store' must have the same type}}
+}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to