https://github.com/jhuber6 updated 
https://github.com/llvm/llvm-project/pull/156042

>From 1221affdc11f757ced2303a894950badde4b9833 Mon Sep 17 00:00:00 2001
From: Joseph Huber <hube...@outlook.com>
Date: Fri, 29 Aug 2025 09:57:19 -0500
Subject: [PATCH 1/3] [Clang] Add masked vector builtins for expand and
 compress access

Summary:
The interface here is nearly indentical to the already added masked
loads and stores. These bind to very similar intrinsics so we add them
here.
---
 clang/docs/LanguageExtensions.rst             | 11 ++++
 clang/docs/ReleaseNotes.rst                   |  6 ++-
 clang/include/clang/Basic/Builtins.td         | 12 +++++
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +-
 clang/lib/CodeGen/CGBuiltin.cpp               | 35 ++++++++----
 clang/lib/Sema/SemaChecking.cpp               |  6 ++-
 clang/test/CodeGen/builtin-masked.c           | 54 ++++++++++++++++++-
 clang/test/Sema/builtin-masked.c              | 26 +++++++--
 8 files changed, 132 insertions(+), 20 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 2ce60de05fff2..33c26b014b6d0 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -950,6 +950,11 @@ argument is always boolean mask vector. The 
``__builtin_masked_load`` builtin
 takes an optional third vector argument that will be used for the result of the
 masked-off lanes. These builtins assume the memory is always aligned.
 
+The ``__builtin_masked_expand_load`` and ``__builtin_masked_compress_store``
+builtins have the same interface but store the result in consecutive indices.
+Effectively this performs the ``if (cond.i) v.i = a[j++]`` and ``if (cond.i)
+a[j++] = v.i`` pattern respectively.
+
 Example:
 
 .. code-block:: c++
@@ -959,8 +964,14 @@ Example:
 
     v8i load(v8b m, v8i *p) { return __builtin_masked_load(m, p); }
 
+    v8i load_expand(v8b m, v8i *p) { return __builtin_masked_expand_load(m, 
p); }
+    
     void store(v8b m, v8i v, v8i *p) { __builtin_masked_store(m, v, p); }
 
+    void store_compress(v8b m, v8i v, v8i *p) {
+      __builtin_masked_compress_store(m, v, p);
+    }
+
 
 Matrix Types
 ============
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 84e499e5d0ab9..12c848cc68b43 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -169,8 +169,10 @@ Non-comprehensive list of changes in this release
 - A vector of booleans is now a valid condition for the ternary ``?:`` 
operator.
   This binds to a simple vector select operation.
 
-- Added ``__builtin_masked_load`` and ``__builtin_masked_store`` for 
conditional
-  memory loads from vectors. Binds to the LLVM intrinsic of the same name.
+- Added ``__builtin_masked_load``, ``__builtin_masked_expand_load``,
+  ``__builtin_masked_store``, ``__builtin_masked_compress_store`` for
+  conditional memory loads from vectors. Binds to the LLVM intrinsics of the
+  same name.
 
 - The ``__builtin_popcountg``, ``__builtin_ctzg``, and ``__builtin_clzg``
   functions now accept fixed-size boolean vectors.
diff --git a/clang/include/clang/Basic/Builtins.td 
b/clang/include/clang/Basic/Builtins.td
index 6d21c620bfc80..af0e8242f1e0d 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1244,6 +1244,18 @@ def MaskedStore : Builtin {
   let Prototype = "void(...)";
 }
 
+def MaskedExpandLoad : Builtin {
+  let Spellings = ["__builtin_masked_expand_load"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
+def MaskedCompressStore : Builtin {
+  let Spellings = ["__builtin_masked_compress_store"];
+  let Attributes = [NoThrow, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
+
 def AllocaUninitialized : Builtin {
   let Spellings = ["__builtin_alloca_uninitialized"];
   let Attributes = [FunctionWithBuiltinPrefix, NoThrow];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index c934fed2c7462..f59573a2b88d7 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11016,7 +11016,7 @@ def err_sizeless_nonlocal : Error<
 def err_vec_masked_load_store_ptr : Error<
  "%ordinal0 argument must be a %1">;
 def err_vec_masked_load_store_size : Error<
- "all arguments to %0 must have the same number of elements (was %1 and %2)">;
+ "all arguments must have the same number of elements (was %0 and %1)">;
 
 def err_vec_builtin_non_vector : Error<
  "%select{first two|all}1 arguments to %0 must be vectors">;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 16c059122c84a..172a521e63c17 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4271,7 +4271,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl 
GD, unsigned BuiltinID,
     return RValue::get(Result);
   }
 
-  case Builtin::BI__builtin_masked_load: {
+  case Builtin::BI__builtin_masked_load:
+  case Builtin::BI__builtin_masked_expand_load: {
     llvm::Value *Mask = EmitScalarExpr(E->getArg(0));
     llvm::Value *Ptr = EmitScalarExpr(E->getArg(1));
 
@@ -4284,14 +4285,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const 
GlobalDecl GD, unsigned BuiltinID,
     if (E->getNumArgs() > 2)
       PassThru = EmitScalarExpr(E->getArg(2));
 
-    Function *F =
-        CGM.getIntrinsic(Intrinsic::masked_load, {RetTy, UnqualPtrTy});
-
-    llvm::Value *Result =
-        Builder.CreateCall(F, {Ptr, AlignVal, Mask, PassThru}, "masked_load");
+    llvm::Value *Result;
+    if (BuiltinID == Builtin::BI__builtin_masked_load) {
+      Function *F =
+          CGM.getIntrinsic(Intrinsic::masked_load, {RetTy, UnqualPtrTy});
+      Result =
+          Builder.CreateCall(F, {Ptr, AlignVal, Mask, PassThru}, 
"masked_load");
+    } else {
+      Function *F = CGM.getIntrinsic(Intrinsic::masked_expandload, {RetTy});
+      Result =
+          Builder.CreateCall(F, {Ptr, Mask, PassThru}, "masked_expand_load");
+    }
     return RValue::get(Result);
   };
-  case Builtin::BI__builtin_masked_store: {
+  case Builtin::BI__builtin_masked_store:
+  case Builtin::BI__builtin_masked_compress_store: {
     llvm::Value *Mask = EmitScalarExpr(E->getArg(0));
     llvm::Value *Val = EmitScalarExpr(E->getArg(1));
     llvm::Value *Ptr = EmitScalarExpr(E->getArg(2));
@@ -4304,10 +4312,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const 
GlobalDecl GD, unsigned BuiltinID,
     llvm::Value *AlignVal =
         llvm::ConstantInt::get(Int32Ty, Align.getQuantity());
 
-    llvm::Function *F =
-        CGM.getIntrinsic(llvm::Intrinsic::masked_store, {ValLLTy, PtrTy});
-
-    Builder.CreateCall(F, {Val, Ptr, AlignVal, Mask});
+    if (BuiltinID == Builtin::BI__builtin_masked_store) {
+      llvm::Function *F =
+          CGM.getIntrinsic(llvm::Intrinsic::masked_store, {ValLLTy, PtrTy});
+      Builder.CreateCall(F, {Val, Ptr, AlignVal, Mask});
+    } else {
+      llvm::Function *F =
+          CGM.getIntrinsic(llvm::Intrinsic::masked_compressstore, {ValLLTy});
+      Builder.CreateCall(F, {Val, Ptr, Mask});
+    }
     return RValue::get(nullptr);
   }
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 3139c4d0e92df..829bd90d0895d 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2307,7 +2307,7 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr 
*TheCall) {
   if (MaskVecTy->getNumElements() != DataVecTy->getNumElements())
     return ExprError(
         S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
-        << "__builtin_masked_load" << MaskTy << PointeeTy);
+        << MaskTy << PointeeTy);
 
   TheCall->setType(PointeeTy);
   return TheCall;
@@ -2341,7 +2341,7 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr 
*TheCall) {
       MaskVecTy->getNumElements() != PtrVecTy->getNumElements())
     return ExprError(
         S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
-        << "__builtin_masked_store" << MaskTy << PointeeTy);
+        << MaskTy << PointeeTy);
 
   if (!S.Context.hasSameType(ValTy, PointeeTy))
     return ExprError(S.Diag(TheCall->getBeginLoc(),
@@ -2607,8 +2607,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, 
unsigned BuiltinID,
     // TheCall will be freed by the smart pointer here, but that's fine, since
     // BuiltinShuffleVector guts it, but then doesn't release it.
   case Builtin::BI__builtin_masked_load:
+  case Builtin::BI__builtin_masked_expand_load:
     return BuiltinMaskedLoad(*this, TheCall);
   case Builtin::BI__builtin_masked_store:
+  case Builtin::BI__builtin_masked_compress_store:
     return BuiltinMaskedStore(*this, TheCall);
   case Builtin::BI__builtin_invoke:
     return BuiltinInvoke(*this, TheCall);
diff --git a/clang/test/CodeGen/builtin-masked.c 
b/clang/test/CodeGen/builtin-masked.c
index e52716ae0a69e..579cf5c413c9b 100644
--- a/clang/test/CodeGen/builtin-masked.c
+++ b/clang/test/CodeGen/builtin-masked.c
@@ -52,8 +52,34 @@ v8i test_load_passthru(v8b m, v8i *p, v8i t) {
   return __builtin_masked_load(m, p, t);
 }
 
+// CHECK-LABEL: define dso_local <8 x i32> @test_load_expand(
+// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef [[P:%.*]], ptr noundef 
byval(<8 x i32>) align 32 [[TMP0:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[T_ADDR:%.*]] = alloca <8 x i32>, align 32
+// CHECK-NEXT:    store i8 [[M_COERCE]], ptr [[M]], align 1
+// CHECK-NEXT:    [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1
+// CHECK-NEXT:    [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1>
+// CHECK-NEXT:    [[T:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8
+// CHECK-NEXT:    store i8 [[TMP1]], ptr [[M_ADDR]], align 1
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    store <8 x i32> [[T]], ptr [[T_ADDR]], align 32
+// CHECK-NEXT:    [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[T_ADDR]], align 32
+// CHECK-NEXT:    [[MASKED_EXPAND_LOAD:%.*]] = call <8 x i32> 
@llvm.masked.expandload.v8i32(ptr [[TMP3]], <8 x i1> [[TMP2]], <8 x i32> 
[[TMP4]])
+// CHECK-NEXT:    ret <8 x i32> [[MASKED_EXPAND_LOAD]]
+//
+v8i test_load_expand(v8b m, v8i *p, v8i t) {
+  return __builtin_masked_expand_load(m, p, t);
+}
+
 // CHECK-LABEL: define dso_local void @test_store(
-// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 
32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR2:[0-9]+]] {
+// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 
32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR3:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[M:%.*]] = alloca i8, align 1
 // CHECK-NEXT:    [[M_ADDR:%.*]] = alloca i8, align 1
@@ -77,3 +103,29 @@ v8i test_load_passthru(v8b m, v8i *p, v8i t) {
 void test_store(v8b m, v8i v, v8i *p) {
   __builtin_masked_store(m, v, p);
 }
+
+// CHECK-LABEL: define dso_local void @test_compress_store(
+// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 
32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR3]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <8 x i32>, align 32
+// CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store i8 [[M_COERCE]], ptr [[M]], align 1
+// CHECK-NEXT:    [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1
+// CHECK-NEXT:    [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1>
+// CHECK-NEXT:    [[V:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8
+// CHECK-NEXT:    store i8 [[TMP1]], ptr [[M_ADDR]], align 1
+// CHECK-NEXT:    store <8 x i32> [[V]], ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    store ptr [[P]], ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1>
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+// CHECK-NEXT:    call void @llvm.masked.compressstore.v8i32(<8 x i32> 
[[TMP3]], ptr [[TMP4]], <8 x i1> [[TMP2]])
+// CHECK-NEXT:    ret void
+//
+void test_compress_store(v8b m, v8i v, v8i *p) {
+  __builtin_masked_compress_store(m, v, p);
+}
diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c
index 1f1395793ee38..242a9132c8b40 100644
--- a/clang/test/Sema/builtin-masked.c
+++ b/clang/test/Sema/builtin-masked.c
@@ -8,11 +8,11 @@ typedef float v8f __attribute__((ext_vector_type(8)));
 void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
   (void)__builtin_masked_load(mask); // expected-error {{too few arguments to 
function call, expected 2, have 1}}
   (void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many 
arguments to function call, expected at most 3, have 4}}
-  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to 
__builtin_masked_load must have the same number of elements}}
+  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments 
must have the same number of elements}}
   (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument 
must be a pointer to vector}}
   (void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd 
argument must be a pointer to vector}}
   (void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd 
argument must be a 'v8i' (vector of 8 'int' values)}}
-  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to 
__builtin_masked_load must have the same number of elements}}
+  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments 
must have the same number of elements}}
 }
 
 void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
@@ -21,6 +21,26 @@ void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b 
mask2) {
   __builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a 
vector of boolean types (was 'int')}}
   __builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must 
be a vector}}
   __builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must 
be a pointer to vector}}
-  __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to 
__builtin_masked_store must have the same number of elements}}
+  __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments 
must have the same number of elements}}
   __builtin_masked_store(mask, *pf, pf2); // expected-error {{last two 
arguments to '__builtin_masked_store' must have the same type}}
 }
+
+void test_masked_expand_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
+  (void)__builtin_masked_expand_load(mask); // expected-error {{too few 
arguments to function call, expected 2, have 1}}
+  (void)__builtin_masked_expand_load(mask, pf, pf, pf); // expected-error 
{{too many arguments to function call, expected at most 3, have 4}}
+  (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all 
arguments must have the same number of elements}}
+  (void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd 
argument must be a pointer to vector}}
+  (void)__builtin_masked_expand_load(mask, (void *)0); // expected-error {{2nd 
argument must be a pointer to vector}}
+  (void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd 
argument must be a 'v8i' (vector of 8 'int' values)}}
+  (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all 
arguments must have the same number of elements}}
+}
+
+void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
+  __builtin_masked_compress_store(mask); // expected-error {{too few arguments 
to function call, expected 3, have 1}}
+  __builtin_masked_compress_store(mask, 0, 0, 0); // expected-error {{too many 
arguments to function call, expected 3, have 4}}
+  __builtin_masked_compress_store(0, 0, pf); // expected-error {{1st argument 
must be a vector of boolean types (was 'int')}}
+  __builtin_masked_compress_store(mask, 0, pf); // expected-error {{2nd 
argument must be a vector}}
+  __builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd 
argument must be a pointer to vector}}
+  __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all 
arguments must have the same number of elements}}
+  __builtin_masked_compress_store(mask, *pf, pf2); // expected-error {{last 
two arguments to '__builtin_masked_compress_store' must have the same type}}
+}

>From 805d96c2b9d19d9a7183261b5601335c49b63526 Mon Sep 17 00:00:00 2001
From: Joseph Huber <hube...@outlook.com>
Date: Fri, 29 Aug 2025 10:33:01 -0500
Subject: [PATCH 2/3] comments

---
 clang/docs/LanguageExtensions.rst                |  4 ++--
 clang/include/clang/Basic/DiagnosticSemaKinds.td |  2 +-
 clang/lib/Sema/SemaChecking.cpp                  |  2 ++
 clang/test/Sema/builtin-masked.c                 | 12 ++++++------
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 33c26b014b6d0..2025d0f7dd8b7 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -952,8 +952,8 @@ masked-off lanes. These builtins assume the memory is 
always aligned.
 
 The ``__builtin_masked_expand_load`` and ``__builtin_masked_compress_store``
 builtins have the same interface but store the result in consecutive indices.
-Effectively this performs the ``if (cond.i) v.i = a[j++]`` and ``if (cond.i)
-a[j++] = v.i`` pattern respectively.
+Effectively this performs the ``if (m[i]) v[i] = p[j++]`` and ``if (m[i])
+p[j++] = v[i]`` pattern respectively.
 
 Example:
 
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td 
b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index f59573a2b88d7..c934fed2c7462 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11016,7 +11016,7 @@ def err_sizeless_nonlocal : Error<
 def err_vec_masked_load_store_ptr : Error<
  "%ordinal0 argument must be a %1">;
 def err_vec_masked_load_store_size : Error<
- "all arguments must have the same number of elements (was %0 and %1)">;
+ "all arguments to %0 must have the same number of elements (was %1 and %2)">;
 
 def err_vec_builtin_non_vector : Error<
  "%select{first two|all}1 arguments to %0 must be vectors">;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 829bd90d0895d..2e04c4b19dc63 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2307,6 +2307,7 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr 
*TheCall) {
   if (MaskVecTy->getNumElements() != DataVecTy->getNumElements())
     return ExprError(
         S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
+        << S.getASTContext().BuiltinInfo.getName(TheCall->getBuiltinCallee())
         << MaskTy << PointeeTy);
 
   TheCall->setType(PointeeTy);
@@ -2341,6 +2342,7 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr 
*TheCall) {
       MaskVecTy->getNumElements() != PtrVecTy->getNumElements())
     return ExprError(
         S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
+        << S.getASTContext().BuiltinInfo.getName(TheCall->getBuiltinCallee())
         << MaskTy << PointeeTy);
 
   if (!S.Context.hasSameType(ValTy, PointeeTy))
diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c
index 242a9132c8b40..c444457d7d98c 100644
--- a/clang/test/Sema/builtin-masked.c
+++ b/clang/test/Sema/builtin-masked.c
@@ -8,11 +8,11 @@ typedef float v8f __attribute__((ext_vector_type(8)));
 void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
   (void)__builtin_masked_load(mask); // expected-error {{too few arguments to 
function call, expected 2, have 1}}
   (void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many 
arguments to function call, expected at most 3, have 4}}
-  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments 
must have the same number of elements}}
+  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to 
__builtin_masked_load must have the same number of elements}}
   (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument 
must be a pointer to vector}}
   (void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd 
argument must be a pointer to vector}}
   (void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd 
argument must be a 'v8i' (vector of 8 'int' values)}}
-  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments 
must have the same number of elements}}
+  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to 
__builtin_masked_load must have the same number of elements}}
 }
 
 void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
@@ -21,18 +21,18 @@ void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b 
mask2) {
   __builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a 
vector of boolean types (was 'int')}}
   __builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must 
be a vector}}
   __builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must 
be a pointer to vector}}
-  __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments 
must have the same number of elements}}
+  __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to 
__builtin_masked_store must have the same number of elements}}
   __builtin_masked_store(mask, *pf, pf2); // expected-error {{last two 
arguments to '__builtin_masked_store' must have the same type}}
 }
 
 void test_masked_expand_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
   (void)__builtin_masked_expand_load(mask); // expected-error {{too few 
arguments to function call, expected 2, have 1}}
   (void)__builtin_masked_expand_load(mask, pf, pf, pf); // expected-error 
{{too many arguments to function call, expected at most 3, have 4}}
-  (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all 
arguments must have the same number of elements}}
+  (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all 
arguments to __builtin_masked_expand_load must have the same number of 
elements}}
   (void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd 
argument must be a pointer to vector}}
   (void)__builtin_masked_expand_load(mask, (void *)0); // expected-error {{2nd 
argument must be a pointer to vector}}
   (void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd 
argument must be a 'v8i' (vector of 8 'int' values)}}
-  (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all 
arguments must have the same number of elements}}
+  (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all 
arguments to __builtin_masked_expand_load must have the same number of 
elements}}
 }
 
 void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
@@ -41,6 +41,6 @@ void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, 
v2b mask2) {
   __builtin_masked_compress_store(0, 0, pf); // expected-error {{1st argument 
must be a vector of boolean types (was 'int')}}
   __builtin_masked_compress_store(mask, 0, pf); // expected-error {{2nd 
argument must be a vector}}
   __builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd 
argument must be a pointer to vector}}
-  __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all 
arguments must have the same number of elements}}
+  __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all 
arguments to __builtin_masked_compress_store must have the same number of 
elements}}
   __builtin_masked_compress_store(mask, *pf, pf2); // expected-error {{last 
two arguments to '__builtin_masked_compress_store' must have the same type}}
 }

>From 9caae4b0982a87f12170e3dedbf80d287bc62b4a Mon Sep 17 00:00:00 2001
From: Joseph Huber <hube...@outlook.com>
Date: Fri, 29 Aug 2025 10:54:39 -0500
Subject: [PATCH 3/3] comments

---
 clang/docs/LanguageExtensions.rst | 22 +++++++++++++---------
 clang/lib/Sema/SemaChecking.cpp   |  6 ++++--
 clang/test/Sema/builtin-masked.c  | 12 ++++++------
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/clang/docs/LanguageExtensions.rst 
b/clang/docs/LanguageExtensions.rst
index 2025d0f7dd8b7..cbe59124d5b99 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -952,8 +952,8 @@ masked-off lanes. These builtins assume the memory is 
always aligned.
 
 The ``__builtin_masked_expand_load`` and ``__builtin_masked_compress_store``
 builtins have the same interface but store the result in consecutive indices.
-Effectively this performs the ``if (m[i]) v[i] = p[j++]`` and ``if (m[i])
-p[j++] = v[i]`` pattern respectively.
+Effectively this performs the ``if (mask[i]) val[i] = ptr[j++]`` and ``if
+(mask[i]) ptr[j++] = val[i]`` pattern respectively.
 
 Example:
 
@@ -962,14 +962,18 @@ Example:
     using v8b = bool [[clang::ext_vector_type(8)]];
     using v8i = int [[clang::ext_vector_type(8)]];
 
-    v8i load(v8b m, v8i *p) { return __builtin_masked_load(m, p); }
-
-    v8i load_expand(v8b m, v8i *p) { return __builtin_masked_expand_load(m, 
p); }
+    v8i load(v8b mask, v8i *ptr) { return __builtin_masked_load(mask, ptr); }
     
-    void store(v8b m, v8i v, v8i *p) { __builtin_masked_store(m, v, p); }
-
-    void store_compress(v8b m, v8i v, v8i *p) {
-      __builtin_masked_compress_store(m, v, p);
+    v8i load_expand(v8b mask, v8i *ptr) {
+      return __builtin_masked_expand_load(mask, ptr);
+    }
+    
+    void store(v8b mask, v8i val, v8i *ptr) {
+      __builtin_masked_store(mask, val, ptr);
+    }
+    
+    void store_compress(v8b mask, v8i val, v8i *ptr) {
+      __builtin_masked_compress_store(mask, val, ptr);
     }
 
 
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 2e04c4b19dc63..7fbb059a827fe 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2307,7 +2307,8 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr 
*TheCall) {
   if (MaskVecTy->getNumElements() != DataVecTy->getNumElements())
     return ExprError(
         S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
-        << S.getASTContext().BuiltinInfo.getName(TheCall->getBuiltinCallee())
+        << S.getASTContext().BuiltinInfo.getQuotedName(
+               TheCall->getBuiltinCallee())
         << MaskTy << PointeeTy);
 
   TheCall->setType(PointeeTy);
@@ -2342,7 +2343,8 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr 
*TheCall) {
       MaskVecTy->getNumElements() != PtrVecTy->getNumElements())
     return ExprError(
         S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size)
-        << S.getASTContext().BuiltinInfo.getName(TheCall->getBuiltinCallee())
+        << S.getASTContext().BuiltinInfo.getQuotedName(
+               TheCall->getBuiltinCallee())
         << MaskTy << PointeeTy);
 
   if (!S.Context.hasSameType(ValTy, PointeeTy))
diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c
index c444457d7d98c..05c6580651964 100644
--- a/clang/test/Sema/builtin-masked.c
+++ b/clang/test/Sema/builtin-masked.c
@@ -8,11 +8,11 @@ typedef float v8f __attribute__((ext_vector_type(8)));
 void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
   (void)__builtin_masked_load(mask); // expected-error {{too few arguments to 
function call, expected 2, have 1}}
   (void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many 
arguments to function call, expected at most 3, have 4}}
-  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to 
__builtin_masked_load must have the same number of elements}}
+  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to 
'__builtin_masked_load' must have the same number of elements}}
   (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument 
must be a pointer to vector}}
   (void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd 
argument must be a pointer to vector}}
   (void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd 
argument must be a 'v8i' (vector of 8 'int' values)}}
-  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to 
__builtin_masked_load must have the same number of elements}}
+  (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to 
'__builtin_masked_load' must have the same number of elements}}
 }
 
 void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
@@ -21,18 +21,18 @@ void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b 
mask2) {
   __builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a 
vector of boolean types (was 'int')}}
   __builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must 
be a vector}}
   __builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must 
be a pointer to vector}}
-  __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to 
__builtin_masked_store must have the same number of elements}}
+  __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to 
'__builtin_masked_store' must have the same number of elements}}
   __builtin_masked_store(mask, *pf, pf2); // expected-error {{last two 
arguments to '__builtin_masked_store' must have the same type}}
 }
 
 void test_masked_expand_load(v8i *pf, v8b mask, v2b mask2, v2b thru) {
   (void)__builtin_masked_expand_load(mask); // expected-error {{too few 
arguments to function call, expected 2, have 1}}
   (void)__builtin_masked_expand_load(mask, pf, pf, pf); // expected-error 
{{too many arguments to function call, expected at most 3, have 4}}
-  (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all 
arguments to __builtin_masked_expand_load must have the same number of 
elements}}
+  (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all 
arguments to '__builtin_masked_expand_load' must have the same number of 
elements}}
   (void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd 
argument must be a pointer to vector}}
   (void)__builtin_masked_expand_load(mask, (void *)0); // expected-error {{2nd 
argument must be a pointer to vector}}
   (void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd 
argument must be a 'v8i' (vector of 8 'int' values)}}
-  (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all 
arguments to __builtin_masked_expand_load must have the same number of 
elements}}
+  (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all 
arguments to '__builtin_masked_expand_load' must have the same number of 
elements}}
 }
 
 void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) {
@@ -41,6 +41,6 @@ void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, 
v2b mask2) {
   __builtin_masked_compress_store(0, 0, pf); // expected-error {{1st argument 
must be a vector of boolean types (was 'int')}}
   __builtin_masked_compress_store(mask, 0, pf); // expected-error {{2nd 
argument must be a vector}}
   __builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd 
argument must be a pointer to vector}}
-  __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all 
arguments to __builtin_masked_compress_store must have the same number of 
elements}}
+  __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all 
arguments to '__builtin_masked_compress_store' must have the same number of 
elements}}
   __builtin_masked_compress_store(mask, *pf, pf2); // expected-error {{last 
two arguments to '__builtin_masked_compress_store' must have the same type}}
 }

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to