https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/156042
>From 1221affdc11f757ced2303a894950badde4b9833 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Fri, 29 Aug 2025 09:57:19 -0500 Subject: [PATCH 1/3] [Clang] Add masked vector builtins for expand and compress access Summary: The interface here is nearly indentical to the already added masked loads and stores. These bind to very similar intrinsics so we add them here. --- clang/docs/LanguageExtensions.rst | 11 ++++ clang/docs/ReleaseNotes.rst | 6 ++- clang/include/clang/Basic/Builtins.td | 12 +++++ .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/lib/CodeGen/CGBuiltin.cpp | 35 ++++++++---- clang/lib/Sema/SemaChecking.cpp | 6 ++- clang/test/CodeGen/builtin-masked.c | 54 ++++++++++++++++++- clang/test/Sema/builtin-masked.c | 26 +++++++-- 8 files changed, 132 insertions(+), 20 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 2ce60de05fff2..33c26b014b6d0 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -950,6 +950,11 @@ argument is always boolean mask vector. The ``__builtin_masked_load`` builtin takes an optional third vector argument that will be used for the result of the masked-off lanes. These builtins assume the memory is always aligned. +The ``__builtin_masked_expand_load`` and ``__builtin_masked_compress_store`` +builtins have the same interface but store the result in consecutive indices. +Effectively this performs the ``if (cond.i) v.i = a[j++]`` and ``if (cond.i) +a[j++] = v.i`` pattern respectively. + Example: .. code-block:: c++ @@ -959,8 +964,14 @@ Example: v8i load(v8b m, v8i *p) { return __builtin_masked_load(m, p); } + v8i load_expand(v8b m, v8i *p) { return __builtin_masked_expand_load(m, p); } + void store(v8b m, v8i v, v8i *p) { __builtin_masked_store(m, v, p); } + void store_compress(v8b m, v8i v, v8i *p) { + __builtin_masked_compress_store(m, v, p); + } + Matrix Types ============ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 84e499e5d0ab9..12c848cc68b43 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -169,8 +169,10 @@ Non-comprehensive list of changes in this release - A vector of booleans is now a valid condition for the ternary ``?:`` operator. This binds to a simple vector select operation. -- Added ``__builtin_masked_load`` and ``__builtin_masked_store`` for conditional - memory loads from vectors. Binds to the LLVM intrinsic of the same name. +- Added ``__builtin_masked_load``, ``__builtin_masked_expand_load``, + ``__builtin_masked_store``, ``__builtin_masked_compress_store`` for + conditional memory loads from vectors. Binds to the LLVM intrinsics of the + same name. - The ``__builtin_popcountg``, ``__builtin_ctzg``, and ``__builtin_clzg`` functions now accept fixed-size boolean vectors. diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 6d21c620bfc80..af0e8242f1e0d 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -1244,6 +1244,18 @@ def MaskedStore : Builtin { let Prototype = "void(...)"; } +def MaskedExpandLoad : Builtin { + let Spellings = ["__builtin_masked_expand_load"]; + let Attributes = [NoThrow, CustomTypeChecking]; + let Prototype = "void(...)"; +} + +def MaskedCompressStore : Builtin { + let Spellings = ["__builtin_masked_compress_store"]; + let Attributes = [NoThrow, CustomTypeChecking]; + let Prototype = "void(...)"; +} + def AllocaUninitialized : Builtin { let Spellings = ["__builtin_alloca_uninitialized"]; let Attributes = [FunctionWithBuiltinPrefix, NoThrow]; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index c934fed2c7462..f59573a2b88d7 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11016,7 +11016,7 @@ def err_sizeless_nonlocal : Error< def err_vec_masked_load_store_ptr : Error< "%ordinal0 argument must be a %1">; def err_vec_masked_load_store_size : Error< - "all arguments to %0 must have the same number of elements (was %1 and %2)">; + "all arguments must have the same number of elements (was %0 and %1)">; def err_vec_builtin_non_vector : Error< "%select{first two|all}1 arguments to %0 must be vectors">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 16c059122c84a..172a521e63c17 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4271,7 +4271,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, return RValue::get(Result); } - case Builtin::BI__builtin_masked_load: { + case Builtin::BI__builtin_masked_load: + case Builtin::BI__builtin_masked_expand_load: { llvm::Value *Mask = EmitScalarExpr(E->getArg(0)); llvm::Value *Ptr = EmitScalarExpr(E->getArg(1)); @@ -4284,14 +4285,21 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, if (E->getNumArgs() > 2) PassThru = EmitScalarExpr(E->getArg(2)); - Function *F = - CGM.getIntrinsic(Intrinsic::masked_load, {RetTy, UnqualPtrTy}); - - llvm::Value *Result = - Builder.CreateCall(F, {Ptr, AlignVal, Mask, PassThru}, "masked_load"); + llvm::Value *Result; + if (BuiltinID == Builtin::BI__builtin_masked_load) { + Function *F = + CGM.getIntrinsic(Intrinsic::masked_load, {RetTy, UnqualPtrTy}); + Result = + Builder.CreateCall(F, {Ptr, AlignVal, Mask, PassThru}, "masked_load"); + } else { + Function *F = CGM.getIntrinsic(Intrinsic::masked_expandload, {RetTy}); + Result = + Builder.CreateCall(F, {Ptr, Mask, PassThru}, "masked_expand_load"); + } return RValue::get(Result); }; - case Builtin::BI__builtin_masked_store: { + case Builtin::BI__builtin_masked_store: + case Builtin::BI__builtin_masked_compress_store: { llvm::Value *Mask = EmitScalarExpr(E->getArg(0)); llvm::Value *Val = EmitScalarExpr(E->getArg(1)); llvm::Value *Ptr = EmitScalarExpr(E->getArg(2)); @@ -4304,10 +4312,15 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::Value *AlignVal = llvm::ConstantInt::get(Int32Ty, Align.getQuantity()); - llvm::Function *F = - CGM.getIntrinsic(llvm::Intrinsic::masked_store, {ValLLTy, PtrTy}); - - Builder.CreateCall(F, {Val, Ptr, AlignVal, Mask}); + if (BuiltinID == Builtin::BI__builtin_masked_store) { + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::masked_store, {ValLLTy, PtrTy}); + Builder.CreateCall(F, {Val, Ptr, AlignVal, Mask}); + } else { + llvm::Function *F = + CGM.getIntrinsic(llvm::Intrinsic::masked_compressstore, {ValLLTy}); + Builder.CreateCall(F, {Val, Ptr, Mask}); + } return RValue::get(nullptr); } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 3139c4d0e92df..829bd90d0895d 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2307,7 +2307,7 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) { if (MaskVecTy->getNumElements() != DataVecTy->getNumElements()) return ExprError( S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) - << "__builtin_masked_load" << MaskTy << PointeeTy); + << MaskTy << PointeeTy); TheCall->setType(PointeeTy); return TheCall; @@ -2341,7 +2341,7 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr *TheCall) { MaskVecTy->getNumElements() != PtrVecTy->getNumElements()) return ExprError( S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) - << "__builtin_masked_store" << MaskTy << PointeeTy); + << MaskTy << PointeeTy); if (!S.Context.hasSameType(ValTy, PointeeTy)) return ExprError(S.Diag(TheCall->getBeginLoc(), @@ -2607,8 +2607,10 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, // TheCall will be freed by the smart pointer here, but that's fine, since // BuiltinShuffleVector guts it, but then doesn't release it. case Builtin::BI__builtin_masked_load: + case Builtin::BI__builtin_masked_expand_load: return BuiltinMaskedLoad(*this, TheCall); case Builtin::BI__builtin_masked_store: + case Builtin::BI__builtin_masked_compress_store: return BuiltinMaskedStore(*this, TheCall); case Builtin::BI__builtin_invoke: return BuiltinInvoke(*this, TheCall); diff --git a/clang/test/CodeGen/builtin-masked.c b/clang/test/CodeGen/builtin-masked.c index e52716ae0a69e..579cf5c413c9b 100644 --- a/clang/test/CodeGen/builtin-masked.c +++ b/clang/test/CodeGen/builtin-masked.c @@ -52,8 +52,34 @@ v8i test_load_passthru(v8b m, v8i *p, v8i t) { return __builtin_masked_load(m, p, t); } +// CHECK-LABEL: define dso_local <8 x i32> @test_load_expand( +// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef [[P:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: store i8 [[M_COERCE]], ptr [[M]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1 +// CHECK-NEXT: [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[T:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[M_ADDR]], align 1 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: store <8 x i32> [[T]], ptr [[T_ADDR]], align 32 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[T_ADDR]], align 32 +// CHECK-NEXT: [[MASKED_EXPAND_LOAD:%.*]] = call <8 x i32> @llvm.masked.expandload.v8i32(ptr [[TMP3]], <8 x i1> [[TMP2]], <8 x i32> [[TMP4]]) +// CHECK-NEXT: ret <8 x i32> [[MASKED_EXPAND_LOAD]] +// +v8i test_load_expand(v8b m, v8i *p, v8i t) { + return __builtin_masked_expand_load(m, p, t); +} + // CHECK-LABEL: define dso_local void @test_store( -// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR2:[0-9]+]] { +// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR3:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 // CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1 @@ -77,3 +103,29 @@ v8i test_load_passthru(v8b m, v8i *p, v8i t) { void test_store(v8b m, v8i v, v8i *p) { __builtin_masked_store(m, v, p); } + +// CHECK-LABEL: define dso_local void @test_compress_store( +// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR3]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[V_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store i8 [[M_COERCE]], ptr [[M]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1 +// CHECK-NEXT: [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[V:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[M_ADDR]], align 1 +// CHECK-NEXT: store <8 x i32> [[V]], ptr [[V_ADDR]], align 32 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[V_ADDR]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: call void @llvm.masked.compressstore.v8i32(<8 x i32> [[TMP3]], ptr [[TMP4]], <8 x i1> [[TMP2]]) +// CHECK-NEXT: ret void +// +void test_compress_store(v8b m, v8i v, v8i *p) { + __builtin_masked_compress_store(m, v, p); +} diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c index 1f1395793ee38..242a9132c8b40 100644 --- a/clang/test/Sema/builtin-masked.c +++ b/clang/test/Sema/builtin-masked.c @@ -8,11 +8,11 @@ typedef float v8f __attribute__((ext_vector_type(8))); void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) { (void)__builtin_masked_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}} (void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}} - (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}} + (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments must have the same number of elements}} (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}} - (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}} + (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments must have the same number of elements}} } void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { @@ -21,6 +21,26 @@ void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { __builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}} __builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}} __builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}} - __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to __builtin_masked_store must have the same number of elements}} + __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments must have the same number of elements}} __builtin_masked_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_store' must have the same type}} } + +void test_masked_expand_load(v8i *pf, v8b mask, v2b mask2, v2b thru) { + (void)__builtin_masked_expand_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}} + (void)__builtin_masked_expand_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}} + (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments must have the same number of elements}} + (void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}} + (void)__builtin_masked_expand_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}} + (void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}} + (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments must have the same number of elements}} +} + +void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { + __builtin_masked_compress_store(mask); // expected-error {{too few arguments to function call, expected 3, have 1}} + __builtin_masked_compress_store(mask, 0, 0, 0); // expected-error {{too many arguments to function call, expected 3, have 4}} + __builtin_masked_compress_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}} + __builtin_masked_compress_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}} + __builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}} + __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all arguments must have the same number of elements}} + __builtin_masked_compress_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_compress_store' must have the same type}} +} >From 805d96c2b9d19d9a7183261b5601335c49b63526 Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Fri, 29 Aug 2025 10:33:01 -0500 Subject: [PATCH 2/3] comments --- clang/docs/LanguageExtensions.rst | 4 ++-- clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/lib/Sema/SemaChecking.cpp | 2 ++ clang/test/Sema/builtin-masked.c | 12 ++++++------ 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 33c26b014b6d0..2025d0f7dd8b7 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -952,8 +952,8 @@ masked-off lanes. These builtins assume the memory is always aligned. The ``__builtin_masked_expand_load`` and ``__builtin_masked_compress_store`` builtins have the same interface but store the result in consecutive indices. -Effectively this performs the ``if (cond.i) v.i = a[j++]`` and ``if (cond.i) -a[j++] = v.i`` pattern respectively. +Effectively this performs the ``if (m[i]) v[i] = p[j++]`` and ``if (m[i]) +p[j++] = v[i]`` pattern respectively. Example: diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f59573a2b88d7..c934fed2c7462 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11016,7 +11016,7 @@ def err_sizeless_nonlocal : Error< def err_vec_masked_load_store_ptr : Error< "%ordinal0 argument must be a %1">; def err_vec_masked_load_store_size : Error< - "all arguments must have the same number of elements (was %0 and %1)">; + "all arguments to %0 must have the same number of elements (was %1 and %2)">; def err_vec_builtin_non_vector : Error< "%select{first two|all}1 arguments to %0 must be vectors">; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 829bd90d0895d..2e04c4b19dc63 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2307,6 +2307,7 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) { if (MaskVecTy->getNumElements() != DataVecTy->getNumElements()) return ExprError( S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) + << S.getASTContext().BuiltinInfo.getName(TheCall->getBuiltinCallee()) << MaskTy << PointeeTy); TheCall->setType(PointeeTy); @@ -2341,6 +2342,7 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr *TheCall) { MaskVecTy->getNumElements() != PtrVecTy->getNumElements()) return ExprError( S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) + << S.getASTContext().BuiltinInfo.getName(TheCall->getBuiltinCallee()) << MaskTy << PointeeTy); if (!S.Context.hasSameType(ValTy, PointeeTy)) diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c index 242a9132c8b40..c444457d7d98c 100644 --- a/clang/test/Sema/builtin-masked.c +++ b/clang/test/Sema/builtin-masked.c @@ -8,11 +8,11 @@ typedef float v8f __attribute__((ext_vector_type(8))); void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) { (void)__builtin_masked_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}} (void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}} - (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments must have the same number of elements}} + (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}} (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}} - (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments must have the same number of elements}} + (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}} } void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { @@ -21,18 +21,18 @@ void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { __builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}} __builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}} __builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}} - __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments must have the same number of elements}} + __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to __builtin_masked_store must have the same number of elements}} __builtin_masked_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_store' must have the same type}} } void test_masked_expand_load(v8i *pf, v8b mask, v2b mask2, v2b thru) { (void)__builtin_masked_expand_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}} (void)__builtin_masked_expand_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}} - (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments must have the same number of elements}} + (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_expand_load must have the same number of elements}} (void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_expand_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}} - (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments must have the same number of elements}} + (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_expand_load must have the same number of elements}} } void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { @@ -41,6 +41,6 @@ void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { __builtin_masked_compress_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}} __builtin_masked_compress_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}} __builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}} - __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all arguments must have the same number of elements}} + __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all arguments to __builtin_masked_compress_store must have the same number of elements}} __builtin_masked_compress_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_compress_store' must have the same type}} } >From 9caae4b0982a87f12170e3dedbf80d287bc62b4a Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Fri, 29 Aug 2025 10:54:39 -0500 Subject: [PATCH 3/3] comments --- clang/docs/LanguageExtensions.rst | 22 +++++++++++++--------- clang/lib/Sema/SemaChecking.cpp | 6 ++++-- clang/test/Sema/builtin-masked.c | 12 ++++++------ 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 2025d0f7dd8b7..cbe59124d5b99 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -952,8 +952,8 @@ masked-off lanes. These builtins assume the memory is always aligned. The ``__builtin_masked_expand_load`` and ``__builtin_masked_compress_store`` builtins have the same interface but store the result in consecutive indices. -Effectively this performs the ``if (m[i]) v[i] = p[j++]`` and ``if (m[i]) -p[j++] = v[i]`` pattern respectively. +Effectively this performs the ``if (mask[i]) val[i] = ptr[j++]`` and ``if +(mask[i]) ptr[j++] = val[i]`` pattern respectively. Example: @@ -962,14 +962,18 @@ Example: using v8b = bool [[clang::ext_vector_type(8)]]; using v8i = int [[clang::ext_vector_type(8)]]; - v8i load(v8b m, v8i *p) { return __builtin_masked_load(m, p); } - - v8i load_expand(v8b m, v8i *p) { return __builtin_masked_expand_load(m, p); } + v8i load(v8b mask, v8i *ptr) { return __builtin_masked_load(mask, ptr); } - void store(v8b m, v8i v, v8i *p) { __builtin_masked_store(m, v, p); } - - void store_compress(v8b m, v8i v, v8i *p) { - __builtin_masked_compress_store(m, v, p); + v8i load_expand(v8b mask, v8i *ptr) { + return __builtin_masked_expand_load(mask, ptr); + } + + void store(v8b mask, v8i val, v8i *ptr) { + __builtin_masked_store(mask, val, ptr); + } + + void store_compress(v8b mask, v8i val, v8i *ptr) { + __builtin_masked_compress_store(mask, val, ptr); } diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 2e04c4b19dc63..7fbb059a827fe 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2307,7 +2307,8 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) { if (MaskVecTy->getNumElements() != DataVecTy->getNumElements()) return ExprError( S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) - << S.getASTContext().BuiltinInfo.getName(TheCall->getBuiltinCallee()) + << S.getASTContext().BuiltinInfo.getQuotedName( + TheCall->getBuiltinCallee()) << MaskTy << PointeeTy); TheCall->setType(PointeeTy); @@ -2342,7 +2343,8 @@ static ExprResult BuiltinMaskedStore(Sema &S, CallExpr *TheCall) { MaskVecTy->getNumElements() != PtrVecTy->getNumElements()) return ExprError( S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) - << S.getASTContext().BuiltinInfo.getName(TheCall->getBuiltinCallee()) + << S.getASTContext().BuiltinInfo.getQuotedName( + TheCall->getBuiltinCallee()) << MaskTy << PointeeTy); if (!S.Context.hasSameType(ValTy, PointeeTy)) diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c index c444457d7d98c..05c6580651964 100644 --- a/clang/test/Sema/builtin-masked.c +++ b/clang/test/Sema/builtin-masked.c @@ -8,11 +8,11 @@ typedef float v8f __attribute__((ext_vector_type(8))); void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) { (void)__builtin_masked_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}} (void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}} - (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}} + (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_load' must have the same number of elements}} (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}} - (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}} + (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_load' must have the same number of elements}} } void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { @@ -21,18 +21,18 @@ void test_masked_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { __builtin_masked_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}} __builtin_masked_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}} __builtin_masked_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}} - __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to __builtin_masked_store must have the same number of elements}} + __builtin_masked_store(mask2, *pf, pf); // expected-error {{all arguments to '__builtin_masked_store' must have the same number of elements}} __builtin_masked_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_store' must have the same type}} } void test_masked_expand_load(v8i *pf, v8b mask, v2b mask2, v2b thru) { (void)__builtin_masked_expand_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}} (void)__builtin_masked_expand_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}} - (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_expand_load must have the same number of elements}} + (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_expand_load' must have the same number of elements}} (void)__builtin_masked_expand_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_expand_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_expand_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}} - (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_expand_load must have the same number of elements}} + (void)__builtin_masked_expand_load(mask2, pf); // expected-error {{all arguments to '__builtin_masked_expand_load' must have the same number of elements}} } void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { @@ -41,6 +41,6 @@ void test_masked_compress_store(v8i *pf, v8f *pf2, v8b mask, v2b mask2) { __builtin_masked_compress_store(0, 0, pf); // expected-error {{1st argument must be a vector of boolean types (was 'int')}} __builtin_masked_compress_store(mask, 0, pf); // expected-error {{2nd argument must be a vector}} __builtin_masked_compress_store(mask, *pf, 0); // expected-error {{3rd argument must be a pointer to vector}} - __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all arguments to __builtin_masked_compress_store must have the same number of elements}} + __builtin_masked_compress_store(mask2, *pf, pf); // expected-error {{all arguments to '__builtin_masked_compress_store' must have the same number of elements}} __builtin_masked_compress_store(mask, *pf, pf2); // expected-error {{last two arguments to '__builtin_masked_compress_store' must have the same type}} } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits