https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/155652
>From 3a62febbcf70485bc287f3ea713a8eff61cf215d Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Wed, 27 Aug 2025 11:35:56 -0500 Subject: [PATCH 1/2] [Clang] Update `__builtin_masked_load` to accept passthrough argument Summary: It's important to be able to define the result of the masked-off lanes, add this as an optional argument to the builtin. --- clang/docs/LanguageExtensions.rst | 4 +++- clang/lib/CodeGen/CGBuiltin.cpp | 2 ++ clang/lib/Sema/SemaChecking.cpp | 11 ++++++++++- clang/test/CodeGen/builtin-masked.c | 26 ++++++++++++++++++++++++++ clang/test/Sema/builtin-masked.c | 5 +++-- 5 files changed, 44 insertions(+), 4 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 1299582b2f5ea..5330d4d44bbf9 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -946,7 +946,9 @@ Let ``VT`` be a vector type and ``ET`` the element type of ``VT``. Each builtin accesses memory according to a provided boolean mask. These are provided as ``__builtin_masked_load`` and ``__builtin_masked_store``. The first -argument is always boolean mask vector. +argument is always boolean mask vector. The ``__builtin_masked_load`` builtin +takes an optional third argument for the result of the masked-off lanes, +otherwise it is poison. These builtins assume the memory is always aligned. Example: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b76d0d3c33f12..16c059122c84a 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4281,6 +4281,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm::ConstantInt::get(Int32Ty, Align.getQuantity()); llvm::Value *PassThru = llvm::PoisonValue::get(RetTy); + if (E->getNumArgs() > 2) + PassThru = EmitScalarExpr(E->getArg(2)); Function *F = CGM.getIntrinsic(Intrinsic::masked_load, {RetTy, UnqualPtrTy}); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 4cba4108c3500..738919ced5462 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2282,7 +2282,7 @@ static bool CheckMaskedBuiltinArgs(Sema &S, Expr *MaskArg, Expr *PtrArg, } static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) { - if (S.checkArgCount(TheCall, 2)) + if (S.checkArgCountRange(TheCall, 2, 3)) return ExprError(); Expr *MaskArg = TheCall->getArg(0); @@ -2295,6 +2295,15 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) { QualType PointeeTy = PtrTy->getPointeeType(); const VectorType *MaskVecTy = MaskTy->getAs<VectorType>(); const VectorType *DataVecTy = PointeeTy->getAs<VectorType>(); + + if (TheCall->getNumArgs() == 3) { + Expr *PassThruArg = TheCall->getArg(2); + QualType PassThruTy = PassThruArg->getType(); + if (!S.Context.hasSameType(PassThruTy, PointeeTy)) + return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr) + << 3 << PointeeTy; + } + if (MaskVecTy->getNumElements() != DataVecTy->getNumElements()) return ExprError( S.Diag(TheCall->getBeginLoc(), diag::err_vec_masked_load_store_size) diff --git a/clang/test/CodeGen/builtin-masked.c b/clang/test/CodeGen/builtin-masked.c index 67071ba19bd25..e52716ae0a69e 100644 --- a/clang/test/CodeGen/builtin-masked.c +++ b/clang/test/CodeGen/builtin-masked.c @@ -26,6 +26,32 @@ v8i test_load(v8b m, v8i *p) { return __builtin_masked_load(m, p); } +// CHECK-LABEL: define dso_local <8 x i32> @test_load_passthru( +// CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef [[P:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[M:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[M_ADDR:%.*]] = alloca i8, align 1 +// CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: [[T_ADDR:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: store i8 [[M_COERCE]], ptr [[M]], align 1 +// CHECK-NEXT: [[LOAD_BITS:%.*]] = load i8, ptr [[M]], align 1 +// CHECK-NEXT: [[M1:%.*]] = bitcast i8 [[LOAD_BITS]] to <8 x i1> +// CHECK-NEXT: [[T:%.*]] = load <8 x i32>, ptr [[TMP0]], align 32 +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i1> [[M1]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], ptr [[M_ADDR]], align 1 +// CHECK-NEXT: store ptr [[P]], ptr [[P_ADDR]], align 8 +// CHECK-NEXT: store <8 x i32> [[T]], ptr [[T_ADDR]], align 32 +// CHECK-NEXT: [[LOAD_BITS2:%.*]] = load i8, ptr [[M_ADDR]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[LOAD_BITS2]] to <8 x i1> +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr [[T_ADDR]], align 32 +// CHECK-NEXT: [[MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[TMP3]], i32 32, <8 x i1> [[TMP2]], <8 x i32> [[TMP4]]) +// CHECK-NEXT: ret <8 x i32> [[MASKED_LOAD]] +// +v8i test_load_passthru(v8b m, v8i *p, v8i t) { + return __builtin_masked_load(m, p, t); +} + // CHECK-LABEL: define dso_local void @test_store( // CHECK-SAME: i8 noundef [[M_COERCE:%.*]], ptr noundef byval(<8 x i32>) align 32 [[TMP0:%.*]], ptr noundef [[P:%.*]]) #[[ATTR2:[0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/Sema/builtin-masked.c b/clang/test/Sema/builtin-masked.c index 81f5323bbe260..1f1395793ee38 100644 --- a/clang/test/Sema/builtin-masked.c +++ b/clang/test/Sema/builtin-masked.c @@ -5,12 +5,13 @@ typedef _Bool v8b __attribute__((ext_vector_type(8))); typedef _Bool v2b __attribute__((ext_vector_type(2))); typedef float v8f __attribute__((ext_vector_type(8))); -void test_masked_load(v8i *pf, v8b mask, v2b mask2) { +void test_masked_load(v8i *pf, v8b mask, v2b mask2, v2b thru) { (void)__builtin_masked_load(mask); // expected-error {{too few arguments to function call, expected 2, have 1}} - (void)__builtin_masked_load(mask, pf, pf); // expected-error {{too many arguments to function call, expected 2, have 3}} + (void)__builtin_masked_load(mask, pf, pf, pf); // expected-error {{too many arguments to function call, expected at most 3, have 4}} (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}} (void)__builtin_masked_load(mask, mask); // expected-error {{2nd argument must be a pointer to vector}} (void)__builtin_masked_load(mask, (void *)0); // expected-error {{2nd argument must be a pointer to vector}} + (void)__builtin_masked_load(mask2, pf, thru); // expected-error {{3rd argument must be a 'v8i' (vector of 8 'int' values)}} (void)__builtin_masked_load(mask2, pf); // expected-error {{all arguments to __builtin_masked_load must have the same number of elements}} } >From a3d2f0432a3cab27f801a2205af9aff62b1408cf Mon Sep 17 00:00:00 2001 From: Joseph Huber <hube...@outlook.com> Date: Wed, 27 Aug 2025 11:47:19 -0500 Subject: [PATCH 2/2] commnets --- clang/docs/LanguageExtensions.rst | 3 ++- clang/lib/Sema/SemaChecking.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 5330d4d44bbf9..f7c7c1d7a3ea1 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -948,7 +948,8 @@ Each builtin accesses memory according to a provided boolean mask. These are provided as ``__builtin_masked_load`` and ``__builtin_masked_store``. The first argument is always boolean mask vector. The ``__builtin_masked_load`` builtin takes an optional third argument for the result of the masked-off lanes, -otherwise it is poison. These builtins assume the memory is always aligned. +otherwise the masked-off lanes are considered undefined in the result. These +builtins assume the memory is always aligned. Example: diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 738919ced5462..e0c3b1a9f9941 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2301,7 +2301,7 @@ static ExprResult BuiltinMaskedLoad(Sema &S, CallExpr *TheCall) { QualType PassThruTy = PassThruArg->getType(); if (!S.Context.hasSameType(PassThruTy, PointeeTy)) return S.Diag(PtrArg->getExprLoc(), diag::err_vec_masked_load_store_ptr) - << 3 << PointeeTy; + << /* third */ 3 << PointeeTy; } if (MaskVecTy->getNumElements() != DataVecTy->getNumElements()) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits