[clang] [HLSL][Matrix] Add Matrix splat support for booleans (PR #175809)

Deric C. via cfe-commits Wed, 14 Jan 2026 09:13:49 -0800

https://github.com/Icohedron updated 
https://github.com/llvm/llvm-project/pull/175809


>From 5ac9ffcda15af7c341477fbb4887dcecbd2d900b Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Mon, 12 Jan 2026 11:26:09 -0800
Subject: [PATCH 1/9] Add type conversions to support bool matrix single
 subscript operators

---
 clang/lib/CodeGen/CGExpr.cpp                  |  8 +++
 clang/lib/CodeGen/CGExprScalar.cpp            |  4 +-
 .../MatrixSingleSubscriptGetter.hlsl          | 69 +++++++++++++++++++
 .../MatrixSingleSubscriptSetter.hlsl          | 63 +++++++++++++++++
 4 files changed, 142 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 999726340aaed..a80c31692ca0d 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2744,6 +2744,14 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, 
LValue Dst,
 
       llvm::Value *Row = Dst.getMatrixRowIdx();
       llvm::Value *RowVal = Src.getScalarVal(); // <NumCols x T>
+      if (getLangOpts().HLSL && RowVal->getType()->isIntOrIntVectorTy(1)) {
+        auto* RowValVecTy = dyn_cast<llvm::FixedVectorType>(RowVal->getType());
+        assert(RowValVecTy);
+        llvm::Type *StorageElmTy =
+            llvm::FixedVectorType::get(MatrixVec->getType()->getScalarType(),
+                                       RowValVecTy->getNumElements());
+        RowVal = Builder.CreateZExt(RowVal, StorageElmTy);
+      }
       llvm::MatrixBuilder MB(Builder);
 
       llvm::Constant *ColConstsIndices = nullptr;
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 6fd94752f5126..74df69b5bdf49 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2130,7 +2130,7 @@ Value *ScalarExprEmitter::VisitMatrixSingleSubscriptExpr(
     MB.CreateIndexAssumption(RowIdx, NumRows);
 
   Value *FlatMatrix = Visit(E->getBase());
-  llvm::Type *ElemTy = CGF.ConvertType(MatrixTy->getElementType());
+  llvm::Type *ElemTy = CGF.ConvertTypeForMem(MatrixTy->getElementType());
   auto *ResultTy = llvm::FixedVectorType::get(ElemTy, NumColumns);
   Value *RowVec = llvm::PoisonValue::get(ResultTy);
 
@@ -2146,7 +2146,7 @@ Value *ScalarExprEmitter::VisitMatrixSingleSubscriptExpr(
     RowVec = Builder.CreateInsertElement(RowVec, Elt, Lane, "matrix_row_ins");
   }
 
-  return RowVec;
+  return CGF.EmitFromMemory(RowVec, E->getType());
 }
 
 Value *ScalarExprEmitter::VisitMatrixSubscriptExpr(MatrixSubscriptExpr *E) {
diff --git 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
index 341a5bbaf0147..dff73fe434b62 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptGetter.hlsl
@@ -203,3 +203,72 @@ float4 AddFloatMatrixConstant(float4x4 M) {
 int4 AddIntMatrixConstant(int4x4 M) {
    return M[0] + M[1] + M[2] + M[3];
 }
+
+// CHECK-LABEL: define hidden noundef <3 x i1> 
@_Z23getBoolVecMatrixDynamicu11matrix_typeILm2ELm3EbEi(
+// CHECK-SAME: <6 x i1> noundef [[M:%.*]], i32 noundef [[INDEX:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = zext <6 x i1> [[M]] to <6 x i32>
+// CHECK-NEXT:    store <6 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load <6 x i32>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = add i32 0, [[TMP1]]
+// CHECK-NEXT:    [[MATRIX_ELEM:%.*]] = extractelement <6 x i32> [[TMP2]], i32 
[[TMP3]]
+// CHECK-NEXT:    [[MATRIX_ROW_INS:%.*]] = insertelement <3 x i32> poison, i32 
[[MATRIX_ELEM]], i32 0
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 2, [[TMP1]]
+// CHECK-NEXT:    [[MATRIX_ELEM1:%.*]] = extractelement <6 x i32> [[TMP2]], 
i32 [[TMP4]]
+// CHECK-NEXT:    [[MATRIX_ROW_INS2:%.*]] = insertelement <3 x i32> 
[[MATRIX_ROW_INS]], i32 [[MATRIX_ELEM1]], i32 1
+// CHECK-NEXT:    [[TMP5:%.*]] = add i32 4, [[TMP1]]
+// CHECK-NEXT:    [[MATRIX_ELEM3:%.*]] = extractelement <6 x i32> [[TMP2]], 
i32 [[TMP5]]
+// CHECK-NEXT:    [[MATRIX_ROW_INS4:%.*]] = insertelement <3 x i32> 
[[MATRIX_ROW_INS2]], i32 [[MATRIX_ELEM3]], i32 2
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc <3 x i32> [[MATRIX_ROW_INS4]] to <3 
x i1>
+// CHECK-NEXT:    ret <3 x i1> [[LOADEDV]]
+//
+bool3 getBoolVecMatrixDynamic(bool2x3 M, int index) {
+    return M[index];
+}
+
+// CHECK-LABEL: define hidden noundef <4 x i1> 
@_Z24getBoolVecMatrixConstantu11matrix_typeILm4ELm4EbEi(
+// CHECK-SAME: <16 x i1> noundef [[M:%.*]], i32 noundef [[INDEX:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = zext <16 x i1> [[M]] to <16 x i32>
+// CHECK-NEXT:    store <16 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    [[MATRIX_ELEM:%.*]] = extractelement <16 x i32> [[TMP1]], 
i32 0
+// CHECK-NEXT:    [[MATRIX_ROW_INS:%.*]] = insertelement <4 x i32> poison, i32 
[[MATRIX_ELEM]], i32 0
+// CHECK-NEXT:    [[MATRIX_ELEM1:%.*]] = extractelement <16 x i32> [[TMP1]], 
i32 4
+// CHECK-NEXT:    [[MATRIX_ROW_INS2:%.*]] = insertelement <4 x i32> 
[[MATRIX_ROW_INS]], i32 [[MATRIX_ELEM1]], i32 1
+// CHECK-NEXT:    [[MATRIX_ELEM3:%.*]] = extractelement <16 x i32> [[TMP1]], 
i32 8
+// CHECK-NEXT:    [[MATRIX_ROW_INS4:%.*]] = insertelement <4 x i32> 
[[MATRIX_ROW_INS2]], i32 [[MATRIX_ELEM3]], i32 2
+// CHECK-NEXT:    [[MATRIX_ELEM5:%.*]] = extractelement <16 x i32> [[TMP1]], 
i32 12
+// CHECK-NEXT:    [[MATRIX_ROW_INS6:%.*]] = insertelement <4 x i32> 
[[MATRIX_ROW_INS4]], i32 [[MATRIX_ELEM5]], i32 3
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc <4 x i32> [[MATRIX_ROW_INS6]] to <4 
x i1>
+// CHECK-NEXT:    ret <4 x i1> [[LOADEDV]]
+//
+bool4 getBoolVecMatrixConstant(bool4x4 M, int index) {
+    return M[0];
+}
+
+// CHECK-LABEL: define hidden noundef i1 
@_Z27getBoolScalarMatrixConstantu11matrix_typeILm3ELm1EbEi(
+// CHECK-SAME: <3 x i1> noundef [[M:%.*]], i32 noundef [[INDEX:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca [3 x i32], align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = zext <3 x i1> [[M]] to <3 x i32>
+// CHECK-NEXT:    store <3 x i32> [[TMP0]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    [[MATRIX_ELEM:%.*]] = extractelement <3 x i32> [[TMP1]], i32 
1
+// CHECK-NEXT:    [[MATRIX_ROW_INS:%.*]] = insertelement <1 x i32> poison, i32 
[[MATRIX_ELEM]], i32 0
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc <1 x i32> [[MATRIX_ROW_INS]] to <1 x 
i1>
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <1 x i1> [[LOADEDV]], 
i32 0
+// CHECK-NEXT:    ret i1 [[CAST_VTRUNC]]
+//
+bool getBoolScalarMatrixConstant(bool3x1 M, int index) {
+    return M[1];
+}
diff --git 
a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptSetter.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptSetter.hlsl
index 49746531ddccc..d314f3a87d619 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptSetter.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSingleSubscriptSetter.hlsl
@@ -58,6 +58,69 @@ void setMatrixScalar(out float2x1 M, int index, float S) {
     M[index] = S;
 }
 
+// CHECK-LABEL: define hidden void 
@_Z13setBoolMatrixRu11matrix_typeILm4ELm4EbEiDv4_b(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) 
[[M:%.*]], i32 noundef [[INDEX:%.*]], <4 x i1> noundef [[V:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[V_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store ptr [[M]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = zext <4 x i1> [[V]] to <4 x i32>
+// CHECK-NEXT:    store <4 x i32> [[TMP0]], ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[V_ADDR]], align 16
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc <4 x i32> [[TMP1]] to <4 x i1>
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull 
[[META3]], !align [[META4]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[MATRIX_LOAD:%.*]] = load <16 x i32>, ptr [[TMP2]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i1> [[LOADEDV]] to <4 x i32>
+// CHECK-NEXT:    [[TMP5:%.*]] = add i32 0, [[TMP3]]
+// CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
+// CHECK-NEXT:    [[TMP7:%.*]] = insertelement <16 x i32> [[MATRIX_LOAD]], i32 
[[TMP6]], i32 [[TMP5]]
+// CHECK-NEXT:    [[TMP8:%.*]] = add i32 4, [[TMP3]]
+// CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 1
+// CHECK-NEXT:    [[TMP10:%.*]] = insertelement <16 x i32> [[TMP7]], i32 
[[TMP9]], i32 [[TMP8]]
+// CHECK-NEXT:    [[TMP11:%.*]] = add i32 8, [[TMP3]]
+// CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[TMP4]], i32 2
+// CHECK-NEXT:    [[TMP13:%.*]] = insertelement <16 x i32> [[TMP10]], i32 
[[TMP12]], i32 [[TMP11]]
+// CHECK-NEXT:    [[TMP14:%.*]] = add i32 12, [[TMP3]]
+// CHECK-NEXT:    [[TMP15:%.*]] = extractelement <4 x i32> [[TMP4]], i32 3
+// CHECK-NEXT:    [[TMP16:%.*]] = insertelement <16 x i32> [[TMP13]], i32 
[[TMP15]], i32 [[TMP14]]
+// CHECK-NEXT:    store <16 x i32> [[TMP16]], ptr [[TMP2]], align 4
+// CHECK-NEXT:    ret void
+//
+void setBoolMatrix(out bool4x4 M, int index, bool4 V) {
+    M[index] = V;
+}
+
+// CHECK-LABEL: define hidden void 
@_Z19setBoolMatrixScalarRu11matrix_typeILm2ELm1EbEib(
+// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(8) 
[[M:%.*]], i32 noundef [[INDEX:%.*]], i1 noundef [[S:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M_ADDR:%.*]] = alloca ptr, align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[S_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[M]], ptr [[M_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[S]] to i32
+// CHECK-NEXT:    store i32 [[STOREDV]], ptr [[S_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[S_ADDR]], align 4
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i32 [[TMP0]] to i1
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <1 x i1> poison, 
i1 [[LOADEDV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <1 x i1> 
[[SPLAT_SPLATINSERT]], <1 x i1> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[M_ADDR]], align 4, !nonnull 
[[META3]], !align [[META4]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[MATRIX_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = zext <1 x i1> [[SPLAT_SPLAT]] to <1 x i32>
+// CHECK-NEXT:    [[TMP4:%.*]] = add i32 0, [[TMP2]]
+// CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i32> [[TMP3]], i32 0
+// CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[MATRIX_LOAD]], i32 
[[TMP5]], i32 [[TMP4]]
+// CHECK-NEXT:    store <2 x i32> [[TMP6]], ptr [[TMP1]], align 4
+// CHECK-NEXT:    ret void
+//
+void setBoolMatrixScalar(out bool2x1 M, int index, bool S) {
+    M[index] = S;
+}
+
 // CHECK-LABEL: define hidden void 
@_Z19setMatrixConstIndexRu11matrix_typeILm4ELm4EiES_(
 // CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(64) 
[[M:%.*]], <16 x i32> noundef [[N:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]

>From ab754a17624f8df99394059cd7912b45b23b4b7c Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Mon, 12 Jan 2026 12:51:32 -0800
Subject: [PATCH 2/9] Apply clang-format

---
 clang/lib/CodeGen/CGExpr.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index a80c31692ca0d..13f2200be77c8 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2745,7 +2745,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, 
LValue Dst,
       llvm::Value *Row = Dst.getMatrixRowIdx();
       llvm::Value *RowVal = Src.getScalarVal(); // <NumCols x T>
       if (getLangOpts().HLSL && RowVal->getType()->isIntOrIntVectorTy(1)) {
-        auto* RowValVecTy = dyn_cast<llvm::FixedVectorType>(RowVal->getType());
+        auto *RowValVecTy = dyn_cast<llvm::FixedVectorType>(RowVal->getType());
         assert(RowValVecTy);
         llvm::Type *StorageElmTy =
             llvm::FixedVectorType::get(MatrixVec->getType()->getScalarType(),

>From 14cbbb5804237087f6ac301ec62930b8db15afa8 Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Mon, 12 Jan 2026 15:14:52 -0800
Subject: [PATCH 3/9] Use cast instead of dyn_cast followed by assert

---
 clang/lib/CodeGen/CGExpr.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 13f2200be77c8..a8e53e77b2e8c 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2745,8 +2745,7 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, 
LValue Dst,
       llvm::Value *Row = Dst.getMatrixRowIdx();
       llvm::Value *RowVal = Src.getScalarVal(); // <NumCols x T>
       if (getLangOpts().HLSL && RowVal->getType()->isIntOrIntVectorTy(1)) {
-        auto *RowValVecTy = dyn_cast<llvm::FixedVectorType>(RowVal->getType());
-        assert(RowValVecTy);
+        auto *RowValVecTy = cast<llvm::FixedVectorType>(RowVal->getType());
         llvm::Type *StorageElmTy =
             llvm::FixedVectorType::get(MatrixVec->getType()->getScalarType(),
                                        RowValVecTy->getNumElements());

>From ed8c130a8f975f293eb1299e73cb9a2edce2204e Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 13 Jan 2026 09:55:35 -0800
Subject: [PATCH 4/9] If already the correct type, do not mutate the Value of
 bool vec/matrix

---
 clang/lib/CodeGen/CGExpr.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index a8e53e77b2e8c..30308b5b109a6 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2218,6 +2218,10 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value 
*Value, QualType Ty) {
 
   if (Ty->isExtVectorBoolType() || Ty->isConstantMatrixBoolType()) {
     llvm::Type *StoreTy = convertTypeForLoadStore(Ty, Value->getType());
+
+    if (Value->getType() == StoreTy)
+      return Value;
+
     if (StoreTy->isVectorTy() && StoreTy->getScalarSizeInBits() >
                                      Value->getType()->getScalarSizeInBits())
       return Builder.CreateZExt(Value, StoreTy);

>From a7f7f9d323111239d0c7cf867b980df638540160 Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 13 Jan 2026 10:20:38 -0800
Subject: [PATCH 5/9] Add boolean matrix splat tests

---
 .../BasicFeatures/MatrixSplat.hlsl            | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
index 802c418f1dad5..618bdbdfe4102 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
@@ -23,6 +23,17 @@ void ConstantFloatSplat() {
     float2x2 M = 3.25;
 }
 
+// CHECK-LABEL: define hidden void @_Z17ConstantBoolSplatv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M:%.*]] = alloca [9 x i32], align 4
+// CHECK-NEXT:    store <9 x i32> splat (i32 1), ptr [[M]], align 4
+// CHECK-NEXT:    ret void
+//
+void ConstantBoolSplat() {
+    bool3x3 M = true;
+}
+
 // CHECK-LABEL: define hidden void @_Z12DynamicSplatf(
 // CHECK-SAME: float noundef nofpclass(nan inf) [[VALUE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -39,6 +50,25 @@ void DynamicSplat(float Value) {
     float3x3 M = Value;
 }
 
+// CHECK-LABEL: define hidden void @_Z16DynamicBoolSplatb(
+// CHECK-SAME: i1 noundef [[VALUE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[M:%.*]] = alloca [16 x i32], align 4
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[VALUE]] to i32
+// CHECK-NEXT:    store i32 [[STOREDV]], ptr [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i32 [[TMP0]] to i1
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i1> poison, 
i1 [[LOADEDV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i1> 
[[SPLAT_SPLATINSERT]], <16 x i1> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = zext <16 x i1> [[SPLAT_SPLAT]] to <16 x i32>
+// CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[M]], align 4
+// CHECK-NEXT:    ret void
+//
+void DynamicBoolSplat(bool Value) {
+    bool4x4 M = Value;
+}
+
 // CHECK-LABEL: define hidden void @_Z13CastThenSplatDv4_f(
 // CHECK-SAME: <4 x float> noundef nofpclass(nan inf) [[VALUE:%.*]]) 
#[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -55,3 +85,22 @@ void DynamicSplat(float Value) {
 void CastThenSplat(float4 Value) {
     float3x3 M = (float) Value;
 }
+
+// CHECK-LABEL: define hidden void @_Z17BoolCastThenSplatDv3_i(
+// CHECK-SAME: <3 x i32> noundef [[VALUE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca <3 x i32>, align 16
+// CHECK-NEXT:    [[M:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT:    store <3 x i32> [[VALUE]], ptr [[VALUE_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr [[VALUE_ADDR]], align 16
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne <3 x i32> [[TMP0]], zeroinitializer
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <3 x i1> [[TOBOOL]], 
i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, 
i1 [[CAST_VTRUNC]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i1> 
[[SPLAT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i1> [[SPLAT_SPLAT]] to <4 x i32>
+// CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[M]], align 4
+// CHECK-NEXT:    ret void
+//
+void BoolCastThenSplat(int3 Value) {
+    bool2x2 M = (bool) Value;
+}

>From d0c32af087073671d0a5f774448d0e40399ae339 Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 13 Jan 2026 10:23:27 -0800
Subject: [PATCH 6/9] Add boolean splat case to prepareMatrixSplat

This commit re-adds Farzon's changes to support boolean splats in 
prepareMatrixSplat.
https://github.com/llvm/llvm-project/pull/170885/changes/e80fe5c7cb993aa6e380abec94302477d56ac03a

Co-authored-by: Farzon Lotfi <[email protected]>
---
 clang/lib/Sema/SemaExpr.cpp | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 4d787a60eba3b..5e849ee69379d 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7899,14 +7899,26 @@ ExprResult Sema::prepareMatrixSplat(QualType MatrixTy, 
Expr *SplattedExpr) {
   assert(DestElemTy->isFloatingType() ||
          DestElemTy->isIntegralOrEnumerationType());
 
-  // TODO: Add support for boolean matrix once exposed
-  // https://github.com/llvm/llvm-project/issues/170920
-  ExprResult CastExprRes = SplattedExpr;
-  CastKind CK = PrepareScalarCast(CastExprRes, DestElemTy);
-  if (CastExprRes.isInvalid())
-    return ExprError();
-  SplattedExpr = CastExprRes.get();
-
+  CastKind CK;
+  if (SplattedExpr->getType()->isBooleanType()) {
+    // As with vectors, we want `true` to become -1 when splatting, and we
+    // need a two-step cast if the destination element type is floating.
+    if (DestElemTy->isFloatingType()) {
+      // Cast boolean to signed integral, then to floating.
+      ExprResult CastExprRes = ImpCastExprToType(SplattedExpr, Context.IntTy,
+                                                 CK_BooleanToSignedIntegral);
+      SplattedExpr = CastExprRes.get();
+      CK = CK_IntegralToFloating;
+    } else {
+      CK = CK_BooleanToSignedIntegral;
+    }
+  } else {
+    ExprResult CastExprRes = SplattedExpr;
+    CK = PrepareScalarCast(CastExprRes, DestElemTy);
+    if (CastExprRes.isInvalid())
+      return ExprError();
+    SplattedExpr = CastExprRes.get();
+  }
   return ImpCastExprToType(SplattedExpr, DestElemTy, CK);
 }
 

>From 2bfb6623eed97827d2ad637d92ba3c92c9fe5497 Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 13 Jan 2026 11:09:10 -0800
Subject: [PATCH 7/9] Revert "Add boolean splat case to prepareMatrixSplat"

This reverts commit d0c32af087073671d0a5f774448d0e40399ae339.
---
 clang/lib/Sema/SemaExpr.cpp | 28 ++++++++--------------------
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 5e849ee69379d..4d787a60eba3b 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7899,26 +7899,14 @@ ExprResult Sema::prepareMatrixSplat(QualType MatrixTy, 
Expr *SplattedExpr) {
   assert(DestElemTy->isFloatingType() ||
          DestElemTy->isIntegralOrEnumerationType());
 
-  CastKind CK;
-  if (SplattedExpr->getType()->isBooleanType()) {
-    // As with vectors, we want `true` to become -1 when splatting, and we
-    // need a two-step cast if the destination element type is floating.
-    if (DestElemTy->isFloatingType()) {
-      // Cast boolean to signed integral, then to floating.
-      ExprResult CastExprRes = ImpCastExprToType(SplattedExpr, Context.IntTy,
-                                                 CK_BooleanToSignedIntegral);
-      SplattedExpr = CastExprRes.get();
-      CK = CK_IntegralToFloating;
-    } else {
-      CK = CK_BooleanToSignedIntegral;
-    }
-  } else {
-    ExprResult CastExprRes = SplattedExpr;
-    CK = PrepareScalarCast(CastExprRes, DestElemTy);
-    if (CastExprRes.isInvalid())
-      return ExprError();
-    SplattedExpr = CastExprRes.get();
-  }
+  // TODO: Add support for boolean matrix once exposed
+  // https://github.com/llvm/llvm-project/issues/170920
+  ExprResult CastExprRes = SplattedExpr;
+  CastKind CK = PrepareScalarCast(CastExprRes, DestElemTy);
+  if (CastExprRes.isInvalid())
+    return ExprError();
+  SplattedExpr = CastExprRes.get();
+
   return ImpCastExprToType(SplattedExpr, DestElemTy, CK);
 }
 

>From ac51a0c11f52e73d9e185d0bf45356c38518d90a Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 13 Jan 2026 11:10:10 -0800
Subject: [PATCH 8/9] Remove TODO comment that is not needed for boolean matrix
 splats

---
 clang/lib/Sema/SemaExpr.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 4d787a60eba3b..51739c3b49ac9 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7899,8 +7899,6 @@ ExprResult Sema::prepareMatrixSplat(QualType MatrixTy, 
Expr *SplattedExpr) {
   assert(DestElemTy->isFloatingType() ||
          DestElemTy->isIntegralOrEnumerationType());
 
-  // TODO: Add support for boolean matrix once exposed
-  // https://github.com/llvm/llvm-project/issues/170920
   ExprResult CastExprRes = SplattedExpr;
   CastKind CK = PrepareScalarCast(CastExprRes, DestElemTy);
   if (CastExprRes.isInvalid())

>From b12f6d3bd445628963c57d3930f013e3ebde6c11 Mon Sep 17 00:00:00 2001
From: Deric Cheung <[email protected]>
Date: Tue, 13 Jan 2026 11:58:25 -0800
Subject: [PATCH 9/9] Add more tests for boolean-related matrix splats

---
 .../BasicFeatures/MatrixSplat.hlsl            | 57 +++++++++++++++++--
 1 file changed, 53 insertions(+), 4 deletions(-)

diff --git a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl 
b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
index 618bdbdfe4102..9b9538e0afdd1 100644
--- a/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
+++ b/clang/test/CodeGenHLSL/BasicFeatures/MatrixSplat.hlsl
@@ -23,17 +23,28 @@ void ConstantFloatSplat() {
     float2x2 M = 3.25;
 }
 
-// CHECK-LABEL: define hidden void @_Z17ConstantBoolSplatv(
+// CHECK-LABEL: define hidden void @_Z21ConstantTrueBoolSplatv(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[M:%.*]] = alloca [9 x i32], align 4
 // CHECK-NEXT:    store <9 x i32> splat (i32 1), ptr [[M]], align 4
 // CHECK-NEXT:    ret void
 //
-void ConstantBoolSplat() {
+void ConstantTrueBoolSplat() {
     bool3x3 M = true;
 }
 
+// CHECK-LABEL: define hidden void @_Z22ConstantFalseBoolSplatv(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[M:%.*]] = alloca [9 x i32], align 4
+// CHECK-NEXT:    store <9 x i32> zeroinitializer, ptr [[M]], align 4
+// CHECK-NEXT:    ret void
+//
+void ConstantFalseBoolSplat() {
+    bool3x3 M = false;
+}
+
 // CHECK-LABEL: define hidden void @_Z12DynamicSplatf(
 // CHECK-SAME: float noundef nofpclass(nan inf) [[VALUE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
@@ -86,7 +97,7 @@ void CastThenSplat(float4 Value) {
     float3x3 M = (float) Value;
 }
 
-// CHECK-LABEL: define hidden void @_Z17BoolCastThenSplatDv3_i(
+// CHECK-LABEL: define hidden void @_Z30ExplicitIntToBoolCastThenSplatDv3_i(
 // CHECK-SAME: <3 x i32> noundef [[VALUE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca <3 x i32>, align 16
@@ -101,6 +112,44 @@ void CastThenSplat(float4 Value) {
 // CHECK-NEXT:    store <4 x i32> [[TMP1]], ptr [[M]], align 4
 // CHECK-NEXT:    ret void
 //
-void BoolCastThenSplat(int3 Value) {
+void ExplicitIntToBoolCastThenSplat(int3 Value) {
     bool2x2 M = (bool) Value;
 }
+
+// CHECK-LABEL: define hidden void @_Z32ExplicitFloatToBoolCastThenSplatDv2_f(
+// CHECK-SAME: <2 x float> noundef nofpclass(nan inf) [[VALUE:%.*]]) 
#[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca <2 x float>, align 8
+// CHECK-NEXT:    [[M:%.*]] = alloca [6 x i32], align 4
+// CHECK-NEXT:    store <2 x float> [[VALUE]], ptr [[VALUE_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[VALUE_ADDR]], align 8
+// CHECK-NEXT:    [[TOBOOL:%.*]] = fcmp reassoc nnan ninf nsz arcp afn une <2 
x float> [[TMP0]], zeroinitializer
+// CHECK-NEXT:    [[CAST_VTRUNC:%.*]] = extractelement <2 x i1> [[TOBOOL]], 
i32 0
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <6 x i1> poison, 
i1 [[CAST_VTRUNC]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <6 x i1> 
[[SPLAT_SPLATINSERT]], <6 x i1> poison, <6 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = zext <6 x i1> [[SPLAT_SPLAT]] to <6 x i32>
+// CHECK-NEXT:    store <6 x i32> [[TMP1]], ptr [[M]], align 4
+// CHECK-NEXT:    ret void
+//
+void ExplicitFloatToBoolCastThenSplat(float2 Value) {
+    bool2x3 M = (bool) Value;
+}
+
+// CHECK-LABEL: define hidden void @_Z32ExplicitBoolToFloatCastThenSplatb(
+// CHECK-SAME: i1 noundef [[VALUE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[M:%.*]] = alloca [6 x float], align 4
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[VALUE]] to i32
+// CHECK-NEXT:    store i32 [[STOREDV]], ptr [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[LOADEDV:%.*]] = trunc i32 [[TMP0]] to i1
+// CHECK-NEXT:    [[CONV:%.*]] = uitofp i1 [[LOADEDV]] to float
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <6 x float> 
poison, float [[CONV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <6 x float> 
[[SPLAT_SPLATINSERT]], <6 x float> poison, <6 x i32> zeroinitializer
+// CHECK-NEXT:    store <6 x float> [[SPLAT_SPLAT]], ptr [[M]], align 4
+// CHECK-NEXT:    ret void
+//
+void ExplicitBoolToFloatCastThenSplat(bool Value) {
+    float3x2 M = (float) Value;
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [HLSL][Matrix] Add Matrix splat support for booleans (PR #175809)

Reply via email to