https://github.com/Icohedron updated https://github.com/llvm/llvm-project/pull/178315
>From 3c3844dbe7d6b79de4f7a86eca7d1ad9480ca21c Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Tue, 27 Jan 2026 14:51:39 -0800 Subject: [PATCH 1/2] EmitFromMemory when emitting load vector and matrix element LValue --- clang/lib/CodeGen/CGExpr.cpp | 19 ++++--- .../BasicFeatures/VectorElementwiseCast.hlsl | 53 +++++++++++++++++++ 2 files changed, 65 insertions(+), 7 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 490377c04b034..7f817000acb68 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2445,8 +2445,9 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) { if (LV.isVectorElt()) { llvm::LoadInst *Load = Builder.CreateLoad(LV.getVectorAddress(), LV.isVolatileQualified()); - return RValue::get(Builder.CreateExtractElement(Load, LV.getVectorIdx(), - "vecext")); + llvm::Value *Elt = + Builder.CreateExtractElement(Load, LV.getVectorIdx(), "vecext"); + return RValue::get(EmitFromMemory(Elt, LV.getType())); } // If this is a reference to a subset of the elements of a vector, either @@ -2461,14 +2462,18 @@ RValue CodeGenFunction::EmitLoadOfLValue(LValue LV, SourceLocation Loc) { if (LV.isMatrixElt()) { llvm::Value *Idx = LV.getMatrixIdx(); - if (CGM.getCodeGenOpts().OptimizationLevel > 0) { - const auto *const MatTy = LV.getType()->castAs<ConstantMatrixType>(); - llvm::MatrixBuilder MB(Builder); - MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened()); + QualType EltTy = LV.getType(); + if (const auto *MatTy = EltTy->getAs<ConstantMatrixType>()) { + EltTy = MatTy->getElementType(); + if (CGM.getCodeGenOpts().OptimizationLevel > 0) { + llvm::MatrixBuilder MB(Builder); + MB.CreateIndexAssumption(Idx, MatTy->getNumElementsFlattened()); + } } llvm::LoadInst *Load = Builder.CreateLoad(LV.getMatrixAddress(), LV.isVolatileQualified()); - return RValue::get(Builder.CreateExtractElement(Load, Idx, "matrixext")); + llvm::Value *Elt = Builder.CreateExtractElement(Load, Idx, "matrixext"); + return RValue::get(EmitFromMemory(Elt, EltTy)); } if (LV.isMatrixRow()) { QualType MatTy = LV.getType(); diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl index c11c8498ada45..881e6b5dd525a 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl @@ -180,3 +180,56 @@ export void call8(int3x1 M) { int3 V = (int3)M; } +// vector flat cast from matrix of same size (bool) +// CHECK-LABEL: call9 +// CHECK: [[M_ADDR:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[V:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x i32], align 4 +// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = zext <2 x i1> %M to <2 x i32> +// CHECK-NEXT: store <2 x i32> [[TMP0]], ptr [[M_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[M_ADDR]], align 4 +// CHECK-NEXT: store <2 x i32> [[TMP1]], ptr [[HLSL_EWCAST_SRC]], align 4 +// CHECK-NEXT: [[MATRIX_GEP:%.*]] = getelementptr inbounds <2 x i32>, ptr [[HLSL_EWCAST_SRC]], i32 0 +// CHECK-NEXT: [[TMP2:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT:%.*]] = extractelement <2 x i32> [[TMP3]], i32 0 +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i32 [[MATRIXEXT]] to i1 +// CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i1> [[TMP2]], i1 [[LOADEDV]], i64 0 +// CHECK-NEXT: [[TMP5:%.*]] = load <2 x i32>, ptr [[MATRIX_GEP]], align 4 +// CHECK-NEXT: [[MATRIXEXT1:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 +// CHECK-NEXT: [[LOADEDV2:%.*]] = trunc i32 [[MATRIXEXT1]] to i1 +// CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i1> [[TMP4]], i1 [[LOADEDV2]], i64 1 +// CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i1> [[TMP6]] to <2 x i32> +// CHECK-NEXT: store <2 x i32> [[TMP7]], ptr [[V]], align 8 +// CHECK-NEXT: ret void +export void call9(bool1x2 M) { + bool2 V = (bool2)M; +} + +struct BoolVecStruct { + bool2 V; +}; + +// vector flat cast from struct containing bool vector +// CHECK-LABEL: call10 +// CHECK: [[V:%.*]] = alloca <2 x i32>, align 8 +// CHECK-NEXT: [[AGG_TEMP:%.*]] = alloca %struct.BoolVecStruct, align 1 +// CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8 +// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 1 [[AGG_TEMP]], ptr align 1 %s, i32 8, i1 false) +// CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr inbounds %struct.BoolVecStruct, ptr [[AGG_TEMP]], i32 0, i32 0 +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i1>, ptr [[FLATCAST_TMP]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 8 +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 +// CHECK-NEXT: [[LOADEDV:%.*]] = trunc i32 [[VECEXT]] to i1 +// CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i1> [[TMP0]], i1 [[LOADEDV]], i64 0 +// CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[VECTOR_GEP]], align 8 +// CHECK-NEXT: [[VECEXT1:%.*]] = extractelement <2 x i32> [[TMP3]], i32 1 +// CHECK-NEXT: [[LOADEDV2:%.*]] = trunc i32 [[VECEXT1]] to i1 +// CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i1> [[TMP2]], i1 [[LOADEDV2]], i64 1 +// CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i32> +// CHECK-NEXT: store <2 x i32> [[TMP5]], ptr [[V]], align 8 +// CHECK-NEXT: ret void +export void call10(BoolVecStruct s) { + bool2 V = (bool2)s; +} >From 341252791338a90604c41543834add90cd9d680a Mon Sep 17 00:00:00 2001 From: Deric Cheung <[email protected]> Date: Tue, 17 Feb 2026 11:37:14 -0800 Subject: [PATCH 2/2] Update matrix allocas in test for array of vectors representation --- .../CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl index 83776b816211d..e232223b185c2 100644 --- a/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl +++ b/clang/test/CodeGenHLSL/BasicFeatures/VectorElementwiseCast.hlsl @@ -184,9 +184,11 @@ export void call8(int3x1 M) { // vector flat cast from matrix of same size (bool) // CHECK-LABEL: call9 -// CHECK: [[M_ADDR:%.*]] = alloca [2 x i32], align 4 +// COL-CHECK: [[M_ADDR:%.*]] = alloca [2 x <1 x i32>], align 4 +// ROW-CHECK: [[M_ADDR:%.*]] = alloca [1 x <2 x i32>], align 4 // CHECK-NEXT: [[V:%.*]] = alloca <2 x i32>, align 8 -// CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x i32], align 4 +// COL-CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [2 x <1 x i32>], align 4 +// ROW-CHECK-NEXT: [[HLSL_EWCAST_SRC:%.*]] = alloca [1 x <2 x i32>], align 4 // CHECK-NEXT: [[FLATCAST_TMP:%.*]] = alloca <2 x i1>, align 8 // CHECK-NEXT: [[TMP0:%.*]] = zext <2 x i1> %M to <2 x i32> // CHECK-NEXT: store <2 x i32> [[TMP0]], ptr [[M_ADDR]], align 4 _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
