[clang] [clang] Fix size and alignment of packed sub-byte integer vectors (PR #161796)

Steffen Larsen via cfe-commits Mon, 10 Nov 2025 07:18:26 -0800

https://github.com/steffenlarsen updated 
https://github.com/llvm/llvm-project/pull/161796


>From 545a9dfc6abcb1c9a5c39c65c2aa1de2436f4ac9 Mon Sep 17 00:00:00 2001
From: "Larsen, Steffen" <[email protected]>
Date: Thu, 2 Oct 2025 22:56:47 -0700
Subject: [PATCH 1/4] [clang] Fix size and alignment of packed sub-byte integer
 vectors

When using sub-byte integer types in vectors, the data is packed into
the first N bits, where N is the bit-size of the sub-byte integer type
multiplied by the number of vector elements. However, currently clang
reports the size as if each element is one byte wide, based on the
element type being considered a single byte wide in separation.

This commit fixes the reported size and alignment of the sub-byte vector
types, so they correspond to the bit-packed layout they employ.

Signed-off-by: Larsen, Steffen <[email protected]>
---
 clang/lib/AST/ASTContext.cpp                  | 13 +++++---
 clang/test/CodeGenCXX/ext-int.cpp             | 28 +++++++++--------
 .../test/CodeGenCXX/matrix-vector-bit-int.cpp | 28 ++++++++---------
 clang/test/SemaCXX/ext-int.cpp                | 30 +++++++++++++++++++
 4 files changed, 67 insertions(+), 32 deletions(-)

diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 056bfe36b2a0a..451a87c1cfc63 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2093,10 +2093,15 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) 
const {
   case Type::ExtVector:
   case Type::Vector: {
     const auto *VT = cast<VectorType>(T);
-    TypeInfo EltInfo = getTypeInfo(VT->getElementType());
-    Width = VT->isPackedVectorBoolType(*this)
-                ? VT->getNumElements()
-                : EltInfo.Width * VT->getNumElements();
+    QualType Elt = VT->getElementType();
+    uint64_t EltWidth = [&]() -> uint64_t {
+      if (VT->isPackedVectorBoolType(*this))
+        return 1;
+      if (Elt.getTypePtrOrNull() && Elt.getTypePtr()->isBitIntType())
+        return Elt.getTypePtr()->castAs<BitIntType>()->getNumBits();
+      return getTypeInfo(Elt).Width;
+    }();
+    Width = EltWidth * VT->getNumElements();
     // Enforce at least byte size and alignment.
     Width = std::max<unsigned>(8, Width);
     Align = std::max<unsigned>(8, Width);
diff --git a/clang/test/CodeGenCXX/ext-int.cpp 
b/clang/test/CodeGenCXX/ext-int.cpp
index a75b3701e36ef..0454363ca7f80 100644
--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -573,7 +573,7 @@ void VectorTest(uint16_t4 first, uint16_t4 second) {
 
 typedef unsigned _BitInt(4) uint4_t4 __attribute__((ext_vector_type(4)));
 void VectorTest(uint4_t4 first, uint4_t4 second) {
-  // LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i32 %{{.+}}, i32 
%{{.+}})
+  // LIN64: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(i16 %{{.+}}, i16 
%{{.+}})
   // LIN32: define{{.*}} void @_Z10VectorTestDv4_DU4_S0_(<4 x i4> %{{.+}}, <4 
x i4> %{{.+}})
   // WIN64: define dso_local void 
@"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x 
i4> %{{.+}}, <4 x i4> %{{.+}})
   // WIN32: define dso_local void 
@"?VectorTest@@YAXT?$__vector@U?$_UBitInt@$03@__clang@@$03@__clang@@0@Z"(<4 x 
i4> inreg %{{.+}}, <4 x i4> inreg %{{.+}})
@@ -585,23 +585,25 @@ void VectorTest(uint4_t4 first, uint4_t4 second) {
 
 typedef unsigned _BitInt(2) uint2_t2 __attribute__((ext_vector_type(2)));
 uint2_t2 TestBitIntVector2x2Alloca(uint2_t2 v1, uint2_t2 v2) {
-  // LIN64: define dso_local i16 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i16 
%[[V1Coerce:.+]], i16 %[[V2Coerce:.+]])
-  // LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 2
-  // LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
-  // LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
-  // LIN64: %[[RetValCoerce:.+]] = alloca i16, align 2
-  // LIN64: call void @llvm.memcpy.p0.p0.i64(ptr align 2 %[[RetValCoerce]], 
ptr align 2 %[[RetVal]], i64 1, i1 false)
-  // LIN64: %[[Ret:.+]] = load i16, ptr %[[RetValCoerce]], align 2
-  // LIN64: ret i16 %[[Ret]]
+  // LIN64: define dso_local i8 @_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(i8 
%[[V1Coerce:.+]], i8 %[[V2Coerce:.+]])
+  // LIN64: %[[RetVal:.+]] = alloca <2 x i2>, align 1
+  // LIN64: %[[V1Addr:.+]] = alloca <2 x i2>, align 1
+  // LIN64: %[[V2Addr:.+]] = alloca <2 x i2>, align 1
+  // LIN64: %[[V1Val:.+]] = load <2 x i2>, ptr %[[V1Addr]], align 1
+  // LIN64: %[[V2Val:.+]] = load <2 x i2>, ptr %[[V2Addr]], align 1
+  // LIN64: %[[AddVal:.+]] = add <2 x i2> %0, %1
+  // LIN64: store <2 x i2> %[[AddVal]], ptr %[[RetVal]], align 1
+  // LIN64: %[[Ret:.+]] = load i8, ptr %[[RetVal]], align 1
+  // LIN64: ret i8 %[[Ret]]
 
   // LIN32: define dso_local <2 x i2> 
@_Z25TestBitIntVector2x2AllocaDv2_DU2_S0_(<2 x i2> %{{.+}}, <2 x i2> %{{.+}})
-  // LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 2
-  // LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 2
+  // LIN32: %[[V1Addr:.+]] = alloca <2 x i2>, align 1
+  // LIN32: %[[V2Addr:.+]] = alloca <2 x i2>, align 1
   // LIN32: ret <2 x i2> %[[Ret:.+]]
 
   // WIN: define dso_local <2 x i2> 
@"?TestBitIntVector2x2Alloca@@YAT?$__vector@U?$_UBitInt@$01@__clang@@$01@__clang@@T12@0@Z"(<2
 x i2>{{.*}}, <2 x i2>{{.*}})
-  // WIN: %[[V1:.+]] = alloca <2 x i2>, align 2
-  // WIN: %[[V2:.+]] = alloca <2 x i2>, align 2
+  // WIN: %[[V1:.+]] = alloca <2 x i2>, align 1
+  // WIN: %[[V2:.+]] = alloca <2 x i2>, align 1
   // WIN: ret <2 x i2> %[[Ret:.+]]
   return v1 + v2;
 }
diff --git a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp 
b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
index 2e7531b334ecb..98b868fcd5bc2 100644
--- a/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
+++ b/clang/test/CodeGenCXX/matrix-vector-bit-int.cpp
@@ -70,27 +70,25 @@ i512x3 v3(i512x3 a) {
   return a + a;
 }
 
-// CHECK-LABEL: define dso_local i32 @_Z2v4Dv3_DB4_(
-// CHECK-SAME: i32 [[A_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-LABEL: define dso_local i16 @_Z2v4Dv3_DB4_(
+// CHECK-SAME: i16 [[A_COERCE:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <3 x i4>, align 4
-// CHECK-NEXT:    [[A:%.*]] = alloca <3 x i4>, align 4
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <3 x i4>, align 4
-// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[A_COERCE]], ptr [[A]], align 4
-// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 4
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <3 x i4>, align 2
+// CHECK-NEXT:    [[A:%.*]] = alloca <3 x i4>, align 2
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <3 x i4>, align 2
+// CHECK-NEXT:    store i16 [[A_COERCE]], ptr [[A]], align 2
+// CHECK-NEXT:    [[LOADVECN:%.*]] = load <4 x i4>, ptr [[A]], align 2
 // CHECK-NEXT:    [[A1:%.*]] = shufflevector <4 x i4> [[LOADVECN]], <4 x i4> 
poison, <3 x i32> <i32 0, i32 1, i32 2>
 // CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i4> [[A1]], <3 x i4> 
poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
-// CHECK-NEXT:    store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store <4 x i4> [[EXTRACTVEC]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[LOADVECN2:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[EXTRACTVEC3:%.*]] = shufflevector <4 x i4> [[LOADVECN2]], 
<4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
-// CHECK-NEXT:    [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[LOADVECN4:%.*]] = load <4 x i4>, ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[EXTRACTVEC5:%.*]] = shufflevector <4 x i4> [[LOADVECN4]], 
<4 x i4> poison, <3 x i32> <i32 0, i32 1, i32 2>
 // CHECK-NEXT:    [[ADD:%.*]] = add <3 x i4> [[EXTRACTVEC3]], [[EXTRACTVEC5]]
-// CHECK-NEXT:    store <3 x i4> [[ADD]], ptr [[RETVAL]], align 4
-// CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 
[[RETVAL_COERCE]], ptr align 4 [[RETVAL]], i64 2, i1 false)
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[RETVAL_COERCE]], align 4
-// CHECK-NEXT:    ret i32 [[TMP0]]
+// CHECK-NEXT:    store <3 x i4> [[ADD]], ptr [[RETVAL]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[RETVAL]], align 2
+// CHECK-NEXT:    ret i16 [[TMP0]]
 //
 i4x3 v4(i4x3 a) {
   return a + a;
diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp
index 5c566dafed931..d3b72761402d0 100644
--- a/clang/test/SemaCXX/ext-int.cpp
+++ b/clang/test/SemaCXX/ext-int.cpp
@@ -293,3 +293,33 @@ void FromPaper1() {
 void FromPaper2(_BitInt(8) a1, _BitInt(24) a2) {
   static_assert(is_same<decltype(a1 * (_BitInt(32))a2), _BitInt(32)>::value, 
"");
 }
+
+// Check sub-byte integer vector size and alignment, expecting packing.
+template <int Bits, int N>
+using packed_vec_t = _BitInt(Bits) __attribute__((ext_vector_type(N)));
+void SubByteVecPacking() {
+  static_assert(sizeof(packed_vec_t<2, 2>) == 1);
+  static_assert(sizeof(packed_vec_t<2, 3>) == 1);
+  static_assert(sizeof(packed_vec_t<2, 4>) == 1);
+  static_assert(sizeof(packed_vec_t<2, 8>) == 2);
+  static_assert(sizeof(packed_vec_t<2, 16>) == 4);
+  static_assert(sizeof(packed_vec_t<2, 32>) == 8);
+  static_assert(sizeof(packed_vec_t<4, 2>) == 1);
+  static_assert(sizeof(packed_vec_t<4, 4>) == 2);
+  static_assert(sizeof(packed_vec_t<4, 8>) == 4);
+  static_assert(sizeof(packed_vec_t<4, 16>) == 8);
+  static_assert(sizeof(packed_vec_t<4, 32>) == 16);
+
+  static_assert(alignof(packed_vec_t<2, 2>) == 1);
+  static_assert(alignof(packed_vec_t<2, 3>) == 1);
+  static_assert(alignof(packed_vec_t<2, 4>) == 1);
+  static_assert(alignof(packed_vec_t<2, 8>) == 2);
+  static_assert(alignof(packed_vec_t<2, 16>) == 4);
+  static_assert(alignof(packed_vec_t<2, 32>) == 8);
+  static_assert(alignof(packed_vec_t<4, 2>) == 1);
+  static_assert(alignof(packed_vec_t<4, 3>) == 2);
+  static_assert(alignof(packed_vec_t<4, 4>) == 2);
+  static_assert(alignof(packed_vec_t<4, 8>) == 4);
+  static_assert(alignof(packed_vec_t<4, 16>) == 8);
+  static_assert(alignof(packed_vec_t<4, 32>) == 16);
+}

>From 4c2d4e4b179a7e464b276b9f00ee95c0292d7dda Mon Sep 17 00:00:00 2001
From: "Larsen, Steffen" <[email protected]>
Date: Mon, 6 Oct 2025 09:30:48 -0700
Subject: [PATCH 2/4] Split packed size selection into a separate function and
 fix bit-casts

Signed-off-by: Larsen, Steffen <[email protected]>
---
 clang/include/clang/AST/ASTContext.h          | 10 ++++
 clang/include/clang/AST/TypeBase.h            |  6 +++
 clang/lib/AST/ASTContext.cpp                  |  9 +---
 clang/lib/AST/ExprConstant.cpp                | 48 ++++++++++---------
 .../SemaCXX/constexpr-builtin-bit-cast.cpp    | 22 +++++++++
 5 files changed, 64 insertions(+), 31 deletions(-)

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index 12351e98e5a2b..de73f31dfbd95 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -2677,6 +2677,16 @@ class ASTContext : public RefCountedBase<ASTContext> {
     return getTypeSizeInCharsIfKnown(QualType(Ty, 0));
   }
 
+  /// Return the size of an element inside a given vector type.
+  uint64_t getVectorElementSize(const VectorType *VTy) const {
+    QualType EltTy = VTy->getElementType();
+    if (VTy->isPackedVectorBoolType(*this))
+      return 1;
+    if (EltTy.getTypePtrOrNull() && EltTy->isBitIntType())
+      return EltTy->castAs<BitIntType>()->getNumBits();
+    return getTypeSize(EltTy);
+  }
+
   /// Return the ABI-specified alignment of a (complete) type \p T, in
   /// bits.
   unsigned getTypeAlign(QualType T) const { return getTypeInfo(T).Align; }
diff --git a/clang/include/clang/AST/TypeBase.h 
b/clang/include/clang/AST/TypeBase.h
index 6786b2f6cbc78..3d904c71b12af 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -2637,6 +2637,7 @@ class alignas(TypeAlignment) Type : public 
ExtQualsTypeCommonBase {
   bool isVectorType() const;                    // GCC vector type.
   bool isExtVectorType() const;                 // Extended vector type.
   bool isExtVectorBoolType() const;             // Extended vector type with 
bool element.
+  bool isBitIntVectorType() const;              // Vector type with _BitInt 
element.
   // Extended vector type with bool element that is packed. HLSL doesn't pack
   // its bool vectors.
   bool isPackedVectorBoolType(const ASTContext &ctx) const;
@@ -8681,6 +8682,11 @@ inline bool Type::isExtVectorBoolType() const {
   return cast<ExtVectorType>(CanonicalType)->getElementType()->isBooleanType();
 }
 
+inline bool Type::isBitIntVectorType() const {
+  return isVectorType() &&
+         cast<VectorType>(CanonicalType)->getElementType()->isBitIntType();
+}
+
 inline bool Type::isSubscriptableVectorType() const {
   return isVectorType() || isSveVLSBuiltinType();
 }
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 451a87c1cfc63..40371ee26ac13 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2094,14 +2094,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) 
const {
   case Type::Vector: {
     const auto *VT = cast<VectorType>(T);
     QualType Elt = VT->getElementType();
-    uint64_t EltWidth = [&]() -> uint64_t {
-      if (VT->isPackedVectorBoolType(*this))
-        return 1;
-      if (Elt.getTypePtrOrNull() && Elt.getTypePtr()->isBitIntType())
-        return Elt.getTypePtr()->castAs<BitIntType>()->getNumBits();
-      return getTypeInfo(Elt).Width;
-    }();
-    Width = EltWidth * VT->getNumElements();
+    Width = getVectorElementSize(VT) * VT->getNumElements();
     // Enforce at least byte size and alignment.
     Width = std::max<unsigned>(8, Width);
     Align = std::max<unsigned>(8, Width);
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..4fdea501e0b57 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -7589,8 +7589,8 @@ class APValueToBufferConverter {
     QualType EltTy = VTy->getElementType();
     unsigned NElts = VTy->getNumElements();
 
-    if (VTy->isPackedVectorBoolType(Info.Ctx)) {
-      // Special handling for OpenCL bool vectors:
+    if (VTy->isPackedVectorBoolType(Info.Ctx) || VTy->isBitIntVectorType()) {
+      // Special handling for OpenCL bool and sub-byte vectors:
       // Since these vectors are stored as packed bits, but we can't write
       // individual bits to the BitCastBuffer, we'll buffer all of the elements
       // together into an appropriately sized APInt and write them all out at
@@ -7599,18 +7599,21 @@ class APValueToBufferConverter {
       // have to worry about writing data which should have been left
       // uninitialized.
       bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian();
+      uint64_t EltSize = Info.Ctx.getVectorElementSize(VTy);
 
-      llvm::APInt Res = llvm::APInt::getZero(NElts);
+      llvm::APInt Res = llvm::APInt::getZero(NElts * EltSize);
       for (unsigned I = 0; I < NElts; ++I) {
         const llvm::APSInt &EltAsInt = Val.getVectorElt(I).getInt();
-        assert(EltAsInt.isUnsigned() && EltAsInt.getBitWidth() == 1 &&
-               "bool vector element must be 1-bit unsigned integer!");
-
-        Res.insertBits(EltAsInt, BigEndian ? (NElts - I - 1) : I);
+        assert(!VTy->isPackedVectorBoolType(Info.Ctx) ||
+               (EltAsInt.isUnsigned() && EltAsInt.getBitWidth()) == 1 &&
+                   "bool vector element must be 1-bit unsigned integer!");
+        uint64_t BitOffset = EltSize * (BigEndian ? (NElts - I - 1) : I);
+        Res.insertBits(EltAsInt, BitOffset);
       }
 
-      SmallVector<uint8_t, 8> Bytes(NElts / 8);
-      llvm::StoreIntToMemory(Res, &*Bytes.begin(), NElts / 8);
+      uint64_t NumBytes = NElts * EltSize / 8;
+      SmallVector<uint8_t, 8> Bytes(NumBytes);
+      llvm::StoreIntToMemory(Res, &*Bytes.begin(), NumBytes);
       Buffer.writeObject(Offset, Bytes);
     } else {
       // Iterate over each of the elements and write them out to the buffer at
@@ -7852,13 +7855,11 @@ class BufferToAPValueConverter {
   std::optional<APValue> visit(const VectorType *VTy, CharUnits Offset) {
     QualType EltTy = VTy->getElementType();
     unsigned NElts = VTy->getNumElements();
-    unsigned EltSize =
-        VTy->isPackedVectorBoolType(Info.Ctx) ? 1 : 
Info.Ctx.getTypeSize(EltTy);
 
     SmallVector<APValue, 4> Elts;
     Elts.reserve(NElts);
-    if (VTy->isPackedVectorBoolType(Info.Ctx)) {
-      // Special handling for OpenCL bool vectors:
+    if (VTy->isPackedVectorBoolType(Info.Ctx) || VTy->isBitIntVectorType()) {
+      // Special handling for OpenCL bool and sub-byte vectors:
       // Since these vectors are stored as packed bits, but we can't read
       // individual bits from the BitCastBuffer, we'll buffer all of the
       // elements together into an appropriately sized APInt and write them all
@@ -7867,20 +7868,22 @@ class BufferToAPValueConverter {
       // we don't have to worry about reading any padding data which didn't
       // actually need to be accessed.
       bool BigEndian = Info.Ctx.getTargetInfo().isBigEndian();
+      uint64_t EltSize = Info.Ctx.getVectorElementSize(VTy);
+      bool IsSigned = EltTy->isSignedIntegerType();
 
+      uint64_t NumBytes = NElts * EltSize / 8;
       SmallVector<uint8_t, 8> Bytes;
-      Bytes.reserve(NElts / 8);
-      if (!Buffer.readObject(Offset, CharUnits::fromQuantity(NElts / 8), 
Bytes))
+      Bytes.reserve(NumBytes);
+      if (!Buffer.readObject(Offset, CharUnits::fromQuantity(NumBytes), Bytes))
         return std::nullopt;
 
-      APSInt SValInt(NElts, true);
-      llvm::LoadIntFromMemory(SValInt, &*Bytes.begin(), Bytes.size());
+      APSInt SValInt(NElts * EltSize);
+      llvm::LoadIntFromMemory(SValInt, Bytes.data(), Bytes.size());
 
       for (unsigned I = 0; I < NElts; ++I) {
-        llvm::APInt Elt =
-            SValInt.extractBits(1, (BigEndian ? NElts - I - 1 : I) * EltSize);
-        Elts.emplace_back(
-            APSInt(std::move(Elt), !EltTy->isSignedIntegerType()));
+        uint64_t BitOffset = EltSize * (BigEndian ? (NElts - I - 1) : I);
+        llvm::APInt Elt = SValInt.extractBits(EltSize, BitOffset);
+        Elts.emplace_back(APSInt(std::move(Elt), !IsSigned));
       }
     } else {
       // Iterate over each of the elements and read them from the buffer at
@@ -7986,8 +7989,7 @@ static bool 
checkBitCastConstexprEligibilityType(SourceLocation Loc,
   if (const auto *VTy = Ty->getAs<VectorType>()) {
     QualType EltTy = VTy->getElementType();
     unsigned NElts = VTy->getNumElements();
-    unsigned EltSize =
-        VTy->isPackedVectorBoolType(Ctx) ? 1 : Ctx.getTypeSize(EltTy);
+    unsigned EltSize = Ctx.getVectorElementSize(VTy);
 
     if ((NElts * EltSize) % Ctx.getCharWidth() != 0) {
       // The vector's size in bits is not a multiple of the target's byte size,
diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp 
b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
index 7a6d7cb353158..aedd27f433bb8 100644
--- a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
+++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
@@ -510,6 +510,28 @@ constexpr bool9 bad_short_to_bool9 = 
__builtin_bit_cast(bool9, static_cast<unsig
 // expected-note@+1 {{bit_cast involving type 'bool 
__attribute__((ext_vector_type(17)))' (vector of 17 'bool' values) is not 
allowed in a constant expression; element size 1 * element count 17 is not a 
multiple of the byte size 8}}
 constexpr bool17 bad_int_to_bool17 = __builtin_bit_cast(bool17, 0x0001CAFEU);
 
+template <int Bits, int N> using packed_vec_t = _BitInt(Bits) 
__attribute__((ext_vector_type(N)));
+
+static_assert(round_trip<packed_vec_t<2, 4>>(static_cast<unsigned char>(0)), 
"");
+static_assert(round_trip<packed_vec_t<2, 4>>(static_cast<unsigned char>(1)), 
"");
+static_assert(round_trip<packed_vec_t<2, 4>>(static_cast<unsigned 
char>(0x55)), "");
+static_assert(round_trip<packed_vec_t<2, 8>>(static_cast<short>(0)), "");
+static_assert(round_trip<packed_vec_t<2, 8>>(static_cast<short>(-1)), "");
+static_assert(round_trip<packed_vec_t<2, 8>>(static_cast<short>(0x5555)), "");
+
+static_assert(bit_cast<unsigned char>(packed_vec_t<2, 4>{1, -2, 0, -1}) == 
(LITTLE_END ? 0xC9 : 0x63), "");
+static_assert(bit_cast<unsigned short>(packed_vec_t<2, 8>{1, -2, 0, -1, -2, 
-1, 1, 0}) == (LITTLE_END ? 0x1EC9 : 0x63B4), "");
+
+static_assert(round_trip<packed_vec_t<4, 2>>(static_cast<unsigned char>(0)), 
"");
+static_assert(round_trip<packed_vec_t<4, 2>>(static_cast<unsigned char>(1)), 
"");
+static_assert(round_trip<packed_vec_t<4, 2>>(static_cast<unsigned 
char>(0x55)), "");
+static_assert(round_trip<packed_vec_t<4, 4>>(static_cast<short>(0)), "");
+static_assert(round_trip<packed_vec_t<4, 4>>(static_cast<short>(-1)), "");
+static_assert(round_trip<packed_vec_t<4, 4>>(static_cast<short>(0x5555)), "");
+
+static_assert(bit_cast<unsigned char>(packed_vec_t<4, 2>{-4, -7}) == 
(LITTLE_END ? 0x9C : 0xC9), "");
+static_assert(bit_cast<unsigned short>(packed_vec_t<4, 4>{3, -5, -1, 7}) == 
(LITTLE_END ? 0x7FB3 : 0x3BF7), "");
+
 }
 
 namespace test_complex {

>From e13eff33cecca27e748664774f87eaeab4597d08 Mon Sep 17 00:00:00 2001
From: "Larsen, Steffen" <[email protected]>
Date: Sun, 12 Oct 2025 21:36:52 -0700
Subject: [PATCH 3/4] Remove unused variable and fix formatting

Signed-off-by: Larsen, Steffen <[email protected]>
---
 clang/include/clang/AST/TypeBase.h | 3 ++-
 clang/lib/AST/ASTContext.cpp       | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/AST/TypeBase.h 
b/clang/include/clang/AST/TypeBase.h
index 3d904c71b12af..283af7d97960f 100644
--- a/clang/include/clang/AST/TypeBase.h
+++ b/clang/include/clang/AST/TypeBase.h
@@ -2637,10 +2637,11 @@ class alignas(TypeAlignment) Type : public 
ExtQualsTypeCommonBase {
   bool isVectorType() const;                    // GCC vector type.
   bool isExtVectorType() const;                 // Extended vector type.
   bool isExtVectorBoolType() const;             // Extended vector type with 
bool element.
-  bool isBitIntVectorType() const;              // Vector type with _BitInt 
element.
   // Extended vector type with bool element that is packed. HLSL doesn't pack
   // its bool vectors.
   bool isPackedVectorBoolType(const ASTContext &ctx) const;
+  // Vector type with packed _BitInt elements.
+  bool isBitIntVectorType() const;
   bool isSubscriptableVectorType() const;
   bool isMatrixType() const;                    // Matrix type.
   bool isConstantMatrixType() const;            // Constant matrix type.
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 40371ee26ac13..cc91960cfe01b 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2093,7 +2093,6 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const 
{
   case Type::ExtVector:
   case Type::Vector: {
     const auto *VT = cast<VectorType>(T);
-    QualType Elt = VT->getElementType();
     Width = getVectorElementSize(VT) * VT->getNumElements();
     // Enforce at least byte size and alignment.
     Width = std::max<unsigned>(8, Width);

>From 3176e9eb55a772ab26da85b34bc501206cf70f7b Mon Sep 17 00:00:00 2001
From: "Larsen, Steffen" <[email protected]>
Date: Tue, 14 Oct 2025 05:51:10 -0700
Subject: [PATCH 4/4] Remove null-type check and add tests for padded type
 bitcasts

Signed-off-by: Larsen, Steffen <[email protected]>
---
 clang/include/clang/AST/ASTContext.h          |  2 +-
 .../SemaCXX/constexpr-builtin-bit-cast.cpp    | 28 +++++++++++++++++++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/AST/ASTContext.h 
b/clang/include/clang/AST/ASTContext.h
index de73f31dfbd95..7caccf2a22f0a 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -2682,7 +2682,7 @@ class ASTContext : public RefCountedBase<ASTContext> {
     QualType EltTy = VTy->getElementType();
     if (VTy->isPackedVectorBoolType(*this))
       return 1;
-    if (EltTy.getTypePtrOrNull() && EltTy->isBitIntType())
+    if (EltTy->isBitIntType())
       return EltTy->castAs<BitIntType>()->getNumBits();
     return getTypeSize(EltTy);
   }
diff --git a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp 
b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
index aedd27f433bb8..893abf0636a5d 100644
--- a/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
+++ b/clang/test/SemaCXX/constexpr-builtin-bit-cast.cpp
@@ -532,6 +532,34 @@ static_assert(round_trip<packed_vec_t<4, 
4>>(static_cast<short>(0x5555)), "");
 static_assert(bit_cast<unsigned char>(packed_vec_t<4, 2>{-4, -7}) == 
(LITTLE_END ? 0x9C : 0xC9), "");
 static_assert(bit_cast<unsigned short>(packed_vec_t<4, 4>{3, -5, -1, 7}) == 
(LITTLE_END ? 0x7FB3 : 0x3BF7), "");
 
+// expected-error@+2 {{constexpr variable 'bad_packed_vec_2_3_to_char' must be 
initialized by a constant expression}}
+// expected-note@+1 {{bit_cast involving type '_BitInt(2) 
__attribute__((ext_vector_type(3)))' (vector of 3 '_BitInt(2)' values) is not 
allowed in a constant expression; element size 2 * element count 3 is not a 
multiple of the byte size 8}}
+constexpr unsigned char bad_packed_vec_2_3_to_char = 
__builtin_bit_cast(unsigned char, packed_vec_t<2, 3>{1, 0, 1});
+// expected-error@+2 {{constexpr variable 'bad_char_to_packed_vec_2_3' must be 
initialized by a constant expression}}
+// expected-note@+1 {{bit_cast involving type '_BitInt(2) 
__attribute__((ext_vector_type(3)))' (vector of 3 '_BitInt(2)' values) is not 
allowed in a constant expression; element size 2 * element count 3 is not a 
multiple of the byte size 8}}
+constexpr packed_vec_t<2, 3> bad_char_to_packed_vec_2_3 = 
__builtin_bit_cast(packed_vec_t<2, 3>, static_cast<unsigned char>(0));
+
+// expected-error@+2 {{constexpr variable 'bad_packed_vec_2_6_to_short' must 
be initialized by a constant expression}}
+// expected-note@+1 {{bit_cast involving type '_BitInt(2) 
__attribute__((ext_vector_type(6)))' (vector of 6 '_BitInt(2)' values) is not 
allowed in a constant expression; element size 2 * element count 6 is not a 
multiple of the byte size 8}}
+constexpr unsigned short bad_packed_vec_2_6_to_short = 
__builtin_bit_cast(unsigned short, packed_vec_t<2, 6>{1, 0, 1});
+// expected-error@+2 {{constexpr variable 'bad_short_to_packed_vec_2_6' must 
be initialized by a constant expression}}
+// expected-note@+1 {{bit_cast involving type '_BitInt(2) 
__attribute__((ext_vector_type(6)))' (vector of 6 '_BitInt(2)' values) is not 
allowed in a constant expression; element size 2 * element count 6 is not a 
multiple of the byte size 8}}
+constexpr packed_vec_t<2, 6> bad_short_to_packed_vec_2_6 = 
__builtin_bit_cast(packed_vec_t<2, 6>, static_cast<unsigned short>(0));
+
+// expected-error@+2 {{constexpr variable 'bad_packed_vec_4_3_to_short' must 
be initialized by a constant expression}}
+// expected-note@+1 {{bit_cast involving type '_BitInt(4) 
__attribute__((ext_vector_type(3)))' (vector of 3 '_BitInt(4)' values) is not 
allowed in a constant expression; element size 4 * element count 3 is not a 
multiple of the byte size 8}}
+constexpr unsigned short bad_packed_vec_4_3_to_short = 
__builtin_bit_cast(unsigned short, packed_vec_t<4, 3>{1, 0, 1});
+// expected-error@+2 {{constexpr variable 'bad_short_to_packed_vec_4_3' must 
be initialized by a constant expression}}
+// expected-note@+1 {{bit_cast involving type '_BitInt(4) 
__attribute__((ext_vector_type(3)))' (vector of 3 '_BitInt(4)' values) is not 
allowed in a constant expression; element size 4 * element count 3 is not a 
multiple of the byte size 8}}
+constexpr packed_vec_t<4, 3> bad_short_to_packed_vec_4_3 = 
__builtin_bit_cast(packed_vec_t<4, 3>, static_cast<unsigned short>(0));
+
+// expected-error@+2 {{constexpr variable 'bad_packed_vec_4_5_to_int' must be 
initialized by a constant expression}}
+// expected-note@+1 {{bit_cast involving type '_BitInt(4) 
__attribute__((ext_vector_type(5)))' (vector of 5 '_BitInt(4)' values) is not 
allowed in a constant expression; element size 4 * element count 5 is not a 
multiple of the byte size 8}}
+constexpr unsigned int bad_packed_vec_4_5_to_int = __builtin_bit_cast(unsigned 
int, packed_vec_t<4, 5>{1, 0, 1});
+// expected-error@+2 {{constexpr variable 'bad_int_to_packed_vec_4_5' must be 
initialized by a constant expression}}
+// expected-note@+1 {{bit_cast involving type '_BitInt(4) 
__attribute__((ext_vector_type(5)))' (vector of 5 '_BitInt(4)' values) is not 
allowed in a constant expression; element size 4 * element count 5 is not a 
multiple of the byte size 8}}
+constexpr packed_vec_t<4, 5> bad_int_to_packed_vec_4_5 = 
__builtin_bit_cast(packed_vec_t<4, 5>, static_cast<unsigned int>(0));
+
 }
 
 namespace test_complex {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [clang] Fix size and alignment of packed sub-byte integer vectors (PR #161796)

Reply via email to