https://github.com/chaitanyav updated https://github.com/llvm/llvm-project/pull/168100
>From 71dbbff93b2abafd5bccf64479522f5aaa1ff7f6 Mon Sep 17 00:00:00 2001 From: NagaChaitanya Vellanki <[email protected]> Date: Mon, 10 Nov 2025 22:59:08 -0800 Subject: [PATCH 1/2] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 VPSHUFBITQMB intrinsics to be used in constexpr Resolves: #161337 --- clang/include/clang/Basic/BuiltinsX86.td | 6 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 72 +++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 46 ++++++++++++ clang/lib/Headers/avx512bitalgintrin.h | 27 ++++--- clang/lib/Headers/avx512vlbitalgintrin.h | 46 ++++++------ .../test/CodeGen/X86/avx512bitalg-builtins.c | 10 +++ .../CodeGen/X86/avx512vlbitalg-builtins.c | 14 ++++ 7 files changed, 179 insertions(+), 42 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index b760c3e06b8f7..d9059fc730b5b 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1363,15 +1363,15 @@ let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVect def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">; } -let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpshufbitqmb128_mask : X86Builtin<"unsigned short(_Vector<16, char>, _Vector<16, char>, unsigned short)">; } -let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vpshufbitqmb256_mask : X86Builtin<"unsigned int(_Vector<32, char>, _Vector<32, char>, unsigned int)">; } -let Features = "avx512bitalg", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512bitalg", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpshufbitqmb512_mask : X86Builtin<"unsigned long long int(_Vector<64, char>, _Vector<64, char>, unsigned long long int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index eba71d66bc4d6..b36599101d5f1 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3468,6 +3468,72 @@ static bool interp__builtin_ia32_shuffle_generic( return true; } +static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + + assert(Call->getNumArgs() == 3); + + QualType SourceType = Call->getArg(0)->getType(); + QualType ShuffleMaskType = Call->getArg(1)->getType(); + QualType ZeroMaskType = Call->getArg(2)->getType(); + if (!SourceType->isVectorType() || !ShuffleMaskType->isVectorType() || + !ZeroMaskType->isIntegerType()) { + return false; + } + + Pointer Source, ShuffleMask; + APSInt ZeroMask = popToAPSInt(S, Call->getArg(2)); + ShuffleMask = S.Stk.pop<Pointer>(); + Source = S.Stk.pop<Pointer>(); + + const auto *SourceVecT = SourceType->castAs<VectorType>(); + const auto *ShuffleMaskVecT = ShuffleMaskType->castAs<VectorType>(); + assert(SourceVecT->getNumElements() == ShuffleMaskVecT->getNumElements()); + assert(ZeroMask.getBitWidth() == SourceVecT->getNumElements()); + + PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType()); + PrimType ShuffleMaskElemT = + *S.getContext().classify(ShuffleMaskVecT->getElementType()); + + const unsigned NumBytesInQWord = 8; + const unsigned NumBitsInByte = 8; + const unsigned NumBytes = SourceVecT->getNumElements(); + const unsigned NumQWords = NumBytes / NumBytesInQWord; + const unsigned RetWidth = ZeroMask.getBitWidth(); + APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + + APInt SourceQWord(64, 0); + for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord; + ++ByteInQWord) { + uint64_t Byte = 0; + INT_TYPE_SWITCH(SourceElemT, { + Byte = static_cast<uint64_t>( + Source.elem<T>(QWordId * NumBytesInQWord + ByteInQWord)); + }); + SourceQWord |= (Byte & 0xFF) << (ByteInQWord * NumBitsInByte); + } + + for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord; + ++ByteInQWord) { + unsigned ByteIdx = QWordId * NumBytesInQWord + ByteInQWord; + unsigned M = 0; + INT_TYPE_SWITCH(ShuffleMaskElemT, { + M = static_cast<unsigned>(ShuffleMask.elem<T>(ByteIdx)) & 0x3F; + }); + + if (ZeroMask[ByteIdx]) { + RetMask.setBitVal(ByteIdx, SourceQWord[M]); + } + } + } + + pushInteger(S, RetMask, Call->getType()); + + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -4844,6 +4910,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_ucmpq512_mask: return interp__builtin_ia32_cmp_mask(S, OpPC, Call, BuiltinID, /*IsUnsigned=*/true); + + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: + return interp__builtin_ia32_shufbitqmb_mask(S, OpPC, Call); + case X86::BI__builtin_ia32_pslldqi128_byteshift: case X86::BI__builtin_ia32_pslldqi256_byteshift: case X86::BI__builtin_ia32_pslldqi512_byteshift: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index ce5301f17b3e7..962d4b91c1d45 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16706,6 +16706,52 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success(APValue(RetMask), E); } + case X86::BI__builtin_ia32_vpshufbitqmb128_mask: + case X86::BI__builtin_ia32_vpshufbitqmb256_mask: + case X86::BI__builtin_ia32_vpshufbitqmb512_mask: { + assert(E->getNumArgs() == 3); + + APValue Source, ShuffleMask; + APSInt ZeroMask; + if (!EvaluateVector(E->getArg(0), Source, Info) || + !EvaluateVector(E->getArg(1), ShuffleMask, Info) || + !EvaluateInteger(E->getArg(2), ZeroMask, Info)) + return false; + + assert(Source.getVectorLength() == ShuffleMask.getVectorLength()); + assert(ZeroMask.getBitWidth() == Source.getVectorLength()); + + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = Source.getVectorLength(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + unsigned RetWidth = ZeroMask.getBitWidth(); + APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true); + + for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { + + APInt SourceQWord(64, 0); + for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord; + ++ByteInQWord) { + uint64_t Byte = + Source.getVectorElt(QWordId * NumBytesInQWord + ByteInQWord) + .getInt() + .getZExtValue(); + SourceQWord |= (Byte & 0xFF) << (ByteInQWord * NumBitsInByte); + } + + for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord; + ++ByteInQWord) { + unsigned ByteIdx = QWordId * NumBytesInQWord + ByteInQWord; + unsigned M = + ShuffleMask.getVectorElt(ByteIdx).getInt().getZExtValue() & 0x3F; + if (ZeroMask[ByteIdx]) { + RetMask.setBitVal(ByteIdx, SourceQWord[M]); + } + } + } + return Success(APValue(RetMask), E); + } } } diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h index 98197e468370d..6a3e47814cb93 100644 --- a/clang/lib/Headers/avx512bitalgintrin.h +++ b/clang/lib/Headers/avx512bitalgintrin.h @@ -15,44 +15,43 @@ #define __AVX512BITALGINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + constexpr \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512bitalg"), __min_vector_width__(512))) +#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), \ __min_vector_width__(512))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr -#else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_popcnt_epi16(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v32hu)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) { return (__m512i)__builtin_ia32_selectw_512( (__mmask32)__U, (__v32hi)_mm512_popcnt_epi16(__B), (__v32hi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { return _mm512_mask_popcnt_epi16((__m512i)_mm512_setzero_si512(), __U, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_popcnt_epi8(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi8(__m512i __A) { return (__m512i)__builtin_elementwise_popcount((__v64qu)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) { return (__m512i)__builtin_ia32_selectb_512( (__mmask64)__U, (__v64qi)_mm512_popcnt_epi8(__B), (__v64qi)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { return _mm512_mask_popcnt_epi8((__m512i)_mm512_setzero_si512(), __U, __B); } @@ -74,6 +73,4 @@ _mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) } #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_CONSTEXPR - #endif diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h index 1874adce4ea6e..624630f76e484 100644 --- a/clang/lib/Headers/avx512vlbitalgintrin.h +++ b/clang/lib/Headers/avx512vlbitalgintrin.h @@ -15,6 +15,16 @@ #define __AVX512VLBITALGINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bitalg"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bitalg"), \ + __min_vector_width__(256))) +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512bitalg"), \ @@ -23,75 +33,66 @@ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512bitalg"), \ __min_vector_width__(256))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr -#else -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 #endif -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi16(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v16hu)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256( (__mmask16)__U, (__v16hi)_mm256_popcnt_epi16(__B), (__v16hi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { return _mm256_mask_popcnt_epi16((__m256i)_mm256_setzero_si256(), __U, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_popcnt_epi16(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v8hu)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128( (__mmask8)__U, (__v8hi)_mm_popcnt_epi16(__B), (__v8hi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { return _mm_mask_popcnt_epi16((__m128i)_mm_setzero_si128(), __U, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi8(__m256i __A) { return (__m256i)__builtin_elementwise_popcount((__v32qu)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256( (__mmask32)__U, (__v32qi)_mm256_popcnt_epi8(__B), (__v32qi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { return _mm256_mask_popcnt_epi8((__m256i)_mm256_setzero_si256(), __U, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_popcnt_epi8(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) { return (__m128i)__builtin_elementwise_popcount((__v16qu)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128( (__mmask16)__U, (__v16qi)_mm_popcnt_epi8(__B), (__v16qi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { return _mm_mask_popcnt_epi8((__m128i)_mm_setzero_si128(), __U, __B); } @@ -131,7 +132,4 @@ _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 -#undef __DEFAULT_FN_ATTRS128_CONSTEXPR -#undef __DEFAULT_FN_ATTRS256_CONSTEXPR - #endif diff --git a/clang/test/CodeGen/X86/avx512bitalg-builtins.c b/clang/test/CodeGen/X86/avx512bitalg-builtins.c index 3ac8674421d93..7d524ab156500 100644 --- a/clang/test/CodeGen/X86/avx512bitalg-builtins.c +++ b/clang/test/CodeGen/X86/avx512bitalg-builtins.c @@ -70,4 +70,14 @@ __mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) { // CHECK: @llvm.x86.avx512.vpshufbitqmb.512 return _mm512_bitshuffle_epi64_mask(__A, __B); } +TEST_CONSTEXPR(_mm512_bitshuffle_epi64_mask( + (__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85, + 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85}, + (__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7, + 0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010155ff0101ULL); +TEST_CONSTEXPR(_mm512_mask_bitshuffle_epi64_mask(0xFFFFFFFF00000000ULL, + (__m512i)(__v64qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85, + 1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85}, + (__m512i)(__v64qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7, + 0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff010100000000ULL); diff --git a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c index e0b55c6fde81a..d19d2c28f3649 100644 --- a/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbitalg-builtins.c @@ -116,6 +116,13 @@ __mmask32 test_mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) { // CHECK: @llvm.x86.avx512.vpshufbitqmb.256 return _mm256_bitshuffle_epi64_mask(__A, __B); } +TEST_CONSTEXPR(_mm256_bitshuffle_epi64_mask( + (__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85}, + (__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0101); + +TEST_CONSTEXPR(_mm256_mask_bitshuffle_epi64_mask(0xFFFF0000, + (__m256i)(__v32qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128, -1,0,0,0,0,0,0,0, 85,85,85,85,85,85,85,85}, + (__m256i)(__v32qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56, 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,7}) == 0x55ff0000); __mmask16 test_mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_bitshuffle_epi64_mask @@ -129,4 +136,11 @@ __mmask16 test_mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) { // CHECK: @llvm.x86.avx512.vpshufbitqmb.128 return _mm_bitshuffle_epi64_mask(__A, __B); } +TEST_CONSTEXPR(_mm_bitshuffle_epi64_mask( + (__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128}, + (__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0101); + +TEST_CONSTEXPR(_mm_mask_bitshuffle_epi64_mask(0xFF00, + (__m128i)(__v16qi){1,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,-128}, + (__m128i)(__v16qi){0,1,2,3,4,5,6,7, 63,62,61,60,59,58,57,56}) == 0x0100); >From 457e5481c9b54d35d352e851f4b61764bc2621f2 Mon Sep 17 00:00:00 2001 From: NagaChaitanya Vellanki <[email protected]> Date: Thu, 20 Nov 2025 09:45:55 -0800 Subject: [PATCH 2/2] Address review feedback - Rename ByteInQWord to ByteIdx for loop counter within qword - Add SelIdx for absolute byte index used in mask selection - Use APInt::insertBits() instead of manual shift/OR for building SourceQWord - Remove unnecessary const from local variables - Move constexpr after __attribute__ in header macros - Minor formatting cleanup --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 29 +++++++++++------------- clang/lib/AST/ExprConstant.cpp | 24 ++++++++------------ clang/lib/Headers/avx512bitalgintrin.h | 5 ++-- clang/lib/Headers/avx512vlbitalgintrin.h | 12 +++++----- 4 files changed, 31 insertions(+), 39 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b36599101d5f1..831c05768f3ec 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3495,36 +3495,33 @@ static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC, PrimType ShuffleMaskElemT = *S.getContext().classify(ShuffleMaskVecT->getElementType()); - const unsigned NumBytesInQWord = 8; - const unsigned NumBitsInByte = 8; - const unsigned NumBytes = SourceVecT->getNumElements(); - const unsigned NumQWords = NumBytes / NumBytesInQWord; - const unsigned RetWidth = ZeroMask.getBitWidth(); + unsigned NumBytesInQWord = 8; + unsigned NumBitsInByte = 8; + unsigned NumBytes = SourceVecT->getNumElements(); + unsigned NumQWords = NumBytes / NumBytesInQWord; + unsigned RetWidth = ZeroMask.getBitWidth(); APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true); for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { - APInt SourceQWord(64, 0); - for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord; - ++ByteInQWord) { + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { uint64_t Byte = 0; INT_TYPE_SWITCH(SourceElemT, { Byte = static_cast<uint64_t>( - Source.elem<T>(QWordId * NumBytesInQWord + ByteInQWord)); + Source.elem<T>(QWordId * NumBytesInQWord + ByteIdx)); }); - SourceQWord |= (Byte & 0xFF) << (ByteInQWord * NumBitsInByte); + SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); } - for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord; - ++ByteInQWord) { - unsigned ByteIdx = QWordId * NumBytesInQWord + ByteInQWord; + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx; unsigned M = 0; INT_TYPE_SWITCH(ShuffleMaskElemT, { - M = static_cast<unsigned>(ShuffleMask.elem<T>(ByteIdx)) & 0x3F; + M = static_cast<unsigned>(ShuffleMask.elem<T>(SelIdx)) & 0x3F; }); - if (ZeroMask[ByteIdx]) { - RetMask.setBitVal(ByteIdx, SourceQWord[M]); + if (ZeroMask[SelIdx]) { + RetMask.setBitVal(SelIdx, SourceQWord[M]); } } } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 962d4b91c1d45..60f3245483d29 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16729,24 +16729,20 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, APSInt RetMask(llvm::APInt(RetWidth, 0), /*isUnsigned=*/true); for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) { - APInt SourceQWord(64, 0); - for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord; - ++ByteInQWord) { - uint64_t Byte = - Source.getVectorElt(QWordId * NumBytesInQWord + ByteInQWord) - .getInt() - .getZExtValue(); - SourceQWord |= (Byte & 0xFF) << (ByteInQWord * NumBitsInByte); + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + uint64_t Byte = Source.getVectorElt(QWordId * NumBytesInQWord + ByteIdx) + .getInt() + .getZExtValue(); + SourceQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte); } - for (unsigned ByteInQWord = 0; ByteInQWord != NumBytesInQWord; - ++ByteInQWord) { - unsigned ByteIdx = QWordId * NumBytesInQWord + ByteInQWord; + for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) { + unsigned SelIdx = QWordId * NumBytesInQWord + ByteIdx; unsigned M = - ShuffleMask.getVectorElt(ByteIdx).getInt().getZExtValue() & 0x3F; - if (ZeroMask[ByteIdx]) { - RetMask.setBitVal(ByteIdx, SourceQWord[M]); + ShuffleMask.getVectorElt(SelIdx).getInt().getZExtValue() & 0x3F; + if (ZeroMask[SelIdx]) { + RetMask.setBitVal(SelIdx, SourceQWord[M]); } } } diff --git a/clang/lib/Headers/avx512bitalgintrin.h b/clang/lib/Headers/avx512bitalgintrin.h index 6a3e47814cb93..f5e9b1a84fcfe 100644 --- a/clang/lib/Headers/avx512bitalgintrin.h +++ b/clang/lib/Headers/avx512bitalgintrin.h @@ -17,9 +17,8 @@ /* Define the default attributes for the functions in this file. */ #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS \ - constexpr \ - __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512bitalg"), __min_vector_width__(512))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), \ + __min_vector_width__(512))) constexpr #else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), \ diff --git a/clang/lib/Headers/avx512vlbitalgintrin.h b/clang/lib/Headers/avx512vlbitalgintrin.h index 624630f76e484..edfb9c1e1f241 100644 --- a/clang/lib/Headers/avx512vlbitalgintrin.h +++ b/clang/lib/Headers/avx512vlbitalgintrin.h @@ -17,13 +17,13 @@ /* Define the default attributes for the functions in this file. */ #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS128 \ - constexpr __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512bitalg"), \ - __min_vector_width__(128))) + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bitalg"), \ + __min_vector_width__(128))) constexpr #define __DEFAULT_FN_ATTRS256 \ - constexpr __attribute__((__always_inline__, __nodebug__, \ - __target__("avx512vl,avx512bitalg"), \ - __min_vector_width__(256))) + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512bitalg"), \ + __min_vector_width__(256))) constexpr #else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
