https://github.com/chaitanyav updated 
https://github.com/llvm/llvm-project/pull/168995

>From 4a5b92c667bc8a2a2346463bd34041c280213aee Mon Sep 17 00:00:00 2001
From: NagaChaitanya Vellanki <[email protected]>
Date: Thu, 20 Nov 2025 16:42:22 -0800
Subject: [PATCH] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin
 - Allow AVX512 VPMULTISHIFTQB intrinsics to be used in constexpr

Resolves:#167477
---
 clang/include/clang/Basic/BuiltinsX86.td      |   6 +-
 clang/lib/AST/ByteCode/InterpBuiltin.cpp      |  63 +++++++++
 clang/lib/AST/ExprConstant.cpp                |  45 ++++++
 clang/lib/Headers/avx512vbmiintrin.h          |  38 +++--
 clang/lib/Headers/avx512vbmivlintrin.h        |  72 +++++-----
 clang/test/CodeGen/X86/avx512vbmi-builtins.c  | 133 +++++++++++++++++-
 clang/test/CodeGen/X86/avx512vbmivl-builtin.c |  94 ++++++++++++-
 7 files changed, 378 insertions(+), 73 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsX86.td 
b/clang/include/clang/Basic/BuiltinsX86.td
index b760c3e06b8f7..93fb511a508f3 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -3358,15 +3358,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>
   def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned 
int, _Constant int)">;
 }
 
-let Features = "avx512vbmi", Attributes = [NoThrow, Const, 
RequiredVectorWidth<512>] in {
+let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<512>] in {
   def vpmultishiftqb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, 
_Vector<64, char>)">;
 }
 
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<128>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<128>] in {
   def vpmultishiftqb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, 
_Vector<16, char>)">;
 }
 
-let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, 
RequiredVectorWidth<256>] in {
+let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, 
RequiredVectorWidth<256>] in {
   def vpmultishiftqb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, 
_Vector<32, char>)">;
 }
 
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp 
b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index eba71d66bc4d6..5be31239fd597 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3468,6 +3468,65 @@ static bool interp__builtin_ia32_shuffle_generic(
   return true;
 }
 
+static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
+                                              const CallExpr *Call) {
+  assert(Call->getNumArgs() == 2);
+
+  QualType ATy = Call->getArg(0)->getType();
+  QualType BTy = Call->getArg(1)->getType();
+  if (!ATy->isVectorType() || !BTy->isVectorType()) {
+    return false;
+  }
+
+  const Pointer &BPtr = S.Stk.pop<Pointer>();
+  const Pointer &APtr = S.Stk.pop<Pointer>();
+  const auto *AVecT = ATy->castAs<VectorType>();
+  const auto *BVecT = BTy->castAs<VectorType>();
+  assert(AVecT->getNumElements() == BVecT->getNumElements());
+
+  PrimType ElemT = *S.getContext().classify(AVecT->getElementType());
+
+  unsigned NumBytesInQWord = 8;
+  unsigned NumBitsInByte = 8;
+  unsigned NumBytes = AVecT->getNumElements();
+  unsigned NumQWords = NumBytes / NumBytesInQWord;
+  const Pointer &Dst = S.Stk.peek<Pointer>();
+
+  for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
+    APInt AQWord(64, 0);
+    APInt BQWord(64, 0);
+    for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+      unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
+      uint64_t Byte = 0;
+      INT_TYPE_SWITCH(ElemT, {
+        Byte = static_cast<uint64_t>(APtr.elem<T>(Idx));
+        AQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
+
+        Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
+        BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
+      });
+    }
+
+    for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+      uint64_t Ctrl =
+          AQWord.extractBits(8, ByteIdx * NumBitsInByte).getZExtValue() & 0x3F;
+
+      APInt Byte(8, 0);
+      for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
+        Byte.insertBits(BQWord.extractBits(1, (Ctrl + BitIdx) & 0x3F), BitIdx);
+      }
+      INT_TYPE_SWITCH(ElemT, {
+        Dst.elem<T>(QWordId * NumBytesInQWord + ByteIdx) =
+            T::from(Byte.getZExtValue());
+      });
+    }
+  }
+
+  Dst.initializeAllElements();
+
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
                       uint32_t BuiltinID) {
   if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -4669,6 +4728,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, 
const CallExpr *Call,
           return std::make_pair(0, static_cast<int>(LaneOffset + Index));
         });
 
+  case X86::BI__builtin_ia32_vpmultishiftqb128:
+  case X86::BI__builtin_ia32_vpmultishiftqb256:
+  case X86::BI__builtin_ia32_vpmultishiftqb512:
+    return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
   case X86::BI__builtin_ia32_kandqi:
   case X86::BI__builtin_ia32_kandhi:
   case X86::BI__builtin_ia32_kandsi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ce5301f17b3e7..21a664fefde49 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13062,6 +13062,51 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr 
*E) {
     return Success(R, E);
   }
 
+  case X86::BI__builtin_ia32_vpmultishiftqb128:
+  case X86::BI__builtin_ia32_vpmultishiftqb256:
+  case X86::BI__builtin_ia32_vpmultishiftqb512: {
+    assert(E->getNumArgs() == 2);
+
+    APValue A, B;
+    if (!Evaluate(A, Info, E->getArg(0)) || !Evaluate(B, Info, E->getArg(1)))
+      return false;
+
+    assert(A.getVectorLength() == B.getVectorLength());
+    unsigned NumBytesInQWord = 8;
+    unsigned NumBitsInByte = 8;
+    unsigned NumBytes = A.getVectorLength();
+    unsigned NumQWords = NumBytes / NumBytesInQWord;
+    SmallVector<APValue, 64> Result;
+    Result.reserve(NumBytes);
+
+    for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
+      APInt AQWord(64, 0);
+      APInt BQWord(64, 0);
+      for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+        unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
+        uint64_t Byte = A.getVectorElt(Idx).getInt().getZExtValue();
+        AQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
+
+        Byte = B.getVectorElt(Idx).getInt().getZExtValue();
+        BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
+      }
+
+      for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
+        uint64_t Ctrl =
+            AQWord.extractBits(8, ByteIdx * NumBitsInByte).getZExtValue() &
+            0x3F;
+
+        APInt Byte(8, 0);
+        for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
+          Byte.insertBits(BQWord.extractBits(1, (Ctrl + BitIdx) & 0x3F),
+                          BitIdx);
+        }
+        Result.push_back(APValue(APSInt(Byte, /*isUnsigned*/ true)));
+      }
+    }
+    return Success(APValue(Result.data(), Result.size()), E);
+  }
+
   case X86::BI__builtin_ia32_phminposuw128: {
     APValue Source;
     if (!Evaluate(Source, Info, E->getArg(0)))
diff --git a/clang/lib/Headers/avx512vbmiintrin.h 
b/clang/lib/Headers/avx512vbmiintrin.h
index 84fda5c5849e8..5ac78f0849c26 100644
--- a/clang/lib/Headers/avx512vbmiintrin.h
+++ b/clang/lib/Headers/avx512vbmiintrin.h
@@ -15,61 +15,57 @@
 #define __VBMIINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
 #define __DEFAULT_FN_ATTRS                                                     
\
   __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"),     
\
-                 __min_vector_width__(512)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
+                 __min_vector_width__(512))) constexpr
 #else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
+#define __DEFAULT_FN_ATTRS                                                     
\
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"),     
\
+                 __min_vector_width__(512)))
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) {
   return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I,
                                                  (__v64qi) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I,
-                              __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi8(
+    __m512i __A, __mmask64 __U, __m512i __I, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512(__U,
                                (__v64qi)_mm512_permutex2var_epi8(__A, __I, 
__B),
                                (__v64qi)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U,
-                               __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi8(
+    __m512i __A, __m512i __I, __mmask64 __U, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512(__U,
                                (__v64qi)_mm512_permutex2var_epi8(__A, __I, 
__B),
                                (__v64qi)__I);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I,
-                               __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi8(
+    __mmask64 __U, __m512i __A, __m512i __I, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512(__U,
                                (__v64qi)_mm512_permutex2var_epi8(__A, __I, 
__B),
                                (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                      (__v64qi)_mm512_permutexvar_epi8(__A, 
__B),
                                      (__v64qi)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A,
-                             __m512i __B) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi8(
+    __m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
                                      (__v64qi)_mm512_permutexvar_epi8(__A, 
__B),
                                      (__v64qi)__W);
@@ -97,6 +93,6 @@ _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i 
__X, __m512i __Y)
                                 (__v64qi)_mm512_multishift_epi64_epi8(__X, 
__Y),
                                 (__v64qi)_mm512_setzero_si512());
 }
-#undef __DEFAULT_FN_ATTRS_CONSTEXPR
+
 #undef __DEFAULT_FN_ATTRS
 #endif
diff --git a/clang/lib/Headers/avx512vbmivlintrin.h 
b/clang/lib/Headers/avx512vbmivlintrin.h
index 58a48dadff863..40a67bd63ca49 100644
--- a/clang/lib/Headers/avx512vbmivlintrin.h
+++ b/clang/lib/Headers/avx512vbmivlintrin.h
@@ -15,6 +15,16 @@
 #define __VBMIVLINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128                                                  
\
+  __attribute__((__always_inline__, __nodebug__,                               
\
+                 __target__("avx512vbmi,avx512vl"),                            
\
+                 __min_vector_width__(128))) constexpr
+#define __DEFAULT_FN_ATTRS256                                                  
\
+  __attribute__((__always_inline__, __nodebug__,                               
\
+                 __target__("avx512vbmi,avx512vl"),                            
\
+                 __min_vector_width__(256))) constexpr
+#else
 #define __DEFAULT_FN_ATTRS128                                                  
\
   __attribute__((__always_inline__, __nodebug__,                               
\
                  __target__("avx512vbmi,avx512vl"),                            
\
@@ -23,111 +33,96 @@
   __attribute__((__always_inline__, __nodebug__,                               
\
                  __target__("avx512vbmi,avx512vl"),                            
\
                  __min_vector_width__(256)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
-#else
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
 #endif
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
   return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
                                                  (__v16qi)__I,
                                                  (__v16qi)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
-                           __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8(
+    __m128i __A, __mmask16 __U, __m128i __I, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128(__U,
                                   (__v16qi)_mm_permutex2var_epi8(__A, __I, 
__B),
                                   (__v16qi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
-                            __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8(
+    __m128i __A, __m128i __I, __mmask16 __U, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128(__U,
                                   (__v16qi)_mm_permutex2var_epi8(__A, __I, 
__B),
                                   (__v16qi)__I);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
-                            __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8(
+    __mmask16 __U, __m128i __A, __m128i __I, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128(__U,
                                   (__v16qi)_mm_permutex2var_epi8(__A, __I, 
__B),
                                   (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) {
   return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
                                                  (__v32qi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
-                              __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8(
+    __m256i __A, __mmask32 __U, __m256i __I, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256(__U,
                                (__v32qi)_mm256_permutex2var_epi8(__A, __I, 
__B),
                                (__v32qi)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
-                               __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8(
+    __m256i __A, __m256i __I, __mmask32 __U, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256(__U,
                                (__v32qi)_mm256_permutex2var_epi8(__A, __I, 
__B),
                                (__v32qi)__I);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
-                               __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8(
+    __mmask32 __U, __m256i __A, __m256i __I, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256(__U,
                                (__v32qi)_mm256_permutex2var_epi8(__A, __I, 
__B),
                                (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_permutexvar_epi8(__m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                         (__v16qi)_mm_permutexvar_epi8(__A, 
__B),
                                         (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __m128i __A,
-                          __m128i __B) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8(
+    __m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                         (__v16qi)_mm_permutexvar_epi8(__A, 
__B),
                                         (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                      (__v32qi)_mm256_permutexvar_epi8(__A, 
__B),
                                      (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
-_mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __m256i __A,
-                             __m256i __B) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8(
+    __m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
                                      (__v32qi)_mm256_permutexvar_epi8(__A, 
__B),
                                      (__v32qi)__W);
@@ -179,9 +174,6 @@ _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i 
__X, __m256i __Y)
                                 (__v32qi)_mm256_setzero_si256());
 }
 
-#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
-#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
 #undef __DEFAULT_FN_ATTRS128
 #undef __DEFAULT_FN_ATTRS256
-
 #endif
diff --git a/clang/test/CodeGen/X86/avx512vbmi-builtins.c 
b/clang/test/CodeGen/X86/avx512vbmi-builtins.c
index 7d506db92faeb..fcce58b63737b 100644
--- a/clang/test/CodeGen/X86/avx512vbmi-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vbmi-builtins.c
@@ -211,18 +211,145 @@ __m512i test_mm512_mask_multishift_epi64_epi8(__m512i 
__W, __mmask64 __M, __m512
   // CHECK-LABEL: test_mm512_mask_multishift_epi64_epi8
   // CHECK: call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> 
%{{.*}}, <64 x i8> %{{.*}})
   // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
-  return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y); 
+  return _mm512_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v64qu(
+    _mm512_mask_multishift_epi64_epi8(
+        (__m512i)(__v64qu){
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+            0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+        0xAAAAAAAAAAAAAAAAULL,
+        (__m512i)(__v64qu){
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56},
+        (__m512i)(__v64qu){
+            0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+            0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+            0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+            0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+            0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+            0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+            0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+            0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78}),
+    0xFF, 0x02, 0xFF, 0x04, 0xFF, 0x06, 0xFF, 0x08,
+    0xFF, 0x12, 0xFF, 0x14, 0xFF, 0x16, 0xFF, 0x18,
+    0xFF, 0x22, 0xFF, 0x24, 0xFF, 0x26, 0xFF, 0x28,
+    0xFF, 0x32, 0xFF, 0x34, 0xFF, 0x36, 0xFF, 0x38,
+    0xFF, 0x42, 0xFF, 0x44, 0xFF, 0x46, 0xFF, 0x48,
+    0xFF, 0x52, 0xFF, 0x54, 0xFF, 0x56, 0xFF, 0x58,
+    0xFF, 0x62, 0xFF, 0x64, 0xFF, 0x66, 0xFF, 0x68,
+    0xFF, 0x72, 0xFF, 0x74, 0xFF, 0x76, 0xFF, 0x78));
+
 __m512i test_mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, 
__m512i __Y) {
   // CHECK-LABEL: test_mm512_maskz_multishift_epi64_epi8
   // CHECK: call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> 
%{{.*}}, <64 x i8> %{{.*}})
   // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
-  return _mm512_maskz_multishift_epi64_epi8(__M, __X, __Y); 
+  return _mm512_maskz_multishift_epi64_epi8(__M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v64qu(
+    _mm512_maskz_multishift_epi64_epi8(
+        0x5555555555555555ULL,
+        (__m512i)(__v64qu){
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56},
+        (__m512i)(__v64qu){
+            0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+            0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+            0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+            0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+            0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+            0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+            0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+            0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78}),
+    0x01, 0, 0x03, 0, 0x05, 0, 0x07, 0,
+    0x11, 0, 0x13, 0, 0x15, 0, 0x17, 0,
+    0x21, 0, 0x23, 0, 0x25, 0, 0x27, 0,
+    0x31, 0, 0x33, 0, 0x35, 0, 0x37, 0,
+    0x41, 0, 0x43, 0, 0x45, 0, 0x47, 0,
+    0x51, 0, 0x53, 0, 0x55, 0, 0x57, 0,
+    0x61, 0, 0x63, 0, 0x65, 0, 0x67, 0,
+    0x71, 0, 0x73, 0, 0x75, 0, 0x77, 0));
+
 __m512i test_mm512_multishift_epi64_epi8(__m512i __X, __m512i __Y) {
   // CHECK-LABEL: test_mm512_multishift_epi64_epi8
   // CHECK: call <64 x i8> @llvm.x86.avx512.pmultishift.qb.512(<64 x i8> 
%{{.*}}, <64 x i8> %{{.*}})
-  return _mm512_multishift_epi64_epi8(__X, __Y); 
+  return _mm512_multishift_epi64_epi8(__X, __Y);
 }
+
+TEST_CONSTEXPR(match_v64qu(
+    _mm512_multishift_epi64_epi8(
+        (__m512i)(__v64qu){
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56,
+            0, 8, 16, 24, 32, 40, 48, 56},
+        (__m512i)(__v64qu){
+            0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+            0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+            0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+            0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+            0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+            0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+            0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+            0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78}),
+    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+    0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+    0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+    0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+    0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+    0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+    0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+    0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78));
+
+TEST_CONSTEXPR(match_v64qu(
+    _mm512_multishift_epi64_epi8(
+        (__m512i)(__v64qu){
+            4, 4, 4, 4, 4, 4, 4, 4,
+            4, 4, 4, 4, 4, 4, 4, 4,
+            4, 4, 4, 4, 4, 4, 4, 4,
+            4, 4, 4, 4, 4, 4, 4, 4,
+            4, 4, 4, 4, 4, 4, 4, 4,
+            4, 4, 4, 4, 4, 4, 4, 4,
+            4, 4, 4, 4, 4, 4, 4, 4,
+            4, 4, 4, 4, 4, 4, 4, 4},
+        (__m512i)(__v64qu){
+            0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+            0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+            0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+            0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+            0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+            0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+            0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE,
+            0x10, 0x32, 0x54, 0x76, 0x98, 0xBA, 0xDC, 0xFE}),
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21,
+    0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21, 0x21));
diff --git a/clang/test/CodeGen/X86/avx512vbmivl-builtin.c 
b/clang/test/CodeGen/X86/avx512vbmivl-builtin.c
index 49b7a1a721195..f55ab4154c752 100644
--- a/clang/test/CodeGen/X86/avx512vbmivl-builtin.c
+++ b/clang/test/CodeGen/X86/avx512vbmivl-builtin.c
@@ -166,39 +166,121 @@ __m128i test_mm_mask_multishift_epi64_epi8(__m128i __W, 
__mmask16 __M, __m128i _
   // CHECK-LABEL: test_mm_mask_multishift_epi64_epi8
   // CHECK: call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> 
%{{.*}}, <16 x i8> %{{.*}})
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
-  return _mm_mask_multishift_epi64_epi8(__W, __M, __X, __Y); 
+  return _mm_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v16qu(
+    _mm_mask_multishift_epi64_epi8(
+        (__m128i)(__v16qu){0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+                          0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+        0xAAAA,
+        (__m128i)(__v16qu){0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56},
+        (__m128i)(__v16qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+                          0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18}),
+    0xFF, 0x02, 0xFF, 0x04, 0xFF, 0x06, 0xFF, 0x08,
+    0xFF, 0x12, 0xFF, 0x14, 0xFF, 0x16, 0xFF, 0x18));
+
 __m128i test_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, 
__m128i __Y) {
   // CHECK-LABEL: test_mm_maskz_multishift_epi64_epi8
   // CHECK: call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> 
%{{.*}}, <16 x i8> %{{.*}})
   // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
-  return _mm_maskz_multishift_epi64_epi8(__M, __X, __Y); 
+  return _mm_maskz_multishift_epi64_epi8(__M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v16qu(
+    _mm_maskz_multishift_epi64_epi8(
+        0x5555,
+        (__m128i)(__v16qu){0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56},
+        (__m128i)(__v16qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+                          0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18}),
+    0x01, 0, 0x03, 0, 0x05, 0, 0x07, 0,
+    0x11, 0, 0x13, 0, 0x15, 0, 0x17, 0));
+
 __m128i test_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) {
   // CHECK-LABEL: test_mm_multishift_epi64_epi8
   // CHECK: call <16 x i8> @llvm.x86.avx512.pmultishift.qb.128(<16 x i8> 
%{{.*}}, <16 x i8> %{{.*}})
-  return _mm_multishift_epi64_epi8(__X, __Y); 
+  return _mm_multishift_epi64_epi8(__X, __Y);
 }
 
+TEST_CONSTEXPR(match_v16qu(
+    _mm_multishift_epi64_epi8(
+        (__m128i)(__v16qu){0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56},
+        (__m128i)(__v16qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+                          0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18}),
+    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+    0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18));
+
 __m256i test_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, 
__m256i __X, __m256i __Y) {
   // CHECK-LABEL: test_mm256_mask_multishift_epi64_epi8
   // CHECK: call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> 
%{{.*}}, <32 x i8> %{{.*}})
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
-  return _mm256_mask_multishift_epi64_epi8(__W, __M, __X, __Y); 
+  return _mm256_mask_multishift_epi64_epi8(__W, __M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v32qu(
+    _mm256_mask_multishift_epi64_epi8(
+        (__m256i)(__v32qu){0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+                          0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+                          0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+                          0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF},
+        0xAAAAAAAA,
+        (__m256i)(__v32qu){0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56},
+        (__m256i)(__v32qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+                          0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+                          0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+                          0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38}),
+    0xFF, 0x02, 0xFF, 0x04, 0xFF, 0x06, 0xFF, 0x08,
+    0xFF, 0x12, 0xFF, 0x14, 0xFF, 0x16, 0xFF, 0x18,
+    0xFF, 0x22, 0xFF, 0x24, 0xFF, 0x26, 0xFF, 0x28,
+    0xFF, 0x32, 0xFF, 0x34, 0xFF, 0x36, 0xFF, 0x38));
+
 __m256i test_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, 
__m256i __Y) {
   // CHECK-LABEL: test_mm256_maskz_multishift_epi64_epi8
   // CHECK: call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> 
%{{.*}}, <32 x i8> %{{.*}})
   // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
-  return _mm256_maskz_multishift_epi64_epi8(__M, __X, __Y); 
+  return _mm256_maskz_multishift_epi64_epi8(__M, __X, __Y);
 }
 
+TEST_CONSTEXPR(match_v32qu(
+    _mm256_maskz_multishift_epi64_epi8(
+        0x55555555,
+        (__m256i)(__v32qu){0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56},
+        (__m256i)(__v32qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+                          0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+                          0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+                          0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38}),
+    0x01, 0, 0x03, 0, 0x05, 0, 0x07, 0,
+    0x11, 0, 0x13, 0, 0x15, 0, 0x17, 0,
+    0x21, 0, 0x23, 0, 0x25, 0, 0x27, 0,
+    0x31, 0, 0x33, 0, 0x35, 0, 0x37, 0));
+
 __m256i test_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) {
   // CHECK-LABEL: test_mm256_multishift_epi64_epi8
   // CHECK: call <32 x i8> @llvm.x86.avx512.pmultishift.qb.256(<32 x i8> 
%{{.*}}, <32 x i8> %{{.*}})
-  return _mm256_multishift_epi64_epi8(__X, __Y); 
+  return _mm256_multishift_epi64_epi8(__X, __Y);
 }
 
+TEST_CONSTEXPR(match_v32qu(
+    _mm256_multishift_epi64_epi8(
+        (__m256i)(__v32qu){0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56,
+                          0, 8, 16, 24, 32, 40, 48, 56},
+        (__m256i)(__v32qu){0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+                          0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+                          0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+                          0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38}),
+    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+    0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+    0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+    0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38));
+

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to