https://github.com/tbaederr created https://github.com/llvm/llvm-project/pull/130420
llvm has recently started to use `__builitn_memchr` at compile time, so implement this. Still needs some work but the basics are done. >From 956594d8c47169a9f45eb2aae03085f79d295390 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbae...@redhat.com> Date: Sat, 8 Mar 2025 16:11:37 +0100 Subject: [PATCH] [clang][bytecode] Implement __builtin_{memchr,strchr,char_memchr} --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 107 +++++++++++++++- clang/test/AST/ByteCode/builtin-functions.cpp | 118 ++++++++++++++++++ 2 files changed, 224 insertions(+), 1 deletion(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 00f99745862ee..b8c4ef2f48a79 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -1960,13 +1960,103 @@ static bool interp__builtin_memcmp(InterpState &S, CodePtr OpPC, // However, if we read all the available bytes but were instructed to read // even more, diagnose this as a "read of dereferenced one-past-the-end - // pointer". This is what would happen if we called CheckRead() on every array + // pointer". This is what would happen if we called CheckLoad() on every array // element. S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_past_end) << AK_Read << S.Current->getRange(OpPC); return false; } +static bool interp__builtin_memchr(InterpState &S, CodePtr OpPC, + const InterpFrame *Frame, + const Function *Func, const CallExpr *Call) { + unsigned ID = Func->getBuiltinID(); + if (ID == Builtin::BImemchr || ID == Builtin::BIwcschr || + ID == Builtin::BIstrchr || ID == Builtin::BIwmemchr) + diagnoseNonConstexprBuiltin(S, OpPC, ID); + + const Pointer &Ptr = getParam<Pointer>(Frame, 0); + APSInt Desired; + std::optional<APSInt> MaxLength; + if (Call->getNumArgs() == 3) { + MaxLength = + peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(2)), 0); + Desired = peekToAPSInt( + S.Stk, *S.getContext().classify(Call->getArg(1)), + align(primSize(*S.getContext().classify(Call->getArg(2)))) + + align(primSize(*S.getContext().classify(Call->getArg(1))))); + } else { + Desired = peekToAPSInt(S.Stk, *S.getContext().classify(Call->getArg(1))); + } + + if (MaxLength && MaxLength->isZero()) { + S.Stk.push<Pointer>(); + return true; + } + + if (Ptr.isDummy()) + return false; + + // Null is only okay if the given size is 0. + if (Ptr.isZero()) { + S.FFDiag(S.Current->getSource(OpPC), diag::note_constexpr_access_null) + << AK_Read; + return false; + } + + QualType ElemTy = Ptr.getFieldDesc()->isArray() + ? Ptr.getFieldDesc()->getElemQualType() + : Ptr.getFieldDesc()->getType(); + bool IsRawByte = ID == Builtin::BImemchr || ID == Builtin::BI__builtin_memchr; + + // Give up on byte-oriented matching against multibyte elements. + if (IsRawByte && !isOneByteCharacterType(ElemTy)) { + S.FFDiag(S.Current->getSource(OpPC), + diag::note_constexpr_memchr_unsupported) + << S.getASTContext().BuiltinInfo.getQuotedName(ID) << ElemTy; + return false; + } + + if (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr) { + // strchr compares directly to the passed integer, and therefore + // always fails if given an int that is not a char. + if (Desired != + Desired.trunc(S.getASTContext().getCharWidth()).getSExtValue()) { + S.Stk.push<Pointer>(); + return true; + } + } + + uint64_t DesiredVal = + Desired.trunc(S.getASTContext().getCharWidth()).getZExtValue(); + bool StopAtZero = + (ID == Builtin::BIstrchr || ID == Builtin::BI__builtin_strchr); + + size_t Index = Ptr.getIndex(); + for (;;) { + const Pointer &ElemPtr = Index > 0 ? Ptr.atIndex(Index) : Ptr; + + if (!CheckLoad(S, OpPC, ElemPtr)) + return false; + + unsigned char V = static_cast<unsigned char>(ElemPtr.deref<char>()); + if (V == DesiredVal) { + S.Stk.push<Pointer>(ElemPtr); + return true; + } + + if (StopAtZero && V == 0) + break; + + ++Index; + if (MaxLength && Index == MaxLength->getZExtValue()) + break; + } + + S.Stk.push<Pointer>(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, const CallExpr *Call, uint32_t BuiltinID) { const InterpFrame *Frame = S.Current; @@ -2445,6 +2535,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F, return false; break; + case Builtin::BImemchr: + case Builtin::BI__builtin_memchr: + case Builtin::BIstrchr: + case Builtin::BI__builtin_strchr: +#if 0 + case Builtin::BIwcschr: + case Builtin::BI__builtin_wcschr: + case Builtin::BImemchr: + case Builtin::BI__builtin_wmemchr: +#endif + case Builtin::BI__builtin_char_memchr: + if (!interp__builtin_memchr(S, OpPC, Frame, F, Call)) + return false; + break; + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index 75380f99901a2..dbff9164a91c1 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -18,6 +18,8 @@ extern "C" { typedef decltype(sizeof(int)) size_t; extern size_t wcslen(const wchar_t *p); + extern void *memchr(const void *s, int c, size_t n); + extern char *strchr(const char *s, int c); } namespace strcmp { @@ -1351,3 +1353,119 @@ namespace Memcmp { static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 6) == -1); static_assert(__builtin_wmemcmp(L"abab\0banana", L"abab\0canada", 5) == 0); } + +namespace Memchr { + constexpr const char *kStr = "abca\xff\0d"; + constexpr char kFoo[] = {'f', 'o', 'o'}; + + static_assert(__builtin_memchr(kStr, 'a', 0) == nullptr); + static_assert(__builtin_memchr(kStr, 'a', 1) == kStr); + static_assert(__builtin_memchr(kStr, '\0', 5) == nullptr); + static_assert(__builtin_memchr(kStr, '\0', 6) == kStr + 5); + static_assert(__builtin_memchr(kStr, '\xff', 8) == kStr + 4); + static_assert(__builtin_memchr(kStr, '\xff' + 256, 8) == kStr + 4); + static_assert(__builtin_memchr(kStr, '\xff' - 256, 8) == kStr + 4); + static_assert(__builtin_memchr(kFoo, 'x', 3) == nullptr); + static_assert(__builtin_memchr(kFoo, 'x', 4) == nullptr); // both-error {{not an integral constant}} \ + // both-note {{dereferenced one-past-the-end}} + static_assert(__builtin_memchr(nullptr, 'x', 3) == nullptr); // both-error {{not an integral constant}} \ + // both-note {{dereferenced null}} + static_assert(__builtin_memchr(nullptr, 'x', 0) == nullptr); + + +#if defined(CHAR8_T) + constexpr const char8_t *kU8Str = u8"abca\xff\0d"; + constexpr char8_t kU8Foo[] = {u8'f', u8'o', u8'o'}; + static_assert(__builtin_memchr(kU8Str, u8'a', 0) == nullptr); + static_assert(__builtin_memchr(kU8Str, u8'a', 1) == kU8Str); + static_assert(__builtin_memchr(kU8Str, u8'\0', 5) == nullptr); + static_assert(__builtin_memchr(kU8Str, u8'\0', 6) == kU8Str + 5); + static_assert(__builtin_memchr(kU8Str, u8'\xff', 8) == kU8Str + 4); + static_assert(__builtin_memchr(kU8Str, u8'\xff' + 256, 8) == kU8Str + 4); + static_assert(__builtin_memchr(kU8Str, u8'\xff' - 256, 8) == kU8Str + 4); + static_assert(__builtin_memchr(kU8Foo, u8'x', 3) == nullptr); + static_assert(__builtin_memchr(kU8Foo, u8'x', 4) == nullptr); // both-error {{not an integral constant}} \ + // both-note {{dereferenced one-past-the-end}} + static_assert(__builtin_memchr(nullptr, u8'x', 3) == nullptr); // both-error {{not an integral constant}} \ + // both-note {{dereferenced null}} + static_assert(__builtin_memchr(nullptr, u8'x', 0) == nullptr); +#endif + + extern struct Incomplete incomplete; + static_assert(__builtin_memchr(&incomplete, 0, 0u) == nullptr); + static_assert(__builtin_memchr(&incomplete, 0, 1u) == nullptr); // both-error {{not an integral constant}} \ + // ref-note {{read of incomplete type 'struct Incomplete'}} + + const unsigned char &u1 = 0xf0; + auto &&i1 = (const signed char []){-128}; + static_assert(__builtin_memchr(&u1, -(0x0f + 1), 1) == &u1); + static_assert(__builtin_memchr(i1, 0x80, 1) == i1); + + enum class E : unsigned char {}; + struct EPair { E e, f; }; + constexpr EPair ee{E{240}}; + static_assert(__builtin_memchr(&ee.e, 240, 1) == &ee.e); // both-error {{constant}} \ + // both-note {{not supported}} + + constexpr bool kBool[] = {false, true, false}; + constexpr const bool *const kBoolPastTheEndPtr = kBool + 3; + static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, 1, 99) == kBool + 1); // both-error {{constant}} \ + // both-note {{not supported}} + static_assert(sizeof(bool) != 1u || __builtin_memchr(kBool + 1, 0, 99) == kBoolPastTheEndPtr - 1); // both-error {{constant}} \ + // both-note {{not supported}} + static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr - 3, -1, 3) == nullptr); // both-error {{constant}} \ + // both-note {{not supported}} + static_assert(sizeof(bool) != 1u || __builtin_memchr(kBoolPastTheEndPtr, 0, 1) == nullptr); // both-error {{constant}} \ + // both-note {{not supported}} + + static_assert(__builtin_char_memchr(kStr, 'a', 0) == nullptr); + static_assert(__builtin_char_memchr(kStr, 'a', 1) == kStr); + static_assert(__builtin_char_memchr(kStr, '\0', 5) == nullptr); + static_assert(__builtin_char_memchr(kStr, '\0', 6) == kStr + 5); + static_assert(__builtin_char_memchr(kStr, '\xff', 8) == kStr + 4); + static_assert(__builtin_char_memchr(kStr, '\xff' + 256, 8) == kStr + 4); + static_assert(__builtin_char_memchr(kStr, '\xff' - 256, 8) == kStr + 4); + static_assert(__builtin_char_memchr(kFoo, 'x', 3) == nullptr); + static_assert(__builtin_char_memchr(kFoo, 'x', 4) == nullptr); // both-error {{not an integral constant}} \ + // both-note {{dereferenced one-past-the-end}} + static_assert(__builtin_char_memchr(nullptr, 'x', 3) == nullptr); // both-error {{not an integral constant}} \ + // both-note {{dereferenced null}} + static_assert(__builtin_char_memchr(nullptr, 'x', 0) == nullptr); + + static_assert(*__builtin_char_memchr(kStr, '\xff', 8) == '\xff'); + constexpr bool char_memchr_mutable() { + char buffer[] = "mutable"; + *__builtin_char_memchr(buffer, 't', 8) = 'r'; + *__builtin_char_memchr(buffer, 'm', 8) = 'd'; + return __builtin_strcmp(buffer, "durable") == 0; + } + static_assert(char_memchr_mutable()); + + constexpr bool b = !memchr("hello", 'h', 3); // both-error {{constant expression}} \ + // both-note {{non-constexpr function 'memchr' cannot be used in a constant expression}} + +} + +namespace Strchr { + constexpr const char *kStr = "abca\xff\0d"; + constexpr char kFoo[] = {'f', 'o', 'o'}; + static_assert(__builtin_strchr(kStr, 'a') == kStr); + static_assert(__builtin_strchr(kStr, 'b') == kStr + 1); + static_assert(__builtin_strchr(kStr, 'c') == kStr + 2); + static_assert(__builtin_strchr(kStr, 'd') == nullptr); + static_assert(__builtin_strchr(kStr, 'e') == nullptr); + static_assert(__builtin_strchr(kStr, '\0') == kStr + 5); + static_assert(__builtin_strchr(kStr, 'a' + 256) == nullptr); + static_assert(__builtin_strchr(kStr, 'a' - 256) == nullptr); + static_assert(__builtin_strchr(kStr, '\xff') == kStr + 4); + static_assert(__builtin_strchr(kStr, '\xff' + 256) == nullptr); + static_assert(__builtin_strchr(kStr, '\xff' - 256) == nullptr); + static_assert(__builtin_strchr(kFoo, 'o') == kFoo + 1); + static_assert(__builtin_strchr(kFoo, 'x') == nullptr); // both-error {{not an integral constant}} \ + // both-note {{dereferenced one-past-the-end}} + static_assert(__builtin_strchr(nullptr, 'x') == nullptr); // both-error {{not an integral constant}} \ + // both-note {{dereferenced null}} + + constexpr bool a = !strchr("hello", 'h'); // both-error {{constant expression}} \ + // both-note {{non-constexpr function 'strchr' cannot be used in a constant expression}} +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits