Author: Corentin Jabot Date: 2022-10-24T18:10:43+02:00 New Revision: d0d2772379bd89f1dce3c456520272678cf4b966
URL: https://github.com/llvm/llvm-project/commit/d0d2772379bd89f1dce3c456520272678cf4b966 DIFF: https://github.com/llvm/llvm-project/commit/d0d2772379bd89f1dce3c456520272678cf4b966.diff LOG: [Clang] Implement P2513 Implement P2513 This change allows initializing an array of unsigned char, or char from u8 string literals. This was done both to support legacy code and for compatibility with C where char8_t will be typedef to unsigned char. This is backported to C++20 as per WG21 guidance. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D136449 Added: Modified: clang/docs/ReleaseNotes.rst clang/include/clang/Basic/DiagnosticSemaKinds.td clang/lib/Frontend/InitPreprocessor.cpp clang/lib/Sema/SemaInit.cpp clang/test/Lexer/cxx-features.cpp clang/test/SemaCXX/char8_t.cpp clang/test/SemaCXX/cxx2a-compat.cpp clang/www/cxx_status.html Removed: ################################################################################ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 68cee534513a..49ef53f33877 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -557,6 +557,8 @@ C++2b Feature Support - Support label at end of compound statement (`P2324 <https://wg21.link/p2324r2>`_). - Implemented `P1169R4: static operator() <https://wg21.link/P1169R4>`_. +- Implemented "char8_t Compatibility and Portability Fix" (`P2513R3 <https://wg21.link/P2513R3>`_). + This Change was applied to C++20 as a Defect Report. CUDA/HIP Language Changes in Clang ---------------------------------- @@ -654,8 +656,8 @@ libclang the behavior of ``QualType::getNonReferenceType`` for ``CXType``. - Introduced the new function ``clang_CXXMethod_isDeleted``, which queries whether the method is declared ``= delete``. -- ``clang_Cursor_getNumTemplateArguments``, ``clang_Cursor_getTemplateArgumentKind``, - ``clang_Cursor_getTemplateArgumentType``, ``clang_Cursor_getTemplateArgumentValue`` and +- ``clang_Cursor_getNumTemplateArguments``, ``clang_Cursor_getTemplateArgumentKind``, + ``clang_Cursor_getTemplateArgumentType``, ``clang_Cursor_getTemplateArgumentValue`` and ``clang_Cursor_getTemplateArgumentUnsignedValue`` now work on struct, class, and partial template specialization cursors in addition to function cursors. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index b1d475772502..8cf73784d97b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6868,8 +6868,8 @@ def err_array_init_plain_string_into_char8_t : Error< def note_array_init_plain_string_into_char8_t : Note< "add 'u8' prefix to form a 'char8_t' string literal">; def err_array_init_utf8_string_into_char : Error< - "%select{|ISO C++20 does not permit }0initialization of char array with " - "UTF-8 string literal%select{ is not permitted by '-fchar8_t'|}0">; + "initialization of %select{|signed }0char array with " + "UTF-8 string literal is not permitted by %select{'-fchar8_t'|C++20}1">; def warn_cxx20_compat_utf8_string : Warning< "type of UTF-8 string literal will change from array of const char to " "array of const char8_t in C++20">, InGroup<CXX20Compat>, DefaultIgnore; diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 2273fb113fb2..96b93dcdf044 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -705,7 +705,7 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_named_character_escapes", "202207L"); if (LangOpts.Char8) - Builder.defineMacro("__cpp_char8_t", "201811L"); + Builder.defineMacro("__cpp_char8_t", "202207L"); Builder.defineMacro("__cpp_impl_destroying_delete", "201806L"); // TS features. diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp index db5580c9e55c..7ebf6997e27e 100644 --- a/clang/lib/Sema/SemaInit.cpp +++ b/clang/lib/Sema/SemaInit.cpp @@ -81,10 +81,20 @@ static StringInitFailureKind IsStringInit(Expr *Init, const ArrayType *AT, const QualType ElemTy = Context.getCanonicalType(AT->getElementType()).getUnqualifiedType(); + auto IsCharOrUnsignedChar = [](const QualType &T) { + const BuiltinType *BT = dyn_cast<BuiltinType>(T.getTypePtr()); + return BT && BT->isCharType() && BT->getKind() != BuiltinType::SChar; + }; + switch (SL->getKind()) { case StringLiteral::UTF8: // char8_t array can be initialized with a UTF-8 string. - if (ElemTy->isChar8Type()) + // - C++20 [dcl.init.string] (DR) + // Additionally, an array of char or unsigned char may be initialized + // by a UTF-8 string literal. + if (ElemTy->isChar8Type() || + (Context.getLangOpts().Char8 && + IsCharOrUnsignedChar(ElemTy.getCanonicalType()))) return SIF_None; [[fallthrough]]; case StringLiteral::Ordinary: @@ -9114,9 +9124,8 @@ bool InitializationSequence::Diagnose(Sema &S, << FixItHint::CreateInsertion(Args.front()->getBeginLoc(), "u8"); break; case FK_UTF8StringIntoPlainChar: - S.Diag(Kind.getLocation(), - diag::err_array_init_utf8_string_into_char) - << S.getLangOpts().CPlusPlus20; + S.Diag(Kind.getLocation(), diag::err_array_init_utf8_string_into_char) + << DestType->isSignedIntegerType() << S.getLangOpts().CPlusPlus20; break; case FK_ArrayTypeMismatch: case FK_NonConstantArrayInit: diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index ee52017a2201..c12f2d20b66c 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -66,9 +66,9 @@ #error "wrong value for __cpp_aggregate_paren_init" #endif -#if defined(CHAR8_T) ? check(char8_t, 201811, 201811, 201811, 201811, 201811, 201811) : \ +#if defined(CHAR8_T) ? check(char8_t, 202207, 202207, 202207, 202207, 202207, 202207) : \ defined(NO_CHAR8_T) ? check(char8_t, 0, 0, 0, 0, 0, 0) : \ - check(char8_t, 0, 0, 0, 0, 201811, 201811) + check(char8_t, 0, 0, 0, 0, 202207, 202207) #error "wrong value for __cpp_char8_t" #endif diff --git a/clang/test/SemaCXX/char8_t.cpp b/clang/test/SemaCXX/char8_t.cpp index f60a66dbe84d..5ffa550847de 100644 --- a/clang/test/SemaCXX/char8_t.cpp +++ b/clang/test/SemaCXX/char8_t.cpp @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -fchar8_t -std=c++17 -verify %s -// RUN: %clang_cc1 -std=c++2a -verify %s +// RUN: %clang_cc1 -std=c++2a -verify=expected %s +// RUN: %clang_cc1 -std=c++2a -verify=expected -fno-signed-char %s + char8_t a = u8'a'; char8_t b[] = u8"foo"; @@ -7,15 +9,35 @@ char8_t c = 'a'; char8_t d[] = "foo"; // expected-error {{initializing 'char8_t' array with plain string literal}} expected-note {{add 'u8' prefix}} char e = u8'a'; -char f[] = u8"foo"; -#if __cplusplus <= 201703L -// expected-error@-2 {{initialization of char array with UTF-8 string literal is not permitted by '-fchar8_t'}} -#else -// expected-error@-4 {{ISO C++20 does not permit initialization of char array with UTF-8 string literal}} -#endif char g = 'a'; char h[] = "foo"; +unsigned char i[] = u8"foo"; +unsigned char j[] = { u8"foo" }; +char k[] = u8"foo"; +char l[] = { u8"foo" }; +signed char m[] = u8"foo"; // expected-error {{initialization of char array with UTF-8 string literal is not permitted}} +signed char n[] = { u8"foo" }; // expected-error {{cannot initialize an array element of type 'signed char' with an lvalue of type 'const char8_t[4]'}} + +const unsigned char* uptr = u8"foo"; // expected-error {{cannot initialize}} +const signed char* sptr = u8"foo"; // expected-error {{cannot initialize}} +const char* ptr = u8"foo"; // expected-error {{cannot initialize}} + +template <typename T> +void check_values() { + constexpr T c[] = {0, static_cast<T>(0xFF), 0x42}; + constexpr T a[] = u8"\x00\xFF\x42"; + + static_assert(a[0] == c[0]); + static_assert(a[1] == c[1]); + static_assert(a[2] == c[2]); +} + +void call_check_values() { + check_values<char>(); + check_values<unsigned char>(); +} + void disambig() { char8_t (a) = u8'x'; } @@ -48,3 +70,21 @@ void check_deduction() { static_assert(sizeof(char8_t) == 1); static_assert(char8_t(-1) > 0); static_assert(u8"\u0080"[0] > 0); + +namespace ambiguous { + +struct A { + char8_t s[10]; +}; +struct B { + char s[10]; +}; + +void f(A); // expected-note {{candidate}} +void f(B); // expected-note {{candidate}} + +int test() { + f({u8"foo"}); // expected-error {{call to 'f' is ambiguous}} +} + +} diff --git a/clang/test/SemaCXX/cxx2a-compat.cpp b/clang/test/SemaCXX/cxx2a-compat.cpp index 0e9eafdc9b70..4f20cf59b65f 100644 --- a/clang/test/SemaCXX/cxx2a-compat.cpp +++ b/clang/test/SemaCXX/cxx2a-compat.cpp @@ -33,9 +33,8 @@ string u8str = u8"test" u8"test"; // expected-warning@-4 {{type of UTF-8 string literal will change}} expected-note@-4 {{remove 'u8' prefix}} // expected-warning@-4 {{type of UTF-8 string literal will change}} expected-note@-4 {{remove 'u8' prefix}} #else -// expected-error@-8 {{ISO C++20 does not permit initialization of char array with UTF-8 string literal}} -// expected-error@-8 {{cannot initialize a variable of type 'const char *' with an lvalue of type 'const char8_t[6]'}} -// expected-error@-8 {{no viable conversion from 'const char8_t[9]' to 'string'}} +// expected-error@-7 {{cannot initialize a variable of type 'const char *' with an lvalue of type 'const char8_t[6]'}} +// expected-error@-7 {{no viable conversion from 'const char8_t[9]' to 'string'}} #endif template<bool b> diff --git a/clang/www/cxx_status.html b/clang/www/cxx_status.html index cbea6bb5e1e2..d46e7bb0b46a 100755 --- a/clang/www/cxx_status.html +++ b/clang/www/cxx_status.html @@ -1502,7 +1502,7 @@ <h2 id="cxx23">C++2b implementation status</h2> <tr> <td><code>char8_t</code> Compatibility and Portability Fix</td> <td><a href="https://wg21.link/P2513R3">P2513R3</a></td> - <td class="none" align="center">No</td> + <td class="unreleased" align="center">Clang 16</td> </tr> <tr> <td>Relax requirements on <code>wchar_t</code> to match existing practices</td> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits