https://github.com/cor3ntin updated https://github.com/llvm/llvm-project/pull/93216
>From 556c622275c630b74c0f9000c5c599ff665595e1 Mon Sep 17 00:00:00 2001 From: Corentin Jabot <corentinja...@gmail.com> Date: Thu, 23 May 2024 18:45:58 +0200 Subject: [PATCH 1/2] [Clang] allow `` `@$ `` in raw string delimiters in C++26 And as an extension in older language modes. Per https://eel.is/c++draft/lex.string#nt:d-char Fixes #93130 --- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/Basic/CharInfo.h | 15 +++++++------- .../include/clang/Basic/DiagnosticLexKinds.td | 8 ++++++++ clang/lib/Basic/CharInfo.cpp | 20 +++++++++---------- clang/lib/Lex/Lexer.cpp | 11 +++++++++- clang/test/Lexer/cxx2c-raw-strings.cpp | 12 +++++++++++ 6 files changed, 49 insertions(+), 18 deletions(-) create mode 100644 clang/test/Lexer/cxx2c-raw-strings.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 7bcdee96e213e..2e298cd9cdb82 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -771,6 +771,7 @@ Bug Fixes to C++ Support Fixes (#GH87210), (GH89541). - Clang no longer tries to check if an expression is immediate-escalating in an unevaluated context. Fixes (#GH91308). +- Clang now allow ``@$``` in raw string literals. Fixes (#GH93130). Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/CharInfo.h b/clang/include/clang/Basic/CharInfo.h index d807955311828..4d90528f7992e 100644 --- a/clang/include/clang/Basic/CharInfo.h +++ b/clang/include/clang/Basic/CharInfo.h @@ -28,8 +28,7 @@ namespace charinfo { CHAR_LOWER = 0x0040, // a-z CHAR_UNDER = 0x0080, // _ CHAR_PERIOD = 0x0100, // . - CHAR_RAWDEL = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"' - CHAR_PUNCT = 0x0400 // `$@() + CHAR_PUNCT = 0x0200, // {}[]#<>%:;?*+-/^&|~!=,"'`$@() }; enum { @@ -152,7 +151,8 @@ LLVM_READONLY inline bool isHexDigit(unsigned char c) { /// Note that '_' is both a punctuation character and an identifier character! LLVM_READONLY inline bool isPunctuation(unsigned char c) { using namespace charinfo; - return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0; + return (InfoTable[c] & + (CHAR_UNDER | CHAR_PERIOD | CHAR_PUNCT | CHAR_PUNCT)) != 0; } /// Return true if this character is an ASCII printable character; that is, a @@ -160,8 +160,8 @@ LLVM_READONLY inline bool isPunctuation(unsigned char c) { /// terminal. LLVM_READONLY inline bool isPrintable(unsigned char c) { using namespace charinfo; - return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT| - CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0; + return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_PUNCT | + CHAR_DIGIT | CHAR_UNDER | CHAR_SPACE)) != 0; } /// Return true if this is the body character of a C preprocessing number, @@ -175,8 +175,9 @@ LLVM_READONLY inline bool isPreprocessingNumberBody(unsigned char c) { /// Return true if this is the body character of a C++ raw string delimiter. LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) { using namespace charinfo; - return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD| - CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0; + return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_DIGIT | + CHAR_UNDER | CHAR_PUNCT)) != 0 && + c != '(' && c != ')'; } enum class EscapeChar { diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index ad6bacfb118d4..8411842490c4e 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -111,6 +111,14 @@ def warn_cxx98_compat_raw_string_literal : Warning< "raw string literals are incompatible with C++98">, InGroup<CXX98Compat>, DefaultIgnore; +def warn_cxx26_compat_raw_string_literal_character_set : Warning< + "'%0'in a raw string literal delimiter is incompatible " + "with standards before C++2c">, + InGroup<CXXPre26Compat>, DefaultIgnore; +def ext_cxx26_raw_string_literal_character_set : Extension< + "'%0'in a raw string literal delimiter is a C++2c extension">, + InGroup<CXX26>, DefaultIgnore; + def warn_multichar_character_literal : Warning< "multi-character character constant">, InGroup<MultiChar>; def warn_four_char_character_literal : Warning< diff --git a/clang/lib/Basic/CharInfo.cpp b/clang/lib/Basic/CharInfo.cpp index d02054c9718f5..26d693b8e9b94 100644 --- a/clang/lib/Basic/CharInfo.cpp +++ b/clang/lib/Basic/CharInfo.cpp @@ -31,20 +31,20 @@ const uint16_t clang::charinfo::InfoTable[256] = { 0 , 0 , 0 , 0 , //32 SP 33 ! 34 " 35 # //36 $ 37 % 38 & 39 ' - CHAR_SPACE , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , + CHAR_SPACE , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , //40 ( 41 ) 42 * 43 + //44 , 45 - 46 . 47 / - CHAR_PUNCT , CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PERIOD , CHAR_PUNCT , //48 0 49 1 50 2 51 3 //52 4 53 5 54 6 55 7 CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , CHAR_DIGIT , //56 8 57 9 58 : 59 ; //60 < 61 = 62 > 63 ? - CHAR_DIGIT , CHAR_DIGIT , CHAR_RAWDEL , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , + CHAR_DIGIT , CHAR_DIGIT , CHAR_PUNCT , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , //64 @ 65 A 66 B 67 C //68 D 69 E 70 F 71 G CHAR_PUNCT , CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER , @@ -59,8 +59,8 @@ const uint16_t clang::charinfo::InfoTable[256] = { CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , //88 X 89 Y 90 Z 91 [ //92 \ 93 ] 94 ^ 95 _ - CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_RAWDEL , - CHAR_PUNCT , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER , + CHAR_UPPER , CHAR_UPPER , CHAR_UPPER , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , CHAR_UNDER , //96 ` 97 a 98 b 99 c //100 d 101 e 102 f 103 g CHAR_PUNCT , CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER , @@ -75,6 +75,6 @@ const uint16_t clang::charinfo::InfoTable[256] = { CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , //120 x 121 y 122 z 123 { //124 | 125 } 126 ~ 127 DEL - CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_RAWDEL , - CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0 + CHAR_LOWER , CHAR_LOWER , CHAR_LOWER , CHAR_PUNCT , + CHAR_PUNCT , CHAR_PUNCT , CHAR_PUNCT , 0 }; diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index c98645993abe0..c7543a48c0b50 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2261,8 +2261,17 @@ bool Lexer::LexRawStringLiteral(Token &Result, const char *CurPtr, unsigned PrefixLen = 0; - while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) + while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) { ++PrefixLen; + if (!isLexingRawMode() && + llvm::is_contained({'$', '@', '`'}, CurPtr[PrefixLen])) { + const char *Pos = &CurPtr[PrefixLen]; + Diag(Pos, LangOpts.CPlusPlus26 + ? diag::warn_cxx26_compat_raw_string_literal_character_set + : diag::ext_cxx26_raw_string_literal_character_set) + << StringRef(Pos, 1); + } + } // If the last character was not a '(', then we didn't lex a valid delimiter. if (CurPtr[PrefixLen] != '(') { diff --git a/clang/test/Lexer/cxx2c-raw-strings.cpp b/clang/test/Lexer/cxx2c-raw-strings.cpp new file mode 100644 index 0000000000000..9181cbc7cf8d4 --- /dev/null +++ b/clang/test/Lexer/cxx2c-raw-strings.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wc++26-extensions %s +// RUN: %clang_cc1 -std=c++2c -fsyntax-only -verify=cxx26 -Wpre-c++26-compat %s + +int main() { + (void) R"abc`@$(foobar)abc`@$"; + //expected-warning@-1 {{'`'in a raw string literal delimiter is a C++2c extension}} + //expected-warning@-2 {{'@'in a raw string literal delimiter is a C++2c extension}} + //expected-warning@-3 {{'$'in a raw string literal delimiter is a C++2c extension}} + //cxx26-warning@-4 {{'`'in a raw string literal delimiter is incompatible with standards before C++2c}} + //cxx26-warning@-5 {{'@'in a raw string literal delimiter is incompatible with standards before C++2c}} + //cxx26-warning@-6 {{'$'in a raw string literal delimiter is incompatible with standards before C++2c}} +} >From d3c22bd70c1e9a505ee6c2bf5fd01a6b778b3da3 Mon Sep 17 00:00:00 2001 From: Corentin Jabot <corentinja...@gmail.com> Date: Fri, 24 May 2024 08:26:03 +0200 Subject: [PATCH 2/2] add space --- clang/include/clang/Basic/DiagnosticLexKinds.td | 4 ++-- clang/test/Lexer/cxx2c-raw-strings.cpp | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 8411842490c4e..e10ffbabd1da6 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -112,11 +112,11 @@ def warn_cxx98_compat_raw_string_literal : Warning< InGroup<CXX98Compat>, DefaultIgnore; def warn_cxx26_compat_raw_string_literal_character_set : Warning< - "'%0'in a raw string literal delimiter is incompatible " + " '%0' in a raw string literal delimiter is incompatible " "with standards before C++2c">, InGroup<CXXPre26Compat>, DefaultIgnore; def ext_cxx26_raw_string_literal_character_set : Extension< - "'%0'in a raw string literal delimiter is a C++2c extension">, + " '%0' in a raw string literal delimiter is a C++2c extension">, InGroup<CXX26>, DefaultIgnore; def warn_multichar_character_literal : Warning< diff --git a/clang/test/Lexer/cxx2c-raw-strings.cpp b/clang/test/Lexer/cxx2c-raw-strings.cpp index 9181cbc7cf8d4..e72dbfb63c390 100644 --- a/clang/test/Lexer/cxx2c-raw-strings.cpp +++ b/clang/test/Lexer/cxx2c-raw-strings.cpp @@ -3,10 +3,10 @@ int main() { (void) R"abc`@$(foobar)abc`@$"; - //expected-warning@-1 {{'`'in a raw string literal delimiter is a C++2c extension}} - //expected-warning@-2 {{'@'in a raw string literal delimiter is a C++2c extension}} - //expected-warning@-3 {{'$'in a raw string literal delimiter is a C++2c extension}} - //cxx26-warning@-4 {{'`'in a raw string literal delimiter is incompatible with standards before C++2c}} - //cxx26-warning@-5 {{'@'in a raw string literal delimiter is incompatible with standards before C++2c}} - //cxx26-warning@-6 {{'$'in a raw string literal delimiter is incompatible with standards before C++2c}} + //expected-warning@-1 {{'` 'in a raw string literal delimiter is a C++2c extension}} + //expected-warning@-2 {{'@' in a raw string literal delimiter is a C++2c extension}} + //expected-warning@-3 {{'$' in a raw string literal delimiter is a C++2c extension}} + //cxx26-warning@-4 {{'`' in a raw string literal delimiter is incompatible with standards before C++2c}} + //cxx26-warning@-5 {{'@' in a raw string literal delimiter is incompatible with standards before C++2c}} + //cxx26-warning@-6 {{'$' in a raw string literal delimiter is incompatible with standards before C++2c}} } _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits