llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-driver Author: None (Sirraide) <details> <summary>Changes</summary> This is a tentative implementation of support for raw string literals in C following the discussion on #<!-- -->85703. GCC supports raw string literals in C in `-gnuXY` mode. This pr both enables raw string literals in `-gnuXY` mode in C and adds a `-f[no-]raw-string-literals` flag to override this beheviour. There are a few questions I still have though: 1. GCC does not seem to support raw string literals in C++ before C++11, even if e.g. `-std=gnu++03` is passed. Should we follow this behaviour or should we enable raw string literals in earlier C++ language modes as well if `-gnu++XY` is passed? `-fraw-string-literals` currently makes it possible to enable them in e.g. C++03. 2. `-fno-raw-string-literals` allows users to *disable* raw string literals in `-gnuXY` mode. I thought it might be useful to have this, but do we want it? 3. The implementation of this currently adds a `RawStringLiterals` option to the LangOpts; `-f[no-]raw-string-literals` overrides the default value for it which depends on the language standard. As a consequence, passing e.g. `-std=c++11 -fno-raw-string-literals` will *disable* raw string literals even though we’re in C++11 mode. Do we want to allow this or should we just ignore `-f[no-]raw-string-literals` if we’re in C++11 or later? 4. This probably deserves a note in `LanguageExtensions.rst`, but I’m not exactly sure where. 5. Should we add a flag for this to `__has_feature`/`__has_extension`? --- Full diff: https://github.com/llvm/llvm-project/pull/88265.diff 9 Files Affected: - (modified) clang/docs/ReleaseNotes.rst (+3) - (modified) clang/include/clang/Basic/LangOptions.def (+2) - (modified) clang/include/clang/Basic/LangStandard.h (+6) - (modified) clang/include/clang/Driver/Options.td (+6) - (modified) clang/lib/Basic/LangOptions.cpp (+1) - (modified) clang/lib/Driver/ToolChains/Clang.cpp (+2) - (modified) clang/lib/Format/Format.cpp (+1) - (modified) clang/lib/Lex/Lexer.cpp (+5-5) - (added) clang/test/Lexer/raw-string-ext.c (+18) ``````````diff diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index f96cebbde3d825..20d14130fb62bc 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -43,6 +43,9 @@ code bases. C/C++ Language Potentially Breaking Changes ------------------------------------------- +- Clang now supports raw string literals in ``-std=gnuXY`` mode as an extension in + C. This behaviour can also be overridden using ``-f[no-]raw-string-literals``. + C++ Specific Potentially Breaking Changes ----------------------------------------- - Clang now diagnoses function/variable templates that shadow their own template parameters, e.g. ``template<class T> void T();``. diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 8ef6700ecdc78e..96bd339bb1851d 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -454,6 +454,8 @@ LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type") LANGOPT(CXXAssumptions, 1, 1, "Enable or disable codegen and compile-time checks for C++23's [[assume]] attribute") +LANGOPT(RawStringLiterals, 1, 0, "Enable or disable raw string literals") + ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2, StrictFlexArraysLevelKind::Default, "Rely on strict definition of flexible arrays") diff --git a/clang/include/clang/Basic/LangStandard.h b/clang/include/clang/Basic/LangStandard.h index 8e25afc833661c..0a308b93ada746 100644 --- a/clang/include/clang/Basic/LangStandard.h +++ b/clang/include/clang/Basic/LangStandard.h @@ -130,6 +130,12 @@ struct LangStandard { /// hasDigraphs - Language supports digraphs. bool hasDigraphs() const { return Flags & Digraphs; } + /// hasRawStringLiterals - Language supports R"()" raw string literals. + bool hasRawStringLiterals() const { + // GCC supports raw string literals in C, but not in C++ before C++11. + return isCPlusPlus11() || (!isCPlusPlus() && isGNUMode()); + } + /// isGNUMode - Language includes GNU extensions. bool isGNUMode() const { return Flags & GNUMode; } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f745e573eb2686..32e6c10e1251b7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4142,6 +4142,12 @@ def fenable_matrix : Flag<["-"], "fenable-matrix">, Group<f_Group>, HelpText<"Enable matrix data type and related builtin functions">, MarshallingInfoFlag<LangOpts<"MatrixTypes">>; +defm raw_string_literals : BoolFOption<"raw-string-literals", + LangOpts<"RawStringLiterals">, Default<std#".hasRawStringLiterals()">, + PosFlag<SetTrue, [], [], "Enable">, + NegFlag<SetFalse, [], [], "Disable">, + BothFlags<[], [ClangOption, CC1Option], " raw string literals">>; + def fzero_call_used_regs_EQ : Joined<["-"], "fzero-call-used-regs=">, Group<f_Group>, Visibility<[ClangOption, CC1Option]>, diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp index a0adfbf61840e3..c34f0ed5ed7174 100644 --- a/clang/lib/Basic/LangOptions.cpp +++ b/clang/lib/Basic/LangOptions.cpp @@ -124,6 +124,7 @@ void LangOptions::setLangDefaults(LangOptions &Opts, Language Lang, Opts.HexFloats = Std.hasHexFloats(); Opts.WChar = Std.isCPlusPlus(); Opts.Digraphs = Std.hasDigraphs(); + Opts.RawStringLiterals = Std.hasRawStringLiterals(); Opts.HLSL = Lang == Language::HLSL; if (Opts.HLSL && Opts.IncludeDefaultHeader) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 766a9b91e3c0ad..c99bfe4efc4137 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6536,6 +6536,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fheinous_gnu_extensions); Args.AddLastArg(CmdArgs, options::OPT_fdigraphs, options::OPT_fno_digraphs); Args.AddLastArg(CmdArgs, options::OPT_fzero_call_used_regs_EQ); + Args.AddLastArg(CmdArgs, options::OPT_fraw_string_literals, + options::OPT_fno_raw_string_literals); if (Args.hasFlag(options::OPT_femulated_tls, options::OPT_fno_emulated_tls, Triple.hasDefaultEmulatedTLS())) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 89e6c19b0af45c..71865bb061f57e 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -3850,6 +3850,7 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { // the sequence "<::" will be unconditionally treated as "[:". // Cf. Lexer::LexTokenInternal. LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11; + LangOpts.RawStringLiterals = LexingStd >= FormatStyle::LS_Cpp11; LangOpts.LineComment = 1; bool AlternativeOperators = Style.isCpp(); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index c98645993abe07..67d75c1140b232 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3867,7 +3867,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { tok::utf16_char_constant); // UTF-16 raw string literal - if (Char == 'R' && LangOpts.CPlusPlus11 && + if (Char == 'R' && LangOpts.RawStringLiterals && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') return LexRawStringLiteral(Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), @@ -3889,7 +3889,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { SizeTmp2, Result), tok::utf8_char_constant); - if (Char2 == 'R' && LangOpts.CPlusPlus11) { + if (Char2 == 'R' && LangOpts.RawStringLiterals) { unsigned SizeTmp3; char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3); // UTF-8 raw string literal @@ -3925,7 +3925,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { tok::utf32_char_constant); // UTF-32 raw string literal - if (Char == 'R' && LangOpts.CPlusPlus11 && + if (Char == 'R' && LangOpts.RawStringLiterals && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') return LexRawStringLiteral(Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), @@ -3940,7 +3940,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { // Notify MIOpt that we read a non-whitespace/non-comment token. MIOpt.ReadToken(); - if (LangOpts.CPlusPlus11) { + if (LangOpts.RawStringLiterals) { Char = getCharAndSize(CurPtr, SizeTmp); if (Char == '"') @@ -3963,7 +3963,7 @@ bool Lexer::LexTokenInternal(Token &Result, bool TokAtPhysicalStartOfLine) { tok::wide_string_literal); // Wide raw string literal. - if (LangOpts.CPlusPlus11 && Char == 'R' && + if (LangOpts.RawStringLiterals && Char == 'R' && getCharAndSize(CurPtr + SizeTmp, SizeTmp2) == '"') return LexRawStringLiteral(Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result), diff --git a/clang/test/Lexer/raw-string-ext.c b/clang/test/Lexer/raw-string-ext.c new file mode 100644 index 00000000000000..45e3990cadf3d2 --- /dev/null +++ b/clang/test/Lexer/raw-string-ext.c @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -fsyntax-only -std=gnu11 -verify=gnu -DGNU %s +// RUN: %clang_cc1 -fsyntax-only -std=c11 -fraw-string-literals -verify=gnu -DGNU %s +// RUN: %clang_cc1 -fsyntax-only -std=c11 -verify=std %s +// RUN: %clang_cc1 -fsyntax-only -std=gnu11 -fno-raw-string-literals -verify=std %s + +void f() { + (void) R"foo()foo"; // std-error {{use of undeclared identifier 'R'}} + (void) LR"foo()foo"; // std-error {{use of undeclared identifier 'LR'}} + (void) uR"foo()foo"; // std-error {{use of undeclared identifier 'uR'}} + (void) u8R"foo()foo"; // std-error {{use of undeclared identifier 'u8R'}} + (void) UR"foo()foo"; // std-error {{use of undeclared identifier 'UR'}} +} + +// gnu-error@* {{missing terminating delimiter}} +// gnu-error@* {{expected expression}} +// gnu-error@* {{expected ';' after top level declarator}} +#define R "bar" +const char* s = R"foo("; `````````` </details> https://github.com/llvm/llvm-project/pull/88265 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits