Author: Aaron Ballman Date: 2025-03-18T07:28:59-04:00 New Revision: 9cf46fb2303627fd2c74ed88dcd9f3f8cbfe0c93
URL: https://github.com/llvm/llvm-project/commit/9cf46fb2303627fd2c74ed88dcd9f3f8cbfe0c93 DIFF: https://github.com/llvm/llvm-project/commit/9cf46fb2303627fd2c74ed88dcd9f3f8cbfe0c93.diff LOG: [C2y] Add octal prefixes, deprecate unprefixed octals (#131626) WG14 N3353 added support for 0o and 0O as octal literal prefixes. It also deprecates use of octal literals without a prefix, except for the literal 0. This feature is being exposed as an extension in older C language modes as well as in all C++ language modes. Added: clang/test/C/C2y/n3353.c Modified: clang/docs/LanguageExtensions.rst clang/docs/ReleaseNotes.rst clang/include/clang/Basic/DiagnosticGroups.td clang/include/clang/Basic/DiagnosticLexKinds.td clang/include/clang/Lex/Lexer.h clang/lib/Lex/Lexer.cpp clang/lib/Lex/LiteralSupport.cpp clang/www/c_status.html Removed: ################################################################################ diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 6d03e6de461e7..20203dcfc1c2d 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -1652,6 +1652,7 @@ Designated initializers (N494) C Array & element qualification (N2607) C23 C89 Attributes (N2335) C23 C89 ``#embed`` (N3017) C23 C89, C++ +Octal literals prefixed with ``0o`` or ``0O`` C2y C89, C++ ============================================= ================================ ============= ============= Builtin type aliases diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index d9f1c95533c9c..0adbc19f40096 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -129,6 +129,13 @@ C2y Feature Support - Implemented `WG14 N3411 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3411.pdf>`_ which allows a source file to not end with a newline character. This is still reported as a conforming extension in earlier language modes. +- Implemented `WG14 N3353 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3353.htm>_` + which adds the new ``0o`` and ``0O`` ocal literal prefixes and deprecates + octal literals other than ``0`` which do not start with the new prefix. This + feature is exposed in earlier language modes and in C++ as an extension. The + paper also introduced octal and hexadecimal delimited escape sequences (e.g., + ``"\x{12}\o{12}"``) which are also supported as an extension in older C + language modes. C23 Feature Support ^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index e54f921741269..b9f08d96151c9 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -92,6 +92,7 @@ def EnumCompare : DiagGroup<"enum-compare", [EnumCompareSwitch, def DeprecatedAnonEnumEnumConversion : DiagGroup<"deprecated-anon-enum-enum-conversion">; def DeprecatedEnumEnumConversion : DiagGroup<"deprecated-enum-enum-conversion">; def DeprecatedEnumFloatConversion : DiagGroup<"deprecated-enum-float-conversion">; +def DeprecatedOctalLiterals : DiagGroup<"deprecated-octal-literals">; def AnonEnumEnumConversion : DiagGroup<"anon-enum-enum-conversion", [DeprecatedAnonEnumEnumConversion]>; def EnumEnumConversion : DiagGroup<"enum-enum-conversion", @@ -235,7 +236,8 @@ def Deprecated : DiagGroup<"deprecated", [DeprecatedAnonEnumEnumConversion, DeprecatedVolatile, DeprecatedWritableStr, DeprecatedRedundantConstexprStaticDef, - DeprecatedMissingCommaVariadicParam + DeprecatedMissingCommaVariadicParam, + DeprecatedOctalLiterals ]>, DiagCategory<"Deprecations">; diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td b/clang/include/clang/Basic/DiagnosticLexKinds.td index 0e5592d65669b..bdb7e9350b5f7 100644 --- a/clang/include/clang/Basic/DiagnosticLexKinds.td +++ b/clang/include/clang/Basic/DiagnosticLexKinds.td @@ -148,14 +148,14 @@ def ext_mathematical_notation : ExtWarn< InGroup<DiagGroup<"mathematical-notation-identifier-extension">>; def ext_delimited_escape_sequence : Extension< - "%select{delimited|named}0 escape sequences are a " - "%select{Clang|C++23}1 extension">, - InGroup<DiagGroup<"delimited-escape-sequence-extension">>; - + "%select{delimited|named}0 escape sequences are a %select{C++23|C2y|Clang}1 " + "extension">, InGroup<DiagGroup<"delimited-escape-sequence-extension">>; def warn_cxx23_delimited_escape_sequence : Warning< - "%select{delimited|named}0 escape sequences are " - "incompatible with C++ standards before C++23">, - InGroup<CXXPre23Compat>, DefaultIgnore; + "%select{delimited|named}0 escape sequences are incompatible with C++ " + "standards before C++23">, InGroup<CXXPre23Compat>, DefaultIgnore; +def warn_c2y_delimited_escape_sequence : Warning< + "delimited escape sequences are incompatible with C standards before C2y">, + InGroup<CPre2yCompat>, DefaultIgnore; def err_delimited_escape_empty : Error< "delimited escape sequence cannot be empty">; @@ -256,6 +256,17 @@ def warn_cxx17_hex_literal : Warning< "hexadecimal floating literals are incompatible with " "C++ standards before C++17">, InGroup<CXXPre17CompatPedantic>, DefaultIgnore; +def ext_octal_literal : Extension< + "octal integer literals are a C2y extension">, InGroup<C2y>; +def ext_cpp_octal_literal : Extension< + "octal integer literals are a Clang extension">, + InGroup<DiagGroup<"octal-prefix-extension">>; +def warn_c2y_compat_octal_literal : Warning< + "octal integer literals are incompatible with standards before C2y">, + InGroup<CPre2yCompat>, DefaultIgnore; +def warn_unprefixed_octal_deprecated : Warning< + "octal literals without a '0o' prefix are deprecated">, + InGroup<DeprecatedOctalLiterals>; def ext_binary_literal : Extension< "binary integer literals are a C23 extension">, InGroup<C23>; def warn_c23_compat_binary_literal : Warning< diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index 89c8ae354dafc..bb65ae010cffa 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -582,6 +582,12 @@ class Lexer : public PreprocessorLexer { /// sequence. static bool isNewLineEscaped(const char *BufferStart, const char *Str); + /// Diagnose use of a delimited or named escape sequence. + static void DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, + bool Named, + const LangOptions &Opts, + DiagnosticsEngine &Diags); + /// Represents a char and the number of bytes parsed to produce it. struct SizedChar { char Char; diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index c62a9f5041183..96d5d4f440768 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3405,6 +3405,30 @@ bool Lexer::isCodeCompletionPoint(const char *CurPtr) const { return false; } +void Lexer::DiagnoseDelimitedOrNamedEscapeSequence(SourceLocation Loc, + bool Named, + const LangOptions &Opts, + DiagnosticsEngine &Diags) { + unsigned DiagId; + if (Opts.CPlusPlus23) + DiagId = diag::warn_cxx23_delimited_escape_sequence; + else if (Opts.C2y && !Named) + DiagId = diag::warn_c2y_delimited_escape_sequence; + else + DiagId = diag::ext_delimited_escape_sequence; + + // The trailing arguments are only used by the extension warning; either this + // is a C2y extension or a C++23 extension, unless it's a named escape + // sequence in C, then it's a Clang extension. + unsigned Ext; + if (!Opts.CPlusPlus) + Ext = Named ? 2 /* Clang extension */ : 1 /* C2y extension */; + else + Ext = 0; // C++23 extension + + Diags.Report(Loc, DiagId) << Named << Ext; +} + std::optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, const char *SlashLoc, Token *Result) { @@ -3496,12 +3520,10 @@ std::optional<uint32_t> Lexer::tryReadNumericUCN(const char *&StartPtr, return std::nullopt; } - if (Delimited && PP) { - Diag(SlashLoc, PP->getLangOpts().CPlusPlus23 - ? diag::warn_cxx23_delimited_escape_sequence - : diag::ext_delimited_escape_sequence) - << /*delimited*/ 0 << (PP->getLangOpts().CPlusPlus ? 1 : 0); - } + if (Delimited && PP) + DiagnoseDelimitedOrNamedEscapeSequence(getSourceLocation(SlashLoc), false, + PP->getLangOpts(), + PP->getDiagnostics()); if (Result) { Result->setFlag(Token::HasUCN); @@ -3585,10 +3607,9 @@ std::optional<uint32_t> Lexer::tryReadNamedUCN(const char *&StartPtr, } if (Diagnose && Match) - Diag(SlashLoc, PP->getLangOpts().CPlusPlus23 - ? diag::warn_cxx23_delimited_escape_sequence - : diag::ext_delimited_escape_sequence) - << /*named*/ 1 << (PP->getLangOpts().CPlusPlus ? 1 : 0); + DiagnoseDelimitedOrNamedEscapeSequence(getSourceLocation(SlashLoc), true, + PP->getLangOpts(), + PP->getDiagnostics()); // If no diagnostic has been emitted yet, likely because we are doing a // tentative lexing, we do not want to recover here to make sure the token diff --git a/clang/lib/Lex/LiteralSupport.cpp b/clang/lib/Lex/LiteralSupport.cpp index 69dc057d0df4b..20933cc8dee69 100644 --- a/clang/lib/Lex/LiteralSupport.cpp +++ b/clang/lib/Lex/LiteralSupport.cpp @@ -21,6 +21,7 @@ #include "clang/Lex/Preprocessor.h" #include "clang/Lex/Token.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -353,10 +354,8 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, diag::err_expected) << tok::r_brace; else if (!HadError) { - Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf, - Features.CPlusPlus23 ? diag::warn_cxx23_delimited_escape_sequence - : diag::ext_delimited_escape_sequence) - << /*delimited*/ 0 << (Features.CPlusPlus ? 1 : 0); + Lexer::DiagnoseDelimitedOrNamedEscapeSequence(Loc, false, Features, + *Diags); } } @@ -709,11 +708,8 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, diag::warn_ucn_not_valid_in_c89_literal); if ((IsDelimitedEscapeSequence || IsNamedEscapeSequence) && Diags) - Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf, - Features.CPlusPlus23 ? diag::warn_cxx23_delimited_escape_sequence - : diag::ext_delimited_escape_sequence) - << (IsNamedEscapeSequence ? 1 : 0) << (Features.CPlusPlus ? 1 : 0); - + Lexer::DiagnoseDelimitedOrNamedEscapeSequence(Loc, IsNamedEscapeSequence, + Features, *Diags); return true; } @@ -1423,6 +1419,29 @@ void NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) { return; } + // Parse a potential octal literal prefix. + bool SawOctalPrefix = false; + if ((c1 == 'O' || c1 == 'o') && (s[1] >= '0' && s[1] <= '7')) { + unsigned DiagId; + if (LangOpts.C2y) + DiagId = diag::warn_c2y_compat_octal_literal; + else if (LangOpts.CPlusPlus) + DiagId = diag::ext_cpp_octal_literal; + else + DiagId = diag::ext_octal_literal; + Diags.Report(TokLoc, DiagId); + ++s; + DigitsBegin = s; + SawOctalPrefix = true; + } + + auto _ = llvm::make_scope_exit([&] { + // If we still have an octal value but we did not see an octal prefix, + // diagnose as being an obsolescent feature starting in C2y. + if (radix == 8 && LangOpts.C2y && !SawOctalPrefix && !hadError) + Diags.Report(TokLoc, diag::warn_unprefixed_octal_deprecated); + }); + // For now, the radix is set to 8. If we discover that we have a // floating point constant, the radix will change to 10. Octal floating // point constants are not permitted (only decimal and hexadecimal). diff --git a/clang/test/C/C2y/n3353.c b/clang/test/C/C2y/n3353.c new file mode 100644 index 0000000000000..fb7f9439ac21b --- /dev/null +++ b/clang/test/C/C2y/n3353.c @@ -0,0 +1,134 @@ +// RUN: %clang_cc1 -verify=expected,c2y,c -pedantic -std=c2y %s +// RUN: %clang_cc1 -verify=expected,c2y,compat -Wpre-c2y-compat -std=c2y %s +// RUN: %clang_cc1 -verify=expected,ext,c -pedantic -std=c23 %s +// RUN: %clang_cc1 -verify=expected,cpp -pedantic -x c++ -Wno-c11-extensions %s + + +/* WG14 N3353: Clang 21 + * Obsolete implicitly octal literals and add delimited escape sequences + */ + +constexpr int i = 0234; // c2y-warning {{octal literals without a '0o' prefix are deprecated}} +constexpr int j = 0o234; /* ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + compat-warning {{octal integer literals are incompatible with standards before C2y}} + */ + +static_assert(i == 156); +static_assert(j == 156); + +// Show that 0O is the same as Oo (tested above) +static_assert(0O1234 == 0o1234); /* ext-warning 2 {{octal integer literals are a C2y extension}} + cpp-warning 2 {{octal integer literals are a Clang extension}} + compat-warning 2 {{octal integer literals are incompatible with standards before C2y}} + */ + +// Show that you can use them with the usual integer literal suffixes. +static_assert(0o234ull == 156); /* ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + compat-warning {{octal integer literals are incompatible with standards before C2y}} + */ + +// And it's still a valid null pointer constant. +static const void *ptr = 0o0; /* ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + compat-warning {{octal integer literals are incompatible with standards before C2y}} + */ + +// Demonstrate that it works fine in the preprocessor. +#if 0o123 != 0x53 /* ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + compat-warning {{octal integer literals are incompatible with standards before C2y}} + */ +#error "oh no, math stopped working!" +#endif + +// 0 by itself is not deprecated, of course. +int k = 0; + +// Make sure there are no surprises with auto and type deduction. Promotion +// turns this into an 'int', and 'constexpr' implies 'const'. +constexpr auto l = 0o1234567; /* ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + compat-warning {{octal integer literals are incompatible with standards before C2y}} + */ +static_assert(l == 0x53977); +static_assert(__extension__ _Generic(typeof(0o1), typeof(01) : 1, default : 0)); /* c2y-warning {{octal literals without a '0o' prefix are deprecated}} + compat-warning {{passing a type argument as the first operand to '_Generic' is incompatible with C standards before C2y}} + compat-warning {{octal integer literals are incompatible with standards before C2y}} + */ +static_assert(__extension__ _Generic(typeof(l), const int : 1, default : 0)); // compat-warning {{passing a type argument as the first operand to '_Generic' is incompatible with C standards before C2y}} + +// Note that 0o by itself is an invalid literal. +int m = 0o; /* expected-error {{invalid suffix 'o' on integer constant}} + c2y-warning {{octal literals without a '0o' prefix are deprecated}} + */ + +// Ensure negation works as expected. +static_assert(-0o1234 == -668); /* ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + compat-warning {{octal integer literals are incompatible with standards before C2y}} + */ + +// FIXME: it would be better to not diagnose the compat and ext warnings when +// the octal literal is invalid. +// We expect diagnostics for non-octal digits. +int n = 0o18; /* expected-error {{invalid digit '8' in octal constant}} + compat-warning {{octal integer literals are incompatible with standards before C2y}} + ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + */ +int o1 = 0o8; /* expected-error {{invalid suffix 'o8' on integer constant}} + c2y-warning {{octal literals without a '0o' prefix are deprecated}} + */ +// FIXME: however, it matches the behavior for hex literals in terms of the +// error reported. Unfortunately, we then go on to think 0 is an octal literal +// without a prefix, which is again a bit confusing. +int o2 = 0xG; /* expected-error {{invalid suffix 'xG' on integer constant}} + c2y-warning {{octal literals without a '0o' prefix are deprecated}} + */ + +// Ensure digit separators work as expected. +constexpr int p = 0o0'1'2'3'4'5'6'7; /* compat-warning {{octal integer literals are incompatible with standards before C2y}} + ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + */ +static_assert(p == 01234567); // c2y-warning {{octal literals without a '0o' prefix are deprecated}} +int q = 0o'0'1; /* expected-error {{invalid suffix 'o'0'1' on integer constant}} + c2y-warning {{octal literals without a '0o' prefix are deprecated}} + */ + +#define M 0o123 +int r = M; /* compat-warning {{octal integer literals are incompatible with standards before C2y}} + ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + */ + +// Also, test delimited escape sequences. Note, this paper added a delimited +// escape sequence for octal *and* hex. +auto a = "\x{12}\o{12}\N{SPARKLES}"; /* compat-warning 2 {{delimited escape sequences are incompatible with C standards before C2y}} + ext-warning 2 {{delimited escape sequences are a C2y extension}} + cpp-warning 2 {{delimited escape sequences are a C++23 extension}} + cpp-warning {{named escape sequences are a C++23 extension}} + c-warning {{named escape sequences are a Clang extension}} + */ + +#ifdef __cplusplus +template <unsigned N> +struct S { + static_assert(N == 0o567); /* ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + compat-warning {{octal integer literals are incompatible with standards before C2y}} + */ +}; + +void foo() { + S<0o567> s; /* ext-warning {{octal integer literals are a C2y extension}} + cpp-warning {{octal integer literals are a Clang extension}} + compat-warning {{octal integer literals are incompatible with standards before C2y}} + */ +} +#endif + +#line 0123 // expected-warning {{#line directive interprets number as decimal, not octal}} +#line 0o123 // expected-error {{#line directive requires a simple digit sequence}} diff --git a/clang/www/c_status.html b/clang/www/c_status.html index d68e8d6441ed2..7cf50bfdb6639 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -176,7 +176,7 @@ <h2 id="c2y">C2y implementation status</h2> <tr> <td>Obsolete implicitly octal literals and add delimited escape sequences</td> <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3353.htm">N3353</a></td> - <td class="none" align="center">No</td> + <td class="unreleased" align="center">Clang 21</td> </tr> <tr> <td>'if' declarations, v2</td> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits