On 8/17/22 00:17, Jakub Jelinek wrote:
Hi!The following patch implements the C++23 P2290R3 paper. Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? 2022-08-17 Jakub Jelinek <ja...@redhat.com> PR c++/106645 libcpp/ * include/cpplib.h (struct cpp_options): Implement P2290R3 - Delimited escape sequences. Add delimite_escape_seqs member. * init.cc (struct lang_flags): Likewise. (lang_defaults): Add delim column. (cpp_set_lang): Copy over delimite_escape_seqs. * charset.cc (_cpp_valid_ucn): Handle delimited escape sequences. (convert_hex): Likewise. (convert_oct): Likewise. (convert_escape): Call convert_oct even for \o. (_cpp_interpret_identifier): Handle delimited escape sequences. * lex.cc (get_bidi_ucn_1): Likewise. Add end argument, fill it in. (get_bidi_ucn): Adjust get_bidi_ucn_1 caller. Use end argument to compute num_bytes. gcc/testsuite/ * c-c++-common/cpp/delimited-escape-seq-1.c: New test. * c-c++-common/cpp/delimited-escape-seq-2.c: New test. * c-c++-common/cpp/delimited-escape-seq-3.c: New test. * c-c++-common/Wbidi-chars-24.c: New test. * gcc.dg/cpp/delimited-escape-seq-1.c: New test. * gcc.dg/cpp/delimited-escape-seq-2.c: New test. * g++.dg/cpp/delimited-escape-seq-1.C: New test. * g++.dg/cpp/delimited-escape-seq-2.C: New test. --- libcpp/include/cpplib.h.jj 2022-08-10 09:06:53.268209449 +0200 +++ libcpp/include/cpplib.h 2022-08-15 19:32:53.743213474 +0200 @@ -519,6 +519,9 @@ struct cpp_options /* Nonzero for C++23 size_t literals. */ unsigned char size_t_literals;+ /* Nonzero for C++23 delimited escape sequences. */+ unsigned char delimited_escape_seqs; + /* Holds the name of the target (execution) character set. */ const char *narrow_charset;--- libcpp/init.cc.jj 2022-08-10 09:06:53.268209449 +0200+++ libcpp/init.cc 2022-08-15 16:09:01.403020485 +0200 @@ -96,34 +96,35 @@ struct lang_flags char dfp_constants; char size_t_literals; char elifdef; + char delimited_escape_seqs; };static const struct lang_flags lang_defaults[] =-{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef */ - /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1 }, - /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0 }, - /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 }, - /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0 }, - /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0 }, - /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0 }, - /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0 }, - /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 }, - /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1 }, - /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } +{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef delim */ + /* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 }, + /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 }, + /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 }, + /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 }, + /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0 }, + /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 }, + /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0 }, + /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 }, + /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 }, + /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0 }, + /* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0 }, + /* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0 }, + /* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0 }, + /* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 }, + /* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0 }, + /* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 }, + /* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0 }, + /* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, + /* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1 }, + /* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } };/* Sets internal flags correctly for a given language. */@@ -153,6 +154,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_ CPP_OPTION (pfile, dfp_constants) = l->dfp_constants; CPP_OPTION (pfile, size_t_literals) = l->size_t_literals; CPP_OPTION (pfile, elifdef) = l->elifdef; + CPP_OPTION (pfile, delimited_escape_seqs) = l->delimited_escape_seqs; }/* Initialize library global state. */--- libcpp/charset.cc.jj 2022-08-15 12:52:43.213902801 +0200 +++ libcpp/charset.cc 2022-08-16 11:42:27.729948705 +0200 @@ -1081,6 +1081,7 @@ _cpp_valid_ucn (cpp_reader *pfile, const unsigned int length; const uchar *str = *pstr; const uchar *base = str - 2; + bool delimited = false;if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))cpp_error (pfile, CPP_DL_WARNING, @@ -1095,7 +1096,17 @@ _cpp_valid_ucn (cpp_reader *pfile, const (int) str[-1]);if (str[-1] == 'u')- length = 4; + { + length = 4; + if (str < limit && *str == '{') + { + str++; + length = 32;
/* Magic value to indicate no digits seen. */
+ delimited = true; + if (loc_reader) + char_range->m_finish = loc_reader->get_next ().m_finish; + } + } else if (str[-1] == 'U') length = 8; else @@ -1107,6 +1118,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const result = 0; do { + if (str == limit) + break; c = *str; if (!ISXDIGIT (c)) break; @@ -1116,9 +1129,41 @@ _cpp_valid_ucn (cpp_reader *pfile, const gcc_assert (char_range); char_range->m_finish = loc_reader->get_next ().m_finish; } + if (delimited) + { + if (!result) + /* Accept arbitrary number of leading zeros. */ + length = 16; + else if (length == 8) + { + /* Make sure we detect overflows. */ + result |= 0x8000000; + ++length; + }
16 above so that this case happens after we read 8 digits after leading zeroes?
+ } + result = (result << 4) + hex_value (c); } - while (--length && str < limit); + while (--length); + + if (delimited + && str < limit + && *str == '}' + && (length != 32 || !identifier_pos)) + { + if (length == 32) + cpp_error (pfile, CPP_DL_ERROR, + "empty delimited escape sequence"); + else if (!CPP_OPTION (pfile, delimited_escape_seqs) + && CPP_OPTION (pfile, cpp_pedantic)) + cpp_error (pfile, CPP_DL_PEDWARN, + "delimited escape sequences are only valid in C++23"); + str++; + length = 0; + delimited = false; + if (loc_reader) + char_range->m_finish = loc_reader->get_next ().m_finish;
Here and in other functions, the pattern of increment the input pointer and update m_finish seems like it should be a macro?
+ }/* Partial UCNs are not valid in strings, but decompose intomultiple tokens in identifiers, so we can't give a helpful @@ -1132,9 +1177,14 @@ _cpp_valid_ucn (cpp_reader *pfile, const *pstr = str; if (length) { - cpp_error (pfile, CPP_DL_ERROR, - "incomplete universal character name %.*s", - (int) (str - base), base); + if (!delimited) + cpp_error (pfile, CPP_DL_ERROR, + "incomplete universal character name %.*s", + (int) (str - base), base); + else + cpp_error (pfile, CPP_DL_ERROR, + "'\\u{' not terminated with '}' after %.*s", + (int) (str - base), base); result = 1; } /* The C99 standard permits $, @ and ` to be specified as UCNs. We use @@ -1392,6 +1442,8 @@ convert_hex (cpp_reader *pfile, const uc int digits_found = 0; size_t width = cvt.width; size_t mask = width_to_mask (width); + bool delimited = false; + const uchar *base = from - 1;/* loc_reader and ranges must either be both NULL, or both be non-NULL. */gcc_assert ((loc_reader != NULL) == (ranges != NULL)); @@ -1407,6 +1459,14 @@ convert_hex (cpp_reader *pfile, const uc if (loc_reader) char_range.m_finish = loc_reader->get_next ().m_finish;+ if (from < limit && *from == '{')+ { + delimited = true; + from++; + if (loc_reader) + char_range.m_finish = loc_reader->get_next ().m_finish; + } + while (from < limit) { c = *from; @@ -1420,12 +1480,37 @@ convert_hex (cpp_reader *pfile, const uc digits_found = 1; }+ if (delimited && from < limit && *from == '}')+ { + from++; + if (!digits_found) + { + cpp_error (pfile, CPP_DL_ERROR, + "empty delimited escape sequence"); + return from; + } + else if (!CPP_OPTION (pfile, delimited_escape_seqs) + && CPP_OPTION (pfile, cpp_pedantic)) + cpp_error (pfile, CPP_DL_PEDWARN, + "delimited escape sequences are only valid in C++23"); + delimited = false; + if (loc_reader) + char_range.m_finish = loc_reader->get_next ().m_finish; + } + if (!digits_found) { cpp_error (pfile, CPP_DL_ERROR, "\\x used with no following hex digits"); return from; } + else if (delimited) + { + cpp_error (pfile, CPP_DL_ERROR, + "'\\x{' not terminated with '}' after %.*s", + (int) (from - base), base); + return from; + }if (overflow | (n != (n & mask))){ @@ -1459,13 +1544,31 @@ convert_oct (cpp_reader *pfile, const uc cpp_substring_ranges *ranges) { size_t count = 0; - cppchar_t c, n = 0; + cppchar_t c, n = 0, overflow = 0; size_t width = cvt.width; size_t mask = width_to_mask (width); + bool delimited = false; + const uchar *base = from - 1;/* loc_reader and ranges must either be both NULL, or both be non-NULL. */gcc_assert ((loc_reader != NULL) == (ranges != NULL));+ if (from < limit && *from == 'o')+ { + from++; + if (loc_reader) + char_range.m_finish = loc_reader->get_next ().m_finish; + if (from == limit || *from != '{') + cpp_error (pfile, CPP_DL_ERROR, "'\\o' not followed by '{'"); + else + { + from++; + if (loc_reader) + char_range.m_finish = loc_reader->get_next ().m_finish; + delimited = true; + } + } + while (from < limit && count++ < 3) { c = *from; @@ -1474,10 +1577,42 @@ convert_oct (cpp_reader *pfile, const uc from++; if (loc_reader) char_range.m_finish = loc_reader->get_next ().m_finish; + if (delimited) + { + count = 2; + overflow |= n ^ (n << 3 >> 3); + } n = (n << 3) + c - '0'; }- if (n != (n & mask))+ if (delimited) + { + if (from < limit && *from == '}') + { + from++; + if (count == 1) + { + cpp_error (pfile, CPP_DL_ERROR, + "empty delimited escape sequence"); + return from; + } + else if (!CPP_OPTION (pfile, delimited_escape_seqs) + && CPP_OPTION (pfile, cpp_pedantic)) + cpp_error (pfile, CPP_DL_PEDWARN, + "delimited escape sequences are only valid in C++23"); + if (loc_reader) + char_range.m_finish = loc_reader->get_next ().m_finish; + } + else + { + cpp_error (pfile, CPP_DL_ERROR, + "'\\o{' not terminated with '}' after %.*s", + (int) (from - base), base); + return from; + } + } + + if (overflow | (n != (n & mask))) { cpp_error (pfile, CPP_DL_PEDWARN, "octal escape sequence out of range"); @@ -1535,6 +1670,7 @@ convert_escape (cpp_reader *pfile, constcase '0': case '1': case '2': case '3':case '4': case '5': case '6': case '7': + case 'o': return convert_oct (pfile, from, limit, tbuf, cvt, char_range, loc_reader, ranges);@@ -2119,15 +2255,23 @@ _cpp_interpret_identifier (cpp_reader *pcppchar_t value = 0; size_t bufleft = len - (bufp - buf); int rval; + bool delimited = false;idp += 2;+ if (length == 4 && id[idp] == '{') + { + delimited = true; + idp++; + } while (length && idp < len && ISXDIGIT (id[idp])) { value = (value << 4) + hex_value (id[idp]); idp++; - length--; + if (!delimited) + length--; } - idp--; + if (!delimited) + idp--;
Don't we need to check that the first non-xdigit is a }?
/* Special case for EBCDIC: if the identifier containsa '$' specified using a UCN, translate it to EBCDIC. */ --- libcpp/lex.cc.jj 2022-05-23 10:59:06.235591348 +0200 +++ libcpp/lex.cc 2022-08-16 11:57:53.772823661 +0200 @@ -1426,19 +1426,35 @@ get_bidi_utf8 (cpp_reader *pfile, const /* Parse a UCN where P points just past \u or \U and return its bidi code. */static bidi::kind-get_bidi_ucn_1 (const unsigned char *p, bool is_U) +get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end) { /* 6.4.3 Universal Character Names \u hex-quad \U hex-quad hex-quad + \u { simple-hexadecimal-digit-sequence } where \unnnn means \U0000nnnn. */+ *end = p + 4;if (is_U) { if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0') return bidi::kind::NONE; /* Skip 4B so we can treat \u and \U the same below. */ p += 4; + *end += 4; + } + else if (p[0] == '{') + { + p++; + while (*p == '0') + p++; + if (p[0] != '2' + || p[1] != '0' + || !ISXDIGIT (p[2]) + || !ISXDIGIT (p[3]) + || p[4] != '}') + return bidi::kind::NONE; + *end = p + 5; }/* All code points we are looking for start with 20xx. */@@ -1499,14 +1515,15 @@ get_bidi_ucn_1 (const unsigned char *p, If the kind is not NONE, write the location to *OUT.*/static bidi::kind-get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U, +get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U, location_t *out) { - bidi::kind result = get_bidi_ucn_1 (p, is_U); + const unsigned char *end; + bidi::kind result = get_bidi_ucn_1 (p, is_U, &end); if (result != bidi::kind::NONE) { const unsigned char *start = p - 2; - size_t num_bytes = 2 + (is_U ? 8 : 4); + size_t num_bytes = end - start; *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes); } return result; --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c.jj 2022-08-16 10:47:38.693022740 +0200 +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-1.c 2022-08-16 12:18:42.235477632 +0200 @@ -0,0 +1,92 @@ +/* P2290R3 - Delimited escape sequences */ +/* { dg-do run } */ +/* { dg-require-effective-target wchar } */ +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */ +/* { dg-options "-std=c++23" { target c++ } } */ + +#ifndef __cplusplus +#include <wchar.h> +typedef __CHAR16_TYPE__ char16_t; +typedef __CHAR32_TYPE__ char32_t; +#endif + +const char32_t *a = U"\u{1234}\u{10fffd}\u{000000000000000000000000000000000000000000000000000000000001234}\u{10FFFD}"; +const char32_t *b = U"\x{1234}\x{10fffd}\x{000000000000000000000000000000000000000000000000000000000001234}"; +const char32_t *c = U"\o{1234}\o{4177775}\o{000000000000000000000000000000000000000000000000000000000000000000000000004177775}"; +const char16_t *d = u"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}"; +const char16_t *e = u"\x{1234}\x{BffD}\x{000001234}"; +const char16_t *f = u"\o{1234}\o{137775}\o{000000000000000137775}"; +const wchar_t *g = L"\u{1234}\u{bFFd}\u{00000000000000000000000000000001234}"; +const wchar_t *h = L"\x{1234}\x{bFFd}\x{000001234}"; +const wchar_t *i = L"\o{1234}\o{137775}\o{000000000000000137775}"; +#ifdef __cplusplus +const char *j = "\u{34}\u{000000000000000003D}"; +#endif +const char *k = "\x{34}\x{000000000000000003D}"; +const char *l = "\o{34}\o{000000000000000176}"; + +#if U'\u{1234}' != U'\u1234' || U'\u{10fffd}' != U'\U0010FFFD' \ + || U'\x{00000001234}' != U'\x1234' || U'\x{010fffd}' != U'\x10FFFD' \ + || U'\o{1234}' != U'\x29c' || U'\o{004177775}' != U'\x10FFFD' \ + || u'\u{1234}' != u'\u1234' || u'\u{0bffd}' != u'\uBFFD' \ + || u'\x{00000001234}' != u'\x1234' || u'\x{0Bffd}' != u'\x0bFFD' \ + || u'\o{1234}' != u'\x29c' || u'\o{00137775}' != u'\xBFFD' \ + || L'\u{1234}' != L'\u1234' || L'\u{0bffd}' != L'\uBFFD' \ + || L'\x{00000001234}' != L'\x1234' || L'\x{0bffd}' != L'\x0bFFD' \ + || L'\o{1234}' != L'\x29c' || L'\o{00137775}' != L'\xBFFD' \ + || '\x{34}' != '\x034' || '\x{0003d}' != '\x003D' \ + || '\o{34}' != '\x1C' || '\o{176}' != '\x007E' +#error Bad +#endif +#ifdef __cplusplus +#if '\u{0000000034}' != '\u0034' || '\u{3d}' != '\u003D' +#error Bad +#endif +#endif + +int +main () +{ + if (a[0] != U'\u1234' || a[0] != U'\u{1234}' + || a[1] != U'\U0010FFFD' || a[1] != U'\u{000010fFfD}' + || a[2] != a[0] + || a[3] != a[1] + || b[0] != U'\x1234' || b[0] != U'\x{001234}' + || b[1] != U'\x10FFFD' || b[1] != U'\x{0010fFfD}' + || b[2] != b[0] + || c[0] != U'\x29c' || c[0] != U'\o{001234}' + || c[1] != U'\x10FFFD' || c[1] != U'\o{4177775}' + || c[2] != c[1]) + __builtin_abort (); + if (d[0] != u'\u1234' || d[0] != u'\u{1234}' + || d[1] != u'\U0000BFFD' || d[1] != u'\u{00000bFfD}' + || d[2] != d[0] + || e[0] != u'\x1234' || e[0] != u'\x{001234}' + || e[1] != u'\xBFFD' || e[1] != u'\x{00bFfD}' + || e[2] != e[0] + || f[0] != u'\x29c' || f[0] != u'\o{001234}' + || f[1] != u'\xbFFD' || f[1] != u'\o{137775}' + || f[2] != f[1]) + __builtin_abort (); + if (g[0] != L'\u1234' || g[0] != L'\u{1234}' + || g[1] != L'\U0000BFFD' || g[1] != L'\u{00000bFfD}' + || g[2] != g[0] + || h[0] != L'\x1234' || h[0] != L'\x{001234}' + || h[1] != L'\xBFFD' || h[1] != L'\x{00bFfD}' + || h[2] != h[0] + || i[0] != L'\x29c' || i[0] != L'\o{001234}' + || i[1] != L'\xbFFD' || i[1] != L'\o{137775}' + || i[2] != i[1]) + __builtin_abort (); +#ifdef __cplusplus + if (j[0] != '\u0034' || j[0] != '\u{034}' + || j[1] != '\U0000003D' || j[1] != '\u{000003d}') + __builtin_abort (); +#endif + if (k[0] != '\x034' || k[0] != '\x{0034}' + || k[1] != '\x3D' || k[1] != '\x{3d}' + || l[0] != '\x1c' || l[0] != '\o{0034}' + || l[1] != '\x07e' || l[1] != '\o{176}' || l[1] != '\176') + __builtin_abort (); + return 0; +} --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c.jj 2022-08-16 10:47:41.846981390 +0200 +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-2.c 2022-08-16 12:18:58.807260607 +0200 @@ -0,0 +1,18 @@ +/* P2290R3 - Delimited escape sequences */ +/* { dg-do compile } */ +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */ +/* { dg-options "-std=c++23" { target c++ } } */ + +int jalape\u{f1}o = 42; + +int +caf\u{000e9} (void) +{ + return jalape\u00F1o; +} + +int +test (void) +{ + return caf\u00e9 (); +} --- gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c.jj 2022-08-16 12:18:19.308777922 +0200 +++ gcc/testsuite/c-c++-common/cpp/delimited-escape-seq-3.c 2022-08-16 12:41:23.693648138 +0200 @@ -0,0 +1,33 @@ +/* P2290R3 - Delimited escape sequences */ +/* { dg-do compile } */ +/* { dg-require-effective-target wchar } */ +/* { dg-options "-std=gnu99 -Wno-c++-compat" { target c } } */ +/* { dg-options "-std=c++23" { target c++ } } */ + +#ifndef __cplusplus +typedef __CHAR32_TYPE__ char32_t; +#endif + +const char32_t *a = U"\u{}"; /* { dg-error "empty delimited escape sequence" } */ + /* { dg-error "is not a valid universal character" "" { target c } .-1 } */ +const char32_t *b = U"\u{12" "34}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */ +const char32_t *c = U"\u{0000ffffffff}"; /* { dg-error "is not a valid universal character" } */ +const char32_t *d = U"\u{010000edcb}"; /* { dg-error "is not a valid universal character" } */ +const char32_t *e = U"\u{02000000000000000000edcb}"; /* { dg-error "is not a valid universal character" } */ +const char32_t *f = U"\u{123ghij}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */ +const char32_t *g = U"\u{123.}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */ +const char32_t *h = U"\u{.}"; /* { dg-error "'\\\\u\\{' not terminated with '\\}' after" } */ +const char32_t *i = U"\x{}"; /* { dg-error "empty delimited escape sequence" } */ +const char32_t *j = U"\x{12" "34}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */ +const char32_t *k = U"\x{0000ffffffff}"; +const char32_t *l = U"\x{010000edcb}"; /* { dg-warning "hex escape sequence out of range" } */ +const char32_t *m = U"\x{02000000000000000000edcb}"; /* { dg-warning "hex escape sequence out of range" } */ +const char32_t *n = U"\x{123ghij}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */ +const char32_t *o = U"\x{123.}"; /* { dg-error "'\\\\x\\{' not terminated with '\\}' after" } */ +const char32_t *p = U"\o{}"; /* { dg-error "empty delimited escape sequence" } */ +const char32_t *q = U"\o{12" "34}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */ +const char32_t *r = U"\o{0000037777777777}"; +const char32_t *s = U"\o{040000166713}"; /* { dg-warning "octal escape sequence out of range" } */ +const char32_t *t = U"\o{02000000000000000000000166713}";/* { dg-warning "octal escape sequence out of range" } */ +const char32_t *u = U"\o{1238}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */ +const char32_t *v = U"\o{.}"; /* { dg-error "'\\\\o\\{' not terminated with '\\}' after" } */ --- gcc/testsuite/c-c++-common/Wbidi-chars-24.c.jj 2022-08-16 12:03:19.350561676 +0200 +++ gcc/testsuite/c-c++-common/Wbidi-chars-24.c 2022-08-16 12:06:46.381851525 +0200 @@ -0,0 +1,28 @@ +/* PR preprocessor/103026 */ +/* { dg-do compile } */ +/* { dg-options "-Wbidi-chars=ucn,unpaired" } */ +/* Test nesting of bidi chars in various contexts. */ + +void +g1 () +{ + const char *s1 = "a b c LRE\u{202a} 1 2 3 PDI\u{00000000000000000000000002069} x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s2 = "a b c RLE\u{00202b} 1 2 3 PDI\u{2069} x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s3 = "a b c LRO\u{000000202d} 1 2 3 PDI\u{02069} x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s4 = "a b c RLO\u{202e} 1 2 3 PDI\u{00000002069} x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s5 = "a b c LRI\u{002066} 1 2 3 PDF\u{202C} x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s6 = "a b c RLI\u{02067} 1 2 3 PDF\u{202c} x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ + const char *s7 = "a b c FSI\u{0002068} 1 2 3 PDF\u{0202c} x y z"; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +} + +int A\u{202a}B\u{2069}C; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ +int a\u{00000202b}B\u{000000002069}c; +/* { dg-warning "unpaired" "" { target *-*-* } .-1 } */ --- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c.jj 2022-08-16 10:47:38.693022740 +0200 +++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-1.c 2022-08-16 12:46:56.508291006 +0200 @@ -0,0 +1,10 @@ +/* P2290R3 - Delimited escape sequences */ +/* { dg-do compile } */ +/* { dg-require-effective-target wchar } */ +/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic" } */ + +typedef __CHAR32_TYPE__ char32_t; + +const char32_t *a = U"\u{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */ +const char32_t *b = U"\x{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */ +const char32_t *c = U"\o{1234}"; /* { dg-warning "delimited escape sequences are only valid in" } */ --- gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c.jj 2022-08-16 10:47:41.846981390 +0200 +++ gcc/testsuite/gcc.dg/cpp/delimited-escape-seq-2.c 2022-08-16 12:47:05.955167423 +0200 @@ -0,0 +1,10 @@ +/* P2290R3 - Delimited escape sequences */ +/* { dg-do compile } */ +/* { dg-require-effective-target wchar } */ +/* { dg-options "-std=gnu99 -Wno-c++-compat -pedantic-errors" } */ + +typedef __CHAR32_TYPE__ char32_t; + +const char32_t *a = U"\u{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */ +const char32_t *b = U"\x{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */ +const char32_t *c = U"\o{1234}"; /* { dg-error "delimited escape sequences are only valid in" } */ --- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C.jj 2022-08-16 12:46:43.368462901 +0200 +++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-1.C 2022-08-16 12:49:21.532393786 +0200 @@ -0,0 +1,8 @@ +// P2290R3 - Delimited escape sequences +// { dg-do compile { target c++11 } } +// { dg-require-effective-target wchar } +// { dg-options "-pedantic" } + +const char32_t *a = U"\u{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } } +const char32_t *b = U"\x{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } } +const char32_t *c = U"\o{1234}"; // { dg-warning "delimited escape sequences are only valid in" "" { target c++20_down } } --- gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C.jj 2022-08-16 12:46:46.281424798 +0200 +++ gcc/testsuite/g++.dg/cpp/delimited-escape-seq-2.C 2022-08-16 12:49:33.761233803 +0200 @@ -0,0 +1,8 @@ +// P2290R3 - Delimited escape sequences +// { dg-do compile { target c++11 } } +// { dg-require-effective-target wchar } +// { dg-options "-pedantic-errors" } + +const char32_t *a = U"\u{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } } +const char32_t *b = U"\x{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } } +const char32_t *c = U"\o{1234}"; // { dg-error "delimited escape sequences are only valid in" "" { target c++20_down } } Jakub