On Mon, Nov 17, 2025 at 01:08:49PM +0100, Jakub Jelinek wrote: > On Mon, Nov 17, 2025 at 04:45:44PM +0530, Jason Merrill wrote: > > On 11/16/25 4:28 AM, Jakub Jelinek wrote: > > > I've tried to test a patch to switch -std=gnu++17 C++ default > > > to -std=gnu++20 (will post momentarily), but ran into various problems > > > during GCC bootstraps, our codebase isn't fully C++20 ready. > > > > > > The most common problems are arithmetic or bitwise operations > > > between enumerators of different enum types (or between enumerator > > > and floating point in the testsuite), ambiguous overloaded > > > operator == because of forgotten const qualification of const inside > > > of the argument and then libcody being largely stuck in C++ and > > > incompatible > > > with C++20 which introduced char8_t type and uses it for u8 literals. > > > > > > The following patch fixes various issues I've run into, for libcody > > > this patch just makes sure code including cody.hh can be compiled > > > with -std=gnu++20, libcody itself I have a tweak in the other patch. > > > > Frankly I'm not sure what the point of libcody's u8 dance is; UTF-8 for all > > the ASCII characters that it uses S2C(u8"x") for is the same byte as the 'x' > > plain character literal. I'd be inclined to strip that all out. > > It isn't the same for -fexec-charset=IBM1047 (or other non-ASCII execution > charsets), so perhaps the intent is to communicate using UTF-8 (or ASCII?) > instead of using arbitrary other character set. > Detail::S2C is from I understand just a workaround for C++11 and one can use > (char) u8'x' instead for C++14 and later (perhaps conditionally based on > feature test macros). > I don't know what other projects use libcody. > If it is just GCC, perhaps it is time to update it to minimum C++14 like > rest of GCC and the workarounds can go. > But for C++20 another thing is that I think we'd need to add some extra > overloads for const char8_t * etc. and just cast to const char. Plus > there are some uses of std::string created from u8 literals, dunno if it > should use std::u8string in that case or have casts to const char * first.
The following builds with -std=c++11 and c++14 and c++17 and c++20 and c++23 and c++26. I see the u8 string literals are mixed e.g. with strerror, so in -fexec-charset=IBM1047 there will still be garbage, so am not 100% sure if the u8 literals everywhere are worth it either. 2025-11-17 Jakub Jelinek <[email protected]> * cody.hh (S2C): For __cpp_char8_t >= 201811 use char8_t instead of char in argument type. (MessageBuffer::Space): Revert 2025-11-15 change. (MessageBuffer::Append): For __cpp_char8_t >= 201811 add overload with char8_t const * type of first argument. (Packet::Packet): Similarly for first argument. * client.cc (CommunicationError, Client::ProcessResponse, Client::Connect, ConnectResponse, PathnameResponse, OKResponse, IncludeTranslateResponse): Cast u8 string literals to (const char *) where needed. * server.cc (Server::ProcessRequests, ConnectRequest): Likewise. --- libcody/cody.hh.jj 2025-11-15 19:23:39.520293500 +0100 +++ libcody/cody.hh 2025-11-17 16:58:55.949656716 +0100 @@ -47,12 +47,21 @@ namespace Detail { // C++11 doesn't have utf8 character literals :( +#if __cpp_char8_t >= 201811 +template<unsigned I> +constexpr char S2C (char8_t const (&s)[I]) +{ + static_assert (I == 2, "only single octet strings may be converted"); + return s[0]; +} +#else template<unsigned I> constexpr char S2C (char const (&s)[I]) { static_assert (I == 2, "only single octet strings may be converted"); return s[0]; } +#endif /// Internal buffering class. Used to concatenate outgoing messages /// and Lex incoming ones. @@ -110,11 +119,7 @@ public: /// Add whitespace word separator. Multiple adjacent whitespace is fine. void Space () { -#if __cpp_unicode_characters >= 201411 - Append ((char) u8' '); -#else Append (Detail::S2C(u8" ")); -#endif } public: @@ -127,6 +132,13 @@ public: Space (); Append (str, maybe_quote, len); } +#if __cpp_char8_t >= 201811 + void AppendWord (char8_t const *str, bool maybe_quote = false, + size_t len = ~size_t (0)) + { + AppendWord ((const char *) str, maybe_quote, len); + } +#endif /// Add a word as with AppendWord /// @param str the string to append /// @param maybe_quote string might need quoting, as for Append @@ -268,6 +280,12 @@ public: : string (s), cat (STRING), code (c) { } +#if __cpp_char8_t >= 201811 + Packet (unsigned c, const char8_t *s) + : string ((const char *) s), cat (STRING), code (c) + { + } +#endif Packet (unsigned c, std::vector<std::string> &&v) : vector (std::move (v)), cat (VECTOR), code (c) { --- libcody/client.cc.jj 2021-01-05 00:13:58.118299183 +0100 +++ libcody/client.cc 2025-11-17 17:01:27.069591926 +0100 @@ -97,7 +97,7 @@ int Client::CommunicateWithServer () static Packet CommunicationError (int err) { - std::string e {u8"communication error:"}; + std::string e {(const char *) u8"communication error:"}; e.append (strerror (err)); return Packet (Client::PC_ERROR, std::move (e)); @@ -110,33 +110,34 @@ Packet Client::ProcessResponse (std::vec { if (e == EINVAL) { - std::string msg (u8"malformed string '"); + std::string msg ((const char *) u8"malformed string '"); msg.append (words[0]); - msg.append (u8"'"); + msg.append ((const char *) u8"'"); return Packet (Client::PC_ERROR, std::move (msg)); } else - return Packet (Client::PC_ERROR, u8"missing response"); + return Packet (Client::PC_ERROR, (const char *) u8"missing response"); } Assert (!words.empty ()); - if (words[0] == u8"ERROR") + if (words[0] == (const char *) u8"ERROR") return Packet (Client::PC_ERROR, - words.size () == 2 ? words[1]: u8"malformed error response"); + words.size () == 2 ? words[1] + : (const char *) u8"malformed error response"); if (isLast && !read.IsAtEnd ()) return Packet (Client::PC_ERROR, - std::string (u8"unexpected extra response")); + std::string ((const char *) u8"unexpected extra response")); Assert (code < Detail::RC_HWM); Packet result (responseTable[code] (words)); result.SetRequest (code); if (result.GetCode () == Client::PC_ERROR && result.GetString ().empty ()) { - std::string msg {u8"malformed response '"}; + std::string msg {(const char *) u8"malformed response '"}; read.LexedLine (msg); - msg.append (u8"'"); + msg.append ((const char *) u8"'"); result.GetString () = std::move (msg); } else if (result.GetCode () == Client::PC_CONNECT) @@ -199,7 +200,7 @@ Packet Client::Connect (char const *agen size_t alen, size_t ilen) { write.BeginLine (); - write.AppendWord (u8"HELLO"); + write.AppendWord ((const char *) u8"HELLO"); write.AppendInteger (Version); write.AppendWord (agent, true, alen); write.AppendWord (ident, true, ilen); @@ -211,7 +212,8 @@ Packet Client::Connect (char const *agen // HELLO $version $agent [$flags] Packet ConnectResponse (std::vector<std::string> &words) { - if (words[0] == u8"HELLO" && (words.size () == 3 || words.size () == 4)) + if (words[0] == (const char *) u8"HELLO" + && (words.size () == 3 || words.size () == 4)) { char *eptr; unsigned long val = strtoul (words[1].c_str (), &eptr, 10); @@ -247,7 +249,7 @@ Packet Client::ModuleRepo () // PATHNAME $dir | ERROR Packet PathnameResponse (std::vector<std::string> &words) { - if (words[0] == u8"PATHNAME" && words.size () == 2) + if (words[0] == (const char *) u8"PATHNAME" && words.size () == 2) return Packet (Client::PC_PATHNAME, std::move (words[1])); return Packet (Client::PC_ERROR, u8""); @@ -256,7 +258,7 @@ Packet PathnameResponse (std::vector<std // OK or ERROR Packet OKResponse (std::vector<std::string> &words) { - if (words[0] == u8"OK") + if (words[0] == (const char *) u8"OK") return Packet (Client::PC_OK); else return Packet (Client::PC_ERROR, @@ -319,11 +321,11 @@ Packet Client::IncludeTranslate (char co // PATHNAME $cmifile Packet IncludeTranslateResponse (std::vector<std::string> &words) { - if (words[0] == u8"BOOL" && words.size () == 2) + if (words[0] == (const char *) u8"BOOL" && words.size () == 2) { - if (words[1] == u8"FALSE") - return Packet (Client::PC_BOOL, 0); - else if (words[1] == u8"TRUE") + if (words[1] == (const char *) u8"FALSE") + return Packet (Client::PC_BOOL); + else if (words[1] == (const char *) u8"TRUE") return Packet (Client::PC_BOOL, 1); else return Packet (Client::PC_ERROR, u8""); --- libcody/server.cc.jj 2020-12-21 22:20:04.143490902 +0100 +++ libcody/server.cc 2025-11-17 17:04:10.535350717 +0100 @@ -36,12 +36,12 @@ static RequestPair const requestTable[Detail::RC_HWM] = { // Same order as enum RequestCode - RequestPair {u8"HELLO", nullptr}, - RequestPair {u8"MODULE-REPO", ModuleRepoRequest}, - RequestPair {u8"MODULE-EXPORT", ModuleExportRequest}, - RequestPair {u8"MODULE-IMPORT", ModuleImportRequest}, - RequestPair {u8"MODULE-COMPILED", ModuleCompiledRequest}, - RequestPair {u8"INCLUDE-TRANSLATE", IncludeTranslateRequest}, + RequestPair {(const char *) u8"HELLO", nullptr}, + RequestPair {(const char *) u8"MODULE-REPO", ModuleRepoRequest}, + RequestPair {(const char *) u8"MODULE-EXPORT", ModuleExportRequest}, + RequestPair {(const char *) u8"MODULE-IMPORT", ModuleImportRequest}, + RequestPair {(const char *) u8"MODULE-COMPILED", ModuleCompiledRequest}, + RequestPair {(const char *) u8"INCLUDE-TRANSLATE", IncludeTranslateRequest}, }; } @@ -135,21 +135,21 @@ void Server::ProcessRequests (void) std::string msg; if (err > 0) - msg = u8"error processing '"; + msg = (const char *) u8"error processing '"; else if (ix >= Detail::RC_HWM) - msg = u8"unrecognized '"; + msg = (const char *) u8"unrecognized '"; else if (IsConnected () && ix == Detail::RC_CONNECT) - msg = u8"already connected '"; + msg = (const char *) u8"already connected '"; else if (!IsConnected () && ix != Detail::RC_CONNECT) - msg = u8"not connected '"; + msg = (const char *) u8"not connected '"; else - msg = u8"malformed '"; + msg = (const char *) u8"malformed '"; read.LexedLine (msg); - msg.append (u8"'"); + msg.append ((const char *) u8"'"); if (err > 0) { - msg.append (u8" "); + msg.append ((const char *) u8" "); msg.append (strerror (err)); } resolver->ErrorResponse (this, std::move (msg)); @@ -176,7 +176,7 @@ Resolver *ConnectRequest (Server *s, Res return nullptr; if (words.size () == 3) - words.emplace_back (u8""); + words.emplace_back ((const char *) u8""); unsigned version = ParseUnsigned (words[1]); if (version == ~0u) return nullptr; Jakub
