On Mon, Nov 17, 2025 at 01:08:49PM +0100, Jakub Jelinek wrote:
> On Mon, Nov 17, 2025 at 04:45:44PM +0530, Jason Merrill wrote:
> > On 11/16/25 4:28 AM, Jakub Jelinek wrote:
> > > I've tried to test a patch to switch -std=gnu++17 C++ default
> > > to -std=gnu++20 (will post momentarily), but ran into various problems
> > > during GCC bootstraps, our codebase isn't fully C++20 ready.
> > > 
> > > The most common problems are arithmetic or bitwise operations
> > > between enumerators of different enum types (or between enumerator
> > > and floating point in the testsuite), ambiguous overloaded
> > > operator == because of forgotten const qualification of const inside
> > > of the argument and then libcody being largely stuck in C++ and 
> > > incompatible
> > > with C++20 which introduced char8_t type and uses it for u8 literals.
> > > 
> > > The following patch fixes various issues I've run into, for libcody
> > > this patch just makes sure code including cody.hh can be compiled
> > > with -std=gnu++20, libcody itself I have a tweak in the other patch.
> > 
> > Frankly I'm not sure what the point of libcody's u8 dance is; UTF-8 for all
> > the ASCII characters that it uses S2C(u8"x") for is the same byte as the 'x'
> > plain character literal.  I'd be inclined to strip that all out.
> 
> It isn't the same for -fexec-charset=IBM1047 (or other non-ASCII execution
> charsets), so perhaps the intent is to communicate using UTF-8 (or ASCII?)
> instead of using arbitrary other character set.
> Detail::S2C is from I understand just a workaround for C++11 and one can use
> (char) u8'x' instead for C++14 and later (perhaps conditionally based on
> feature test macros).
> I don't know what other projects use libcody.
> If it is just GCC, perhaps it is time to update it to minimum C++14 like
> rest of GCC and the workarounds can go.
> But for C++20 another thing is that I think we'd need to add some extra
> overloads for const char8_t * etc. and just cast to const char.  Plus
> there are some uses of std::string created from u8 literals, dunno if it
> should use std::u8string in that case or have casts to const char * first.

The following builds with -std=c++11 and c++14 and c++17 and c++20 and c++23
and c++26.

I see the u8 string literals are mixed e.g. with strerror, so in
-fexec-charset=IBM1047 there will still be garbage, so am not 100% sure if
the u8 literals everywhere are worth it either.

2025-11-17  Jakub Jelinek  <[email protected]>

        * cody.hh (S2C): For __cpp_char8_t >= 201811 use char8_t instead of
        char in argument type.
        (MessageBuffer::Space): Revert 2025-11-15 change.
        (MessageBuffer::Append): For __cpp_char8_t >= 201811 add overload
        with char8_t const * type of first argument.
        (Packet::Packet): Similarly for first argument.
        * client.cc (CommunicationError, Client::ProcessResponse,
        Client::Connect, ConnectResponse, PathnameResponse, OKResponse,
        IncludeTranslateResponse): Cast u8 string literals to (const char *)
        where needed.
        * server.cc (Server::ProcessRequests, ConnectRequest): Likewise.

--- libcody/cody.hh.jj  2025-11-15 19:23:39.520293500 +0100
+++ libcody/cody.hh     2025-11-17 16:58:55.949656716 +0100
@@ -47,12 +47,21 @@ namespace Detail  {
 
 // C++11 doesn't have utf8 character literals :(
 
+#if __cpp_char8_t >= 201811
+template<unsigned I>
+constexpr char S2C (char8_t const (&s)[I])
+{
+  static_assert (I == 2, "only single octet strings may be converted");
+  return s[0];
+}
+#else
 template<unsigned I>
 constexpr char S2C (char const (&s)[I])
 {
   static_assert (I == 2, "only single octet strings may be converted");
   return s[0];
 }
+#endif
 
 /// Internal buffering class.  Used to concatenate outgoing messages
 /// and Lex incoming ones.
@@ -110,11 +119,7 @@ public:
   /// Add whitespace word separator.  Multiple adjacent whitespace is fine.
   void Space ()
   {
-#if __cpp_unicode_characters >= 201411
-    Append ((char) u8' ');
-#else
     Append (Detail::S2C(u8" "));
-#endif
   }
 
 public:
@@ -127,6 +132,13 @@ public:
       Space ();
     Append (str, maybe_quote, len);
   }
+#if __cpp_char8_t >= 201811
+  void AppendWord (char8_t const *str, bool maybe_quote = false,
+                  size_t len = ~size_t (0))
+  {
+    AppendWord ((const  char *) str, maybe_quote, len);
+  }
+#endif
   /// Add a word as with AppendWord
   /// @param str the string to append
   /// @param maybe_quote string might need quoting, as for Append
@@ -268,6 +280,12 @@ public:
     : string (s), cat (STRING), code (c)
   {
   }
+#if __cpp_char8_t >= 201811
+  Packet (unsigned c, const char8_t *s)
+    : string ((const char *) s), cat (STRING), code (c)
+  {
+  }
+#endif
   Packet (unsigned c, std::vector<std::string> &&v)
     : vector (std::move (v)), cat (VECTOR), code (c)
   {
--- libcody/client.cc.jj        2021-01-05 00:13:58.118299183 +0100
+++ libcody/client.cc   2025-11-17 17:01:27.069591926 +0100
@@ -97,7 +97,7 @@ int Client::CommunicateWithServer ()
 
 static Packet CommunicationError (int err)
 {
-  std::string e {u8"communication error:"};
+  std::string e {(const char *) u8"communication error:"};
   e.append (strerror (err));
 
   return Packet (Client::PC_ERROR, std::move (e));
@@ -110,33 +110,34 @@ Packet Client::ProcessResponse (std::vec
     {
       if (e == EINVAL)
        {
-         std::string msg (u8"malformed string '");
+         std::string msg ((const char *) u8"malformed string '");
          msg.append (words[0]);
-         msg.append (u8"'");
+         msg.append ((const char *) u8"'");
          return Packet (Client::PC_ERROR, std::move (msg));
        }
       else
-       return Packet (Client::PC_ERROR, u8"missing response");
+       return Packet (Client::PC_ERROR, (const char *) u8"missing response");
     }
 
   Assert (!words.empty ());
-  if (words[0] == u8"ERROR")
+  if (words[0] == (const char *) u8"ERROR")
     return Packet (Client::PC_ERROR,
-                  words.size () == 2 ? words[1]: u8"malformed error response");
+                  words.size () == 2 ? words[1]
+                  : (const char *) u8"malformed error response");
 
   if (isLast && !read.IsAtEnd ())
     return Packet (Client::PC_ERROR,
-                  std::string (u8"unexpected extra response"));
+                  std::string ((const char *) u8"unexpected extra response"));
 
   Assert (code < Detail::RC_HWM);
   Packet result (responseTable[code] (words));
   result.SetRequest (code);
   if (result.GetCode () == Client::PC_ERROR && result.GetString ().empty ())
     {
-      std::string msg {u8"malformed response '"};
+      std::string msg {(const char *) u8"malformed response '"};
 
       read.LexedLine (msg);
-      msg.append (u8"'");
+      msg.append ((const char *) u8"'");
       result.GetString () = std::move (msg);
     }
   else if (result.GetCode () == Client::PC_CONNECT)
@@ -199,7 +200,7 @@ Packet Client::Connect (char const *agen
                          size_t alen, size_t ilen)
 {
   write.BeginLine ();
-  write.AppendWord (u8"HELLO");
+  write.AppendWord ((const char *) u8"HELLO");
   write.AppendInteger (Version);
   write.AppendWord (agent, true, alen);
   write.AppendWord (ident, true, ilen);
@@ -211,7 +212,8 @@ Packet Client::Connect (char const *agen
 // HELLO $version $agent [$flags]
 Packet ConnectResponse (std::vector<std::string> &words)
 {
-  if (words[0] == u8"HELLO" && (words.size () == 3 || words.size () == 4))
+  if (words[0] == (const char *) u8"HELLO"
+      && (words.size () == 3 || words.size () == 4))
     {
       char *eptr;
       unsigned long val = strtoul (words[1].c_str (), &eptr, 10);
@@ -247,7 +249,7 @@ Packet Client::ModuleRepo ()
 // PATHNAME $dir | ERROR
 Packet PathnameResponse (std::vector<std::string> &words)
 {
-  if (words[0] == u8"PATHNAME" && words.size () == 2)
+  if (words[0] == (const char *) u8"PATHNAME" && words.size () == 2)
     return Packet (Client::PC_PATHNAME, std::move (words[1]));
 
   return Packet (Client::PC_ERROR, u8"");
@@ -256,7 +258,7 @@ Packet PathnameResponse (std::vector<std
 // OK or ERROR
 Packet OKResponse (std::vector<std::string> &words)
 {
-  if (words[0] == u8"OK")
+  if (words[0] == (const char *) u8"OK")
     return Packet (Client::PC_OK);
   else
     return Packet (Client::PC_ERROR,
@@ -319,11 +321,11 @@ Packet Client::IncludeTranslate (char co
 // PATHNAME $cmifile
 Packet IncludeTranslateResponse (std::vector<std::string> &words)
 {
-  if (words[0] == u8"BOOL" && words.size () == 2)
+  if (words[0] == (const char *) u8"BOOL" && words.size () == 2)
     {
-      if (words[1] == u8"FALSE")
-       return Packet (Client::PC_BOOL, 0);
-      else if (words[1] == u8"TRUE")
+      if (words[1] == (const char *) u8"FALSE")
+       return Packet (Client::PC_BOOL);
+      else if (words[1] == (const char *) u8"TRUE")
        return Packet (Client::PC_BOOL, 1);
       else
        return Packet (Client::PC_ERROR, u8"");
--- libcody/server.cc.jj        2020-12-21 22:20:04.143490902 +0100
+++ libcody/server.cc   2025-11-17 17:04:10.535350717 +0100
@@ -36,12 +36,12 @@ static RequestPair
   const requestTable[Detail::RC_HWM] =
   {
     // Same order as enum RequestCode
-    RequestPair {u8"HELLO", nullptr},
-    RequestPair {u8"MODULE-REPO", ModuleRepoRequest},
-    RequestPair {u8"MODULE-EXPORT", ModuleExportRequest},
-    RequestPair {u8"MODULE-IMPORT", ModuleImportRequest},
-    RequestPair {u8"MODULE-COMPILED", ModuleCompiledRequest},
-    RequestPair {u8"INCLUDE-TRANSLATE", IncludeTranslateRequest},
+    RequestPair {(const char *) u8"HELLO", nullptr},
+    RequestPair {(const char *) u8"MODULE-REPO", ModuleRepoRequest},
+    RequestPair {(const char *) u8"MODULE-EXPORT", ModuleExportRequest},
+    RequestPair {(const char *) u8"MODULE-IMPORT", ModuleImportRequest},
+    RequestPair {(const char *) u8"MODULE-COMPILED", ModuleCompiledRequest},
+    RequestPair {(const char *) u8"INCLUDE-TRANSLATE", 
IncludeTranslateRequest},
   };
 }
 
@@ -135,21 +135,21 @@ void Server::ProcessRequests (void)
          std::string msg;
 
          if (err > 0)
-           msg = u8"error processing '";
+           msg = (const char *) u8"error processing '";
          else if (ix >= Detail::RC_HWM)
-           msg = u8"unrecognized '";
+           msg = (const char *) u8"unrecognized '";
          else if (IsConnected () && ix == Detail::RC_CONNECT)
-           msg = u8"already connected '";
+           msg = (const char *) u8"already connected '";
          else if (!IsConnected () && ix != Detail::RC_CONNECT)
-           msg = u8"not connected '";
+           msg = (const char *) u8"not connected '";
          else
-           msg = u8"malformed '";
+           msg = (const char *) u8"malformed '";
 
          read.LexedLine (msg);
-         msg.append (u8"'");
+         msg.append ((const char *) u8"'");
          if (err > 0)
            {
-             msg.append (u8" ");
+             msg.append ((const char *) u8" ");
              msg.append (strerror (err));
            }
          resolver->ErrorResponse (this, std::move (msg));
@@ -176,7 +176,7 @@ Resolver *ConnectRequest (Server *s, Res
     return nullptr;
 
   if (words.size () == 3)
-    words.emplace_back (u8"");
+    words.emplace_back ((const char *) u8"");
   unsigned version = ParseUnsigned (words[1]);
   if (version == ~0u)
     return nullptr;


        Jakub

Reply via email to