[PATCH] libcpp: Small incremental patch for P1854R4 [PR110341]

Jakub Jelinek via Gcc-patches Sat, 26 Aug 2023 04:11:26 -0700

Hi!

The following incremental patch to the PR110341 posted patch uses
a special conversion callback instead of conversion from host charset
(UTF-8/UTF-EBCDIC) to UTF-32, and also ignores all diagnostics from the
second cpp_interpret_string which should just count chars.  The UTF-EBCDIC
is untested, but simple enough that it should just work.


2023-08-26  Jakub Jelinek  <ja...@redhat.com>

        PR c++/110341
        * charset.cc (one_count_chars, convert_count_chars): New functions.
        (narrow_str_to_charconst): Call cpp_interpret_string with type
        rather than CPP_STRING32, temporarily override for that call
        pfile->cb.diagnostic to noop_diagnostic_cb and
        pfile->narrow_cset_desc.func to convert_count_chars and just compare
        str.len against str2.len.

--- libcpp/charset.cc.jj        2023-08-25 17:14:14.098733396 +0200
+++ libcpp/charset.cc   2023-08-26 12:57:44.858858994 +0200
@@ -446,6 +446,74 @@ one_utf16_to_utf8 (iconv_t bigend, const
   return 0;
 }
 
+
+/* Special routine which just counts number of characters in the
+   string, what exactly is stored into the output doesn't matter
+   as long as it is one uchar per character.  */
+
+static inline int
+one_count_chars (iconv_t, const uchar **inbufp, size_t *inbytesleftp,
+                uchar **outbufp, size_t *outbytesleftp)
+{
+  uchar *outbuf;
+  cppchar_t s = 0;
+  int rval;
+
+  /* Check for space first, since we know exactly how much we need.  */
+  if (*outbytesleftp < 1)
+    return E2BIG;
+
+#if HOST_CHARSET == HOST_CHARSET_ASCII
+  rval = one_utf8_to_cppchar (inbufp, inbytesleftp, &s);
+  if (rval)
+    return rval;
+#else
+  if (*inbytesleftp < 1)
+    return EINVAL;
+  static const uchar utf_ebcdic_map[256] = {
+    /* See table 4 in http://unicode.org/reports/tr16/tr16-7.2.html  */
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1,
+    1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1, 1,
+    1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1, 1, 1, 1, 1,
+    9, 9, 9, 9, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
+    2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+    2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
+    2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 2, 2,
+    2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 1, 3, 3,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4,
+    1, 4, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 4, 5, 5, 5,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 6, 6, 7, 7, 0
+  };
+  rval = utf_ebcdic_map[**inbufp];
+  if (rval == 9)
+    return EILSEQ;
+  if (rval == 0)
+    rval = 1;
+  if (rval >= 2)
+    {
+      if (*inbytesleftp < rval)
+       return EINVAL;
+      for (int i = 1; i < rval; ++i)
+       if (utf_ebcdic_map[(*inbufp)[i]] != 9)
+         return EILSEQ;
+    }
+  *inbytesleftp -= rval;
+  *inbufp += rval;
+#endif
+
+  **outbufp = ' ';
+
+  *outbufp += 1;
+  *outbytesleftp -= 1;
+  return 0;
+}
+
+
 /* Helper routine for the next few functions.  The 'const' on
    one_conversion means that we promise not to modify what function is
    pointed to, which lets the inliner see through it.  */
@@ -529,6 +597,15 @@ convert_utf32_utf8 (iconv_t cd, const uc
   return conversion_loop (one_utf32_to_utf8, cd, from, flen, to);
 }
 
+/* Magic conversion which just counts characters from input, so
+   only to->len is significant.  */
+static bool
+convert_count_chars (iconv_t cd, const uchar *from,
+                    size_t flen, struct _cpp_strbuf *to)
+{
+  return conversion_loop (one_count_chars, cd, from, flen, to);
+}
+
 /* Identity conversion, used when we have no alternative.  */
 static bool
 convert_no_conversion (iconv_t cd ATTRIBUTE_UNUSED,
@@ -2613,15 +2690,22 @@ narrow_str_to_charconst (cpp_reader *pfi
         ill-formed.  We need to count the number of c-chars and compare
         that to str.len.  */
       cpp_string str2 = { 0, 0 };
-      if (cpp_interpret_string (pfile, &token->val.str, 1, &str2,
-                               CPP_STRING32))
+      bool (*saved_diagnostic_handler) (cpp_reader *, enum 
cpp_diagnostic_level,
+                                       enum cpp_warning_reason, rich_location 
*,
+                                       const char *, va_list *)
+       ATTRIBUTE_FPTR_PRINTF(5,0);
+      saved_diagnostic_handler = pfile->cb.diagnostic;
+      pfile->cb.diagnostic = noop_diagnostic_cb;
+      convert_f save_func = pfile->narrow_cset_desc.func;
+      pfile->narrow_cset_desc.func = convert_count_chars;
+      bool ret = cpp_interpret_string (pfile, &token->val.str, 1, &str2, type);
+      pfile->narrow_cset_desc.func = save_func;
+      pfile->cb.diagnostic = saved_diagnostic_handler;
+      if (ret)
        {
-         size_t width32 = converter_for_type (pfile, CPP_STRING32).width;
-         size_t nbwc = width32 / width;
-         size_t len = str2.len / nbwc;
          if (str2.text != token->val.str.text)
            free ((void *)str2.text);
-         if (str.len > len)
+         if (str.len > str2.len)
            {
              diagnosed
                = cpp_error (pfile, CPP_DL_PEDWARN,

        Jakub

[PATCH] libcpp: Small incremental patch for P1854R4 [PR110341]

Reply via email to