Hi!

Here is a new version of the gnu::base64 parameter support, the only changes
are in using the EMBED_PARAMS registry of parameters.

2024-08-15  Jakub Jelinek  <ja...@redhat.com>

libcpp/
        * internal.h (struct cpp_embed_params): Add base64 member.
        (_cpp_free_embed_params_tokens): Declare.
        * directives.cc (DIRECTIVE_TABLE): Add IN_I flag to T_EMBED.
        (save_token_for_embed, _cpp_free_embed_params_tokens): New functions.
        (EMBED_PARAMS): Add gnu::base64 entry.
        (_cpp_parse_embed_params): Parse gnu::base64 parameter.  If
        -fpreprocessed without -fdirectives-only, require #embed to have
        gnu::base64 parameter.  Diagnose conflict between gnu::base64 and
        limit or gnu::offset parameters.
        (do_embed): Use _cpp_free_embed_params_tokens.
        * files.cc (finish_embed, base64_dec_fn): New functions.
        (base64_dec): New array.
        (B64D0, B64D1, B64D2, B64D3): Define.
        (finish_base64_embed): New function.
        (_cpp_stack_embed): Use finish_embed.  Handle params->base64
        using finish_base64_embed.
        * macro.cc (builtin_has_embed): Call _cpp_free_embed_params_tokens.
gcc/
        * doc/cpp.texi (Binary Resource Inclusion): Document gnu::base64
        parameter.
gcc/testsuite/
        * c-c++-common/cpp/embed-17.c: New test.
        * c-c++-common/cpp/embed-18.c: New test.
        * c-c++-common/cpp/embed-19.c: New test.
        * gcc.dg/cpp/embed-6.c: New test.
        * gcc.dg/cpp/embed-7.c: New test.

--- libcpp/internal.h.jj        2024-08-15 11:26:00.726026264 +0200
+++ libcpp/internal.h   2024-08-15 11:35:50.559664877 +0200
@@ -631,7 +631,7 @@ struct cpp_embed_params
   location_t loc;
   bool has_embed;
   cpp_num_part limit, offset;
-  cpp_embed_params_tokens prefix, suffix, if_empty;
+  cpp_embed_params_tokens prefix, suffix, if_empty, base64;
 };
 
 /* Character classes.  Based on the more primitive macros in safe-ctype.h.
@@ -805,6 +805,7 @@ extern void _cpp_restore_pragma_names (c
 extern int _cpp_do__Pragma (cpp_reader *, location_t);
 extern void _cpp_init_directives (cpp_reader *);
 extern void _cpp_init_internal_pragmas (cpp_reader *);
+extern void _cpp_free_embed_params_tokens (cpp_embed_params_tokens *);
 extern bool _cpp_parse_embed_params (cpp_reader *, struct cpp_embed_params *);
 extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *,
                                 linenum_type, unsigned int);
--- libcpp/directives.cc.jj     2024-08-15 11:39:49.476685559 +0200
+++ libcpp/directives.cc        2024-08-15 11:49:30.107446324 +0200
@@ -159,7 +159,7 @@ static void cpp_pop_definition (cpp_read
   D(error,     T_ERROR,        STDC89,    0)                           \
   D(pragma,    T_PRAGMA,       STDC89,    IN_I)                        \
   D(warning,   T_WARNING,      STDC23,    0)                           \
-  D(embed,     T_EMBED,        STDC23,    INCL | EXPAND)               \
+  D(embed,     T_EMBED,        STDC23,    IN_I | INCL | EXPAND)        \
   D(include_next, T_INCLUDE_NEXT, EXTENSION, INCL | EXPAND)            \
   D(ident,     T_IDENT,        EXTENSION, IN_I)                        \
   D(import,    T_IMPORT,       EXTENSION, INCL | EXPAND)  /* ObjC */   \
@@ -932,6 +932,50 @@ do_include_next (cpp_reader *pfile)
   do_include_common (pfile, type);
 }
 
+/* Helper function for skip_balanced_token_seq and _cpp_parse_embed_params.
+   Save one token *TOKEN into *SAVE.  */
+
+static void
+save_token_for_embed (cpp_embed_params_tokens *save, const cpp_token *token)
+{
+  if (save->count == 0)
+    {
+      _cpp_init_tokenrun (&save->base_run, 4);
+      save->cur_run = &save->base_run;
+      save->cur_token = save->base_run.base;
+    }
+  else if (save->cur_token == save->cur_run->limit)
+    {
+      save->cur_run->next = XNEW (tokenrun);
+      save->cur_run->next->prev = save->cur_run;
+      _cpp_init_tokenrun (save->cur_run->next, 4);
+      save->cur_run = save->cur_run->next;
+      save->cur_token = save->cur_run->base;
+    }
+  *save->cur_token = *token;
+  save->cur_token->flags |= NO_EXPAND;
+  save->cur_token++;
+  save->count++;
+}
+
+/* Free memory associated with saved tokens in *SAVE.  */
+
+void
+_cpp_free_embed_params_tokens (cpp_embed_params_tokens *save)
+{
+  if (save->count == 0)
+    return;
+  tokenrun *n;
+  for (tokenrun *t = &save->base_run; t; t = n)
+    {
+      n = t->next;
+      XDELETEVEC (t->base);
+      if (t != &save->base_run)
+       XDELETE (t);
+    }
+  save->count = 0;
+}
+
 /* Skip over balanced preprocessing tokens until END is found.
    If SAVE is non-NULL, remember the parsed tokens in it.  NESTED is
    false in the outermost invocation of the function and true
@@ -961,26 +1005,7 @@ skip_balanced_token_seq (cpp_reader *pfi
       if (save
          && (token->type != CPP_PADDING || save->count)
          && (token->type != end || nested))
-       {
-         if (save->count == 0)
-           {
-             _cpp_init_tokenrun (&save->base_run, 4);
-             save->cur_run = &save->base_run;
-             save->cur_token = save->base_run.base;
-           }
-         else if (save->cur_token == save->cur_run->limit)
-           {
-             save->cur_run->next = XNEW (tokenrun);
-             save->cur_run->next->prev = save->cur_run;
-             _cpp_init_tokenrun (save->cur_run->next, 4);
-             save->cur_run = save->cur_run->next;
-             save->cur_token = save->cur_run->base;
-           }
-         *save->cur_token = *token;
-         save->cur_token->flags |= NO_EXPAND;
-         save->cur_token++;
-         save->count++;
-       }
+       save_token_for_embed (save, token);
       if (token->type == end)
        return;
       switch (token->type)
@@ -1015,6 +1040,7 @@ skip_balanced_token_seq (cpp_reader *pfi
   EMBED_PARAM (PREFIX, "prefix")       \
   EMBED_PARAM (SUFFIX, "suffix")       \
   EMBED_PARAM (IF_EMPTY, "if_empty")   \
+  EMBED_PARAM (GNU_BASE64, "base64")   \
   EMBED_PARAM (GNU_OFFSET, "offset")
 
 enum embed_param_kind {
@@ -1058,12 +1084,33 @@ _cpp_parse_embed_params (cpp_reader *pfi
                  cpp_error (pfile, CPP_DL_ERROR, "expected ')'");
                  return false;
                }
-             return ret;
            }
-         else if (token->type == CPP_CLOSE_PAREN && params->has_embed)
-           return ret;
-         cpp_error (pfile, CPP_DL_ERROR, "expected parameter name");
-         return false;
+         else if (token->type != CPP_CLOSE_PAREN || !params->has_embed)
+           {
+             cpp_error (pfile, CPP_DL_ERROR, "expected parameter name");
+             return false;
+           }
+         if (params->base64.count
+             && (seen & ((1 << EMBED_PARAM_LIMIT)
+                         | (1 << EMBED_PARAM_GNU_OFFSET))) != 0)
+           {
+             ret = false;
+             if (!params->has_embed)
+               cpp_error_with_line (pfile, CPP_DL_ERROR,
+                                    params->base64.base_run.base->src_loc, 0,
+                                    "'gnu::base64' parameter conflicts with "
+                                    "'limit' or 'gnu::offset' parameters");
+           }
+         else if (params->base64.count == 0
+                  && CPP_OPTION (pfile, preprocessed))
+           {
+             ret = false;
+             if (!params->has_embed)
+               cpp_error_with_line (pfile, CPP_DL_ERROR, params->loc, 0,
+                                    "'gnu::base64' parameter required in "
+                                    "preprocessed source");
+           }
+         return ret;
        }
       param_name = NODE_NAME (token->val.node.spelling);
       param_name_len = NODE_LEN (token->val.node.spelling);
@@ -1188,6 +1235,53 @@ _cpp_parse_embed_params (cpp_reader *pfi
            }
          token = _cpp_get_token_no_padding (pfile);
        }
+      else if (param_kind == EMBED_PARAM_GNU_BASE64)
+       {
+         token = _cpp_get_token_no_padding (pfile);
+         while (token->type == CPP_OTHER
+                && CPP_OPTION (pfile, preprocessed)
+                && !CPP_OPTION (pfile, directives_only)
+                && token->val.str.len == 1
+                && token->val.str.text[0] == '\\')
+           {
+             /* Allow backslash newline inside of gnu::base64 argument
+                for -fpreprocessed, so that it doesn't have to be
+                megabytes long line.  */
+             pfile->state.in_directive = 0;
+             token = _cpp_get_token_no_padding (pfile);
+             pfile->state.in_directive = 3;
+           }
+         if (token->type == CPP_STRING)
+           {
+             do
+               {
+                 save_token_for_embed (&params->base64, token);
+                 token = _cpp_get_token_no_padding (pfile);
+                 while (token->type == CPP_OTHER
+                        && CPP_OPTION (pfile, preprocessed)
+                        && !CPP_OPTION (pfile, directives_only)
+                        && token->val.str.len == 1
+                        && token->val.str.text[0] == '\\')
+                   {
+                     pfile->state.in_directive = 0;
+                     token = _cpp_get_token_no_padding (pfile);
+                     pfile->state.in_directive = 3;
+                   }
+               }
+             while (token->type == CPP_STRING);
+             if (token->type != CPP_CLOSE_PAREN)
+               cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+                                    "expected ')'");
+           }
+         else
+           {
+             cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
+                                  "expected character string literal");
+             if (token->type != CPP_CLOSE_PAREN)
+               token = _cpp_get_token_no_padding (pfile);
+           }
+         token = _cpp_get_token_no_padding (pfile);
+       }
       else if (token->type == CPP_OPEN_PAREN)
        {
          cpp_embed_params_tokens *save = NULL;
@@ -1268,26 +1362,10 @@ do_embed (cpp_reader *pfile)
   if (ok)
     _cpp_stack_embed (pfile, fname, angle_brackets, &params);
 
-  for (int i = 0; i < 3; ++i)
-    {
-      cpp_embed_params_tokens *p;
-      if (i == 0)
-       p = &params.prefix;
-      else if (i == 1)
-       p = &params.suffix;
-      else
-       p = &params.if_empty;
-      if (p->count == 0)
-       continue;
-      tokenrun *n;
-      for (tokenrun *t = &p->base_run; t; t = n)
-       {
-         n = t->next;
-         XDELETEVEC (t->base);
-         if (t != &p->base_run)
-           XDELETE (t);
-       }
-    }
+  _cpp_free_embed_params_tokens (&params.prefix);
+  _cpp_free_embed_params_tokens (&params.suffix);
+  _cpp_free_embed_params_tokens (&params.if_empty);
+  _cpp_free_embed_params_tokens (&params.base64);
 
  done:
   XDELETEVEC (fname);
--- libcpp/files.cc.jj  2024-08-15 11:26:00.727026251 +0200
+++ libcpp/files.cc     2024-08-15 11:35:50.561664852 +0200
@@ -1221,6 +1221,320 @@ cpp_probe_header_unit (cpp_reader *pfile
   return nullptr;
 }
 
+/* Helper function for _cpp_stack_embed.  Finish #embed/__has_embed processing
+   after a file is found and data loaded into buffer.  */
+
+static int
+finish_embed (cpp_reader *pfile, _cpp_file *file,
+             struct cpp_embed_params *params)
+{
+  const uchar *buffer = file->buffer;
+  size_t limit = file->limit;
+  if (params->offset - file->offset > limit)
+    limit = 0;
+  else
+    {
+      buffer += params->offset - file->offset;
+      limit -= params->offset - file->offset;
+    }
+  if (params->limit < limit)
+    limit = params->limit;
+
+  /* For sizes larger than say 64 bytes, this is just a temporary
+     solution, we should emit a single new token which the FEs will
+     handle as an optimization.  */
+  size_t max = INTTYPE_MAXIMUM (size_t) / sizeof (cpp_token);
+  if (limit > max / 2
+      || (limit
+         ? (params->prefix.count > max
+            || params->suffix.count > max
+            || (limit * 2 - 1 + params->prefix.count
+                + params->suffix.count > max))
+         : params->if_empty.count > max))
+    {
+      cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+                   "%s is too large", file->path);
+      return 0;
+    }
+
+  size_t len = 0;
+  for (size_t i = 0; i < limit; ++i)
+    {
+      if (buffer[i] < 10)
+       len += 2;
+      else if (buffer[i] < 100)
+       len += 3;
+#if UCHAR_MAX == 255
+      else
+       len += 4;
+#else
+      else if (buffer[i] < 1000)
+       len += 4;
+      else
+       {
+         char buf[64];
+         len += sprintf (buf, "%d", buffer[i]) + 1;
+       }
+#endif
+      if (len > INTTYPE_MAXIMUM (ssize_t))
+       {
+         cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+                       "%s is too large", file->path);
+         return 0;
+       }
+    }
+  uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL;
+  _cpp_buff *tok_buff = NULL;
+  cpp_token *toks = NULL, *tok = &pfile->directive_result;
+  size_t count = 0;
+  if (limit)
+    count = (params->prefix.count + limit * 2 - 1
+            + params->suffix.count) - 1;
+  else if (params->if_empty.count)
+    count = params->if_empty.count - 1;
+  if (count)
+    {
+      tok_buff = _cpp_get_buff (pfile, count * sizeof (cpp_token));
+      toks = (cpp_token *) tok_buff->base;
+    }
+  cpp_embed_params_tokens *prefix
+    = limit ? &params->prefix : &params->if_empty;
+  if (prefix->count)
+    {
+      *tok = *prefix->base_run.base;
+      tok = toks;
+      tokenrun *cur_run = &prefix->base_run;
+      while (cur_run)
+       {
+         size_t cnt = (cur_run->next ? cur_run->limit
+                       : prefix->cur_token) - cur_run->base;
+         cpp_token *t = cur_run->base;
+         if (cur_run == &prefix->base_run)
+           {
+             t++;
+             cnt--;
+           }
+         memcpy (tok, t, cnt * sizeof (cpp_token));
+         tok += cnt;
+         cur_run = cur_run->next;
+       }
+    }
+  for (size_t i = 0; i < limit; ++i)
+    {
+      tok->src_loc = params->loc;
+      tok->type = CPP_NUMBER;
+      tok->flags = NO_EXPAND;
+      if (i == 0)
+       tok->flags |= PREV_WHITE;
+      tok->val.str.text = s;
+      tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]);
+      s += tok->val.str.len + 1;
+      if (tok == &pfile->directive_result)
+       tok = toks;
+      else
+       tok++;
+      if (i < limit - 1)
+       {
+         tok->src_loc = params->loc;
+         tok->type = CPP_COMMA;
+         tok->flags = NO_EXPAND;
+         tok++;
+       }
+    }
+  if (limit && params->suffix.count)
+    {
+      tokenrun *cur_run = &params->suffix.base_run;
+      cpp_token *orig_tok = tok;
+      while (cur_run)
+       {
+         size_t cnt = (cur_run->next ? cur_run->limit
+                       : params->suffix.cur_token) - cur_run->base;
+         cpp_token *t = cur_run->base;
+         memcpy (tok, t, cnt * sizeof (cpp_token));
+         tok += cnt;
+         cur_run = cur_run->next;
+       }
+      orig_tok->flags |= PREV_WHITE;
+    }
+  pfile->directive_result.flags |= PREV_WHITE;
+  if (count)
+    {
+      _cpp_push_token_context (pfile, NULL, toks, count);
+      pfile->context->buff = tok_buff;
+    }
+  return limit ? 1 : 2;
+}
+
+/* Helper function for initialization of base64_dec table.
+   Can't rely on ASCII compatibility, so check each letter
+   separately.  */
+
+constexpr signed char
+base64_dec_fn (unsigned char c)
+{
+  return (c == 'A' ? 0 : c == 'B' ? 1 : c == 'C' ? 2 : c == 'D' ? 3
+         : c == 'E' ? 4 : c == 'F' ? 5 : c == 'G' ? 6 : c == 'H' ? 7
+         : c == 'I' ? 8 : c == 'J' ? 9 : c == 'K' ? 10 : c == 'L' ? 11
+         : c == 'M' ? 12 : c == 'N' ? 13 : c == 'O' ? 14 : c == 'P' ? 15
+         : c == 'Q' ? 16 : c == 'R' ? 17 : c == 'S' ? 18 : c == 'T' ? 19
+         : c == 'U' ? 20 : c == 'V' ? 21 : c == 'W' ? 22 : c == 'X' ? 23
+         : c == 'Y' ? 24 : c == 'Z' ? 25
+         : c == 'a' ? 26 : c == 'b' ? 27 : c == 'c' ? 28 : c == 'd' ? 29
+         : c == 'e' ? 30 : c == 'f' ? 31 : c == 'g' ? 32 : c == 'h' ? 33
+         : c == 'i' ? 34 : c == 'j' ? 35 : c == 'k' ? 36 : c == 'l' ? 37
+         : c == 'm' ? 38 : c == 'n' ? 39 : c == 'o' ? 40 : c == 'p' ? 41
+         : c == 'q' ? 42 : c == 'r' ? 43 : c == 's' ? 44 : c == 't' ? 45
+         : c == 'u' ? 46 : c == 'v' ? 47 : c == 'w' ? 48 : c == 'x' ? 49
+         : c == 'y' ? 50 : c == 'z' ? 51
+         : c == '0' ? 52 : c == '1' ? 53 : c == '2' ? 54 : c == '3' ? 55
+         : c == '4' ? 56 : c == '5' ? 57 : c == '6' ? 58 : c == '7' ? 59
+         : c == '8' ? 60 : c == '9' ? 61 : c == '+' ? 62 : c == '/' ? 63
+         : -1);
+}
+
+/* base64 decoding table.  */
+
+static constexpr signed char base64_dec[] = {
+#define B64D0(x) base64_dec_fn (x)
+#define B64D1(x) B64D0 (x), B64D0 (x + 1), B64D0 (x + 2), B64D0 (x + 3)
+#define B64D2(x) B64D1 (x), B64D1 (x + 4), B64D1 (x + 8), B64D1 (x + 12)
+#define B64D3(x) B64D2 (x), B64D2 (x + 16), B64D2 (x + 32), B64D2 (x + 48)
+  B64D3 (0), B64D3 (64), B64D3 (128), B64D3 (192)
+};
+
+/* Helper function for _cpp_stack_embed.  Handle #embed/__has_embed with
+   gnu::base64 parameter.  */
+
+static int
+finish_base64_embed (cpp_reader *pfile, const char *fname, bool angle,
+                    struct cpp_embed_params *params)
+{
+  size_t len, end, i, j, base64_len = 0, cnt;
+  uchar *buf = NULL, *q, pbuf[4], qbuf[3];
+  const uchar *base64_str;
+  if (angle || strcmp (fname, "."))
+    {
+      if (!params->has_embed)
+       cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+                     "'gnu::base64' parameter can be only used with \".\"");
+      return 0;
+    }
+  tokenrun *cur_run = &params->base64.base_run;
+  cpp_token *tend, *tok;
+  while (cur_run)
+    {
+      tend = cur_run->next ? cur_run->limit : params->base64.cur_token;
+      for (tok = cur_run->base; tok < tend; ++tok)
+       {
+         if (tok->val.str.len < 2
+             || tok->val.str.text[0] != '"'
+             || tok->val.str.text[tok->val.str.len - 1] != '"')
+           {
+           fail:
+             cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
+                           "'gnu::base64' argument not valid base64 "
+                           "encoded string");
+             free (buf);
+             return 0;
+           }
+         if (tok->val.str.len - 2 > (~(size_t) 0) - base64_len)
+           goto fail;
+         base64_len += tok->val.str.len - 2;
+       }
+      cur_run = cur_run->next;
+    }
+  if ((base64_len & 3) != 0)
+    goto fail;
+  len = base64_len / 4 * 3;
+  end = len;
+
+  if (params->has_embed)
+    q = qbuf;
+  else
+    {
+      buf = XNEWVEC (uchar, len ? len : 1);
+      q = buf;
+    }
+  cur_run = &params->base64.base_run;
+  tend = cur_run->next ? cur_run->limit : params->base64.cur_token;
+  tok = cur_run->base;
+  base64_str = tok->val.str.text + 1;
+  cnt = tok->val.str.len - 2;
+  ++tok;
+  for (i = 0; i < end; i += 3)
+    {
+      for (j = 0; j < 4; ++j)
+       {
+         while (cnt == 0)
+           {
+             if (tok == tend)
+               {
+                 cur_run = cur_run->next;
+                 tend = (cur_run->next ? cur_run->limit
+                         : params->base64.cur_token);
+                 tok = cur_run->base;
+               }
+             base64_str = tok->val.str.text + 1;
+             cnt = tok->val.str.len - 2;
+             ++tok;
+           }
+         pbuf[j] = *base64_str;
+         base64_str++;
+         --cnt;
+       }
+      if (pbuf[3] == '=' && i + 3 >= end)
+       {
+         end = len - 3;
+         --len;
+         if (pbuf[2] == '=')
+           --len;
+         break;
+       }
+      int a = base64_dec[pbuf[0]];
+      int b = base64_dec[pbuf[1]];
+      int c = base64_dec[pbuf[2]];
+      int d = base64_dec[pbuf[3]];
+      if (a == -1 || b == -1 || c == -1 || d == -1)
+       goto fail;
+      q[0] = (a << 2) | (b >> 4);
+      q[1] = (b << 4) | (c >> 2);
+      q[2] = (c << 6) | d;
+      if (!params->has_embed)
+       q += 3;
+    }
+  if (len != end)
+    {
+      int a = base64_dec[pbuf[0]];
+      int b = base64_dec[pbuf[1]];
+      if (a == -1 || b == -1)
+       goto fail;
+      q[0] = (a << 2) | (b >> 4);
+      if (len - end == 2)
+       {
+         int c = base64_dec[pbuf[2]];
+         if (c == -1)
+           goto fail;
+         q[1] = (b << 4) | (c >> 2);
+         if ((c & 3) != 0)
+           goto fail;
+       }
+      else if ((b & 15) != 0)
+       goto fail;
+    }
+  if (params->has_embed)
+    return len ? 1 : 2;
+  _cpp_file *file = make_cpp_file (NULL, "");
+  file->embed = 1;
+  file->next_file = pfile->all_files;
+  pfile->all_files = file;
+  params->limit = -1;
+  params->offset = 0;
+  file->limit = len;
+  file->buffer = buf;
+  file->path = xstrdup ("<base64>");
+  return finish_embed (pfile, file, params);
+}
+
 /* Try to load FNAME with #embed/__has_embed parameters PARAMS.
    If !PARAMS->has_embed, return new token in pfile->directive_result
    (first token) and rest in a pushed non-macro context.
@@ -1231,6 +1545,8 @@ int
 _cpp_stack_embed (cpp_reader *pfile, const char *fname, bool angle,
                  struct cpp_embed_params *params)
 {
+  if (params->base64.count)
+    return finish_base64_embed (pfile, fname, angle, params);
   cpp_dir *dir = search_path_head (pfile, fname, angle, IT_EMBED,
                                   params->has_embed);
   if (!dir)
@@ -1450,141 +1766,7 @@ _cpp_stack_embed (cpp_reader *pfile, con
       return limit && params->limit ? 1 : 2;
     }
 
-  const uchar *buffer = file->buffer;
-  size_t limit = file->limit;
-  if (params->offset - file->offset > limit)
-    limit = 0;
-  else
-    {
-      buffer += params->offset - file->offset;
-      limit -= params->offset - file->offset;
-    }
-  if (params->limit < limit)
-    limit = params->limit;
-
-  /* For sizes larger than say 64 bytes, this is just a temporary
-     solution, we should emit a single new token which the FEs will
-     handle as an optimization.  */
-  size_t max = INTTYPE_MAXIMUM (size_t) / sizeof (cpp_token);
-  if (limit > max / 2
-      || (limit
-         ? (params->prefix.count > max
-            || params->suffix.count > max
-            || (limit * 2 + params->prefix.count
-                + params->suffix.count > max))
-         : params->if_empty.count > max))
-    {
-      cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
-                   "%s is too large", file->path);
-      return 0;
-    }
-
-  size_t len = 0;
-  for (size_t i = 0; i < limit; ++i)
-    {
-      if (buffer[i] < 10)
-       len += 2;
-      else if (buffer[i] < 100)
-       len += 3;
-#if UCHAR_MAX == 255
-      else
-       len += 4;
-#else
-      else if (buffer[i] < 1000)
-       len += 4;
-      else
-       {
-         char buf[64];
-         len += sprintf (buf, "%d", buffer[i]) + 1;
-       }
-#endif
-      if (len > INTTYPE_MAXIMUM (ssize_t))
-       {
-         cpp_error_at (pfile, CPP_DL_ERROR, params->loc,
-                       "%s is too large", file->path);
-         return 0;
-       }
-    }
-  uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL;
-  _cpp_buff *tok_buff = NULL;
-  cpp_token *toks = NULL, *tok = &pfile->directive_result;
-  size_t count = 0;
-  if (limit)
-    count = (params->prefix.count + limit * 2 - 1
-            + params->suffix.count) - 1;
-  else if (params->if_empty.count)
-    count = params->if_empty.count - 1;
-  if (count)
-    {
-      tok_buff = _cpp_get_buff (pfile, count * sizeof (cpp_token));
-      toks = (cpp_token *) tok_buff->base;
-    }
-  cpp_embed_params_tokens *prefix
-    = limit ? &params->prefix : &params->if_empty;
-  if (prefix->count)
-    {
-      *tok = *prefix->base_run.base;
-      tok = toks;
-      tokenrun *cur_run = &prefix->base_run;
-      while (cur_run)
-       {
-         size_t cnt = (cur_run->next ? cur_run->limit
-                       : prefix->cur_token) - cur_run->base;
-         cpp_token *t = cur_run->base;
-         if (cur_run == &prefix->base_run)
-           {
-             t++;
-             cnt--;
-           }
-         memcpy (tok, t, cnt * sizeof (cpp_token));
-         tok += cnt;
-         cur_run = cur_run->next;
-       }
-    }
-  for (size_t i = 0; i < limit; ++i)
-    {
-      tok->src_loc = params->loc;
-      tok->type = CPP_NUMBER;
-      tok->flags = NO_EXPAND;
-      if (i == 0)
-       tok->flags |= PREV_WHITE;
-      tok->val.str.text = s;
-      tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]);
-      s += tok->val.str.len + 1;
-      if (tok == &pfile->directive_result)
-       tok = toks;
-      else
-       tok++;
-      if (i < limit - 1)
-       {
-         tok->src_loc = params->loc;
-         tok->type = CPP_COMMA;
-         tok->flags = NO_EXPAND;
-         tok++;
-       }
-    }
-  if (limit && params->suffix.count)
-    {
-      tokenrun *cur_run = &params->suffix.base_run;
-      cpp_token *orig_tok = tok;
-      while (cur_run)
-       {
-         size_t cnt = (cur_run->next ? cur_run->limit
-                       : params->suffix.cur_token) - cur_run->base;
-         cpp_token *t = cur_run->base;
-         memcpy (tok, t, cnt * sizeof (cpp_token));
-         tok += cnt;
-         cur_run = cur_run->next;
-       }
-      orig_tok->flags |= PREV_WHITE;
-    }
-  pfile->directive_result.flags |= PREV_WHITE;
-  if (count)
-    {
-      _cpp_push_token_context (pfile, NULL, toks, count);
-      pfile->context->buff = tok_buff;
-    }
-  return limit ? 1 : 2;
+  return finish_embed (pfile, file, params);
 }
 
 /* Retrofit the just-entered main file asif it was an include.  This
--- libcpp/macro.cc.jj  2024-08-15 10:29:44.532063800 +0200
+++ libcpp/macro.cc     2024-08-15 11:35:50.562664840 +0200
@@ -505,6 +505,8 @@ builtin_has_embed (cpp_reader *pfile)
       if (ok && !pfile->state.skip_eval)
        result = _cpp_stack_embed (pfile, fname, bracket, &params);
 
+      _cpp_free_embed_params_tokens (&params.base64);
+
       XDELETEVEC (fname);
     }
   else if (paren)
--- gcc/doc/cpp.texi.jj 2024-08-15 11:26:00.728026239 +0200
+++ gcc/doc/cpp.texi    2024-08-15 11:35:50.562664840 +0200
@@ -3967,7 +3967,8 @@ with currently supported standard parame
 @code{suffix} and @code{if_empty}, or implementation defined parameters
 specified by a unique vendor prefix followed by @code{::} followed by
 name of the parameter.  GCC uses the @code{gnu} prefix for vendor
-parameters and currently supports the @code{gnu::offset} parameter.
+parameters and currently supports the @code{gnu::offset} and
+@code{gnu::base64} parameters.
 
 The @code{limit} parameter argument is a constant expression which
 specifies the maximum number of bytes included by the directive,
@@ -3981,6 +3982,17 @@ The @code{gnu::offset} parameter argumen
 which specifies how many bytes to skip from the start of the resource.
 @code{limit} is then counted from that position.
 
+The @code{gnu::base64} parameter argument is a possibly concatenated
+character string literal with base64 encoded data.  See
+@uref{https://datatracker.ietf.org/doc/html/rfc4648#section-4}.  There
+should be no newlines in the string literal and because this parameter
+is meant namely for use by the preprocessor itself, there is no support
+for any escape sequences in the string literal argument.  If @code{gnu::base64}
+parameter is specified, the @code{limit} and @code{gnu::offset} parameters
+should not be specified and the filename should be always @code{"."}.
+Instead of reading a file the directive will decode the base64 encoded
+data and use that as the data to include.
+
 The @code{#embed} directive is not supported in the Traditional Mode
 (@pxref{Traditional Mode}).
 
--- gcc/testsuite/c-c++-common/cpp/embed-17.c.jj        2024-08-15 
11:35:50.563664827 +0200
+++ gcc/testsuite/c-c++-common/cpp/embed-17.c   2024-08-15 11:35:50.563664827 
+0200
@@ -0,0 +1,116 @@
+/* { dg-do run } */
+/* { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir" } */
+/* { dg-additional-options "-std=gnu99" { target c } } */
+
+#if __has_embed ("." gnu::base64 ("")) != __STDC_EMBED_EMPTY__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::base64 ("SA==")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." prefix(-) suffix (-) if_empty (-) __gnu__::__base64__ 
("SA==")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::__base64__ ("SGU=")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::__base64__ ("SGVs")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." __gnu__::base64 ("SGVsbG8=")) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+/* M. Tulli Ciceronis De Finibus Bonorum et Malorum.  Liber Primus.  */
+/* echo "Tm9u....bnQu" | fmt -s -w 76 | base64 -d to decode.  */
+#define BONORUM_ET_MALORUM \
+"Tm9uIGVyYW0gbsOpc2NpdXMsIEJydXRlLCBjdW0sIHF1w6Ygc3VtbWlzIGluZ8OpbmlpcyBleHF1aXNpdMOhcXVlIGRvY3Ryw61uYSBwaGlsw7Nzb3BoaSBHcsOmY28gc2VybcOzbmUgdHJhY3RhdsOtc3NlbnQsIGVhIExhdMOtbmlzIGzDrXR0ZXJpcyBtYW5kYXLDqW11cywgZm9yZSB1dCBoaWMgbm9zdGVyIGxhYm9yIGluIHbDoXJpYXMgcmVwcmVoZW5zacOzbmVzIGluY8O6cnJlcmV0LiBuYW0gcXVpYsO6c2RhbSwgZXQgaWlzIHF1aWRlbSBub24gw6FkbW9kdW0gaW5kw7NjdGlzLCB0b3R1bSBob2MgZMOtc3BsaWNldCBwaGlsb3NvcGjDoXJpLiBxdWlkYW0gYXV0ZW0gbm9uIHRhbSBpZCByZXByZWjDqW5kdW50LCBzaSByZW3DrXNzaXVzIGFnw6F0dXIsIHNlZCB0YW50dW0gc3TDumRpdW0gdGFtcXVlIG11bHRhbSDDs3BlcmFtIHBvbsOpbmRhbSBpbiBlbyBub24gYXJiaXRyw6FudHVyLiBlcnVudCDDqXRpYW0sIGV0IGlpIHF1aWRlbSBlcnVkw610aSBHcsOmY2lzIGzDrXR0ZXJpcywgY29udGVtbsOpbnRlcyBMYXTDrW5hcywgcXVpIHNlIGRpY2FudCBpbiBHcsOmY2lzIGxlZ8OpbmRpcyDDs3BlcmFtIG1hbGxlIGNvbnPDum1lcmUuIHBvc3Ryw6ltbyDDoWxpcXVvcyBmdXTDunJvcyBzw7pzcGljb3IsIHF1aSBtZSBhZCDDoWxpYXMgbMOtdHRlcmFzIHZvY2VudCwgZ2VudXMgaG9jIHNjcmliw6luZGksIGV0c2kgc2l0IGVsw6lnYW5zLCBwZXJzw7Nuw6YgdGFtZW4gZXQgZGlnbml0w6F0aXMgZXNzZSBuZWdlbnQu"
+#if __has_embed ("." gnu::base64 (BONORUM_ET_MALORUM)) != __STDC_EMBED_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("foo" gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed (<foo> gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed (<.> gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::base64 ("SGU=") limit(5)) != __STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#if __has_embed ("." gnu::base64 ("SGU=") gnu::offset(2)) != 
__STDC_EMBED_NOT_FOUND__
+#error "__has_embed fail"
+#endif
+
+#embed "." gnu::base64 ("") if_empty (int a = 42;) prefix(+ +) suffix (+ +)
+#embed "." __gnu__::__base64__ ("SA==") prefix (int b = ) suffix (;) if_empty 
(+ +)
+const unsigned char c[] = {
+  #embed "." gnu::base64("SGU=")
+};
+const unsigned char d[] = {
+  #embed "." gnu::base64 ("SGVs")
+};
+const unsigned char e[] = {
+  #embed "." gnu::base64 ("SGVsbG8=")
+};
+const unsigned char f[] = {
+#ifdef __cplusplus
+  #embed "." gnu::base64 (BONORUM_ET_MALORUM) prefix (' ', )
+#else
+  #embed "." gnu::base64 (BONORUM_ET_MALORUM) prefix ([1] = ) suffix(, [0] = ' 
')
+#endif
+};
+#if __has_embed ("." 
gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg"
 \
+"c2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu" 
\
+"YSBhbGlxdWEuCg==")) == __STDC_EMBED_FOUND__
+const unsigned char g[] = {
+#embed "." gnu::base64("" \
+"T" "G9" "yZW" \
+"0gaX" \
+"BzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg" \
+"c2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu" 
\
+"YSBhbGlxdWEuCg==")
+};
+#endif
+
+#ifdef __cplusplus
+#define C "C"
+#else
+#define C
+#endif
+extern C void abort (void);
+extern C int memcmp (const void *, const void *, __SIZE_TYPE__);
+
+int
+main ()
+{
+  if (a != 42 || b != 'H')
+    abort ();
+  if (sizeof (c) != 2 || c[0] != 'H' || c[1] != 'e')
+    abort ();
+  if (sizeof (d) != 3 || d[0] != 'H' || d[1] != 'e' || d[2] != 'l')
+    abort ();
+  if (sizeof (e) != 5 || memcmp (e, "Hello", 5))
+    abort ();
+  if (sizeof (f) != 1 + 747 || memcmp (f, " Non eram néscius, Brute",
+                                      sizeof (" Non eram néscius, Brute") - 1))
+    abort ();
+  const char ge[]
+    = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod 
tempor incididunt ut labore et dolore magna aliqua.";
+  if (sizeof (g) != sizeof (ge)
+      || memcmp (g, ge, sizeof (ge) - 1)
+      || g[sizeof (ge) - 1] != '\n')
+    abort ();
+}
--- gcc/testsuite/c-c++-common/cpp/embed-18.c.jj        2024-08-15 
11:35:50.563664827 +0200
+++ gcc/testsuite/c-c++-common/cpp/embed-18.c   2024-08-15 11:35:50.563664827 
+0200
@@ -0,0 +1,33 @@
+/* { dg-do preprocess } */
+/* { dg-options "" } */
+
+#embed "." gnu::base64("") __gnu__::__base64__("") /* { dg-error "duplicate 
embed parameter 'gnu::base64'" } */
+#embed __FILE__ gnu::base64 prefix() suffix() /* { dg-error "expected '\\\('" 
} */
+#embed __FILE__ gnu::base64(1) prefix() suffix() /* { dg-error "expected 
character string literal" } */
+#embed __FILE__ gnu::base64() prefix() suffix() /* { dg-error "expected 
character string literal" } */
+#embed "." prefix() suffix() gnu::base64("" /* { dg-error "expected '\\\)'" } 
*/
+#embed "." gnu::base64("a") /* { dg-error "'gnu::base64' argument not valid 
base64 encoded string" } */
+#embed "." gnu::base64("----") /* { dg-error "'gnu::base64' argument not valid 
base64 encoded string" } */
+#embed "." gnu::base64("a===") /* { dg-error "'gnu::base64' argument not valid 
base64 encoded string" } */
+#embed "." 
gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg\nc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu\nYSBhbGlxdWEuCg==")
 /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#embed "embed-18.c" gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter 
can be only used with \\\".\\\"" } */
+#embed <embed-18.c> gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter 
can be only used with \\\".\\\"" } */
+#embed <.> gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter can be 
only used with \\\".\\\"" } */
+#embed "." gnu::base64("SA==") limit(3) /* { dg-error "'gnu::base64' parameter 
conflicts with 'limit' or 'gnu::offset' parameters" } */
+#embed "." gnu::base64("SA==") gnu::offset(1) /* { dg-error "'gnu::base64' 
parameter conflicts with 'limit' or 'gnu::offset' parameters" } */
+#if 1 + __has_embed ("." gnu::base64("") __gnu__::__base64__("")) /* { 
dg-error "duplicate embed parameter 'gnu::base64'" } */
+#endif
+#if 1 + __has_embed (__FILE__ __gnu__::__base64__ prefix() suffix()) /* { 
dg-error "expected '\\\('" } */
+#endif
+#if 1 + __has_embed (__FILE__ __gnu__::__base64__(1) prefix() suffix()) /* { 
dg-error "expected character string literal" } */
+#endif
+#if 1 + __has_embed (__FILE__ gnu::base64() prefix() suffix()) /* { dg-error 
"expected character string literal" } */
+#endif
+#if 1 + __has_embed ("." gnu::base64("a")) /* { dg-error "'gnu::base64' 
argument not valid base64 encoded string" } */
+#endif
+#if 1 + __has_embed ("." gnu::base64("----")) /* { dg-error "'gnu::base64' 
argument not valid base64 encoded string" } */
+#endif
+#if 1 + __has_embed ("." gnu::base64("a===")) /* { dg-error "'gnu::base64' 
argument not valid base64 encoded string" } */
+#endif
+#if 1 + __has_embed ("." 
gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg\nc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu\nYSBhbGlxdWEuCg=="))
 /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */
+#endif
--- gcc/testsuite/c-c++-common/cpp/embed-19.c.jj        2024-08-15 
11:35:50.563664827 +0200
+++ gcc/testsuite/c-c++-common/cpp/embed-19.c   2024-08-15 11:35:50.563664827 
+0200
@@ -0,0 +1,5 @@
+/* { dg-do run } */
+/* { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir -save-temps 
-fdirectives-only" } */
+/* { dg-additional-options "-std=gnu99" { target c } } */
+
+#include "embed-1.c"
--- gcc/testsuite/gcc.dg/cpp/embed-6.c.jj       2024-08-15 11:35:50.563664827 
+0200
+++ gcc/testsuite/gcc.dg/cpp/embed-6.c  2024-08-15 11:35:50.563664827 +0200
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-fpreprocessed" } */
+
+const unsigned char c[] = {
+#embed "embed-6.c" limit (64)  /* { dg-error "'gnu::base64' parameter required 
in preprocessed source" } */
+};
--- gcc/testsuite/gcc.dg/cpp/embed-7.c.jj       2024-08-15 11:35:50.563664827 
+0200
+++ gcc/testsuite/gcc.dg/cpp/embed-7.c  2024-08-15 11:35:50.563664827 +0200
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-fpreprocessed -fdirectives-only" } */
+
+const unsigned char c[] = {
+#embed "embed-7.c" limit (64)  /* { dg-error "'gnu::base64' parameter required 
in preprocessed source" } */
+};


        Jakub

Reply via email to