Hi! Here is a new version of the gnu::base64 parameter support, the only changes are in using the EMBED_PARAMS registry of parameters.
2024-08-15 Jakub Jelinek <ja...@redhat.com> libcpp/ * internal.h (struct cpp_embed_params): Add base64 member. (_cpp_free_embed_params_tokens): Declare. * directives.cc (DIRECTIVE_TABLE): Add IN_I flag to T_EMBED. (save_token_for_embed, _cpp_free_embed_params_tokens): New functions. (EMBED_PARAMS): Add gnu::base64 entry. (_cpp_parse_embed_params): Parse gnu::base64 parameter. If -fpreprocessed without -fdirectives-only, require #embed to have gnu::base64 parameter. Diagnose conflict between gnu::base64 and limit or gnu::offset parameters. (do_embed): Use _cpp_free_embed_params_tokens. * files.cc (finish_embed, base64_dec_fn): New functions. (base64_dec): New array. (B64D0, B64D1, B64D2, B64D3): Define. (finish_base64_embed): New function. (_cpp_stack_embed): Use finish_embed. Handle params->base64 using finish_base64_embed. * macro.cc (builtin_has_embed): Call _cpp_free_embed_params_tokens. gcc/ * doc/cpp.texi (Binary Resource Inclusion): Document gnu::base64 parameter. gcc/testsuite/ * c-c++-common/cpp/embed-17.c: New test. * c-c++-common/cpp/embed-18.c: New test. * c-c++-common/cpp/embed-19.c: New test. * gcc.dg/cpp/embed-6.c: New test. * gcc.dg/cpp/embed-7.c: New test. --- libcpp/internal.h.jj 2024-08-15 11:26:00.726026264 +0200 +++ libcpp/internal.h 2024-08-15 11:35:50.559664877 +0200 @@ -631,7 +631,7 @@ struct cpp_embed_params location_t loc; bool has_embed; cpp_num_part limit, offset; - cpp_embed_params_tokens prefix, suffix, if_empty; + cpp_embed_params_tokens prefix, suffix, if_empty, base64; }; /* Character classes. Based on the more primitive macros in safe-ctype.h. @@ -805,6 +805,7 @@ extern void _cpp_restore_pragma_names (c extern int _cpp_do__Pragma (cpp_reader *, location_t); extern void _cpp_init_directives (cpp_reader *); extern void _cpp_init_internal_pragmas (cpp_reader *); +extern void _cpp_free_embed_params_tokens (cpp_embed_params_tokens *); extern bool _cpp_parse_embed_params (cpp_reader *, struct cpp_embed_params *); extern void _cpp_do_file_change (cpp_reader *, enum lc_reason, const char *, linenum_type, unsigned int); --- libcpp/directives.cc.jj 2024-08-15 11:39:49.476685559 +0200 +++ libcpp/directives.cc 2024-08-15 11:49:30.107446324 +0200 @@ -159,7 +159,7 @@ static void cpp_pop_definition (cpp_read D(error, T_ERROR, STDC89, 0) \ D(pragma, T_PRAGMA, STDC89, IN_I) \ D(warning, T_WARNING, STDC23, 0) \ - D(embed, T_EMBED, STDC23, INCL | EXPAND) \ + D(embed, T_EMBED, STDC23, IN_I | INCL | EXPAND) \ D(include_next, T_INCLUDE_NEXT, EXTENSION, INCL | EXPAND) \ D(ident, T_IDENT, EXTENSION, IN_I) \ D(import, T_IMPORT, EXTENSION, INCL | EXPAND) /* ObjC */ \ @@ -932,6 +932,50 @@ do_include_next (cpp_reader *pfile) do_include_common (pfile, type); } +/* Helper function for skip_balanced_token_seq and _cpp_parse_embed_params. + Save one token *TOKEN into *SAVE. */ + +static void +save_token_for_embed (cpp_embed_params_tokens *save, const cpp_token *token) +{ + if (save->count == 0) + { + _cpp_init_tokenrun (&save->base_run, 4); + save->cur_run = &save->base_run; + save->cur_token = save->base_run.base; + } + else if (save->cur_token == save->cur_run->limit) + { + save->cur_run->next = XNEW (tokenrun); + save->cur_run->next->prev = save->cur_run; + _cpp_init_tokenrun (save->cur_run->next, 4); + save->cur_run = save->cur_run->next; + save->cur_token = save->cur_run->base; + } + *save->cur_token = *token; + save->cur_token->flags |= NO_EXPAND; + save->cur_token++; + save->count++; +} + +/* Free memory associated with saved tokens in *SAVE. */ + +void +_cpp_free_embed_params_tokens (cpp_embed_params_tokens *save) +{ + if (save->count == 0) + return; + tokenrun *n; + for (tokenrun *t = &save->base_run; t; t = n) + { + n = t->next; + XDELETEVEC (t->base); + if (t != &save->base_run) + XDELETE (t); + } + save->count = 0; +} + /* Skip over balanced preprocessing tokens until END is found. If SAVE is non-NULL, remember the parsed tokens in it. NESTED is false in the outermost invocation of the function and true @@ -961,26 +1005,7 @@ skip_balanced_token_seq (cpp_reader *pfi if (save && (token->type != CPP_PADDING || save->count) && (token->type != end || nested)) - { - if (save->count == 0) - { - _cpp_init_tokenrun (&save->base_run, 4); - save->cur_run = &save->base_run; - save->cur_token = save->base_run.base; - } - else if (save->cur_token == save->cur_run->limit) - { - save->cur_run->next = XNEW (tokenrun); - save->cur_run->next->prev = save->cur_run; - _cpp_init_tokenrun (save->cur_run->next, 4); - save->cur_run = save->cur_run->next; - save->cur_token = save->cur_run->base; - } - *save->cur_token = *token; - save->cur_token->flags |= NO_EXPAND; - save->cur_token++; - save->count++; - } + save_token_for_embed (save, token); if (token->type == end) return; switch (token->type) @@ -1015,6 +1040,7 @@ skip_balanced_token_seq (cpp_reader *pfi EMBED_PARAM (PREFIX, "prefix") \ EMBED_PARAM (SUFFIX, "suffix") \ EMBED_PARAM (IF_EMPTY, "if_empty") \ + EMBED_PARAM (GNU_BASE64, "base64") \ EMBED_PARAM (GNU_OFFSET, "offset") enum embed_param_kind { @@ -1058,12 +1084,33 @@ _cpp_parse_embed_params (cpp_reader *pfi cpp_error (pfile, CPP_DL_ERROR, "expected ')'"); return false; } - return ret; } - else if (token->type == CPP_CLOSE_PAREN && params->has_embed) - return ret; - cpp_error (pfile, CPP_DL_ERROR, "expected parameter name"); - return false; + else if (token->type != CPP_CLOSE_PAREN || !params->has_embed) + { + cpp_error (pfile, CPP_DL_ERROR, "expected parameter name"); + return false; + } + if (params->base64.count + && (seen & ((1 << EMBED_PARAM_LIMIT) + | (1 << EMBED_PARAM_GNU_OFFSET))) != 0) + { + ret = false; + if (!params->has_embed) + cpp_error_with_line (pfile, CPP_DL_ERROR, + params->base64.base_run.base->src_loc, 0, + "'gnu::base64' parameter conflicts with " + "'limit' or 'gnu::offset' parameters"); + } + else if (params->base64.count == 0 + && CPP_OPTION (pfile, preprocessed)) + { + ret = false; + if (!params->has_embed) + cpp_error_with_line (pfile, CPP_DL_ERROR, params->loc, 0, + "'gnu::base64' parameter required in " + "preprocessed source"); + } + return ret; } param_name = NODE_NAME (token->val.node.spelling); param_name_len = NODE_LEN (token->val.node.spelling); @@ -1188,6 +1235,53 @@ _cpp_parse_embed_params (cpp_reader *pfi } token = _cpp_get_token_no_padding (pfile); } + else if (param_kind == EMBED_PARAM_GNU_BASE64) + { + token = _cpp_get_token_no_padding (pfile); + while (token->type == CPP_OTHER + && CPP_OPTION (pfile, preprocessed) + && !CPP_OPTION (pfile, directives_only) + && token->val.str.len == 1 + && token->val.str.text[0] == '\\') + { + /* Allow backslash newline inside of gnu::base64 argument + for -fpreprocessed, so that it doesn't have to be + megabytes long line. */ + pfile->state.in_directive = 0; + token = _cpp_get_token_no_padding (pfile); + pfile->state.in_directive = 3; + } + if (token->type == CPP_STRING) + { + do + { + save_token_for_embed (¶ms->base64, token); + token = _cpp_get_token_no_padding (pfile); + while (token->type == CPP_OTHER + && CPP_OPTION (pfile, preprocessed) + && !CPP_OPTION (pfile, directives_only) + && token->val.str.len == 1 + && token->val.str.text[0] == '\\') + { + pfile->state.in_directive = 0; + token = _cpp_get_token_no_padding (pfile); + pfile->state.in_directive = 3; + } + } + while (token->type == CPP_STRING); + if (token->type != CPP_CLOSE_PAREN) + cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0, + "expected ')'"); + } + else + { + cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0, + "expected character string literal"); + if (token->type != CPP_CLOSE_PAREN) + token = _cpp_get_token_no_padding (pfile); + } + token = _cpp_get_token_no_padding (pfile); + } else if (token->type == CPP_OPEN_PAREN) { cpp_embed_params_tokens *save = NULL; @@ -1268,26 +1362,10 @@ do_embed (cpp_reader *pfile) if (ok) _cpp_stack_embed (pfile, fname, angle_brackets, ¶ms); - for (int i = 0; i < 3; ++i) - { - cpp_embed_params_tokens *p; - if (i == 0) - p = ¶ms.prefix; - else if (i == 1) - p = ¶ms.suffix; - else - p = ¶ms.if_empty; - if (p->count == 0) - continue; - tokenrun *n; - for (tokenrun *t = &p->base_run; t; t = n) - { - n = t->next; - XDELETEVEC (t->base); - if (t != &p->base_run) - XDELETE (t); - } - } + _cpp_free_embed_params_tokens (¶ms.prefix); + _cpp_free_embed_params_tokens (¶ms.suffix); + _cpp_free_embed_params_tokens (¶ms.if_empty); + _cpp_free_embed_params_tokens (¶ms.base64); done: XDELETEVEC (fname); --- libcpp/files.cc.jj 2024-08-15 11:26:00.727026251 +0200 +++ libcpp/files.cc 2024-08-15 11:35:50.561664852 +0200 @@ -1221,6 +1221,320 @@ cpp_probe_header_unit (cpp_reader *pfile return nullptr; } +/* Helper function for _cpp_stack_embed. Finish #embed/__has_embed processing + after a file is found and data loaded into buffer. */ + +static int +finish_embed (cpp_reader *pfile, _cpp_file *file, + struct cpp_embed_params *params) +{ + const uchar *buffer = file->buffer; + size_t limit = file->limit; + if (params->offset - file->offset > limit) + limit = 0; + else + { + buffer += params->offset - file->offset; + limit -= params->offset - file->offset; + } + if (params->limit < limit) + limit = params->limit; + + /* For sizes larger than say 64 bytes, this is just a temporary + solution, we should emit a single new token which the FEs will + handle as an optimization. */ + size_t max = INTTYPE_MAXIMUM (size_t) / sizeof (cpp_token); + if (limit > max / 2 + || (limit + ? (params->prefix.count > max + || params->suffix.count > max + || (limit * 2 - 1 + params->prefix.count + + params->suffix.count > max)) + : params->if_empty.count > max)) + { + cpp_error_at (pfile, CPP_DL_ERROR, params->loc, + "%s is too large", file->path); + return 0; + } + + size_t len = 0; + for (size_t i = 0; i < limit; ++i) + { + if (buffer[i] < 10) + len += 2; + else if (buffer[i] < 100) + len += 3; +#if UCHAR_MAX == 255 + else + len += 4; +#else + else if (buffer[i] < 1000) + len += 4; + else + { + char buf[64]; + len += sprintf (buf, "%d", buffer[i]) + 1; + } +#endif + if (len > INTTYPE_MAXIMUM (ssize_t)) + { + cpp_error_at (pfile, CPP_DL_ERROR, params->loc, + "%s is too large", file->path); + return 0; + } + } + uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL; + _cpp_buff *tok_buff = NULL; + cpp_token *toks = NULL, *tok = &pfile->directive_result; + size_t count = 0; + if (limit) + count = (params->prefix.count + limit * 2 - 1 + + params->suffix.count) - 1; + else if (params->if_empty.count) + count = params->if_empty.count - 1; + if (count) + { + tok_buff = _cpp_get_buff (pfile, count * sizeof (cpp_token)); + toks = (cpp_token *) tok_buff->base; + } + cpp_embed_params_tokens *prefix + = limit ? ¶ms->prefix : ¶ms->if_empty; + if (prefix->count) + { + *tok = *prefix->base_run.base; + tok = toks; + tokenrun *cur_run = &prefix->base_run; + while (cur_run) + { + size_t cnt = (cur_run->next ? cur_run->limit + : prefix->cur_token) - cur_run->base; + cpp_token *t = cur_run->base; + if (cur_run == &prefix->base_run) + { + t++; + cnt--; + } + memcpy (tok, t, cnt * sizeof (cpp_token)); + tok += cnt; + cur_run = cur_run->next; + } + } + for (size_t i = 0; i < limit; ++i) + { + tok->src_loc = params->loc; + tok->type = CPP_NUMBER; + tok->flags = NO_EXPAND; + if (i == 0) + tok->flags |= PREV_WHITE; + tok->val.str.text = s; + tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]); + s += tok->val.str.len + 1; + if (tok == &pfile->directive_result) + tok = toks; + else + tok++; + if (i < limit - 1) + { + tok->src_loc = params->loc; + tok->type = CPP_COMMA; + tok->flags = NO_EXPAND; + tok++; + } + } + if (limit && params->suffix.count) + { + tokenrun *cur_run = ¶ms->suffix.base_run; + cpp_token *orig_tok = tok; + while (cur_run) + { + size_t cnt = (cur_run->next ? cur_run->limit + : params->suffix.cur_token) - cur_run->base; + cpp_token *t = cur_run->base; + memcpy (tok, t, cnt * sizeof (cpp_token)); + tok += cnt; + cur_run = cur_run->next; + } + orig_tok->flags |= PREV_WHITE; + } + pfile->directive_result.flags |= PREV_WHITE; + if (count) + { + _cpp_push_token_context (pfile, NULL, toks, count); + pfile->context->buff = tok_buff; + } + return limit ? 1 : 2; +} + +/* Helper function for initialization of base64_dec table. + Can't rely on ASCII compatibility, so check each letter + separately. */ + +constexpr signed char +base64_dec_fn (unsigned char c) +{ + return (c == 'A' ? 0 : c == 'B' ? 1 : c == 'C' ? 2 : c == 'D' ? 3 + : c == 'E' ? 4 : c == 'F' ? 5 : c == 'G' ? 6 : c == 'H' ? 7 + : c == 'I' ? 8 : c == 'J' ? 9 : c == 'K' ? 10 : c == 'L' ? 11 + : c == 'M' ? 12 : c == 'N' ? 13 : c == 'O' ? 14 : c == 'P' ? 15 + : c == 'Q' ? 16 : c == 'R' ? 17 : c == 'S' ? 18 : c == 'T' ? 19 + : c == 'U' ? 20 : c == 'V' ? 21 : c == 'W' ? 22 : c == 'X' ? 23 + : c == 'Y' ? 24 : c == 'Z' ? 25 + : c == 'a' ? 26 : c == 'b' ? 27 : c == 'c' ? 28 : c == 'd' ? 29 + : c == 'e' ? 30 : c == 'f' ? 31 : c == 'g' ? 32 : c == 'h' ? 33 + : c == 'i' ? 34 : c == 'j' ? 35 : c == 'k' ? 36 : c == 'l' ? 37 + : c == 'm' ? 38 : c == 'n' ? 39 : c == 'o' ? 40 : c == 'p' ? 41 + : c == 'q' ? 42 : c == 'r' ? 43 : c == 's' ? 44 : c == 't' ? 45 + : c == 'u' ? 46 : c == 'v' ? 47 : c == 'w' ? 48 : c == 'x' ? 49 + : c == 'y' ? 50 : c == 'z' ? 51 + : c == '0' ? 52 : c == '1' ? 53 : c == '2' ? 54 : c == '3' ? 55 + : c == '4' ? 56 : c == '5' ? 57 : c == '6' ? 58 : c == '7' ? 59 + : c == '8' ? 60 : c == '9' ? 61 : c == '+' ? 62 : c == '/' ? 63 + : -1); +} + +/* base64 decoding table. */ + +static constexpr signed char base64_dec[] = { +#define B64D0(x) base64_dec_fn (x) +#define B64D1(x) B64D0 (x), B64D0 (x + 1), B64D0 (x + 2), B64D0 (x + 3) +#define B64D2(x) B64D1 (x), B64D1 (x + 4), B64D1 (x + 8), B64D1 (x + 12) +#define B64D3(x) B64D2 (x), B64D2 (x + 16), B64D2 (x + 32), B64D2 (x + 48) + B64D3 (0), B64D3 (64), B64D3 (128), B64D3 (192) +}; + +/* Helper function for _cpp_stack_embed. Handle #embed/__has_embed with + gnu::base64 parameter. */ + +static int +finish_base64_embed (cpp_reader *pfile, const char *fname, bool angle, + struct cpp_embed_params *params) +{ + size_t len, end, i, j, base64_len = 0, cnt; + uchar *buf = NULL, *q, pbuf[4], qbuf[3]; + const uchar *base64_str; + if (angle || strcmp (fname, ".")) + { + if (!params->has_embed) + cpp_error_at (pfile, CPP_DL_ERROR, params->loc, + "'gnu::base64' parameter can be only used with \".\""); + return 0; + } + tokenrun *cur_run = ¶ms->base64.base_run; + cpp_token *tend, *tok; + while (cur_run) + { + tend = cur_run->next ? cur_run->limit : params->base64.cur_token; + for (tok = cur_run->base; tok < tend; ++tok) + { + if (tok->val.str.len < 2 + || tok->val.str.text[0] != '"' + || tok->val.str.text[tok->val.str.len - 1] != '"') + { + fail: + cpp_error_at (pfile, CPP_DL_ERROR, params->loc, + "'gnu::base64' argument not valid base64 " + "encoded string"); + free (buf); + return 0; + } + if (tok->val.str.len - 2 > (~(size_t) 0) - base64_len) + goto fail; + base64_len += tok->val.str.len - 2; + } + cur_run = cur_run->next; + } + if ((base64_len & 3) != 0) + goto fail; + len = base64_len / 4 * 3; + end = len; + + if (params->has_embed) + q = qbuf; + else + { + buf = XNEWVEC (uchar, len ? len : 1); + q = buf; + } + cur_run = ¶ms->base64.base_run; + tend = cur_run->next ? cur_run->limit : params->base64.cur_token; + tok = cur_run->base; + base64_str = tok->val.str.text + 1; + cnt = tok->val.str.len - 2; + ++tok; + for (i = 0; i < end; i += 3) + { + for (j = 0; j < 4; ++j) + { + while (cnt == 0) + { + if (tok == tend) + { + cur_run = cur_run->next; + tend = (cur_run->next ? cur_run->limit + : params->base64.cur_token); + tok = cur_run->base; + } + base64_str = tok->val.str.text + 1; + cnt = tok->val.str.len - 2; + ++tok; + } + pbuf[j] = *base64_str; + base64_str++; + --cnt; + } + if (pbuf[3] == '=' && i + 3 >= end) + { + end = len - 3; + --len; + if (pbuf[2] == '=') + --len; + break; + } + int a = base64_dec[pbuf[0]]; + int b = base64_dec[pbuf[1]]; + int c = base64_dec[pbuf[2]]; + int d = base64_dec[pbuf[3]]; + if (a == -1 || b == -1 || c == -1 || d == -1) + goto fail; + q[0] = (a << 2) | (b >> 4); + q[1] = (b << 4) | (c >> 2); + q[2] = (c << 6) | d; + if (!params->has_embed) + q += 3; + } + if (len != end) + { + int a = base64_dec[pbuf[0]]; + int b = base64_dec[pbuf[1]]; + if (a == -1 || b == -1) + goto fail; + q[0] = (a << 2) | (b >> 4); + if (len - end == 2) + { + int c = base64_dec[pbuf[2]]; + if (c == -1) + goto fail; + q[1] = (b << 4) | (c >> 2); + if ((c & 3) != 0) + goto fail; + } + else if ((b & 15) != 0) + goto fail; + } + if (params->has_embed) + return len ? 1 : 2; + _cpp_file *file = make_cpp_file (NULL, ""); + file->embed = 1; + file->next_file = pfile->all_files; + pfile->all_files = file; + params->limit = -1; + params->offset = 0; + file->limit = len; + file->buffer = buf; + file->path = xstrdup ("<base64>"); + return finish_embed (pfile, file, params); +} + /* Try to load FNAME with #embed/__has_embed parameters PARAMS. If !PARAMS->has_embed, return new token in pfile->directive_result (first token) and rest in a pushed non-macro context. @@ -1231,6 +1545,8 @@ int _cpp_stack_embed (cpp_reader *pfile, const char *fname, bool angle, struct cpp_embed_params *params) { + if (params->base64.count) + return finish_base64_embed (pfile, fname, angle, params); cpp_dir *dir = search_path_head (pfile, fname, angle, IT_EMBED, params->has_embed); if (!dir) @@ -1450,141 +1766,7 @@ _cpp_stack_embed (cpp_reader *pfile, con return limit && params->limit ? 1 : 2; } - const uchar *buffer = file->buffer; - size_t limit = file->limit; - if (params->offset - file->offset > limit) - limit = 0; - else - { - buffer += params->offset - file->offset; - limit -= params->offset - file->offset; - } - if (params->limit < limit) - limit = params->limit; - - /* For sizes larger than say 64 bytes, this is just a temporary - solution, we should emit a single new token which the FEs will - handle as an optimization. */ - size_t max = INTTYPE_MAXIMUM (size_t) / sizeof (cpp_token); - if (limit > max / 2 - || (limit - ? (params->prefix.count > max - || params->suffix.count > max - || (limit * 2 + params->prefix.count - + params->suffix.count > max)) - : params->if_empty.count > max)) - { - cpp_error_at (pfile, CPP_DL_ERROR, params->loc, - "%s is too large", file->path); - return 0; - } - - size_t len = 0; - for (size_t i = 0; i < limit; ++i) - { - if (buffer[i] < 10) - len += 2; - else if (buffer[i] < 100) - len += 3; -#if UCHAR_MAX == 255 - else - len += 4; -#else - else if (buffer[i] < 1000) - len += 4; - else - { - char buf[64]; - len += sprintf (buf, "%d", buffer[i]) + 1; - } -#endif - if (len > INTTYPE_MAXIMUM (ssize_t)) - { - cpp_error_at (pfile, CPP_DL_ERROR, params->loc, - "%s is too large", file->path); - return 0; - } - } - uchar *s = len ? _cpp_unaligned_alloc (pfile, len) : NULL; - _cpp_buff *tok_buff = NULL; - cpp_token *toks = NULL, *tok = &pfile->directive_result; - size_t count = 0; - if (limit) - count = (params->prefix.count + limit * 2 - 1 - + params->suffix.count) - 1; - else if (params->if_empty.count) - count = params->if_empty.count - 1; - if (count) - { - tok_buff = _cpp_get_buff (pfile, count * sizeof (cpp_token)); - toks = (cpp_token *) tok_buff->base; - } - cpp_embed_params_tokens *prefix - = limit ? ¶ms->prefix : ¶ms->if_empty; - if (prefix->count) - { - *tok = *prefix->base_run.base; - tok = toks; - tokenrun *cur_run = &prefix->base_run; - while (cur_run) - { - size_t cnt = (cur_run->next ? cur_run->limit - : prefix->cur_token) - cur_run->base; - cpp_token *t = cur_run->base; - if (cur_run == &prefix->base_run) - { - t++; - cnt--; - } - memcpy (tok, t, cnt * sizeof (cpp_token)); - tok += cnt; - cur_run = cur_run->next; - } - } - for (size_t i = 0; i < limit; ++i) - { - tok->src_loc = params->loc; - tok->type = CPP_NUMBER; - tok->flags = NO_EXPAND; - if (i == 0) - tok->flags |= PREV_WHITE; - tok->val.str.text = s; - tok->val.str.len = sprintf ((char *) s, "%d", buffer[i]); - s += tok->val.str.len + 1; - if (tok == &pfile->directive_result) - tok = toks; - else - tok++; - if (i < limit - 1) - { - tok->src_loc = params->loc; - tok->type = CPP_COMMA; - tok->flags = NO_EXPAND; - tok++; - } - } - if (limit && params->suffix.count) - { - tokenrun *cur_run = ¶ms->suffix.base_run; - cpp_token *orig_tok = tok; - while (cur_run) - { - size_t cnt = (cur_run->next ? cur_run->limit - : params->suffix.cur_token) - cur_run->base; - cpp_token *t = cur_run->base; - memcpy (tok, t, cnt * sizeof (cpp_token)); - tok += cnt; - cur_run = cur_run->next; - } - orig_tok->flags |= PREV_WHITE; - } - pfile->directive_result.flags |= PREV_WHITE; - if (count) - { - _cpp_push_token_context (pfile, NULL, toks, count); - pfile->context->buff = tok_buff; - } - return limit ? 1 : 2; + return finish_embed (pfile, file, params); } /* Retrofit the just-entered main file asif it was an include. This --- libcpp/macro.cc.jj 2024-08-15 10:29:44.532063800 +0200 +++ libcpp/macro.cc 2024-08-15 11:35:50.562664840 +0200 @@ -505,6 +505,8 @@ builtin_has_embed (cpp_reader *pfile) if (ok && !pfile->state.skip_eval) result = _cpp_stack_embed (pfile, fname, bracket, ¶ms); + _cpp_free_embed_params_tokens (¶ms.base64); + XDELETEVEC (fname); } else if (paren) --- gcc/doc/cpp.texi.jj 2024-08-15 11:26:00.728026239 +0200 +++ gcc/doc/cpp.texi 2024-08-15 11:35:50.562664840 +0200 @@ -3967,7 +3967,8 @@ with currently supported standard parame @code{suffix} and @code{if_empty}, or implementation defined parameters specified by a unique vendor prefix followed by @code{::} followed by name of the parameter. GCC uses the @code{gnu} prefix for vendor -parameters and currently supports the @code{gnu::offset} parameter. +parameters and currently supports the @code{gnu::offset} and +@code{gnu::base64} parameters. The @code{limit} parameter argument is a constant expression which specifies the maximum number of bytes included by the directive, @@ -3981,6 +3982,17 @@ The @code{gnu::offset} parameter argumen which specifies how many bytes to skip from the start of the resource. @code{limit} is then counted from that position. +The @code{gnu::base64} parameter argument is a possibly concatenated +character string literal with base64 encoded data. See +@uref{https://datatracker.ietf.org/doc/html/rfc4648#section-4}. There +should be no newlines in the string literal and because this parameter +is meant namely for use by the preprocessor itself, there is no support +for any escape sequences in the string literal argument. If @code{gnu::base64} +parameter is specified, the @code{limit} and @code{gnu::offset} parameters +should not be specified and the filename should be always @code{"."}. +Instead of reading a file the directive will decode the base64 encoded +data and use that as the data to include. + The @code{#embed} directive is not supported in the Traditional Mode (@pxref{Traditional Mode}). --- gcc/testsuite/c-c++-common/cpp/embed-17.c.jj 2024-08-15 11:35:50.563664827 +0200 +++ gcc/testsuite/c-c++-common/cpp/embed-17.c 2024-08-15 11:35:50.563664827 +0200 @@ -0,0 +1,116 @@ +/* { dg-do run } */ +/* { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir" } */ +/* { dg-additional-options "-std=gnu99" { target c } } */ + +#if __has_embed ("." gnu::base64 ("")) != __STDC_EMBED_EMPTY__ +#error "__has_embed fail" +#endif + +#if __has_embed ("." gnu::base64 ("SA==")) != __STDC_EMBED_FOUND__ +#error "__has_embed fail" +#endif + +#if __has_embed ("." prefix(-) suffix (-) if_empty (-) __gnu__::__base64__ ("SA==")) != __STDC_EMBED_FOUND__ +#error "__has_embed fail" +#endif + +#if __has_embed ("." gnu::__base64__ ("SGU=")) != __STDC_EMBED_FOUND__ +#error "__has_embed fail" +#endif + +#if __has_embed ("." gnu::__base64__ ("SGVs")) != __STDC_EMBED_FOUND__ +#error "__has_embed fail" +#endif + +#if __has_embed ("." __gnu__::base64 ("SGVsbG8=")) != __STDC_EMBED_FOUND__ +#error "__has_embed fail" +#endif + +/* M. Tulli Ciceronis De Finibus Bonorum et Malorum. Liber Primus. */ +/* echo "Tm9u....bnQu" | fmt -s -w 76 | base64 -d to decode. */ +#define BONORUM_ET_MALORUM \ +"Tm9uIGVyYW0gbsOpc2NpdXMsIEJydXRlLCBjdW0sIHF1w6Ygc3VtbWlzIGluZ8OpbmlpcyBleHF1aXNpdMOhcXVlIGRvY3Ryw61uYSBwaGlsw7Nzb3BoaSBHcsOmY28gc2VybcOzbmUgdHJhY3RhdsOtc3NlbnQsIGVhIExhdMOtbmlzIGzDrXR0ZXJpcyBtYW5kYXLDqW11cywgZm9yZSB1dCBoaWMgbm9zdGVyIGxhYm9yIGluIHbDoXJpYXMgcmVwcmVoZW5zacOzbmVzIGluY8O6cnJlcmV0LiBuYW0gcXVpYsO6c2RhbSwgZXQgaWlzIHF1aWRlbSBub24gw6FkbW9kdW0gaW5kw7NjdGlzLCB0b3R1bSBob2MgZMOtc3BsaWNldCBwaGlsb3NvcGjDoXJpLiBxdWlkYW0gYXV0ZW0gbm9uIHRhbSBpZCByZXByZWjDqW5kdW50LCBzaSByZW3DrXNzaXVzIGFnw6F0dXIsIHNlZCB0YW50dW0gc3TDumRpdW0gdGFtcXVlIG11bHRhbSDDs3BlcmFtIHBvbsOpbmRhbSBpbiBlbyBub24gYXJiaXRyw6FudHVyLiBlcnVudCDDqXRpYW0sIGV0IGlpIHF1aWRlbSBlcnVkw610aSBHcsOmY2lzIGzDrXR0ZXJpcywgY29udGVtbsOpbnRlcyBMYXTDrW5hcywgcXVpIHNlIGRpY2FudCBpbiBHcsOmY2lzIGxlZ8OpbmRpcyDDs3BlcmFtIG1hbGxlIGNvbnPDum1lcmUuIHBvc3Ryw6ltbyDDoWxpcXVvcyBmdXTDunJvcyBzw7pzcGljb3IsIHF1aSBtZSBhZCDDoWxpYXMgbMOtdHRlcmFzIHZvY2VudCwgZ2VudXMgaG9jIHNjcmliw6luZGksIGV0c2kgc2l0IGVsw6lnYW5zLCBwZXJzw7Nuw6YgdGFtZW4gZXQgZGlnbml0w6F0aXMgZXNzZSBuZWdlbnQu" +#if __has_embed ("." gnu::base64 (BONORUM_ET_MALORUM)) != __STDC_EMBED_FOUND__ +#error "__has_embed fail" +#endif + +#if __has_embed ("foo" gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__ +#error "__has_embed fail" +#endif + +#if __has_embed (<foo> gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__ +#error "__has_embed fail" +#endif + +#if __has_embed (<.> gnu::base64 ("SGU=")) != __STDC_EMBED_NOT_FOUND__ +#error "__has_embed fail" +#endif + +#if __has_embed ("." gnu::base64 ("SGU=") limit(5)) != __STDC_EMBED_NOT_FOUND__ +#error "__has_embed fail" +#endif + +#if __has_embed ("." gnu::base64 ("SGU=") gnu::offset(2)) != __STDC_EMBED_NOT_FOUND__ +#error "__has_embed fail" +#endif + +#embed "." gnu::base64 ("") if_empty (int a = 42;) prefix(+ +) suffix (+ +) +#embed "." __gnu__::__base64__ ("SA==") prefix (int b = ) suffix (;) if_empty (+ +) +const unsigned char c[] = { + #embed "." gnu::base64("SGU=") +}; +const unsigned char d[] = { + #embed "." gnu::base64 ("SGVs") +}; +const unsigned char e[] = { + #embed "." gnu::base64 ("SGVsbG8=") +}; +const unsigned char f[] = { +#ifdef __cplusplus + #embed "." gnu::base64 (BONORUM_ET_MALORUM) prefix (' ', ) +#else + #embed "." gnu::base64 (BONORUM_ET_MALORUM) prefix ([1] = ) suffix(, [0] = ' ') +#endif +}; +#if __has_embed ("." gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg" \ +"c2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu" \ +"YSBhbGlxdWEuCg==")) == __STDC_EMBED_FOUND__ +const unsigned char g[] = { +#embed "." gnu::base64("" \ +"T" "G9" "yZW" \ +"0gaX" \ +"BzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg" \ +"c2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu" \ +"YSBhbGlxdWEuCg==") +}; +#endif + +#ifdef __cplusplus +#define C "C" +#else +#define C +#endif +extern C void abort (void); +extern C int memcmp (const void *, const void *, __SIZE_TYPE__); + +int +main () +{ + if (a != 42 || b != 'H') + abort (); + if (sizeof (c) != 2 || c[0] != 'H' || c[1] != 'e') + abort (); + if (sizeof (d) != 3 || d[0] != 'H' || d[1] != 'e' || d[2] != 'l') + abort (); + if (sizeof (e) != 5 || memcmp (e, "Hello", 5)) + abort (); + if (sizeof (f) != 1 + 747 || memcmp (f, " Non eram néscius, Brute", + sizeof (" Non eram néscius, Brute") - 1)) + abort (); + const char ge[] + = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."; + if (sizeof (g) != sizeof (ge) + || memcmp (g, ge, sizeof (ge) - 1) + || g[sizeof (ge) - 1] != '\n') + abort (); +} --- gcc/testsuite/c-c++-common/cpp/embed-18.c.jj 2024-08-15 11:35:50.563664827 +0200 +++ gcc/testsuite/c-c++-common/cpp/embed-18.c 2024-08-15 11:35:50.563664827 +0200 @@ -0,0 +1,33 @@ +/* { dg-do preprocess } */ +/* { dg-options "" } */ + +#embed "." gnu::base64("") __gnu__::__base64__("") /* { dg-error "duplicate embed parameter 'gnu::base64'" } */ +#embed __FILE__ gnu::base64 prefix() suffix() /* { dg-error "expected '\\\('" } */ +#embed __FILE__ gnu::base64(1) prefix() suffix() /* { dg-error "expected character string literal" } */ +#embed __FILE__ gnu::base64() prefix() suffix() /* { dg-error "expected character string literal" } */ +#embed "." prefix() suffix() gnu::base64("" /* { dg-error "expected '\\\)'" } */ +#embed "." gnu::base64("a") /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */ +#embed "." gnu::base64("----") /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */ +#embed "." gnu::base64("a===") /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */ +#embed "." gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg\nc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu\nYSBhbGlxdWEuCg==") /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */ +#embed "embed-18.c" gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter can be only used with \\\".\\\"" } */ +#embed <embed-18.c> gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter can be only used with \\\".\\\"" } */ +#embed <.> gnu::base64("SA==") /* { dg-error "'gnu::base64' parameter can be only used with \\\".\\\"" } */ +#embed "." gnu::base64("SA==") limit(3) /* { dg-error "'gnu::base64' parameter conflicts with 'limit' or 'gnu::offset' parameters" } */ +#embed "." gnu::base64("SA==") gnu::offset(1) /* { dg-error "'gnu::base64' parameter conflicts with 'limit' or 'gnu::offset' parameters" } */ +#if 1 + __has_embed ("." gnu::base64("") __gnu__::__base64__("")) /* { dg-error "duplicate embed parameter 'gnu::base64'" } */ +#endif +#if 1 + __has_embed (__FILE__ __gnu__::__base64__ prefix() suffix()) /* { dg-error "expected '\\\('" } */ +#endif +#if 1 + __has_embed (__FILE__ __gnu__::__base64__(1) prefix() suffix()) /* { dg-error "expected character string literal" } */ +#endif +#if 1 + __has_embed (__FILE__ gnu::base64() prefix() suffix()) /* { dg-error "expected character string literal" } */ +#endif +#if 1 + __has_embed ("." gnu::base64("a")) /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */ +#endif +#if 1 + __has_embed ("." gnu::base64("----")) /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */ +#endif +#if 1 + __has_embed ("." gnu::base64("a===")) /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */ +#endif +#if 1 + __has_embed ("." gnu::base64("TG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNlY3RldHVyIGFkaXBpc2NpbmcgZWxpdCwg\nc2VkIGRvIGVpdXNtb2QgdGVtcG9yIGluY2lkaWR1bnQgdXQgbGFib3JlIGV0IGRvbG9yZSBtYWdu\nYSBhbGlxdWEuCg==")) /* { dg-error "'gnu::base64' argument not valid base64 encoded string" } */ +#endif --- gcc/testsuite/c-c++-common/cpp/embed-19.c.jj 2024-08-15 11:35:50.563664827 +0200 +++ gcc/testsuite/c-c++-common/cpp/embed-19.c 2024-08-15 11:35:50.563664827 +0200 @@ -0,0 +1,5 @@ +/* { dg-do run } */ +/* { dg-options "--embed-dir=${srcdir}/c-c++-common/cpp/embed-dir -save-temps -fdirectives-only" } */ +/* { dg-additional-options "-std=gnu99" { target c } } */ + +#include "embed-1.c" --- gcc/testsuite/gcc.dg/cpp/embed-6.c.jj 2024-08-15 11:35:50.563664827 +0200 +++ gcc/testsuite/gcc.dg/cpp/embed-6.c 2024-08-15 11:35:50.563664827 +0200 @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-fpreprocessed" } */ + +const unsigned char c[] = { +#embed "embed-6.c" limit (64) /* { dg-error "'gnu::base64' parameter required in preprocessed source" } */ +}; --- gcc/testsuite/gcc.dg/cpp/embed-7.c.jj 2024-08-15 11:35:50.563664827 +0200 +++ gcc/testsuite/gcc.dg/cpp/embed-7.c 2024-08-15 11:35:50.563664827 +0200 @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-fpreprocessed -fdirectives-only" } */ + +const unsigned char c[] = { +#embed "embed-7.c" limit (64) /* { dg-error "'gnu::base64' parameter required in preprocessed source" } */ +}; Jakub