Importation of header-units requires the tokenizer to recognize such
imports during tokenization so that their macros become available. The
C++ parser currently (and continues to) tokenize the entire file before
beginning c++ parsing.
This implements an explicit coroutine to manage that recognition. It is
used both for C++ parsing proper and for just preprocessing. When a
module-significant control line is observed, we call into the module
machinery to handle it. Usually, we'll also call again later when
parsing that declaration.
--
Nathan Sidwell
diff --git c/gcc/cp/lex.c w/gcc/cp/lex.c
index 8a69bc4f170..013cbadf625 100644
--- c/gcc/cp/lex.c
+++ w/gcc/cp/lex.c
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3. If not see
#include "c-family/c-objc.h"
#include "gcc-rich-location.h"
#include "cp-name-hint.h"
+#include "langhooks.h"
static int interface_strcmp (const char *);
static void init_cp_pragma (void);
@@ -378,7 +381,206 @@ interface_strcmp (const char* s)
return 1;
}
-
+/* We've just read a cpp-token, figure out our next state. Hey, this
+ is a hand-coded co-routine! */
+
+struct token_coro
+{
+ enum state
+ {
+ idle,
+ module_first,
+ module_cont,
+ module_end,
+ };
+
+ enum state state : 8;
+ bool is_import : 1;
+ bool got_export : 1;
+ bool got_colon : 1;
+ bool want_dot : 1;
+
+ location_t token_loc;
+ cpp_reader *reader;
+ module_state *module;
+ module_state *import;
+
+ token_coro (cpp_reader *reader)
+ : state (idle), is_import (false),
+ got_export (false), got_colon (false), want_dot (false),
+ token_loc (UNKNOWN_LOCATION),
+ reader (reader), module (NULL), import (NULL)
+ {
+ };
+
+ /* Process the next token. Note we cannot see CPP_EOF inside a
+ pragma -- a CPP_PRAGMA_EOL always happens. */
+ uintptr_t resume (int type, int keyword, tree value, location_t loc)
+ {
+ unsigned res = 0;
+
+ switch (state)
+ {
+ case idle:
+ if (type == CPP_KEYWORD)
+ switch (keyword)
+ {
+ default:
+ break;
+
+ case RID__EXPORT:
+ got_export = true;
+ res = lang_hooks::PT_begin_pragma;
+ break;
+
+ case RID__IMPORT:
+ is_import = true;
+ /* FALLTHRU */
+ case RID__MODULE:
+ state = module_first;
+ want_dot = false;
+ got_colon = false;
+ token_loc = loc;
+ import = NULL;
+ if (!got_export)
+ res = lang_hooks::PT_begin_pragma;
+ break;
+ }
+ break;
+
+ case module_first:
+ if (is_import && type == CPP_HEADER_NAME)
+ {
+ /* A header name. The preprocessor will have already
+ done include searching and canonicalization. */
+ state = module_end;
+ goto header_unit;
+ }
+
+ if (type == CPP_PADDING || type == CPP_COMMENT)
+ break;
+
+ state = module_cont;
+ if (type == CPP_COLON && module)
+ {
+ got_colon = true;
+ import = module;
+ break;
+ }
+ /* FALLTHROUGH */
+
+ case module_cont:
+ switch (type)
+ {
+ case CPP_PADDING:
+ case CPP_COMMENT:
+ break;
+
+ default:
+ /* If we ever need to pay attention to attributes for
+ header modules, more logic will be needed. */
+ state = module_end;
+ break;
+
+ case CPP_COLON:
+ if (got_colon)
+ state = module_end;
+ got_colon = true;
+ /* FALLTHROUGH */
+ case CPP_DOT:
+ if (!want_dot)
+ state = module_end;
+ want_dot = false;
+ break;
+
+ case CPP_PRAGMA_EOL:
+ goto module_end;
+
+ case CPP_NAME:
+ if (want_dot)
+ {
+ /* Got name instead of [.:]. */
+ state = module_end;
+ break;
+ }
+ header_unit:
+ import = get_module (value, import, got_colon);
+ want_dot = true;
+ break;
+ }
+ break;
+
+ case module_end:
+ if (type == CPP_PRAGMA_EOL)
+ {
+ module_end:;
+ /* End of the directive, handle the name. */
+ if (import)
+ if (module_state *m
+ = preprocess_module (import, token_loc, module != NULL,
+ is_import, got_export, reader))
+ if (!module)
+ module = m;
+
+ is_import = got_export = false;
+ state = idle;
+ }
+ break;
+ }
+
+ return res;
+ }
+};
+
+/* Initialize or teardown. */
+
+uintptr_t
+module_token_cdtor (cpp_reader *pfile, uintptr_t data_)
+{
+ if (token_coro *coro = reinterpret_cast<token_coro *> (data_))
+ {
+ preprocessed_module (pfile);
+ delete coro;
+ data_ = 0;
+ }
+ else if (modules_p ())
+ data_ = reinterpret_cast <uintptr_t > (new token_coro (pfile));
+
+ return data_;
+}
+
+uintptr_t
+module_token_lang (int type, int keyword, tree value, location_t loc,
+ uintptr_t data_)
+{
+ token_coro *coro = reinterpret_cast <token_coro *> (data_);
+ return coro->resume (type, keyword, value, loc);
+}
+
+uintptr_t
+module_token_pre (cpp_reader *pfile, const cpp_token *tok, uintptr_t data_)
+{
+ if (!tok)
+ return module_token_cdtor (pfile, data_);
+
+ int type = tok->type;
+ int keyword = RID_MAX;
+ tree value = NULL_TREE;
+
+ if (tok->type == CPP_NAME)
+ {
+ value = HT_IDENT_TO_GCC_IDENT (HT_NODE (tok->val.node.node));
+ if (IDENTIFIER_KEYWORD_P (value))
+ {
+ keyword = C_RID_CODE (value);
+ type = CPP_KEYWORD;
+ }
+ }
+ else if (tok->type == CPP_HEADER_NAME)
+ value = build_string (tok->val.str.len, (const char *)tok->val.str.text);
+
+ return module_token_lang (type, keyword, value, tok->src_loc, data_);
+}
/* Parse a #pragma whose sole argument is a string constant.
If OPT is true, the argument is optional. */
diff --git c/gcc/cp/cp-tree.h w/gcc/cp/cp-tree.h
index fdb8ee57f0b..e8e4d0af2d8 100644
--- c/gcc/cp/cp-tree.h
+++ w/gcc/cp/cp-tree.h
@@ -6742,6 +6940,10 @@ extern void set_identifier_kind (tree, cp_identifier_kind);
extern bool cxx_init (void);
extern void cxx_finish (void);
extern bool in_main_input_context (void);
+extern uintptr_t module_token_pre (cpp_reader *, const cpp_token *, uintptr_t);
+extern uintptr_t module_token_cdtor (cpp_reader *, uintptr_t);
+extern uintptr_t module_token_lang (int type, int keyword, tree value,
+ location_t, uintptr_t);
/* in method.c */
extern void init_method (void);