Eric Blake <ebb9 <at> byu.net> writes: > | > |> ~ I'm also considering adding a placeholder input block that always results > |> in CHAR_EOF, so that isp is guaranteed to be non-null and we have one less > |> branch in this loop. > | > | Tried this already (see attached patch). It does not provide a noticeable > | speedup: 41.0 sec instead of 41.1, that's less than 0.5%. You get more > | speedup (about 1%) by use of __builtin_expect (also attached). > > __builtin_expect is gcc specific, while a dummy isp block is not. I'll > probably make the change to a dummy block anyway. But adding > __builtin_expect into m4 source code may have other benefits; I'll > consider it.
I'm rebasing the argv_ref branch with the dummy isp block as follows; my testing on cygwin with assertions enabled got the best-of-3 user times on 'autoconf -f' on coreutils from 17.428 to 17.145, or 1.6% improvement (better than your measured 0.5%, but we used different environments in our testing). I've also toyed with the idea of using char*start/char*end management in chains rather than char*start/size_t len. Either way, there are three pieces of information with one calculated from the other two (start, end, len). The difference is that in next_char(), a len-based approach must twiddle two variables (advance start and decrement len), while an end-based approach only twiddles one (advance start, but end remains constant). But that patch would be a bit more invasive to implement, and I'm not convinced it buys time yet, particularly if I get the buf/consume interface working. diff --git a/src/input.c b/src/input.c index 443e053..d9d3551 100644 --- a/src/input.c +++ b/src/input.c @@ -74,7 +74,8 @@ enum input_type { INPUT_STRING, /* String resulting from macro expansion. */ INPUT_FILE, /* File from command line or include. */ - INPUT_CHAIN /* FIFO chain of separate strings, builtins, and $@ refs. */ + INPUT_CHAIN, /* FIFO chain of separate strings, builtins, and $@ refs. */ + INPUT_EOF /* Placeholder at bottom of input stack. */ }; typedef enum input_type input_type; @@ -136,15 +137,19 @@ static struct obstack *current_input; /* Bottom of token_stack, for obstack_free. */ static void *token_bottom; -/* Pointer to top of current_input. */ +/* Pointer to top of current_input, never NULL. */ static input_block *isp; -/* Pointer to top of wrapup_stack. */ +/* Pointer to top of wrapup_stack, never NULL. */ static input_block *wsp; -/* Aux. for handling split push_string (). */ +/* Auxiliary for handling split push_string (), NULL if not pushing + text for rescanning. */ static input_block *next; +/* Marker at the end of the input stack. */ +static input_block input_eof = { NULL, INPUT_EOF, "", 0 }; + /* Flag for next_char () to increment current_line. */ static bool start_of_input_line; @@ -321,7 +326,7 @@ push_string_init (void) { /* Free any memory occupied by completely parsed strings. */ assert (next == NULL); - while (isp && pop_input (false)); + while (pop_input (false)); /* Reserve the next location on the obstack. */ next = (input_block *) obstack_alloc (current_input, sizeof *next); @@ -545,7 +550,7 @@ push_wrapup_init (void) token_chain *chain; assert (obstack_object_size (wrapup_stack) == 0); - if (wsp) + if (wsp != &input_eof) { i = wsp; if (i->type == INPUT_STRING) @@ -745,6 +750,9 @@ pop_input (bool cleanup) output_current_line = -1; break; + case INPUT_EOF: + return false; + default: assert (!"pop_input"); abort (); @@ -771,7 +779,7 @@ pop_wrapup (void) obstack_free (current_input, NULL); free (current_input); - if (wsp == NULL) + if (wsp == &input_eof) { /* End of the program. Free all memory even though we are about to exit, since it makes leak detection easier. */ @@ -790,7 +798,7 @@ pop_wrapup (void) obstack_init (wrapup_stack); isp = wsp; - wsp = NULL; + wsp = &input_eof; input_change = true; return true; @@ -881,9 +889,7 @@ peek_input (bool allow_argv) while (1) { - if (block == NULL) - return CHAR_EOF; - + assert (block); switch (block->type) { case INPUT_STRING: @@ -950,6 +956,9 @@ peek_input (bool allow_argv) } break; + case INPUT_EOF: + return CHAR_EOF; + default: assert (!"peek_input"); abort (); @@ -972,7 +981,7 @@ peek_input (bool allow_argv) `-------------------------------------------------------------------*/ #define next_char(AQ) \ - (isp && isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \ + (isp->type == INPUT_STRING && isp->u.u_s.len && !input_change \ ? (isp->u.u_s.len--, to_uchar (*isp->u.u_s.str++)) \ : next_char_1 (AQ)) @@ -984,13 +993,7 @@ next_char_1 (bool allow_quote) while (1) { - if (isp == NULL) - { - current_file = ""; - current_line = 0; - return CHAR_EOF; - } - + assert (isp); if (input_change) { current_file = isp->file; @@ -1085,6 +1088,9 @@ next_char_1 (bool allow_quote) } break; + case INPUT_EOF: + return CHAR_EOF; + default: assert (!"next_char_1"); abort (); @@ -1360,8 +1366,8 @@ input_init (void) obstack_init (&token_stack); token_bottom = obstack_finish (&token_stack); - isp = NULL; - wsp = NULL; + isp = &input_eof; + wsp = &input_eof; next = NULL; start_of_input_line = false; _______________________________________________ M4-discuss mailing list M4-discuss@gnu.org http://lists.gnu.org/mailman/listinfo/m4-discuss