Module Name: src Committed By: rillig Date: Tue May 16 13:26:26 UTC 2023
Modified Files: src/usr.bin/indent: indent.c indent.h io.c lexi.c pr_comment.c Log Message: indent: directly access the input buffer No functional change. To generate a diff of this commit: cvs rdiff -u -r1.289 -r1.290 src/usr.bin/indent/indent.c cvs rdiff -u -r1.145 -r1.146 src/usr.bin/indent/indent.h cvs rdiff -u -r1.174 -r1.175 src/usr.bin/indent/io.c cvs rdiff -u -r1.196 -r1.197 src/usr.bin/indent/lexi.c cvs rdiff -u -r1.143 -r1.144 src/usr.bin/indent/pr_comment.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/indent/indent.c diff -u src/usr.bin/indent/indent.c:1.289 src/usr.bin/indent/indent.c:1.290 --- src/usr.bin/indent/indent.c:1.289 Tue May 16 11:32:01 2023 +++ src/usr.bin/indent/indent.c Tue May 16 13:26:26 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: indent.c,v 1.289 2023/05/16 11:32:01 rillig Exp $ */ +/* $NetBSD: indent.c,v 1.290 2023/05/16 13:26:26 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: indent.c,v 1.289 2023/05/16 11:32:01 rillig Exp $"); +__RCSID("$NetBSD: indent.c,v 1.290 2023/05/16 13:26:26 rillig Exp $"); #include <sys/param.h> #include <err.h> @@ -291,7 +291,7 @@ main_prepare_parsing(void) inp_read_line(); int ind = 0; - for (const char *p = inp_p();; p++) { + for (const char *p = inp.st;; p++) { if (*p == ' ') ind++; else if (*p == '\t') @@ -886,10 +886,10 @@ read_preprocessing_line(void) buf_add_char(&lab, '#'); - while (ch_isblank(inp_peek())) - buf_add_char(&lab, inp_next()); + while (ch_isblank(inp.st[0])) + buf_add_char(&lab, *inp.st++); - while (inp_peek() != '\n' || (state == COMM && !had_eof)) { + while (inp.st[0] != '\n' || (state == COMM && !had_eof)) { buf_add_char(&lab, inp_next()); switch (lab.mem[lab.len - 1]) { case '\\': @@ -897,9 +897,9 @@ read_preprocessing_line(void) buf_add_char(&lab, inp_next()); break; case '/': - if (inp_peek() == '*' && state == PLAIN) { + if (inp.st[0] == '*' && state == PLAIN) { state = COMM; - buf_add_char(&lab, inp_next()); + buf_add_char(&lab, *inp.st++); } break; case '"': @@ -915,9 +915,9 @@ read_preprocessing_line(void) state = CHR; break; case '*': - if (inp_peek() == '/' && state == COMM) { + if (inp.st[0] == '/' && state == COMM) { state = PLAIN; - buf_add_char(&lab, inp_next()); + buf_add_char(&lab, *inp.st++); } break; } Index: src/usr.bin/indent/indent.h diff -u src/usr.bin/indent/indent.h:1.145 src/usr.bin/indent/indent.h:1.146 --- src/usr.bin/indent/indent.h:1.145 Tue May 16 11:32:01 2023 +++ src/usr.bin/indent/indent.h Tue May 16 13:26:26 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: indent.h,v 1.145 2023/05/16 11:32:01 rillig Exp $ */ +/* $NetBSD: indent.h,v 1.146 2023/05/16 13:26:26 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD @@ -133,6 +133,15 @@ struct buffer { extern FILE *input; extern FILE *output; +/* + * The current line from the input file, used by the lexer to generate tokens. + * To read from the line, start at inp.st and continue up to and including the + * next '\n'. To read beyond the '\n', call inp_skip or inp_next, which will + * make the next line available, invalidating any pointers into the previous + * line. + */ +extern struct buffer inp; + extern struct buffer token; /* the current token to be processed, is * typically copied to the buffer 'code', or * in some cases to 'lab'. */ @@ -409,10 +418,6 @@ int compute_code_indent(void); int compute_label_indent(void); int ind_add(int, const char *, size_t); -const char *inp_p(void); -const char *inp_line_start(void); -char inp_peek(void); -char inp_lookahead(size_t); void inp_skip(void); char inp_next(void); void clear_indent_off_text(void); Index: src/usr.bin/indent/io.c diff -u src/usr.bin/indent/io.c:1.174 src/usr.bin/indent/io.c:1.175 --- src/usr.bin/indent/io.c:1.174 Tue May 16 11:32:01 2023 +++ src/usr.bin/indent/io.c Tue May 16 13:26:26 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: io.c,v 1.174 2023/05/16 11:32:01 rillig Exp $ */ +/* $NetBSD: io.c,v 1.175 2023/05/16 13:26:26 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,47 +38,19 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: io.c,v 1.174 2023/05/16 11:32:01 rillig Exp $"); +__RCSID("$NetBSD: io.c,v 1.175 2023/05/16 13:26:26 rillig Exp $"); #include <stdio.h> #include <string.h> #include "indent.h" -/* - * The current line, ready to be split into tokens, terminated with '\n'. The - * current read position is inp.s, and the invariant inp.s < inp.e holds. - */ -static struct buffer inp; +struct buffer inp; static struct buffer indent_off_text; static int paren_indent; -const char * -inp_p(void) -{ - return inp.st; -} - -const char * -inp_line_start(void) -{ - return inp.mem; -} - -char -inp_peek(void) -{ - return *inp.st; -} - -char -inp_lookahead(size_t i) -{ - return inp.st[i]; -} - void inp_skip(void) { @@ -90,7 +62,7 @@ inp_skip(void) char inp_next(void) { - char ch = inp_peek(); + char ch = inp.st[0]; inp_skip(); return ch; } Index: src/usr.bin/indent/lexi.c diff -u src/usr.bin/indent/lexi.c:1.196 src/usr.bin/indent/lexi.c:1.197 --- src/usr.bin/indent/lexi.c:1.196 Tue May 16 12:46:43 2023 +++ src/usr.bin/indent/lexi.c Tue May 16 13:26:26 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: lexi.c,v 1.196 2023/05/16 12:46:43 rillig Exp $ */ +/* $NetBSD: lexi.c,v 1.197 2023/05/16 13:26:26 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: lexi.c,v 1.196 2023/05/16 12:46:43 rillig Exp $"); +__RCSID("$NetBSD: lexi.c,v 1.197 2023/05/16 13:26:26 rillig Exp $"); #include <stdlib.h> #include <string.h> @@ -178,9 +178,9 @@ static void lex_number(void) { for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) { - unsigned char ch = (unsigned char)inp_peek(); - if (ch == '\\' && inp_lookahead(1) == '\n') { - inp_skip(); + unsigned char ch = (unsigned char)inp.st[0]; + if (ch == '\\' && inp.st[1] == '\n') { + inp.st++; inp_skip(); line_no++; continue; @@ -218,10 +218,10 @@ static void lex_word(void) { for (;;) { - if (is_identifier_part(inp_peek())) - token_add_char(inp_next()); - else if (inp_peek() == '\\' && inp_lookahead(1) == '\n') { - inp_skip(); + if (is_identifier_part(inp.st[0])) + token_add_char(*inp.st++); + else if (inp.st[0] == '\\' && inp.st[1] == '\n') { + inp.st++; inp_skip(); line_no++; } else @@ -233,17 +233,17 @@ static void lex_char_or_string(void) { for (char delim = token.mem[token.len - 1];;) { - if (inp_peek() == '\n') { + if (inp.st[0] == '\n') { diag(1, "Unterminated literal"); return; } - token_add_char(inp_next()); + token_add_char(*inp.st++); if (token.mem[token.len - 1] == delim) return; if (token.mem[token.len - 1] == '\\') { - if (inp_peek() == '\n') + if (inp.st[0] == '\n') ++line_no; token_add_char(inp_next()); } @@ -260,10 +260,10 @@ probably_typename(void) return false; if (ps.in_stmt_or_decl) /* XXX: this condition looks incorrect */ return false; - if (inp_peek() == '*' && inp_lookahead(1) != '=') + if (inp.st[0] == '*' && inp.st[1] != '=') goto maybe; /* XXX: is_identifier_start */ - if (ch_isalpha(inp_peek())) + if (ch_isalpha(inp.st[0])) goto maybe; return false; maybe: @@ -316,7 +316,7 @@ static bool probably_looking_at_definition(void) { int paren_level = 0; - for (const char *p = inp_p(); *p != '\n'; p++) { + for (const char *p = inp.st; *p != '\n'; p++) { if (*p == '(') paren_level++; if (*p == ')' && --paren_level == 0) { @@ -353,15 +353,15 @@ probably_looking_at_definition(void) static lexer_symbol lexi_alnum(void) { - if (ch_isdigit(inp_peek()) || - (inp_peek() == '.' && ch_isdigit(inp_lookahead(1)))) { + if (ch_isdigit(inp.st[0]) || + (inp.st[0] == '.' && ch_isdigit(inp.st[1]))) { lex_number(); - } else if (is_identifier_start(inp_peek())) { + } else if (is_identifier_start(inp.st[0])) { lex_word(); if (token.len == 1 && token.st[0] == 'L' && - (inp_peek() == '"' || inp_peek() == '\'')) { - token_add_char(inp_next()); + (inp.st[0] == '"' || inp.st[0] == '\'')) { + token_add_char(*inp.st++); lex_char_or_string(); ps.next_unary = false; return lsym_word; @@ -369,8 +369,8 @@ lexi_alnum(void) } else return lsym_eof; /* just as a placeholder */ - while (ch_isblank(inp_peek())) - inp_skip(); + while (ch_isblank(inp.st[0])) + inp.st++; ps.next_unary = ps.prev_token == lsym_tag || ps.prev_token == lsym_typedef; @@ -415,7 +415,7 @@ found_typename: } } - if (inp_peek() == '(' && ps.tos <= 1 && ps.ind_level == 0 && + if (inp.st[0] == '(' && ps.tos <= 1 && ps.ind_level == 0 && !ps.in_func_def_params && !ps.block_init) { if (ps.nparen == 0 && probably_looking_at_definition()) { @@ -447,14 +447,14 @@ is_asterisk_unary(void) static void lex_asterisk_unary(void) { - while (inp_peek() == '*' || ch_isspace(inp_peek())) { - if (inp_peek() == '*') + while (inp.st[0] == '*' || ch_isspace(inp.st[0])) { + if (inp.st[0] == '*') token_add_char('*'); inp_skip(); } if (ps.in_decl) { - for (const char *tp = inp_p(); *tp != '\n';) { + for (const char *tp = inp.st; *tp != '\n';) { if (ch_isspace(*tp)) tp++; else if (is_identifier_start(*tp)) { @@ -491,7 +491,7 @@ skip_string(const char **pp, const char static void lex_indent_comment(void) { - const char *p = inp_line_start(); + const char *p = inp.mem; skip_blank(&p); if (!skip_string(&p, "/*")) @@ -528,11 +528,11 @@ lexi(void) ps.next_col_1 = false; for (;;) { - if (ch_isblank(inp_peek())) { + if (ch_isblank(inp.st[0])) { ps.curr_col_1 = false; - inp_skip(); - } else if (inp_peek() == '\\' && inp_lookahead(1) == '\n') { - inp_skip(); + inp.st++; + } else if (inp.st[0] == '\\' && inp.st[1] == '\n') { + inp.st++; inp_skip(); line_no++; } else @@ -592,19 +592,19 @@ lexi(void) lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; next_unary = true; - if (inp_peek() == token.mem[token.len - 1]) { /* '++' or '--' */ - token_add_char(inp_next()); + if (inp.st[0] == token.mem[token.len - 1]) { /* '++' or '--' */ + token_add_char(*inp.st++); if (ps.prev_token == lsym_word || ps.prev_token == lsym_rparen_or_rbracket) { lsym = ps.next_unary ? lsym_unary_op : lsym_postfix_op; next_unary = false; } - } else if (inp_peek() == '=') { /* '+=' or '-=' */ - token_add_char(inp_next()); + } else if (inp.st[0] == '=') { /* '+=' or '-=' */ + token_add_char(*inp.st++); - } else if (inp_peek() == '>') { /* '->' */ - token_add_char(inp_next()); + } else if (inp.st[0] == '>') { /* '->' */ + token_add_char(*inp.st++); lsym = lsym_unary_op; next_unary = false; ps.want_blank = false; @@ -614,8 +614,8 @@ lexi(void) case '=': if (ps.init_or_struct) ps.block_init = true; - if (inp_peek() == '=') - token_add_char(inp_next()); + if (inp.st[0] == '=') + token_add_char(*inp.st++); lsym = lsym_binary_op; next_unary = true; break; @@ -623,10 +623,10 @@ lexi(void) case '>': case '<': case '!': /* ops like <, <<, <=, !=, etc */ - if (inp_peek() == '>' || inp_peek() == '<' || inp_peek() == '=') - token_add_char(inp_next()); - if (inp_peek() == '=') - token_add_char(inp_next()); + if (inp.st[0] == '>' || inp.st[0] == '<' || inp.st[0] == '=') + token_add_char(*inp.st++); + if (inp.st[0] == '=') + token_add_char(*inp.st++); lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; next_unary = true; break; @@ -637,8 +637,8 @@ lexi(void) lsym = lsym_unary_op; next_unary = true; } else { - if (inp_peek() == '=') - token_add_char(inp_next()); + if (inp.st[0] == '=') + token_add_char(*inp.st++); lsym = lsym_binary_op; next_unary = true; } @@ -646,20 +646,20 @@ lexi(void) default: if (token.mem[token.len - 1] == '/' - && (inp_peek() == '*' || inp_peek() == '/')) { + && (inp.st[0] == '*' || inp.st[0] == '/')) { enum indent_enabled prev = indent_enabled; lex_indent_comment(); if (prev == indent_on && indent_enabled == indent_off) clear_indent_off_text(); - token_add_char(inp_next()); + token_add_char(*inp.st++); lsym = lsym_comment; next_unary = ps.next_unary; break; } /* handle '||', '&&', etc., and also things as in 'int *****i' */ - while (token.mem[token.len - 1] == inp_peek() || inp_peek() == '=') - token_add_char(inp_next()); + while (inp.st[0] == token.mem[token.len - 1] || inp.st[0] == '=') + token_add_char(*inp.st++); lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; next_unary = true; Index: src/usr.bin/indent/pr_comment.c diff -u src/usr.bin/indent/pr_comment.c:1.143 src/usr.bin/indent/pr_comment.c:1.144 --- src/usr.bin/indent/pr_comment.c:1.143 Tue May 16 11:32:01 2023 +++ src/usr.bin/indent/pr_comment.c Tue May 16 13:26:26 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: pr_comment.c,v 1.143 2023/05/16 11:32:01 rillig Exp $ */ +/* $NetBSD: pr_comment.c,v 1.144 2023/05/16 13:26:26 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: pr_comment.c,v 1.143 2023/05/16 11:32:01 rillig Exp $"); +__RCSID("$NetBSD: pr_comment.c,v 1.144 2023/05/16 13:26:26 rillig Exp $"); #include <string.h> @@ -61,7 +61,7 @@ com_add_delim(void) static bool fits_in_one_line(int com_ind, int max_line_length) { - for (const char *start = inp_p(), *p = start; *p != '\n'; p++) { + for (const char *start = inp.st, *p = start; *p != '\n'; p++) { if (p[0] == '*' && p[1] == '/') { int len = ind_add(com_ind + 3, start, (size_t)(p - start)); len += p == start || ch_isblank(p[-1]) ? 2 : 3; @@ -85,9 +85,9 @@ analyze_comment(bool *p_may_wrap, bool * ind = 0; } else { - if (inp_peek() == '-' || inp_peek() == '*' || + if (inp.st[0] == '-' || inp.st[0] == '*' || token.mem[token.len - 1] == '/' || - (inp_peek() == '\n' && !opt.format_block_comments)) { + (inp.st[0] == '\n' && !opt.format_block_comments)) { may_wrap = false; delim = false; } @@ -122,20 +122,20 @@ analyze_comment(bool *p_may_wrap, bool * * Find out how much indentation there was originally, because that * much will have to be ignored by output_complete_line. */ - size_t len = (size_t)(inp_p() - 2 - inp_line_start()); - ps.n_comment_delta = -ind_add(0, inp_line_start(), len); + size_t len = (size_t)(inp.st - 2 - inp.mem); + ps.n_comment_delta = -ind_add(0, inp.mem, len); } else { ps.n_comment_delta = 0; - if (!(inp_peek() == '\t' && !ch_isblank(inp_lookahead(1)))) - while (ch_isblank(inp_peek())) - inp_skip(); + if (!(inp.st[0] == '\t' && !ch_isblank(inp.st[1]))) + while (ch_isblank(inp.st[0])) + inp.st++; } ps.comment_delta = 0; com_add_char('/'); com_add_char(token.mem[token.len - 1]); /* either '*' or '/' */ - if (may_wrap && !ch_isblank(inp_peek())) + if (may_wrap && !ch_isblank(inp.st[0])) com_add_char(' '); if (delim && fits_in_one_line(ind, line_length)) @@ -163,7 +163,7 @@ copy_comment_wrap(int line_length, bool ssize_t last_blank = -1; /* index of the last blank in com.mem */ for (;;) { - switch (inp_peek()) { + switch (inp.st[0]) { case '\n': if (had_eof) { diag(1, "Unterminated comment"); @@ -194,21 +194,21 @@ copy_comment_wrap(int line_length, bool do { /* flush any blanks and/or tabs at start of * next line */ inp_skip(); - if (inp_peek() == '*' && skip_asterisk) { + if (inp.st[0] == '*' && skip_asterisk) { skip_asterisk = false; - inp_skip(); - if (inp_peek() == '/') + inp.st++; + if (inp.st[0] == '/') goto end_of_comment; } - } while (ch_isblank(inp_peek())); + } while (ch_isblank(inp.st[0])); break; /* end of case for newline */ case '*': - inp_skip(); - if (inp_peek() == '/') { + inp.st++; + if (inp.st[0] == '/') { end_of_comment: - inp_skip(); + inp.st++; if (delim) { if (com.len > 3) @@ -244,7 +244,7 @@ copy_comment_wrap(int line_length, bool last_blank = (ssize_t)com.len; com_add_char(ch); now_len++; - if (memchr("*\n\r\b\t", inp_peek(), 6) != NULL) + if (memchr("*\n\r\b\t", inp.st[0], 6) != NULL) break; if (now_len >= line_length && last_blank != -1) break; @@ -284,7 +284,7 @@ static void copy_comment_nowrap(void) { for (;;) { - if (inp_peek() == '\n') { + if (inp.st[0] == '\n') { if (token.mem[token.len - 1] == '/') return; @@ -302,7 +302,7 @@ copy_comment_nowrap(void) continue; } - com_add_char(inp_next()); + com_add_char(*inp.st++); if (com.mem[com.len - 2] == '*' && com.mem[com.len - 1] == '/' && token.mem[token.len - 1] == '*') return;