indent

Roland Illig Tue, 16 May 2023 06:26:32 -0700

Module Name:    src
Committed By:   rillig
Date:           Tue May 16 13:26:26 UTC 2023


Modified Files:
        src/usr.bin/indent: indent.c indent.h io.c lexi.c pr_comment.c

Log Message:
indent: directly access the input buffer

No functional change.


To generate a diff of this commit:
cvs rdiff -u -r1.289 -r1.290 src/usr.bin/indent/indent.c
cvs rdiff -u -r1.145 -r1.146 src/usr.bin/indent/indent.h
cvs rdiff -u -r1.174 -r1.175 src/usr.bin/indent/io.c
cvs rdiff -u -r1.196 -r1.197 src/usr.bin/indent/lexi.c
cvs rdiff -u -r1.143 -r1.144 src/usr.bin/indent/pr_comment.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/indent/indent.c
diff -u src/usr.bin/indent/indent.c:1.289 src/usr.bin/indent/indent.c:1.290
--- src/usr.bin/indent/indent.c:1.289	Tue May 16 11:32:01 2023
+++ src/usr.bin/indent/indent.c	Tue May 16 13:26:26 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: indent.c,v 1.289 2023/05/16 11:32:01 rillig Exp $	*/
+/*	$NetBSD: indent.c,v 1.290 2023/05/16 13:26:26 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: indent.c,v 1.289 2023/05/16 11:32:01 rillig Exp $");
+__RCSID("$NetBSD: indent.c,v 1.290 2023/05/16 13:26:26 rillig Exp $");
 
 #include <sys/param.h>
 #include <err.h>
@@ -291,7 +291,7 @@ main_prepare_parsing(void)
     inp_read_line();
 
     int ind = 0;
-    for (const char *p = inp_p();; p++) {
+    for (const char *p = inp.st;; p++) {
 	if (*p == ' ')
 	    ind++;
 	else if (*p == '\t')
@@ -886,10 +886,10 @@ read_preprocessing_line(void)
 
     buf_add_char(&lab, '#');
 
-    while (ch_isblank(inp_peek()))
-	buf_add_char(&lab, inp_next());
+    while (ch_isblank(inp.st[0]))
+	buf_add_char(&lab, *inp.st++);
 
-    while (inp_peek() != '\n' || (state == COMM && !had_eof)) {
+    while (inp.st[0] != '\n' || (state == COMM && !had_eof)) {
 	buf_add_char(&lab, inp_next());
 	switch (lab.mem[lab.len - 1]) {
 	case '\\':
@@ -897,9 +897,9 @@ read_preprocessing_line(void)
 		buf_add_char(&lab, inp_next());
 	    break;
 	case '/':
-	    if (inp_peek() == '*' && state == PLAIN) {
+	    if (inp.st[0] == '*' && state == PLAIN) {
 		state = COMM;
-		buf_add_char(&lab, inp_next());
+		buf_add_char(&lab, *inp.st++);
 	    }
 	    break;
 	case '"':
@@ -915,9 +915,9 @@ read_preprocessing_line(void)
 		state = CHR;
 	    break;
 	case '*':
-	    if (inp_peek() == '/' && state == COMM) {
+	    if (inp.st[0] == '/' && state == COMM) {
 		state = PLAIN;
-		buf_add_char(&lab, inp_next());
+		buf_add_char(&lab, *inp.st++);
 	    }
 	    break;
 	}

Index: src/usr.bin/indent/indent.h
diff -u src/usr.bin/indent/indent.h:1.145 src/usr.bin/indent/indent.h:1.146
--- src/usr.bin/indent/indent.h:1.145	Tue May 16 11:32:01 2023
+++ src/usr.bin/indent/indent.h	Tue May 16 13:26:26 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: indent.h,v 1.145 2023/05/16 11:32:01 rillig Exp $	*/
+/*	$NetBSD: indent.h,v 1.146 2023/05/16 13:26:26 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
@@ -133,6 +133,15 @@ struct buffer {
 extern FILE *input;
 extern FILE *output;
 
+/*
+ * The current line from the input file, used by the lexer to generate tokens.
+ * To read from the line, start at inp.st and continue up to and including the
+ * next '\n'. To read beyond the '\n', call inp_skip or inp_next, which will
+ * make the next line available, invalidating any pointers into the previous
+ * line.
+ */
+extern struct buffer inp;
+
 extern struct buffer token;	/* the current token to be processed, is
 				 * typically copied to the buffer 'code', or
 				 * in some cases to 'lab'. */
@@ -409,10 +418,6 @@ int compute_code_indent(void);
 int compute_label_indent(void);
 int ind_add(int, const char *, size_t);
 
-const char *inp_p(void);
-const char *inp_line_start(void);
-char inp_peek(void);
-char inp_lookahead(size_t);
 void inp_skip(void);
 char inp_next(void);
 void clear_indent_off_text(void);

Index: src/usr.bin/indent/io.c
diff -u src/usr.bin/indent/io.c:1.174 src/usr.bin/indent/io.c:1.175
--- src/usr.bin/indent/io.c:1.174	Tue May 16 11:32:01 2023
+++ src/usr.bin/indent/io.c	Tue May 16 13:26:26 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: io.c,v 1.174 2023/05/16 11:32:01 rillig Exp $	*/
+/*	$NetBSD: io.c,v 1.175 2023/05/16 13:26:26 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -38,47 +38,19 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: io.c,v 1.174 2023/05/16 11:32:01 rillig Exp $");
+__RCSID("$NetBSD: io.c,v 1.175 2023/05/16 13:26:26 rillig Exp $");
 
 #include <stdio.h>
 #include <string.h>
 
 #include "indent.h"
 
-/*
- * The current line, ready to be split into tokens, terminated with '\n'. The
- * current read position is inp.s, and the invariant inp.s < inp.e holds.
- */
-static struct buffer inp;
+struct buffer inp;
 static struct buffer indent_off_text;
 
 static int paren_indent;
 
 
-const char *
-inp_p(void)
-{
-    return inp.st;
-}
-
-const char *
-inp_line_start(void)
-{
-    return inp.mem;
-}
-
-char
-inp_peek(void)
-{
-    return *inp.st;
-}
-
-char
-inp_lookahead(size_t i)
-{
-    return inp.st[i];
-}
-
 void
 inp_skip(void)
 {
@@ -90,7 +62,7 @@ inp_skip(void)
 char
 inp_next(void)
 {
-    char ch = inp_peek();
+    char ch = inp.st[0];
     inp_skip();
     return ch;
 }

Index: src/usr.bin/indent/lexi.c
diff -u src/usr.bin/indent/lexi.c:1.196 src/usr.bin/indent/lexi.c:1.197
--- src/usr.bin/indent/lexi.c:1.196	Tue May 16 12:46:43 2023
+++ src/usr.bin/indent/lexi.c	Tue May 16 13:26:26 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: lexi.c,v 1.196 2023/05/16 12:46:43 rillig Exp $	*/
+/*	$NetBSD: lexi.c,v 1.197 2023/05/16 13:26:26 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: lexi.c,v 1.196 2023/05/16 12:46:43 rillig Exp $");
+__RCSID("$NetBSD: lexi.c,v 1.197 2023/05/16 13:26:26 rillig Exp $");
 
 #include <stdlib.h>
 #include <string.h>
@@ -178,9 +178,9 @@ static void
 lex_number(void)
 {
     for (unsigned char s = 'A'; s != 'f' && s != 'i' && s != 'u';) {
-	unsigned char ch = (unsigned char)inp_peek();
-	if (ch == '\\' && inp_lookahead(1) == '\n') {
-	    inp_skip();
+	unsigned char ch = (unsigned char)inp.st[0];
+	if (ch == '\\' && inp.st[1] == '\n') {
+	    inp.st++;
 	    inp_skip();
 	    line_no++;
 	    continue;
@@ -218,10 +218,10 @@ static void
 lex_word(void)
 {
     for (;;) {
-	if (is_identifier_part(inp_peek()))
-	    token_add_char(inp_next());
-	else if (inp_peek() == '\\' && inp_lookahead(1) == '\n') {
-	    inp_skip();
+	if (is_identifier_part(inp.st[0]))
+	    token_add_char(*inp.st++);
+	else if (inp.st[0] == '\\' && inp.st[1] == '\n') {
+	    inp.st++;
 	    inp_skip();
 	    line_no++;
 	} else
@@ -233,17 +233,17 @@ static void
 lex_char_or_string(void)
 {
     for (char delim = token.mem[token.len - 1];;) {
-	if (inp_peek() == '\n') {
+	if (inp.st[0] == '\n') {
 	    diag(1, "Unterminated literal");
 	    return;
 	}
 
-	token_add_char(inp_next());
+	token_add_char(*inp.st++);
 	if (token.mem[token.len - 1] == delim)
 	    return;
 
 	if (token.mem[token.len - 1] == '\\') {
-	    if (inp_peek() == '\n')
+	    if (inp.st[0] == '\n')
 		++line_no;
 	    token_add_char(inp_next());
 	}
@@ -260,10 +260,10 @@ probably_typename(void)
 	return false;
     if (ps.in_stmt_or_decl)	/* XXX: this condition looks incorrect */
 	return false;
-    if (inp_peek() == '*' && inp_lookahead(1) != '=')
+    if (inp.st[0] == '*' && inp.st[1] != '=')
 	goto maybe;
     /* XXX: is_identifier_start */
-    if (ch_isalpha(inp_peek()))
+    if (ch_isalpha(inp.st[0]))
 	goto maybe;
     return false;
 maybe:
@@ -316,7 +316,7 @@ static bool
 probably_looking_at_definition(void)
 {
     int paren_level = 0;
-    for (const char *p = inp_p(); *p != '\n'; p++) {
+    for (const char *p = inp.st; *p != '\n'; p++) {
 	if (*p == '(')
 	    paren_level++;
 	if (*p == ')' && --paren_level == 0) {
@@ -353,15 +353,15 @@ probably_looking_at_definition(void)
 static lexer_symbol
 lexi_alnum(void)
 {
-    if (ch_isdigit(inp_peek()) ||
-	    (inp_peek() == '.' && ch_isdigit(inp_lookahead(1)))) {
+    if (ch_isdigit(inp.st[0]) ||
+	    (inp.st[0] == '.' && ch_isdigit(inp.st[1]))) {
 	lex_number();
-    } else if (is_identifier_start(inp_peek())) {
+    } else if (is_identifier_start(inp.st[0])) {
 	lex_word();
 
 	if (token.len == 1 && token.st[0] == 'L' &&
-		(inp_peek() == '"' || inp_peek() == '\'')) {
-	    token_add_char(inp_next());
+		(inp.st[0] == '"' || inp.st[0] == '\'')) {
+	    token_add_char(*inp.st++);
 	    lex_char_or_string();
 	    ps.next_unary = false;
 	    return lsym_word;
@@ -369,8 +369,8 @@ lexi_alnum(void)
     } else
 	return lsym_eof;	/* just as a placeholder */
 
-    while (ch_isblank(inp_peek()))
-	inp_skip();
+    while (ch_isblank(inp.st[0]))
+	inp.st++;
 
     ps.next_unary = ps.prev_token == lsym_tag
 	|| ps.prev_token == lsym_typedef;
@@ -415,7 +415,7 @@ found_typename:
 	}
     }
 
-    if (inp_peek() == '(' && ps.tos <= 1 && ps.ind_level == 0 &&
+    if (inp.st[0] == '(' && ps.tos <= 1 && ps.ind_level == 0 &&
 	    !ps.in_func_def_params && !ps.block_init) {
 
 	if (ps.nparen == 0 && probably_looking_at_definition()) {
@@ -447,14 +447,14 @@ is_asterisk_unary(void)
 static void
 lex_asterisk_unary(void)
 {
-    while (inp_peek() == '*' || ch_isspace(inp_peek())) {
-	if (inp_peek() == '*')
+    while (inp.st[0] == '*' || ch_isspace(inp.st[0])) {
+	if (inp.st[0] == '*')
 	    token_add_char('*');
 	inp_skip();
     }
 
     if (ps.in_decl) {
-	for (const char *tp = inp_p(); *tp != '\n';) {
+	for (const char *tp = inp.st; *tp != '\n';) {
 	    if (ch_isspace(*tp))
 		tp++;
 	    else if (is_identifier_start(*tp)) {
@@ -491,7 +491,7 @@ skip_string(const char **pp, const char 
 static void
 lex_indent_comment(void)
 {
-    const char *p = inp_line_start();
+    const char *p = inp.mem;
 
     skip_blank(&p);
     if (!skip_string(&p, "/*"))
@@ -528,11 +528,11 @@ lexi(void)
     ps.next_col_1 = false;
 
     for (;;) {
-	if (ch_isblank(inp_peek())) {
+	if (ch_isblank(inp.st[0])) {
 	    ps.curr_col_1 = false;
-	    inp_skip();
-	} else if (inp_peek() == '\\' && inp_lookahead(1) == '\n') {
-	    inp_skip();
+	    inp.st++;
+	} else if (inp.st[0] == '\\' && inp.st[1] == '\n') {
+	    inp.st++;
 	    inp_skip();
 	    line_no++;
 	} else
@@ -592,19 +592,19 @@ lexi(void)
 	lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
 	next_unary = true;
 
-	if (inp_peek() == token.mem[token.len - 1]) {	/* '++' or '--' */
-	    token_add_char(inp_next());
+	if (inp.st[0] == token.mem[token.len - 1]) {	/* '++' or '--' */
+	    token_add_char(*inp.st++);
 	    if (ps.prev_token == lsym_word ||
 		    ps.prev_token == lsym_rparen_or_rbracket) {
 		lsym = ps.next_unary ? lsym_unary_op : lsym_postfix_op;
 		next_unary = false;
 	    }
 
-	} else if (inp_peek() == '=') {	/* '+=' or '-=' */
-	    token_add_char(inp_next());
+	} else if (inp.st[0] == '=') {	/* '+=' or '-=' */
+	    token_add_char(*inp.st++);
 
-	} else if (inp_peek() == '>') {	/* '->' */
-	    token_add_char(inp_next());
+	} else if (inp.st[0] == '>') {	/* '->' */
+	    token_add_char(*inp.st++);
 	    lsym = lsym_unary_op;
 	    next_unary = false;
 	    ps.want_blank = false;
@@ -614,8 +614,8 @@ lexi(void)
     case '=':
 	if (ps.init_or_struct)
 	    ps.block_init = true;
-	if (inp_peek() == '=')
-	    token_add_char(inp_next());
+	if (inp.st[0] == '=')
+	    token_add_char(*inp.st++);
 	lsym = lsym_binary_op;
 	next_unary = true;
 	break;
@@ -623,10 +623,10 @@ lexi(void)
     case '>':
     case '<':
     case '!':			/* ops like <, <<, <=, !=, etc */
-	if (inp_peek() == '>' || inp_peek() == '<' || inp_peek() == '=')
-	    token_add_char(inp_next());
-	if (inp_peek() == '=')
-	    token_add_char(inp_next());
+	if (inp.st[0] == '>' || inp.st[0] == '<' || inp.st[0] == '=')
+	    token_add_char(*inp.st++);
+	if (inp.st[0] == '=')
+	    token_add_char(*inp.st++);
 	lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
 	next_unary = true;
 	break;
@@ -637,8 +637,8 @@ lexi(void)
 	    lsym = lsym_unary_op;
 	    next_unary = true;
 	} else {
-	    if (inp_peek() == '=')
-		token_add_char(inp_next());
+	    if (inp.st[0] == '=')
+		token_add_char(*inp.st++);
 	    lsym = lsym_binary_op;
 	    next_unary = true;
 	}
@@ -646,20 +646,20 @@ lexi(void)
 
     default:
 	if (token.mem[token.len - 1] == '/'
-		&& (inp_peek() == '*' || inp_peek() == '/')) {
+		&& (inp.st[0] == '*' || inp.st[0] == '/')) {
 	    enum indent_enabled prev = indent_enabled;
 	    lex_indent_comment();
 	    if (prev == indent_on && indent_enabled == indent_off)
 		clear_indent_off_text();
-	    token_add_char(inp_next());
+	    token_add_char(*inp.st++);
 	    lsym = lsym_comment;
 	    next_unary = ps.next_unary;
 	    break;
 	}
 
 	/* handle '||', '&&', etc., and also things as in 'int *****i' */
-	while (token.mem[token.len - 1] == inp_peek() || inp_peek() == '=')
-	    token_add_char(inp_next());
+	while (inp.st[0] == token.mem[token.len - 1] || inp.st[0] == '=')
+	    token_add_char(*inp.st++);
 
 	lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op;
 	next_unary = true;

Index: src/usr.bin/indent/pr_comment.c
diff -u src/usr.bin/indent/pr_comment.c:1.143 src/usr.bin/indent/pr_comment.c:1.144
--- src/usr.bin/indent/pr_comment.c:1.143	Tue May 16 11:32:01 2023
+++ src/usr.bin/indent/pr_comment.c	Tue May 16 13:26:26 2023
@@ -1,4 +1,4 @@
-/*	$NetBSD: pr_comment.c,v 1.143 2023/05/16 11:32:01 rillig Exp $	*/
+/*	$NetBSD: pr_comment.c,v 1.144 2023/05/16 13:26:26 rillig Exp $	*/
 
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
@@ -38,7 +38,7 @@
  */
 
 #include <sys/cdefs.h>
-__RCSID("$NetBSD: pr_comment.c,v 1.143 2023/05/16 11:32:01 rillig Exp $");
+__RCSID("$NetBSD: pr_comment.c,v 1.144 2023/05/16 13:26:26 rillig Exp $");
 
 #include <string.h>
 
@@ -61,7 +61,7 @@ com_add_delim(void)
 static bool
 fits_in_one_line(int com_ind, int max_line_length)
 {
-    for (const char *start = inp_p(), *p = start; *p != '\n'; p++) {
+    for (const char *start = inp.st, *p = start; *p != '\n'; p++) {
 	if (p[0] == '*' && p[1] == '/') {
 	    int len = ind_add(com_ind + 3, start, (size_t)(p - start));
 	    len += p == start || ch_isblank(p[-1]) ? 2 : 3;
@@ -85,9 +85,9 @@ analyze_comment(bool *p_may_wrap, bool *
 	ind = 0;
 
     } else {
-	if (inp_peek() == '-' || inp_peek() == '*' ||
+	if (inp.st[0] == '-' || inp.st[0] == '*' ||
 		token.mem[token.len - 1] == '/' ||
-		(inp_peek() == '\n' && !opt.format_block_comments)) {
+		(inp.st[0] == '\n' && !opt.format_block_comments)) {
 	    may_wrap = false;
 	    delim = false;
 	}
@@ -122,20 +122,20 @@ analyze_comment(bool *p_may_wrap, bool *
 	 * Find out how much indentation there was originally, because that
 	 * much will have to be ignored by output_complete_line.
 	 */
-	size_t len = (size_t)(inp_p() - 2 - inp_line_start());
-	ps.n_comment_delta = -ind_add(0, inp_line_start(), len);
+	size_t len = (size_t)(inp.st - 2 - inp.mem);
+	ps.n_comment_delta = -ind_add(0, inp.mem, len);
     } else {
 	ps.n_comment_delta = 0;
-	if (!(inp_peek() == '\t' && !ch_isblank(inp_lookahead(1))))
-	    while (ch_isblank(inp_peek()))
-		inp_skip();
+	if (!(inp.st[0] == '\t' && !ch_isblank(inp.st[1])))
+	    while (ch_isblank(inp.st[0]))
+		inp.st++;
     }
 
     ps.comment_delta = 0;
     com_add_char('/');
     com_add_char(token.mem[token.len - 1]);	/* either '*' or '/' */
 
-    if (may_wrap && !ch_isblank(inp_peek()))
+    if (may_wrap && !ch_isblank(inp.st[0]))
 	com_add_char(' ');
 
     if (delim && fits_in_one_line(ind, line_length))
@@ -163,7 +163,7 @@ copy_comment_wrap(int line_length, bool 
     ssize_t last_blank = -1;	/* index of the last blank in com.mem */
 
     for (;;) {
-	switch (inp_peek()) {
+	switch (inp.st[0]) {
 	case '\n':
 	    if (had_eof) {
 		diag(1, "Unterminated comment");
@@ -194,21 +194,21 @@ copy_comment_wrap(int line_length, bool 
 	    do {		/* flush any blanks and/or tabs at start of
 				 * next line */
 		inp_skip();
-		if (inp_peek() == '*' && skip_asterisk) {
+		if (inp.st[0] == '*' && skip_asterisk) {
 		    skip_asterisk = false;
-		    inp_skip();
-		    if (inp_peek() == '/')
+		    inp.st++;
+		    if (inp.st[0] == '/')
 			goto end_of_comment;
 		}
-	    } while (ch_isblank(inp_peek()));
+	    } while (ch_isblank(inp.st[0]));
 
 	    break;		/* end of case for newline */
 
 	case '*':
-	    inp_skip();
-	    if (inp_peek() == '/') {
+	    inp.st++;
+	    if (inp.st[0] == '/') {
 	end_of_comment:
-		inp_skip();
+		inp.st++;
 
 		if (delim) {
 		    if (com.len > 3)
@@ -244,7 +244,7 @@ copy_comment_wrap(int line_length, bool 
 		    last_blank = (ssize_t)com.len;
 		com_add_char(ch);
 		now_len++;
-		if (memchr("*\n\r\b\t", inp_peek(), 6) != NULL)
+		if (memchr("*\n\r\b\t", inp.st[0], 6) != NULL)
 		    break;
 		if (now_len >= line_length && last_blank != -1)
 		    break;
@@ -284,7 +284,7 @@ static void
 copy_comment_nowrap(void)
 {
     for (;;) {
-	if (inp_peek() == '\n') {
+	if (inp.st[0] == '\n') {
 	    if (token.mem[token.len - 1] == '/')
 		return;
 
@@ -302,7 +302,7 @@ copy_comment_nowrap(void)
 	    continue;
 	}
 
-	com_add_char(inp_next());
+	com_add_char(*inp.st++);
 	if (com.mem[com.len - 2] == '*' && com.mem[com.len - 1] == '/'
 		&& token.mem[token.len - 1] == '*')
 	    return;

CVS commit: src/usr.bin/indent

Reply via email to