Module Name: src Committed By: rillig Date: Mon May 15 07:28:45 UTC 2023
Modified Files: src/tests/usr.bin/indent: lsym_comment.c src/usr.bin/indent: debug.c indent.c indent.h io.c lexi.c parse.c pr_comment.c Log Message: indent: clean up memory and buffer management Remove the need to explicitly initialize the buffers. To avoid subtracting null pointers or comparing them using '<', migrate the buffers from the (start, end) form to the (start, len) form. This form also avoids inconsistencies in whether 'buf.e == buf.s' or 'buf.s == buf.e' is used. Make buffer.st const, to avoid accidental modification of the buffer's content. Replace '*buf.e++ = ch' with buf_add_char, to avoid having to keep track how much unwritten space is left in the buffer. Remove all safety margins, that is, no more unchecked access to buf.st[-1] or appending using '*buf.e++'. Fix line number counting in lex_word for words that contain line breaks. No functional change. To generate a diff of this commit: cvs rdiff -u -r1.9 -r1.10 src/tests/usr.bin/indent/lsym_comment.c cvs rdiff -u -r1.4 -r1.5 src/usr.bin/indent/debug.c cvs rdiff -u -r1.267 -r1.268 src/usr.bin/indent/indent.c cvs rdiff -u -r1.132 -r1.133 src/usr.bin/indent/indent.h cvs rdiff -u -r1.165 -r1.166 src/usr.bin/indent/io.c cvs rdiff -u -r1.184 -r1.185 src/usr.bin/indent/lexi.c cvs rdiff -u -r1.55 -r1.56 src/usr.bin/indent/parse.c cvs rdiff -u -r1.139 -r1.140 src/usr.bin/indent/pr_comment.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/tests/usr.bin/indent/lsym_comment.c diff -u src/tests/usr.bin/indent/lsym_comment.c:1.9 src/tests/usr.bin/indent/lsym_comment.c:1.10 --- src/tests/usr.bin/indent/lsym_comment.c:1.9 Sun May 14 22:26:37 2023 +++ src/tests/usr.bin/indent/lsym_comment.c Mon May 15 07:28:45 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: lsym_comment.c,v 1.9 2023/05/14 22:26:37 rillig Exp $ */ +/* $NetBSD: lsym_comment.c,v 1.10 2023/05/15 07:28:45 rillig Exp $ */ /* * Tests for the token lsym_comment, which starts a comment. @@ -1031,18 +1031,6 @@ line 4 /* - * Cover the code for expanding the comment buffer. As of 2021-11-07, the - * default buffer size is 200. To actually fill the comment buffer, there must - * be a single line of a comment that is longer than 200 bytes. - */ -//indent input -/*-_____10________20________30________40________50________60________70________80________90_______100_______110_______120_______130_______140_______150_______160_______170_______180_______190_______200 */ -//indent end - -//indent run-equals-input - - -/* * Since 2019-04-04 and before pr_comment.c 1.123 from 2021-11-25, the * function analyze_comment wrongly joined the two comments. */ Index: src/usr.bin/indent/debug.c diff -u src/usr.bin/indent/debug.c:1.4 src/usr.bin/indent/debug.c:1.5 --- src/usr.bin/indent/debug.c:1.4 Sat May 13 15:34:22 2023 +++ src/usr.bin/indent/debug.c Mon May 15 07:28:45 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: debug.c,v 1.4 2023/05/13 15:34:22 rillig Exp $ */ +/* $NetBSD: debug.c,v 1.5 2023/05/15 07:28:45 rillig Exp $ */ /*- * Copyright (c) 2023 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: debug.c,v 1.4 2023/05/13 15:34:22 rillig Exp $"); +__RCSID("$NetBSD: debug.c,v 1.5 2023/05/15 07:28:45 rillig Exp $"); #include "indent.h" @@ -108,26 +108,26 @@ static bool debug_full_parser_state = tr static void debug_print_buf(const char *name, const struct buffer *buf) { - if (buf->s < buf->e) { + if (buf->len > 0) { debug_printf("%s ", name); - debug_vis_range("\"", buf->s, buf->e, "\"\n"); + debug_vis_range("\"", buf->st, buf->len, "\"\n"); } } void debug_buffers(void) { - if (lab.e != lab.s) { + if (lab.len > 0) { debug_printf(" label "); - debug_vis_range("\"", lab.s, lab.e, "\""); + debug_vis_range("\"", lab.st, lab.len, "\""); } - if (code.e != code.s) { + if (code.len > 0) { debug_printf(" code "); - debug_vis_range("\"", code.s, code.e, "\""); + debug_vis_range("\"", code.st, code.len, "\""); } - if (com.e < com.s) { + if (com.len > 0) { debug_printf(" comment "); - debug_vis_range("\"", com.s, com.e, "\""); + debug_vis_range("\"", com.st, com.len, "\""); } } @@ -192,7 +192,7 @@ debug_parser_state(lexer_symbol lsym) debug_println(""); debug_printf("line %d: %s", line_no, lsym_name[lsym]); - debug_vis_range(" \"", token.s, token.e, "\"\n"); + debug_vis_range(" \"", token.st, token.len, "\"\n"); debug_print_buf("label", &lab); debug_print_buf("code", &code); Index: src/usr.bin/indent/indent.c diff -u src/usr.bin/indent/indent.c:1.267 src/usr.bin/indent/indent.c:1.268 --- src/usr.bin/indent/indent.c:1.267 Sun May 14 22:26:37 2023 +++ src/usr.bin/indent/indent.c Mon May 15 07:28:45 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: indent.c,v 1.267 2023/05/14 22:26:37 rillig Exp $ */ +/* $NetBSD: indent.c,v 1.268 2023/05/15 07:28:45 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: indent.c,v 1.267 2023/05/14 22:26:37 rillig Exp $"); +__RCSID("$NetBSD: indent.c,v 1.268 2023/05/15 07:28:45 rillig Exp $"); #include <sys/param.h> #include <err.h> @@ -101,66 +101,34 @@ static char bakfile[MAXPATHLEN] = ""; static void -buf_init(struct buffer *buf) -{ - size_t size = 200; - buf->mem = xmalloc(size); - buf->limit = buf->mem + size - 5 /* safety margin */; - buf->s = buf->mem + 1; /* allow accessing buf->e[-1] */ - buf->e = buf->s; - buf->mem[0] = ' '; -} - -static size_t -buf_len(const struct buffer *buf) -{ - return (size_t)(buf->e - buf->s); -} - -void buf_expand(struct buffer *buf, size_t add_size) { - size_t new_size = (size_t)(buf->limit - buf->s) + 400 + add_size; - size_t len = buf_len(buf); - buf->mem = xrealloc(buf->mem, new_size); - buf->limit = buf->mem + new_size - 5; - buf->s = buf->mem + 1; - buf->e = buf->s + len; -} - -static void -buf_reserve(struct buffer *buf, size_t n) -{ - if (n >= (size_t)(buf->limit - buf->e)) - buf_expand(buf, n); + buf->cap = buf->cap + add_size + 400; + buf->mem = xrealloc(buf->mem, buf->cap); + buf->st = buf->mem; } void buf_add_char(struct buffer *buf, char ch) { - buf_reserve(buf, 1); - *buf->e++ = ch; + if (buf->len == buf->cap) + buf_expand(buf, 1); + buf->mem[buf->len++] = ch; } void -buf_add_range(struct buffer *buf, const char *s, const char *e) +buf_add_chars(struct buffer *buf, const char *s, size_t len) { - size_t len = (size_t)(e - s); - buf_reserve(buf, len); - memcpy(buf->e, s, len); - buf->e += len; + if (len > buf->cap - buf->len) + buf_expand(buf, len); + memcpy(buf->mem + buf->len, s, len); + buf->len += len; } static void buf_add_buf(struct buffer *buf, const struct buffer *add) { - buf_add_range(buf, add->s, add->e); -} - -static void -buf_reset(struct buffer *buf) -{ - buf->e = buf->s; + buf_add_chars(buf, add->st, add->len); } void @@ -180,13 +148,13 @@ diag(int level, const char *msg, ...) } /* - * Compute the indentation from starting at 'ind' and adding the text from - * 'start' to 'end'. + * Compute the indentation from starting at 'ind' and adding the text starting + * at 's'. */ int -ind_add(int ind, const char *start, const char *end) +ind_add(int ind, const char *s, size_t len) { - for (const char *p = start; p != end; ++p) { + for (const char *p = s; len > 0; p++, len--) { if (*p == '\n' || *p == '\f') ind = 0; else if (*p == '\t') @@ -202,14 +170,6 @@ ind_add(int ind, const char *start, cons static void main_init_globals(void) { - inp_init(); - - buf_init(&token); - - buf_init(&lab); - buf_init(&code); - buf_init(&com); - ps.s_sym[0] = psym_stmt_list; ps.prev_token = lsym_semicolon; ps.next_col_1 = true; @@ -345,9 +305,9 @@ static void code_add_decl_indent(int decl_ind, bool tabs_to_var) { int base_ind = ps.ind_level * opt.indent_size; - int ind = base_ind + (int)buf_len(&code); + int ind = base_ind + (int)code.len; int target_ind = base_ind + decl_ind; - const char *orig_code_e = code.e; + size_t orig_code_len = code.len; if (tabs_to_var) for (int next; (next = next_tab(ind)) <= target_ind; ind = next) @@ -356,7 +316,7 @@ code_add_decl_indent(int decl_ind, bool for (; ind < target_ind; ind++) buf_add_char(&code, ' '); - if (code.e == orig_code_e && ps.want_blank) { + if (code.len == orig_code_len && ps.want_blank) { buf_add_char(&code, ' '); ps.want_blank = false; } @@ -365,7 +325,7 @@ code_add_decl_indent(int decl_ind, bool static int process_eof(void) { - if (lab.s != lab.e || code.s != code.e || com.s != com.e) + if (lab.len > 0 || code.len > 0 || com.len > 0) output_line(); if (ps.tos > 1) /* check for balanced braces */ @@ -394,11 +354,11 @@ maybe_break_line(lexer_symbol lsym) static void move_com_to_code(void) { - if (lab.e != lab.s || code.e != code.s) + if (lab.len > 0 || code.len > 0) buf_add_char(&code, ' '); buf_add_buf(&code, &com); buf_add_char(&code, ' '); - buf_reset(&com); + com.len = 0; ps.want_blank = false; } @@ -414,7 +374,7 @@ process_newline(void) { if (ps.prev_token == lsym_comma && ps.nparen == 0 && !ps.block_init && !opt.break_after_comma && break_comma && - com.s == com.e) + com.len == 0) goto stay_in_line; output_line(); @@ -426,7 +386,7 @@ stay_in_line: static bool is_function_pointer_declaration(void) { - return token.s[0] == '(' + return token.st[0] == '(' && ps.in_decl && !ps.block_init && !ps.decl_indent_done @@ -465,11 +425,11 @@ process_lparen_or_lbracket(void) code_add_decl_indent(ps.decl_ind, ps.tabs_to_var); ps.decl_indent_done = true; } else if (want_blank_before_lparen()) - *code.e++ = ' '; + buf_add_char(&code, ' '); ps.want_blank = false; - *code.e++ = token.s[0]; + buf_add_char(&code, token.st[0]); - ps.paren[ps.nparen - 1].indent = (short)ind_add(0, code.s, code.e); + ps.paren[ps.nparen - 1].indent = (short)ind_add(0, code.st, code.len); debug_println("paren_indents[%d] is now %d", ps.nparen - 1, ps.paren[ps.nparen - 1].indent); @@ -479,7 +439,7 @@ process_lparen_or_lbracket(void) debug_println("paren_indents[0] is now %d", ps.paren[0].indent); } - if (ps.init_or_struct && *token.s == '(' && ps.tos <= 2) { + if (ps.init_or_struct && *token.st == '(' && ps.tos <= 2) { /* * this is a kluge to make sure that declarations will be aligned * right if proc decl has an explicit type on it, i.e. "int a(x) {..." @@ -496,7 +456,7 @@ static void process_rparen_or_rbracket(void) { if (ps.nparen == 0) { - diag(0, "Extra '%c'", *token.s); + diag(0, "Extra '%c'", *token.st); goto unbalanced; /* TODO: better exit immediately */ } @@ -512,11 +472,11 @@ process_rparen_or_rbracket(void) if (ps.nparen > 0) ps.nparen--; - if (code.e == code.s) /* if the paren starts the line */ + if (code.len == 0) /* if the paren starts the line */ ps.line_start_nparen = ps.nparen; /* then indent it */ unbalanced: - *code.e++ = token.s[0]; + buf_add_char(&code, token.st[0]); if (ps.spaced_expr_psym != psym_0 && ps.nparen == 0) { ps.force_nl = true; @@ -532,8 +492,8 @@ want_blank_before_unary_op(void) { if (ps.want_blank) return true; - if (token.s[0] == '+' || token.s[0] == '-') - return code.e > code.s && code.e[-1] == token.s[0]; + if (token.st[0] == '+' || token.st[0] == '-') + return code.len > 0 && code.mem[code.len - 1] == token.st[0]; return false; } @@ -543,11 +503,10 @@ process_unary_op(void) if (!ps.decl_indent_done && ps.in_decl && !ps.block_init && !ps.is_function_definition && ps.line_start_nparen == 0) { /* pointer declarations */ - code_add_decl_indent(ps.decl_ind - (int)buf_len(&token), - ps.tabs_to_var); + code_add_decl_indent(ps.decl_ind - (int)token.len, ps.tabs_to_var); ps.decl_indent_done = true; } else if (want_blank_before_unary_op()) - *code.e++ = ' '; + buf_add_char(&code, ' '); buf_add_buf(&code, &token); ps.want_blank = false; @@ -556,7 +515,7 @@ process_unary_op(void) static void process_binary_op(void) { - if (buf_len(&code) > 0) + if (code.len > 0) buf_add_char(&code, ' '); buf_add_buf(&code, &token); ps.want_blank = true; @@ -565,8 +524,7 @@ process_binary_op(void) static void process_postfix_op(void) { - *code.e++ = token.s[0]; - *code.e++ = token.s[1]; + buf_add_buf(&code, &token); ps.want_blank = true; } @@ -575,8 +533,8 @@ process_question(void) { ps.quest_level++; if (ps.want_blank) - *code.e++ = ' '; - *code.e++ = '?'; + buf_add_char(&code, ' '); + buf_add_char(&code, '?'); ps.want_blank = true; } @@ -586,21 +544,21 @@ process_colon(void) if (ps.quest_level > 0) { /* part of a '?:' operator */ ps.quest_level--; if (ps.want_blank) - *code.e++ = ' '; - *code.e++ = ':'; + buf_add_char(&code, ' '); + buf_add_char(&code, ':'); ps.want_blank = true; return; } if (ps.init_or_struct) { /* bit-field */ - *code.e++ = ':'; + buf_add_char(&code, ':'); ps.want_blank = false; return; } buf_add_buf(&lab, &code); /* 'case' or 'default' or named label */ buf_add_char(&lab, ':'); - buf_reset(&code); + code.len = 0; ps.in_stmt_or_decl = false; ps.is_case_label = ps.seen_case; @@ -622,7 +580,7 @@ process_semicolon(void) ps.block_init_level = 0; ps.declaration = ps.declaration == decl_begin ? decl_end : decl_no; - if (ps.in_decl && code.s == code.e && !ps.block_init && + if (ps.in_decl && code.len == 0 && !ps.block_init && !ps.decl_indent_done && ps.line_start_nparen == 0) { /* indent stray semicolons in declarations */ code_add_decl_indent(ps.decl_ind - 1, ps.tabs_to_var); @@ -645,7 +603,7 @@ process_semicolon(void) ps.spaced_expr_psym = psym_0; } } - *code.e++ = ';'; + buf_add_char(&code, ';'); ps.want_blank = true; ps.in_stmt_or_decl = ps.nparen > 0; @@ -667,7 +625,7 @@ process_lbrace(void) else ps.block_init_level++; - if (code.s != code.e && !ps.block_init) { + if (code.len > 0 && !ps.block_init) { if (!opt.brace_same_line) output_line(); else if (ps.in_func_def_params && !ps.init_or_struct) { @@ -689,7 +647,7 @@ process_lbrace(void) } } - if (code.s == code.e) + if (code.len == 0) ps.in_stmt_cont = false; /* don't indent the '{' itself */ if (ps.in_decl && ps.init_or_struct) { ps.di_stack[ps.decl_level] = ps.decl_ind; @@ -709,9 +667,9 @@ process_lbrace(void) ps.decl_ind = 0; parse(psym_lbrace); if (ps.want_blank) - *code.e++ = ' '; + buf_add_char(&code, ' '); ps.want_blank = false; - *code.e++ = '{'; + buf_add_char(&code, '{'); ps.declaration = decl_no; } @@ -727,13 +685,13 @@ process_rbrace(void) ps.declaration = decl_no; ps.block_init_level--; - if (code.s != code.e && !ps.block_init) { /* '}' must be first on line */ + if (code.len > 0 && !ps.block_init) { /* '}' must be first on line */ if (opt.verbose) diag(0, "Line broken"); output_line(); } - *code.e++ = '}'; + buf_add_char(&code, '}'); ps.want_blank = true; ps.in_stmt_or_decl = false; ps.in_stmt_cont = false; @@ -756,7 +714,7 @@ process_do(void) { ps.in_stmt_or_decl = false; - if (code.e != code.s) { /* make sure this starts a line */ + if (code.len > 0) { /* make sure this starts a line */ if (opt.verbose) diag(0, "Line broken"); output_line(); @@ -771,7 +729,7 @@ process_else(void) { ps.in_stmt_or_decl = false; - if (code.e > code.s && !(opt.cuddle_else && code.e[-1] == '}')) { + if (code.len > 0 && !(opt.cuddle_else && code.mem[code.len - 1] == '}')) { if (opt.verbose) diag(0, "Line broken"); output_line(); @@ -787,7 +745,7 @@ process_type(void) parse(psym_decl); /* let the parser worry about indentation */ if (ps.prev_token == lsym_rparen_or_rbracket && ps.tos <= 1) { - if (code.s != code.e) + if (code.len > 0) output_line(); } @@ -802,7 +760,7 @@ process_type(void) if (ps.decl_level <= 0) ps.declaration = decl_begin; - int len = (int)buf_len(&token) + 1; + int len = (int)token.len + 1; int ind = ps.ind_level == 0 || ps.decl_level > 0 ? opt.decl_indent /* global variable or local member */ : opt.local_decl_indent; /* local variable */ @@ -816,10 +774,10 @@ process_ident(lexer_symbol lsym) if (ps.in_decl) { if (lsym == lsym_funcname) { ps.in_decl = false; - if (opt.procnames_start_line && code.s != code.e) + if (opt.procnames_start_line && code.len > 0) output_line(); else if (ps.want_blank) - *code.e++ = ' '; + buf_add_char(&code, ' '); ps.want_blank = false; } else if (!ps.block_init && !ps.decl_indent_done && @@ -841,16 +799,16 @@ process_ident(lexer_symbol lsym) static void process_period(void) { - if (code.e > code.s && code.e[-1] == ',') - *code.e++ = ' '; - *code.e++ = '.'; + if (code.len > 0 && code.mem[code.len - 1] == ',') + buf_add_char(&code, ' '); + buf_add_char(&code, '.'); ps.want_blank = false; } static void process_comma(void) { - ps.want_blank = code.s != code.e; /* only put blank after comma if comma + ps.want_blank = code.len > 0; /* only put blank after comma if comma * does not start the line */ if (ps.in_decl && !ps.is_function_definition && !ps.block_init && @@ -860,14 +818,14 @@ process_comma(void) ps.decl_indent_done = true; } - *code.e++ = ','; + buf_add_char(&code, ','); if (ps.nparen == 0) { if (ps.block_init_level <= 0) ps.block_init = false; int typical_varname_length = 8; if (break_comma && (opt.break_after_comma || - ind_add(compute_code_indent(), code.s, code.e) + ind_add(compute_code_indent(), code.st, code.len) >= opt.max_line_length - typical_varname_length)) ps.force_nl = true; } @@ -887,17 +845,16 @@ read_preprocessing_line(void) buf_add_char(&lab, inp_next()); while (inp_peek() != '\n' || (state == COMM && !had_eof)) { - buf_reserve(&lab, 2); - *lab.e++ = inp_next(); - switch (lab.e[-1]) { + buf_add_char(&lab, inp_next()); + switch (lab.mem[lab.len - 1]) { case '\\': if (state != COMM) - *lab.e++ = inp_next(); + buf_add_char(&lab, inp_next()); break; case '/': if (inp_peek() == '*' && state == PLAIN) { state = COMM; - *lab.e++ = inp_next(); + buf_add_char(&lab, inp_next()); } break; case '"': @@ -915,14 +872,14 @@ read_preprocessing_line(void) case '*': if (inp_peek() == '/' && state == COMM) { state = PLAIN; - *lab.e++ = inp_next(); + buf_add_char(&lab, inp_next()); } break; } } - while (lab.e > lab.s && ch_isblank(lab.e[-1])) - lab.e--; + while (lab.len > 0 && ch_isblank(lab.mem[lab.len - 1])) + lab.len--; } typedef struct { @@ -948,19 +905,20 @@ substring_starts_with(substring ss, cons static void process_preprocessing(void) { - if (com.s != com.e || lab.s != lab.e || code.s != code.e) + if (lab.len > 0 || code.len > 0 || com.len > 0) output_line(); read_preprocessing_line(); ps.is_case_label = false; + const char *end = lab.mem + lab.len; substring dir; - dir.s = lab.s + 1; - while (dir.s < lab.e && ch_isblank(*dir.s)) + dir.s = lab.st + 1; + while (dir.s < end && ch_isblank(*dir.s)) dir.s++; dir.e = dir.s; - while (dir.e < lab.e && ch_isalpha(*dir.e)) + while (dir.e < end && ch_isalpha(*dir.e)) dir.e++; if (substring_starts_with(dir, "if")) { /* also ifdef, ifndef */ @@ -1022,12 +980,10 @@ main_loop(void) maybe_break_line(lsym); ps.in_stmt_or_decl = true; /* add an extra level of indentation; * turned off again by a ';' or '}' */ - if (com.s != com.e) + if (com.len > 0) move_com_to_code(); } - buf_reserve(&code, 3); /* space for 2 characters plus '\0' */ - switch (lsym) { case lsym_form_feed: @@ -1194,11 +1150,12 @@ debug_println(const char *fmt, ...) } void -debug_vis_range(const char *prefix, const char *s, const char *e, +debug_vis_range(const char *prefix, const char *s, size_t len, const char *suffix) { debug_printf("%s", prefix); - for (const char *p = s; p < e; p++) { + for (size_t i = 0; i < len; i++) { + const char *p = s + i; if (*p == '\\' || *p == '"') debug_printf("\\%c", *p); else if (isprint((unsigned char)*p)) @@ -1223,12 +1180,6 @@ nonnull(void *p) } void * -xmalloc(size_t size) -{ - return nonnull(malloc(size)); -} - -void * xrealloc(void *p, size_t new_size) { return nonnull(realloc(p, new_size)); Index: src/usr.bin/indent/indent.h diff -u src/usr.bin/indent/indent.h:1.132 src/usr.bin/indent/indent.h:1.133 --- src/usr.bin/indent/indent.h:1.132 Sun May 14 22:26:37 2023 +++ src/usr.bin/indent/indent.h Mon May 15 07:28:45 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: indent.h,v 1.132 2023/05/14 22:26:37 rillig Exp $ */ +/* $NetBSD: indent.h,v 1.133 2023/05/15 07:28:45 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD @@ -125,10 +125,10 @@ typedef enum parser_symbol { /* A range of characters, not null-terminated. */ struct buffer { - char *s; /* start of the usable text */ - char *e; /* end of the usable text */ - char *mem; /* start of the allocated memory */ - char *limit; /* end of the allocated memory */ + const char *st; /* start of the usable text */ + char *mem; + size_t len; /* length of the usable text, from 'mem' */ + size_t cap; }; extern FILE *input; @@ -366,7 +366,7 @@ extern struct parser_state { #ifdef debug void debug_printf(const char *, ...) __printflike(1, 2); void debug_println(const char *, ...) __printflike(1, 2); -void debug_vis_range(const char *, const char *, const char *, const char *); +void debug_vis_range(const char *, const char *, size_t, const char *); void debug_parser_state(lexer_symbol); void debug_parse_stack(const char *); void debug_buffers(void); @@ -385,9 +385,7 @@ extern const char *const psym_name[]; void register_typename(const char *); int compute_code_indent(void); int compute_label_indent(void); -int ind_add(int, const char *, const char *); - -void inp_init(void); +int ind_add(int, const char *, size_t); const char *inp_p(void); const char *inp_line_start(void); @@ -406,13 +404,11 @@ void process_comment(void); void set_option(const char *, const char *); void load_profiles(const char *); -void *xmalloc(size_t); void *xrealloc(void *, size_t); char *xstrdup(const char *); -void buf_expand(struct buffer *, size_t); void buf_add_char(struct buffer *, char); -void buf_add_range(struct buffer *, const char *, const char *); +void buf_add_chars(struct buffer *, const char *, size_t); static inline bool ch_isalnum(char ch) Index: src/usr.bin/indent/io.c diff -u src/usr.bin/indent/io.c:1.165 src/usr.bin/indent/io.c:1.166 --- src/usr.bin/indent/io.c:1.165 Sun May 14 22:26:37 2023 +++ src/usr.bin/indent/io.c Mon May 15 07:28:45 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: io.c,v 1.165 2023/05/14 22:26:37 rillig Exp $ */ +/* $NetBSD: io.c,v 1.166 2023/05/15 07:28:45 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: io.c,v 1.165 2023/05/14 22:26:37 rillig Exp $"); +__RCSID("$NetBSD: io.c,v 1.166 2023/05/15 07:28:45 rillig Exp $"); #include <assert.h> #include <stdio.h> @@ -55,20 +55,10 @@ static struct buffer inp; static int paren_indent; -void -inp_init(void) -{ - inp.mem = xmalloc(10); - inp.limit = inp.mem + 8; - inp.s = inp.mem; - inp.e = inp.mem; -} - const char * inp_p(void) { - assert(inp.s < inp.e); - return inp.s; + return inp.st; } const char * @@ -80,23 +70,20 @@ inp_line_start(void) char inp_peek(void) { - assert(inp.s < inp.e); - return *inp.s; + return *inp.st; } char inp_lookahead(size_t i) { - assert(i < (size_t)(inp.e - inp.s)); - return inp.s[i]; + return inp.st[i]; } void inp_skip(void) { - assert(inp.s < inp.e); - inp.s++; - if (inp.s >= inp.e) + inp.st++; + if ((size_t)(inp.st - inp.mem) >= inp.len) inp_read_line(); } @@ -109,38 +96,24 @@ inp_next(void) } static void -inp_add(char ch) -{ - if (inp.e >= inp.limit) { - size_t new_size = (size_t)(inp.limit - inp.mem) * 2 + 10; - size_t e_offset = (size_t)(inp.e - inp.mem); - inp.mem = xrealloc(inp.mem, new_size); - inp.s = inp.mem; - inp.e = inp.mem + e_offset; - inp.limit = inp.mem + new_size - 2; - } - *inp.e++ = ch; -} - -static void inp_read_next_line(FILE *f) { - inp.s = inp.mem; - inp.e = inp.mem; + inp.st = inp.mem; + inp.len = 0; for (;;) { int ch = getc(f); if (ch == EOF) { if (!inhibit_formatting) { - inp_add(' '); - inp_add('\n'); + buf_add_char(&inp, ' '); + buf_add_char(&inp, '\n'); } had_eof = true; break; } if (ch != '\0') - inp_add((char)ch); + buf_add_char(&inp, (char)ch); if (ch == '\n') break; } @@ -150,14 +123,14 @@ static void output_char(char ch) { fputc(ch, output); - debug_vis_range("output_char '", &ch, &ch + 1, "'\n"); + debug_vis_range("output_char '", &ch, 1, "'\n"); } static void -output_range(const char *s, const char *e) +output_range(const char *s, size_t len) { - fwrite(s, 1, (size_t)(e - s), output); - debug_vis_range("output_range \"", s, e, "\"\n"); + fwrite(s, 1, len, output); + debug_vis_range("output_range \"", s, len, "\"\n"); } static int @@ -188,12 +161,12 @@ output_line_label(void) { int ind; - while (lab.e > lab.s && ch_isblank(lab.e[-1])) - lab.e--; + while (lab.len > 0 && ch_isblank(lab.mem[lab.len - 1])) + lab.len--; ind = output_indent(0, compute_label_indent()); - output_range(lab.s, lab.e); - ind = ind_add(ind, lab.s, lab.e); + output_range(lab.st, lab.len); + ind = ind_add(ind, lab.st, lab.len); ps.is_case_label = false; return ind; @@ -215,15 +188,15 @@ output_line_code(int ind) } ind = output_indent(ind, target_ind); - output_range(code.s, code.e); - return ind_add(ind, code.s, code.e); + output_range(code.st, code.len); + return ind_add(ind, code.st, code.len); } static void output_line_comment(int ind) { int target_ind = ps.com_ind; - const char *p = com.s; + const char *p = com.st; target_ind += ps.comment_delta; @@ -248,11 +221,11 @@ output_line_comment(int ind) ind = 0; } - while (com.e > p && ch_isspace(com.e[-1])) - com.e--; + while (com.mem + com.len > p && ch_isspace(com.mem[com.len - 1])) + com.len--; (void)output_indent(ind, target_ind); - output_range(p, com.e); + output_range(p, com.len - (size_t)(p - com.mem)); ps.comment_delta = ps.n_comment_delta; } @@ -274,7 +247,7 @@ output_complete_line(char line_terminato if (ps.blank_line_after_decl && ps.declaration == decl_no) { ps.blank_line_after_decl = false; - if (lab.e != lab.s || code.e != code.s || com.e != com.s) + if (lab.len > 0 || code.len > 0 || com.len > 0) output_char('\n'); } @@ -289,11 +262,11 @@ output_complete_line(char line_terminato } int ind = 0; - if (lab.e != lab.s) + if (lab.len > 0) ind = output_line_label(); - if (code.e != code.s) + if (code.len > 0) ind = output_line_code(ind); - if (com.e != com.s) + if (com.len > 0) output_line_comment(ind); output_char(line_terminator); @@ -303,9 +276,9 @@ output_complete_line(char line_terminato ps.in_stmt_cont = ps.in_stmt_or_decl && !ps.in_decl; ps.decl_indent_done = false; - lab.e = lab.s; /* reset buffers */ - code.e = code.s; - com.e = com.s = com.mem + 1; + lab.len = 0; + code.len = 0; + com.len = 0; ps.ind_level = ps.ind_level_follow; ps.line_start_nparen = ps.nparen; @@ -335,11 +308,11 @@ static int compute_code_indent_lineup(int base_ind) { int ind = paren_indent; - int overflow = ind_add(ind, code.s, code.e) - opt.max_line_length; + int overflow = ind_add(ind, code.st, code.len) - opt.max_line_length; if (overflow < 0) return ind; - if (ind_add(base_ind, code.s, code.e) < opt.max_line_length) { + if (ind_add(base_ind, code.st, code.len) < opt.max_line_length) { ind -= overflow + 2; if (ind > base_ind) return ind; @@ -377,7 +350,7 @@ compute_label_indent(void) { if (ps.is_case_label) return (int)(case_ind * (float)opt.indent_size); - if (lab.s[0] == '#') + if (lab.st[0] == '#') return 0; return opt.indent_size * (ps.ind_level - 2); } @@ -426,7 +399,7 @@ parse_indent_comment(void) if (!skip_string(&p, "*/\n")) return; - if (com.s != com.e || lab.s != lab.e || code.s != code.e) + if (lab.len > 0 || code.len > 0 || com.len > 0) output_line(); inhibit_formatting = !on; @@ -440,5 +413,5 @@ inp_read_line(void) parse_indent_comment(); if (inhibit_formatting) - output_range(inp.s, inp.e); + output_range(inp.st, inp.len); } Index: src/usr.bin/indent/lexi.c diff -u src/usr.bin/indent/lexi.c:1.184 src/usr.bin/indent/lexi.c:1.185 --- src/usr.bin/indent/lexi.c:1.184 Sun May 14 22:26:37 2023 +++ src/usr.bin/indent/lexi.c Mon May 15 07:28:45 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: lexi.c,v 1.184 2023/05/14 22:26:37 rillig Exp $ */ +/* $NetBSD: lexi.c,v 1.185 2023/05/15 07:28:45 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: lexi.c,v 1.184 2023/05/14 22:26:37 rillig Exp $"); +__RCSID("$NetBSD: lexi.c,v 1.185 2023/05/15 07:28:45 rillig Exp $"); #include <stdlib.h> #include <string.h> @@ -167,17 +167,9 @@ static const unsigned char lex_number_ro }; static void -check_size_token(size_t desired_size) -{ - if (token.e + desired_size >= token.limit) - buf_expand(&token, desired_size); -} - -static void token_add_char(char ch) { - check_size_token(1); - *token.e++ = ch; + buf_add_char(&token, ch); } @@ -239,6 +231,7 @@ lex_word(void) else if (inp_peek() == '\\' && inp_lookahead(1) == '\n') { inp_skip(); inp_skip(); + line_no++; } else return; } @@ -247,17 +240,17 @@ lex_word(void) static void lex_char_or_string(void) { - for (char delim = token.e[-1];;) { + for (char delim = token.mem[token.len - 1];;) { if (inp_peek() == '\n') { diag(1, "Unterminated literal"); return; } token_add_char(inp_next()); - if (token.e[-1] == delim) + if (token.mem[token.len - 1] == delim) return; - if (token.e[-1] == '\\') { + if (token.mem[token.len - 1] == '\\') { if (inp_peek() == '\n') ++line_no; token_add_char(inp_next()); @@ -311,10 +304,10 @@ static bool is_typename(void) { if (opt.auto_typedefs && - token.e - token.s >= 2 && memcmp(token.e - 2, "_t", 2) == 0) + token.len >= 2 && memcmp(token.mem + token.len - 2, "_t", 2) == 0) return true; - return bsearch_typenames(token.s) >= 0; + return bsearch_typenames(token.st) >= 0; } static int @@ -374,14 +367,11 @@ lexi_alnum(void) } else if (is_identifier_start(inp_peek())) { lex_word(); - if (token.s[0] == 'L' && token.e - token.s == 1 && + if (token.len == 1 && token.st[0] == 'L' && (inp_peek() == '"' || inp_peek() == '\'')) { token_add_char(inp_next()); lex_char_or_string(); ps.next_unary = false; - - check_size_token(1); - return lsym_word; } } else @@ -396,8 +386,8 @@ lexi_alnum(void) return lsym_type_outside_parentheses; token_add_char('\0'); - token.e--; - const struct keyword *kw = bsearch(token.s, keywords, + token.len--; + const struct keyword *kw = bsearch(token.st, keywords, array_length(keywords), sizeof(keywords[0]), cmp_keyword_by_name); bool is_type = false; if (kw == NULL) { @@ -423,7 +413,7 @@ found_typename: } if (ps.prev_token != lsym_period && ps.prev_token != lsym_unary_op) { if (kw != NULL && kw->lsym == lsym_tag) { - if (token.s[0] == 'e' /* enum */) + if (token.st[0] == 'e' /* enum */) ps.in_enum = in_enum_enum; return lsym_tag; } @@ -491,7 +481,7 @@ lex_asterisk_unary(void) lexer_symbol lexi(void) { - token.e = token.s; + token.len = 0; ps.curr_col_1 = ps.next_col_1; ps.next_col_1 = false; @@ -506,13 +496,12 @@ lexi(void) /* Scan a non-alphanumeric token */ - check_size_token(3); /* for things like "<<=" */ - *token.e++ = inp_next(); + token_add_char(inp_next()); lexer_symbol lsym; bool next_unary; - switch (token.e[-1]) { + switch (token.mem[token.len - 1]) { /* INDENT OFF */ case '(': @@ -558,8 +547,8 @@ lexi(void) lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; next_unary = true; - if (inp_peek() == token.e[-1]) { /* '++' or '--' */ - *token.e++ = inp_next(); + if (inp_peek() == token.mem[token.len - 1]) { /* '++' or '--' */ + token_add_char(inp_next()); if (ps.prev_token == lsym_word || ps.prev_token == lsym_rparen_or_rbracket) { lsym = ps.next_unary ? lsym_unary_op : lsym_postfix_op; @@ -567,10 +556,10 @@ lexi(void) } } else if (inp_peek() == '=') { /* '+=' or '-=' */ - *token.e++ = inp_next(); + token_add_char(inp_next()); } else if (inp_peek() == '>') { /* '->' */ - *token.e++ = inp_next(); + token_add_char(inp_next()); lsym = lsym_unary_op; next_unary = false; ps.want_blank = false; @@ -581,7 +570,7 @@ lexi(void) if (ps.init_or_struct) ps.block_init = true; if (inp_peek() == '=') - *token.e++ = inp_next(); + token_add_char(inp_next()); lsym = lsym_binary_op; next_unary = true; break; @@ -590,9 +579,9 @@ lexi(void) case '<': case '!': /* ops like <, <<, <=, !=, etc */ if (inp_peek() == '>' || inp_peek() == '<' || inp_peek() == '=') - *token.e++ = inp_next(); + token_add_char(inp_next()); if (inp_peek() == '=') - *token.e++ = inp_next(); + token_add_char(inp_next()); lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; next_unary = true; break; @@ -604,22 +593,23 @@ lexi(void) next_unary = true; } else { if (inp_peek() == '=') - *token.e++ = inp_next(); + token_add_char(inp_next()); lsym = lsym_binary_op; next_unary = true; } break; default: - if (token.e[-1] == '/' && (inp_peek() == '*' || inp_peek() == '/')) { - *token.e++ = inp_next(); + if (token.mem[token.len - 1] == '/' + && (inp_peek() == '*' || inp_peek() == '/')) { + token_add_char(inp_next()); lsym = lsym_comment; next_unary = ps.next_unary; break; } /* handle '||', '&&', etc., and also things as in 'int *****i' */ - while (token.e[-1] == inp_peek() || inp_peek() == '=') + while (token.mem[token.len - 1] == inp_peek() || inp_peek() == '=') token_add_char(inp_next()); lsym = ps.next_unary ? lsym_unary_op : lsym_binary_op; @@ -633,8 +623,6 @@ lexi(void) ps.next_unary = next_unary; - check_size_token(1); - return lexi_end(lsym); } Index: src/usr.bin/indent/parse.c diff -u src/usr.bin/indent/parse.c:1.55 src/usr.bin/indent/parse.c:1.56 --- src/usr.bin/indent/parse.c:1.55 Sun May 14 12:12:02 2023 +++ src/usr.bin/indent/parse.c Mon May 15 07:28:45 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: parse.c,v 1.55 2023/05/14 12:12:02 rillig Exp $ */ +/* $NetBSD: parse.c,v 1.56 2023/05/15 07:28:45 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: parse.c,v 1.55 2023/05/14 12:12:02 rillig Exp $"); +__RCSID("$NetBSD: parse.c,v 1.56 2023/05/15 07:28:45 rillig Exp $"); #include <err.h> #include <stdio.h> @@ -111,7 +111,7 @@ parse(parser_symbol psym) ++ps.ind_level_follow; /* it is a random, isolated stmt group * or a declaration */ else { - if (code.s == code.e) { + if (code.len == 0) { /* it is a group as part of a while, for, etc. */ --ps.ind_level; Index: src/usr.bin/indent/pr_comment.c diff -u src/usr.bin/indent/pr_comment.c:1.139 src/usr.bin/indent/pr_comment.c:1.140 --- src/usr.bin/indent/pr_comment.c:1.139 Sun May 14 22:26:37 2023 +++ src/usr.bin/indent/pr_comment.c Mon May 15 07:28:45 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: pr_comment.c,v 1.139 2023/05/14 22:26:37 rillig Exp $ */ +/* $NetBSD: pr_comment.c,v 1.140 2023/05/15 07:28:45 rillig Exp $ */ /*- * SPDX-License-Identifier: BSD-4-Clause @@ -38,7 +38,7 @@ */ #include <sys/cdefs.h> -__RCSID("$NetBSD: pr_comment.c,v 1.139 2023/05/14 22:26:37 rillig Exp $"); +__RCSID("$NetBSD: pr_comment.c,v 1.140 2023/05/15 07:28:45 rillig Exp $"); #include <string.h> @@ -47,9 +47,7 @@ __RCSID("$NetBSD: pr_comment.c,v 1.139 2 static void com_add_char(char ch) { - if (1 >= com.limit - com.e) - buf_expand(&com, 1); - *com.e++ = ch; + buf_add_char(&com, ch); } static void @@ -57,8 +55,7 @@ com_add_delim(void) { if (!opt.star_comment_cont) return; - const char *delim = " * "; - buf_add_range(&com, delim, delim + 3); + buf_add_chars(&com, " * ", 3); } static bool @@ -66,8 +63,8 @@ fits_in_one_line(int com_ind, int max_li { for (const char *start = inp_p(), *p = start; *p != '\n'; p++) { if (p[0] == '*' && p[1] == '/') { - int len = ind_add(com_ind + 3, start, p); - len += ch_isblank(p[-1]) ? 2 : 3; + int len = ind_add(com_ind + 3, start, (size_t)(p - start)); + len += p == start || ch_isblank(p[-1]) ? 2 : 3; return len <= max_line_length; } } @@ -89,15 +86,15 @@ analyze_comment(bool *p_may_wrap, bool * } else { if (inp_peek() == '-' || inp_peek() == '*' || - token.e[-1] == '/' || + token.mem[token.len - 1] == '/' || (inp_peek() == '\n' && !opt.format_block_comments)) { may_wrap = false; delim = false; } - if (com.e != com.s) + if (com.len > 0) output_line(); - if (lab.s == lab.e && code.s == code.e) { + if (lab.len == 0 && code.len == 0) { ind = (ps.ind_level - opt.unindent_displace) * opt.indent_size; if (ind <= 0) ind = opt.format_col1_comments ? 0 : 1; @@ -105,9 +102,9 @@ analyze_comment(bool *p_may_wrap, bool * } else { delim = false; - int target_ind = code.s != code.e - ? ind_add(compute_code_indent(), code.s, code.e) - : ind_add(compute_label_indent(), lab.s, lab.e); + int target_ind = code.len > 0 + ? ind_add(compute_code_indent(), code.st, code.len) + : ind_add(compute_label_indent(), lab.st, lab.len); ind = ps.decl_on_line || ps.ind_level == 0 ? opt.decl_comment_column - 1 : opt.comment_column - 1; @@ -125,7 +122,8 @@ analyze_comment(bool *p_may_wrap, bool * * Find out how much indentation there was originally, because that * much will have to be ignored by output_complete_line. */ - ps.n_comment_delta = -ind_add(0, inp_line_start(), inp_p() - 2); + size_t len = (size_t)(inp_p() - 2 - inp_line_start()); + ps.n_comment_delta = -ind_add(0, inp_line_start(), len); } else { ps.n_comment_delta = 0; if (!(inp_peek() == '\t' && !ch_isblank(inp_lookahead(1)))) @@ -135,7 +133,7 @@ analyze_comment(bool *p_may_wrap, bool * ps.comment_delta = 0; com_add_char('/'); - com_add_char(token.e[-1]); /* either '*' or '/' */ + com_add_char(token.mem[token.len - 1]); /* either '*' or '/' */ if (may_wrap && !ch_isblank(inp_peek())) com_add_char(' '); @@ -184,9 +182,9 @@ copy_comment_wrap(int line_length, bool last_blank = -1; if (ps.next_col_1) { - if (com.s == com.e) + if (com.len == 0) com_add_char(' '); /* force empty line of output */ - if (com.e - com.s > 3) { + if (com.len > 3) { output_line(); com_add_delim(); } @@ -195,9 +193,9 @@ copy_comment_wrap(int line_length, bool } else { ps.next_col_1 = true; - if (!(com.e > com.s && ch_isblank(com.e[-1]))) + if (!(com.len > 0 && ch_isblank(com.mem[com.len - 1]))) com_add_char(' '); - last_blank = com.e - 1 - com.mem; + last_blank = (int)com.len - 1; } ++line_no; @@ -222,14 +220,14 @@ copy_comment_wrap(int line_length, bool inp_skip(); if (delim) { - if (com.e - com.s > 3) + if (com.len > 3) output_line(); else - com.e = com.s; + com.len = 0; com_add_char(' '); } - if (!(com.e > com.s && ch_isblank(com.e[-1]))) + if (!(com.len > 0 && ch_isblank(com.mem[com.len - 1]))) com_add_char(' '); com_add_char('*'); com_add_char('/'); @@ -241,11 +239,11 @@ copy_comment_wrap(int line_length, bool default: ; - int now_len = ind_add(ps.com_ind, com.s, com.e); + int now_len = ind_add(ps.com_ind, com.st, com.len); for (;;) { char ch = inp_next(); if (ch_isblank(ch)) - last_blank = com.e - com.mem; + last_blank = (ssize_t)com.len; com_add_char(ch); now_len++; if (memchr("*\n\r\b\t", inp_peek(), 6) != NULL) @@ -258,7 +256,7 @@ copy_comment_wrap(int line_length, bool if (now_len <= line_length) break; - if (ch_isspace(com.e[-1])) + if (ch_isspace(com.mem[com.len - 1])) break; if (last_blank == -1) { /* only a single word in this line */ @@ -268,13 +266,17 @@ copy_comment_wrap(int line_length, bool } const char *last_word_s = com.mem + last_blank + 1; - size_t last_word_len = (size_t)(com.e - last_word_s); - com.e = com.mem + last_blank; + size_t last_word_len = com.len - (size_t)(last_blank + 1); + com.len = (size_t)last_blank; output_line(); com_add_delim(); - memcpy(com.e, last_word_s, last_word_len); - com.e += last_word_len; + /* + * Assume that output_line and com_add_delim don't invalidate + * the "unused" part of the buffer beyond com.mem + com.len. + */ + memmove(com.mem + com.len, last_word_s, last_word_len); + com.len += last_word_len; last_blank = -1; } } @@ -285,7 +287,7 @@ copy_comment_nowrap(void) { for (;;) { if (inp_peek() == '\n') { - if (token.e[-1] == '/') + if (token.mem[token.len - 1] == '/') return; if (had_eof) { @@ -294,7 +296,7 @@ copy_comment_nowrap(void) return; } - if (com.s == com.e) + if (com.len == 0) com_add_char(' '); /* force output of an empty line */ output_line(); ++line_no; @@ -303,7 +305,8 @@ copy_comment_nowrap(void) } com_add_char(inp_next()); - if (com.e[-2] == '*' && com.e[-1] == '/' && token.e[-1] == '*') + if (com.mem[com.len - 2] == '*' && com.mem[com.len - 1] == '/' + && token.mem[token.len - 1] == '*') return; } }