Module Name: src Committed By: rillig Date: Sun Jan 8 22:46:00 UTC 2023
Modified Files: src/usr.bin/xlint/lint1: lex.c scan.l Log Message: lint: document how reading tokens from the input works Rename inpc to read_byte, to make the name more expressive. Since C99, lint is no longer required to use overly short identifiers. No functional change. To generate a diff of this commit: cvs rdiff -u -r1.134 -r1.135 src/usr.bin/xlint/lint1/lex.c cvs rdiff -u -r1.136 -r1.137 src/usr.bin/xlint/lint1/scan.l Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/xlint/lint1/lex.c diff -u src/usr.bin/xlint/lint1/lex.c:1.134 src/usr.bin/xlint/lint1/lex.c:1.135 --- src/usr.bin/xlint/lint1/lex.c:1.134 Sat Oct 1 10:04:06 2022 +++ src/usr.bin/xlint/lint1/lex.c Sun Jan 8 22:46:00 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: lex.c,v 1.134 2022/10/01 10:04:06 rillig Exp $ */ +/* $NetBSD: lex.c,v 1.135 2023/01/08 22:46:00 rillig Exp $ */ /* * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. @@ -38,7 +38,7 @@ #include <sys/cdefs.h> #if defined(__RCSID) -__RCSID("$NetBSD: lex.c,v 1.134 2022/10/01 10:04:06 rillig Exp $"); +__RCSID("$NetBSD: lex.c,v 1.135 2023/01/08 22:46:00 rillig Exp $"); #endif #include <ctype.h> @@ -398,11 +398,13 @@ initscan(void) } /* - * Read a character and ensure that it is positive (except EOF). - * Increment line count(s) if necessary. + * When scanning the remainder of a long token (see lex_input), read a byte + * and return it as an unsigned char or as EOF. + * + * Increment the line counts if necessary. */ static int -inpc(void) +read_byte(void) { int c; @@ -846,7 +848,7 @@ get_escaped_char(int delim) int n, c, v; if (pbc == -1) { - c = inpc(); + c = read_byte(); } else { c = pbc; pbc = -1; @@ -868,7 +870,7 @@ get_escaped_char(int delim) case EOF: return -2; case '\\': - switch (c = inpc()) { + switch (c = read_byte()) { case '"': if (!allow_c90 && delim == '\'') /* \" inside character constants undef... */ @@ -913,7 +915,7 @@ get_escaped_char(int delim) v = 0; do { v = (v << 3) + (c - '0'); - c = inpc(); + c = read_byte(); } while (--n > 0 && '0' <= c && c <= '7'); pbc = c; if (v > TARG_UCHAR_MAX) { @@ -928,7 +930,7 @@ get_escaped_char(int delim) warning(82); v = 0; n = 0; - while (c = inpc(), isxdigit(c)) { + while (c = read_byte(), isxdigit(c)) { c = isdigit(c) ? c - '0' : toupper(c) - 'A' + 10; v = (v << 4) + c; @@ -1048,7 +1050,7 @@ lex_directive(const char *yytext) /* empty string means stdin */ if (fnl == 0) { fn = "{standard input}"; - fnl = 16; /* strlen (fn) */ + fnl = 16; /* strlen (fn) */ } curr_pos.p_file = record_filename(fn, fnl); /* @@ -1118,7 +1120,7 @@ lex_comment(void) eoc = false; /* Skip whitespace after the start of the comment */ - while (c = inpc(), isspace(c)) + while (c = read_byte(), isspace(c)) continue; /* Read the potential keyword to keywd */ @@ -1128,7 +1130,7 @@ lex_comment(void) if (islower(c) && l > 0 && ch_isupper(keywd[0])) break; keywd[l++] = (char)c; - c = inpc(); + c = read_byte(); } while (l > 0 && ch_isspace(keywd[l - 1])) l--; @@ -1144,14 +1146,14 @@ lex_comment(void) /* skip whitespace after the keyword */ while (isspace(c)) - c = inpc(); + c = read_byte(); /* read the argument, if the keyword accepts one and there is one */ l = 0; if (keywtab[i].arg) { while (isdigit(c) && l < sizeof(arg) - 1) { arg[l++] = (char)c; - c = inpc(); + c = read_byte(); } } arg[l] = '\0'; @@ -1159,9 +1161,9 @@ lex_comment(void) /* skip whitespace after the argument */ while (isspace(c)) - c = inpc(); + c = read_byte(); - if (c != '*' || (c = inpc()) != '/') { + if (c != '*' || (c = read_byte()) != '/') { if (keywtab[i].func != linted) /* extra characters in lint comment */ warning(257); @@ -1179,7 +1181,7 @@ lex_comment(void) skip_rest: while (!eoc) { lc = c; - if ((c = inpc()) == EOF) { + if ((c = read_byte()) == EOF) { /* unterminated comment */ error(256); break; @@ -1201,7 +1203,7 @@ lex_slash_slash_comment(void) /* %s does not support // comments */ gnuism(312, allow_c90 ? "C90" : "traditional C"); - while ((c = inpc()) != EOF && c != '\n') + while ((c = read_byte()) != EOF && c != '\n') continue; } Index: src/usr.bin/xlint/lint1/scan.l diff -u src/usr.bin/xlint/lint1/scan.l:1.136 src/usr.bin/xlint/lint1/scan.l:1.137 --- src/usr.bin/xlint/lint1/scan.l:1.136 Fri May 20 21:18:55 2022 +++ src/usr.bin/xlint/lint1/scan.l Sun Jan 8 22:46:00 2023 @@ -1,5 +1,5 @@ %{ -/* $NetBSD: scan.l,v 1.136 2022/05/20 21:18:55 rillig Exp $ */ +/* $NetBSD: scan.l,v 1.137 2023/01/08 22:46:00 rillig Exp $ */ /* * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. @@ -35,7 +35,7 @@ #include <sys/cdefs.h> #if defined(__RCSID) -__RCSID("$NetBSD: scan.l,v 1.136 2022/05/20 21:18:55 rillig Exp $"); +__RCSID("$NetBSD: scan.l,v 1.137 2023/01/08 22:46:00 rillig Exp $"); #endif #include "lint1.h" @@ -127,6 +127,12 @@ TL ([fFlL]?[i]?) %% +/* + * In the above list of regular expressions, the tokens for character + * constants, string literals and comments are incomplete; they only match + * a prefix. The remainder of these tokens is scanned by reading bytes + * directly from the input stream. + */ int lex_input(void) {