Module Name: src Committed By: rillig Date: Sun Jan 22 17:04:30 UTC 2023
Modified Files: src/usr.bin/xlint/lint1: lex.c Log Message: lint: clean up the lexer No functional change. To generate a diff of this commit: cvs rdiff -u -r1.145 -r1.146 src/usr.bin/xlint/lint1/lex.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/xlint/lint1/lex.c diff -u src/usr.bin/xlint/lint1/lex.c:1.145 src/usr.bin/xlint/lint1/lex.c:1.146 --- src/usr.bin/xlint/lint1/lex.c:1.145 Sun Jan 22 16:05:08 2023 +++ src/usr.bin/xlint/lint1/lex.c Sun Jan 22 17:04:30 2023 @@ -1,4 +1,4 @@ -/* $NetBSD: lex.c,v 1.145 2023/01/22 16:05:08 rillig Exp $ */ +/* $NetBSD: lex.c,v 1.146 2023/01/22 17:04:30 rillig Exp $ */ /* * Copyright (c) 1996 Christopher G. Demetriou. All Rights Reserved. @@ -38,7 +38,7 @@ #include <sys/cdefs.h> #if defined(__RCSID) -__RCSID("$NetBSD: lex.c,v 1.145 2023/01/22 16:05:08 rillig Exp $"); +__RCSID("$NetBSD: lex.c,v 1.146 2023/01/22 17:04:30 rillig Exp $"); #endif #include <ctype.h> @@ -70,7 +70,7 @@ bool in_system_header; /* * Valid values for 'since' are 78, 90, 99, 11. * - * As of 2022-04-30, lint treats 11 like 99, in order to provide good error + * The C11 keywords are added in C99 mode as well, to provide good error * messages instead of a simple parse error. If the keyword '_Generic' were * not defined, it would be interpreted as an implicit function call, leading * to a parse error. @@ -96,15 +96,15 @@ bool in_system_header; /* During initialization, these keywords are written to the symbol table. */ static const struct keyword { - const char *kw_name; /* keyword */ + const char *kw_name; int kw_token; /* token returned by yylex() */ - scl_t kw_scl; /* storage class if kw_token T_SCLASS */ - tspec_t kw_tspec; /* type spec. if kw_token + scl_t kw_scl; /* storage class if kw_token is T_SCLASS */ + tspec_t kw_tspec; /* type spec if kw_token is * T_TYPE or T_STRUCT_OR_UNION */ - tqual_t kw_tqual; /* type qual. if kw_token T_QUAL */ - bool kw_c90:1; /* C90 keyword */ - bool kw_c99_or_c11:1; /* C99 or C11 keyword */ - bool kw_gcc:1; /* GCC keyword */ + tqual_t kw_tqual; /* type qualifier if kw_token is T_QUAL */ + bool kw_c90:1; /* available in C90 mode */ + bool kw_c99_or_c11:1; /* available in C99 or C11 mode */ + bool kw_gcc:1; /* available in GCC mode */ bool kw_plain:1; /* 'name' */ bool kw_leading:1; /* '__name' */ bool kw_both:1; /* '__name__' */ @@ -112,8 +112,8 @@ static const struct keyword { kwdef_keyword( "_Alignas", T_ALIGNAS), kwdef_keyword( "_Alignof", T_ALIGNOF), kwdef_token( "alignof", T_ALIGNOF, 78,0,6), - kwdef_token( "_Atomic", T_ATOMIC, 11,0,1), kwdef_token( "asm", T_ASM, 78,1,7), + kwdef_token( "_Atomic", T_ATOMIC, 11,0,1), kwdef_token( "attribute", T_ATTRIBUTE, 78,1,6), kwdef_sclass( "auto", AUTO, 78,0,1), kwdef_type( "_Bool", BOOL, 99), @@ -178,11 +178,17 @@ static const struct keyword { #undef kwdef_keyword }; -/* Symbol table */ -static sym_t *symtab[HSHSIZ1]; +/* + * The symbol table containing all keywords, identifiers and labels. The hash + * entries are linked via sym_t.s_symtab_next. + */ +static sym_t *symtab[HSHSIZ1]; -/* type of next expected symbol */ -symt_t symtyp; +/* + * The kind of the next expected symbol, to distinguish the namespaces of + * members, labels, type tags and other identifiers. + */ +symt_t symtyp; static unsigned int @@ -272,7 +278,7 @@ struct syms { static void syms_add(struct syms *syms, const sym_t *sym) { - while (syms->len >= syms->cap) { + if (syms->len >= syms->cap) { syms->cap *= 2; syms->items = xrealloc(syms->items, syms->cap * sizeof(syms->items[0])); @@ -324,29 +330,28 @@ debug_symtab(void) static void add_keyword(const struct keyword *kw, bool leading, bool trailing) { - sym_t *sym; - char buf[256]; - const char *name; + const char *name; if (!leading && !trailing) { name = kw->kw_name; } else { + char buf[256]; (void)snprintf(buf, sizeof(buf), "%s%s%s", leading ? "__" : "", kw->kw_name, trailing ? "__" : ""); name = xstrdup(buf); } - sym = block_zero_alloc(sizeof(*sym)); + sym_t *sym = block_zero_alloc(sizeof(*sym)); sym->s_name = name; sym->s_keyword = kw; - sym->u.s_keyword.sk_token = kw->kw_token; - if (kw->kw_token == T_TYPE || kw->kw_token == T_STRUCT_OR_UNION) { + int tok = kw->kw_token; + sym->u.s_keyword.sk_token = tok; + if (tok == T_TYPE || tok == T_STRUCT_OR_UNION) sym->u.s_keyword.sk_tspec = kw->kw_tspec; - } else if (kw->kw_token == T_SCLASS) { + if (tok == T_SCLASS) sym->s_scl = kw->kw_scl; - } else if (kw->kw_token == T_QUAL) { + if (tok == T_QUAL) sym->u.s_keyword.sk_qualifier = kw->kw_tqual; - } symtab_add(sym); } @@ -374,17 +379,14 @@ is_keyword_known(const struct keyword *k return true; } -/* - * All keywords are written to the symbol table. This saves us looking - * in an extra table for each name we found. - */ +/* Write all keywords to the symbol table. */ void initscan(void) { - const struct keyword *kw, *end; - end = keywords + sizeof(keywords) / sizeof(keywords[0]); - for (kw = keywords; kw != end; kw++) { + size_t n = sizeof(keywords) / sizeof(keywords[0]); + for (size_t i = 0; i < n; i++) { + const struct keyword *kw = keywords + i; if (!is_keyword_known(kw)) continue; if (kw->kw_plain) @@ -432,17 +434,9 @@ lex_keyword(sym_t *sym) } /* - * Lex has found a letter followed by zero or more letters or digits. - * It looks for a symbol in the symbol table with the same name. This - * symbol must either be a keyword or a symbol of the type required by - * symtyp (label, member, tag, ...). - * - * If it is a keyword, the token is returned. In some cases it is described - * more deeply by data written to yylval. - * - * If it is a symbol, T_NAME is returned and the name is stored in yylval. - * If there is already a symbol of the same name and type in the symbol - * table, yylval.y_name->sb_sym points there. + * Look up the definition of a name in the symbol table. This symbol must + * either be a keyword or a symbol of the type required by symtyp (label, + * member, tag, ...). */ extern int lex_name(const char *yytext, size_t yyleng) @@ -470,10 +464,6 @@ lex_name(const char *yytext, size_t yyle } -/* - * Convert a string representing an integer into internal representation. - * Return T_CON, storing the numeric value in yylval, for yylex. - */ int lex_integer_constant(const char *yytext, size_t yyleng, int base) { @@ -526,7 +516,6 @@ lex_integer_constant(const char *yytext, typ = suffix_type[u_suffix][l_suffix]; errno = 0; - uq = (uint64_t)strtoull(cp, &eptr, base); lint_assert(eptr == cp + len); if (errno != 0) { @@ -643,13 +632,6 @@ convert_integer(int64_t q, tspec_t t, un : (int64_t)(q | ~vbits); } -/* - * Convert a string representing a floating point value into its numerical - * representation. Type and value are returned in yylval. - * - * XXX Currently it is not possible to convert constants of type - * long double which are greater than DBL_MAX. - */ int lex_floating_constant(const char *yytext, size_t yyleng) { @@ -682,14 +664,15 @@ lex_floating_constant(const char *yytext warning(98); } + /* TODO: Handle precision and exponents of 'long double'. */ errno = 0; d = strtod(cp, &eptr); if (eptr != cp + len) { switch (*eptr) { /* - * XXX: non-native non-current strtod() may not handle hex - * floats, ignore the rest if we find traces of hex float - * syntax... + * XXX: Non-native non-current strtod() may not + * handle hex floats, ignore the rest if we find + * traces of hex float syntax. */ case 'p': case 'P': @@ -851,8 +834,7 @@ read_escaped_backslash(int delim) * Read a character which is part of a character constant or of a string * and handle escapes. * - * The argument is the character which delimits the character constant or - * string. + * 'delim' is '\'' for character constants and '"' for string literals. * * Returns -1 if the end of the character constant or string is reached, * -2 if the EOF is reached, and the character otherwise. @@ -860,14 +842,13 @@ read_escaped_backslash(int delim) static int get_escaped_char(int delim) { - int c; - if (prev_byte == -1) { - c = read_byte(); - } else { - c = prev_byte; + int c = prev_byte; + if (c != -1) prev_byte = -1; - } + else + c = read_byte(); + if (c == delim) return -1; switch (c) { @@ -1096,7 +1077,7 @@ lex_directive(const char *yytext) void lex_comment(void) { - int c, lc; + int c; static const struct { const char *keywd; bool arg; @@ -1125,9 +1106,8 @@ lex_comment(void) char arg[32]; size_t l, i; int a; - bool eoc; - eoc = false; + bool seen_end_of_comment = false; /* Skip whitespace after the start of the comment */ while (c = read_byte(), isspace(c)) @@ -1173,37 +1153,27 @@ lex_comment(void) while (isspace(c)) c = read_byte(); - if (c != '*' || (c = read_byte()) != '/') { - if (keywtab[i].func != linted) - /* extra characters in lint comment */ - warning(257); - } else { - /* - * remember that we have already found the end of the - * comment - */ - eoc = true; - } + seen_end_of_comment = c == '*' && (c = read_byte()) == '/'; + if (!seen_end_of_comment && keywtab[i].func != linted) + /* extra characters in lint comment */ + warning(257); if (keywtab[i].func != NULL) - (*keywtab[i].func)(a); + keywtab[i].func(a); skip_rest: - while (!eoc) { - lc = c; + while (!seen_end_of_comment) { + int lc = c; if ((c = read_byte()) == EOF) { /* unterminated comment */ error(256); break; } if (lc == '*' && c == '/') - eoc = true; + seen_end_of_comment = true; } } -/* - * Handle // style comments - */ void lex_slash_slash_comment(void) { @@ -1233,11 +1203,6 @@ clear_warn_flags(void) constcond_flag = false; } -/* - * Strings are stored in a dynamically allocated buffer and passed - * in yylval.y_string to the parser. The parser or the routines called - * by the parser are responsible for freeing this buffer. - */ int lex_string(void) { @@ -1417,8 +1382,8 @@ getsym(sbuf_t *sb) } /* - * Construct a temporary symbol. The symbol name starts with a digit, making - * the name illegal. + * Construct a temporary symbol. The symbol name starts with a digit to avoid + * name clashes with other identifiers. */ sym_t * mktempsym(type_t *tp) @@ -1484,9 +1449,7 @@ rmsyms(sym_t *syms) } } -/* - * Put a symbol into the symbol table. - */ +/* Put a symbol into the symbol table. */ void inssym(int level, sym_t *sym) { @@ -1501,18 +1464,12 @@ inssym(int level, sym_t *sym) * that these symbols are preferred over symbols from the outer * blocks that happen to have the same name. */ - lint_assert(sym->s_symtab_next != NULL - ? sym->s_block_level >= sym->s_symtab_next->s_block_level - : true); + const sym_t *next = sym->s_symtab_next; + if (next != NULL) + lint_assert(sym->s_block_level >= next->s_block_level); } -/* - * Called at level 0 after syntax errors. - * - * Removes all symbols which are not declared at level 0 from the - * symbol table. Also frees all memory which is not associated with - * level 0. - */ +/* Called at level 0 after syntax errors. */ void clean_up_after_error(void) {