On 2/25/21 7:18 AM, Alessandro Di Federico wrote: > From: Paolo Montesel <bab...@rev.ng> > > Signed-off-by: Alessandro Di Federico <a...@rev.ng> > Signed-off-by: Paolo Montesel <bab...@rev.ng> > --- > target/hexagon/idef-parser/idef-parser.h | 245 +++++++ > target/hexagon/idef-parser/idef-parser.lex | 647 ++++++++++++++++++ > target/hexagon/meson.build | 4 + > tests/docker/dockerfiles/alpine.docker | 1 + > tests/docker/dockerfiles/centos7.docker | 1 + > tests/docker/dockerfiles/centos8.docker | 1 + > tests/docker/dockerfiles/debian10.docker | 1 + > .../dockerfiles/fedora-i386-cross.docker | 1 + > .../dockerfiles/fedora-win32-cross.docker | 1 + > .../dockerfiles/fedora-win64-cross.docker | 1 + > tests/docker/dockerfiles/fedora.docker | 1 + > tests/docker/dockerfiles/opensuse-leap.docker | 1 + > tests/docker/dockerfiles/ubuntu.docker | 1 + > tests/docker/dockerfiles/ubuntu1804.docker | 1 + > tests/docker/dockerfiles/ubuntu2004.docker | 3 +- > 15 files changed, 909 insertions(+), 1 deletion(-) > create mode 100644 target/hexagon/idef-parser/idef-parser.h > create mode 100644 target/hexagon/idef-parser/idef-parser.lex > > diff --git a/target/hexagon/idef-parser/idef-parser.h > b/target/hexagon/idef-parser/idef-parser.h > new file mode 100644 > index 0000000000..d08b9c80ea > --- /dev/null > +++ b/target/hexagon/idef-parser/idef-parser.h > @@ -0,0 +1,245 @@ > +/* > + * Copyright(c) 2019-2020 rev.ng Srls. All Rights Reserved. > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, see > <http://www.gnu.org/licenses/>. > + */ > + > +#ifndef IDEF_PARSER_H > +#define IDEF_PARSER_H > + > +#include <inttypes.h> > +#include <stdio.h> > +#include <stdbool.h> > + > +#define TCGV_NAME_SIZE 7 > +#define MAX_WRITTEN_REGS 32 > +#define OFFSET_STR_LEN 32 > +#define ALLOC_LIST_LEN 32 > +#define ALLOC_NAME_SIZE 32 > +#define INIT_LIST_LEN 32 > +#define OUT_BUF_LEN (1024 * 1024)
Uh, right. Let's not be declaring statically sized 1MB buffers, thanks. > +#define SIGNATURE_BUF_LEN (128 * 1024) > +#define HEADER_BUF_LEN (128 * 1024) Nor 128k buffers. > +/* Variadic macros to wrap the buffer printing functions */ > +#define EMIT(c, ...) \ > + do { \ > + (c)->out_c += snprintf((c)->out_buffer + (c)->out_c, \ > + OUT_BUF_LEN - (c)->out_c, \ > + __VA_ARGS__); \ There's nothing here that can't be better done with glib GString, with no pre-determined buffer sizes. E.g. g_string_append_printf((c)->out_str, __VA_ARGS__) which seems simple enough to drop the macros entirely. > +/** > + * Semantic record of the EXTRACT token, identifying the cast operator > + */ > +typedef struct HexExtract { > + int bit_width; /**< Bit width of the extract operator > */ > + int storage_bit_width; /**< Actual bit width of the extract operator > */ > + bool is_unsigned; /**< Unsigned flag for the extract operator > */ > +} HexExtract; All extracts begin at the lsb? > +/** > + * Enum of the possible rvalue types, used in the HexValue.type field > + */ > +enum RvalueUnionTag {REGISTER, TEMP, IMMEDIATE, PREDICATE, VARID}; Typedef this. > + enum RvalueUnionTag type; /**< Type of the rvalue > */ so you can drop the enum here. > +enum OpType {ADD_OP, SUB_OP, MUL_OP, DIV_OP, ASL_OP, ASR_OP, LSR_OP, ANDB_OP, > + ORB_OP, XORB_OP, ANDL_OP, MINI_OP, MAXI_OP, MOD_OP}; Likewise. > +/** > + * Data structure including instruction specific information, to be cleared > + * out after the compilation of each instruction > + */ > +typedef struct Inst { > + char *name; /**< Name of the compiled instruction > */ > + char *code_begin; /**< Beginning of instruction input code > */ > + char *code_end; /**< End of instruction input code > */ > + int tmp_count; /**< Index of the last declared TCGv temp > */ > + int qemu_tmp_count; /**< Index of the last declared int temp > */ > + int if_count; /**< Index of the last declared if label > */ > + int error_count; /**< Number of generated errors > */ > + Var allocated[ALLOC_LIST_LEN]; /**< Allocated VARID automatic vars > */ > + int allocated_count; /**< Elements contained in allocated[] > */ > + HexValue init_list[INIT_LIST_LEN]; /**< List of initialized registers > */ > + int init_count; /**< Number of members of init_list > */ > +} Inst; > + > +/** > + * Data structure representing the whole translation context, which in a > + * reentrant flex/bison parser just like ours is passed between the scanner > + * and the parser, holding all the necessary information to perform the > + * parsing, this data structure survives between the compilation of different > + * instructions > + * > + */ > +typedef struct Context { > + void *scanner; /**< Reentrant parser state pointer > */ > + char *input_buffer; /**< Buffer containing the input code > */ > + char *out_buffer; /**< Buffer containing the output code > */ > + int out_c; /**< Characters emitted into out_buffer > */ > + char *signature_buffer; /**< Buffer containing the signatures code > */ > + int signature_c; /**< Characters emitted into sig..._buffer > */ > + char *header_buffer; /**< Buffer containing the output code > */ > + int header_c; /**< Characters emitted into header buffer > */ > + FILE *defines_file; /**< FILE * of the generated header > */ > + FILE *output_file; /**< FILE * of the C output file > */ > + FILE *enabled_file; /**< FILE * of the list of enabled inst > */ > + int total_insn; /**< Number of instructions in input file > */ > + int implemented_insn; /**< Instruction compiled without errors > */ > + Inst inst; /**< Parsing data of the current inst */ > +} Context; I have a notion that every "char *" should be "GString *". > +"(unsigned int)" { /* Skip c-style casts */ } Why is this not treated like > +"(size8"[us]"_t)" { yylval->cast.bit_width = 8; > + yylval->cast.is_unsigned = ((yytext[6]) == 'u'); > + return CAST; } > +"(size16"[us]"_t)" { yylval->cast.bit_width = 16; > + yylval->cast.is_unsigned = ((yytext[7]) == 'u'); > + return CAST; } > +"(int)" { yylval->cast.bit_width = 32; > + yylval->cast.is_unsigned = false; > + return CAST; } these? Certainly it should be placed nearby. > +"CANCEL" { return CANC; } Any reason not to spell out CANCEL? > +"fREAD_LC"{ZERO_ONE} { yylval->rvalue.type = REGISTER; > + yylval->rvalue.reg.type = CONTROL; > + yylval->rvalue.reg.id = LC0 + atoi(yytext + 8); Why atoi and not yytext[8] - '0'? While we're at it, {ZERO_ONE} seems unnecessary obfuscation over [01]. > +{DIGIT}+ { yylval->rvalue.type = IMMEDIATE; > + yylval->rvalue.bit_width = 64; > + yylval->rvalue.is_unsigned = false; > + yylval->rvalue.imm.type = VALUE; > + yylval->rvalue.imm.value = atoi(yytext); > + return IMM; } > +{DIGIT}+"LL" { yylval->rvalue.type = IMMEDIATE; > + yylval->rvalue.bit_width = 64; > + yylval->rvalue.is_unsigned = false; > + yylval->rvalue.imm.type = VALUE; > + yylval->rvalue.imm.value = atoi(yytext); I assume "LL" means it's supposed to be producing a 64-bit result, which atoi will not do. You need to be using strtoll. > +"0x"{HEX_DIGIT}+"ULL" { yylval->rvalue.type = IMMEDIATE; > + yylval->rvalue.bit_width = 64; > + yylval->rvalue.is_unsigned = true; > + yylval->rvalue.imm.type = VALUE; > + yylval->rvalue.imm.value = strtoul(yytext, NULL, > 16); And of course here, strtoull. > +{VAR_ID} { /* Variable name, we adopt the C names convention > */ > + yylval->rvalue.type = VARID; > + yylval->rvalue.var.name = strndup(yytext, > + > ALLOC_NAME_SIZE); > + /* Default types are int */ > + yylval->rvalue.bit_width = 32; > + if (yylval->rvalue.var.name == NULL) { This is the sort of check you need not be doing, by using the correct functions. In this case, g_string_new(yytext). Glib generally asserts on memory allocation failure -- you have to explicitly use a "*_try_*" function if you want to manage failure yourself. Which of course we don't. r~