# New Ticket Created by Jürgen Bömmels # Please include the string: [perl #21033] # in the subject line of all future correspondence about this issue. # <URL: http://rt.perl.org/rt2/Ticket/Display.html?id=21033 >
Hello, I just implemented macro expansion in imcc. This brings us one step closer to substitude assemble.pl The following things are working: macro definition with parameters macro expansion with parameters parameter expansion local labels including of files Following bugs still exist: .constant not yet implemented string constants with embedded NUL characters segfault Compound keys are not working, but thats also in an unpatched imcc The macroexpansion is done by storing the neccessary information of the state of the current buffer in a linked list and continue the lexing with the expansion of macro. After finishing the lexing of the macro expansion the old buffer is restored (in the yywrap function) and the lexing is continued. The reading of the macro does not use the yyparse but calls custom functions which recursivly call yylex again. The local labels are implemented by storing an an illegal label which is transformed into a regular unique label upon expansion. bye boe -- attachment 1 ------------------------------------------------------ url: http://rt.perl.org/rt2/attach/51310/39304/f51bc1/imcc.diff
Index: languages/imcc/imcc.l =================================================================== RCS file: /cvs/public/parrot/languages/imcc/imcc.l,v retrieving revision 1.26 diff -u -r1.26 imcc.l --- languages/imcc/imcc.l 11 Feb 2003 11:34:36 -0000 1.26 +++ languages/imcc/imcc.l 15 Feb 2003 00:46:12 -0000 @@ -16,10 +16,56 @@ #include "imc.h" #include "parser.h" -#define YY_NO_UNPUT +#define MAX_PARAM 16 + +struct params_t { + char *name[MAX_PARAM]; + int num_param; +}; + +struct macro_t { + char *name; + struct params_t params; + char *expansion; +}; + +/* XXX: boe: rework this hack to use a hash */ +struct macro_t macros[256]; +int num_macros = 0; + +char temp_buffer[4096]; + +struct macro_frame_t { + struct macro_frame_t *next; + YY_BUFFER_STATE buffer; + struct params_t *params; + struct params_t expansion; + int label; +}; + +struct macro_frame_t *frames = NULL; + +/* static function declariations */ +static struct macro_frame_t *new_frame (void); +//static void add_param_to_frame (const char *param, const char *expansion); +static void scan_string (struct macro_frame_t *frame, const char *expansion); +static void scan_file (struct macro_frame_t *frame, FILE *); +static void destroy_frame (struct macro_frame_t *frame); +static int yylex_skip (YYSTYPE *valp, void *interp, const char *skip); + +static int read_macro (YYSTYPE *valp, void *interp); +static int expand_macro (YYSTYPE *valp, void *interp, const char *name); +static void include_file (const char *file_name); + #define YY_DECL int yylex(YYSTYPE *valp, struct Parrot_Interp *interp) -int state; +#define YYCHOP() (yytext[--yyleng] = '\0') +#define DUP_AND_RET(valp, token) \ + do { \ + if (valp) (valp)->s = str_dup(yytext); \ + return token; \ + } while (0) + %} %option outfile="imclexer.c" @@ -32,22 +78,26 @@ SIGN [-+] FLOATNUM {SIGN}?{DIGIT}+{DOT}{DIGIT}*([eE]{SIGN}?{DIGIT}+)? LETTERDIGIT [a-zA-Z0-9_] +ID {LETTER}{LETTERDIGIT}* STRINGCONSTANT \"(\\\"|[^"\n]*)*\" CHARCONSTANT \'[^'\n]*\' RANKSPEC \[[,]*\] EOL \r?\n +WS [\t\f\r ] %x emit +%x macro + %% - /* for emacs: ' */ + /* for emacs "*/ if (expect_pasm == 1) { - expect_pasm = 2; + expect_pasm = 2; BEGIN(emit); - } - if (pasm_file && YYSTATE != emit) { - if (pasm_file == 1) { - BEGIN(emit); - return EMIT; + } + if (pasm_file && YYSTATE == INITIAL) { + if (pasm_file == 1) { + BEGIN(emit); + return EMIT; } return 0; } @@ -56,7 +106,7 @@ if (expect_pasm == 2) BEGIN(INITIAL); expect_pasm = 0; - line++; + if (frames) line++; return '\n'; } @@ -128,17 +178,67 @@ "!=" return(RELOP_NE); "**" return(POW); +<emit>".macro" { + return read_macro(valp, interp); + } + +<emit>".include" { + int c; + + c = yylex(valp, interp); + if (c != STRINGC) return c; + + YYCHOP(); + include_file(str_dup(yytext + 1)); + } + +<emit>{ID}"$:" { + char *label; + + if (valp) { + YYCHOP(); YYCHOP(); + + label = mem_sys_allocate(yyleng+10); + sprintf(label, "%s%d", yytext, frames->label); + + valp->s = label; + } + + return LABEL; + } + +<emit>{ID}"$" { + char *label; + + if (valp) { + YYCHOP(); + + label = mem_sys_allocate(yyleng+10); + sprintf(label, "%s%d", yytext, frames->label); + + valp->s = label; + } + + return IDENTIFIER; + } + <emit,INITIAL>"," return(COMMA); -<emit,INITIAL>{LETTER}{LETTERDIGIT}*":" { - yytext[yyleng-1] = 0; /* trim last ':' */ - valp->s = str_dup(yytext); - return(LABEL); +<emit,INITIAL>{ID}":" { + YYCHOP(); /* trim last ':' */ + DUP_AND_RET(valp,LABEL); } -<emit>{DOT}{LETTER}{LETTERDIGIT}* { - valp->s = str_dup(yytext+1); - return(MACRO); +<emit,INITIAL>{DOT}{LETTER}{LETTERDIGIT}* { + int type = get_pmc_num(interp, yytext+1); + + if (type) { + char *buf = malloc(16); + sprintf(buf, "%d", type); + valp->s = buf; + return INTC; + } + if (!expand_macro(valp, interp, yytext+1)) yyerror("unknown macro"); } <emit,INITIAL>{LETTER}{LETTERDIGIT}* { @@ -153,67 +253,90 @@ return(is_op(interp, valp->s) ? PARROT_OP : IDENTIFIER); } -<emit,INITIAL>{FLOATNUM} { - valp->s = str_dup(yytext); - return(FLOATC); - } +<*>{FLOATNUM} DUP_AND_RET(valp, FLOATC); +<*>{SIGN}?{DIGIT}+ DUP_AND_RET(valp, INTC); +<*>{HEX} DUP_AND_RET(valp, INTC); +<*>{BIN} DUP_AND_RET(valp, INTC); -<emit,INITIAL>{SIGN}?{DIGIT}+ { - valp->s = str_dup(yytext); - return(INTC); - } -<emit,INITIAL>{HEX} { - valp->s = str_dup(yytext); - return(INTC); - } -<emit,INITIAL>{BIN} { - valp->s = str_dup(yytext); - return(INTC); +<*>{STRINGCONSTANT} { + valp->s = str_dup(yytext); + return(STRINGC); /* XXX delete quotes, -> emit, pbc */ } -<emit,INITIAL>{STRINGCONSTANT} { + +<*>{CHARCONSTANT} { valp->s = str_dup(yytext); /* XXX delete quotes, -> emit, pbc */ return(STRINGC); } -<emit,INITIAL>{CHARCONSTANT} { - valp->s = str_dup(yytext); - return(STRINGC); - } -<emit,INITIAL>\$I[0-9]+ { - valp->s = str_dup(yytext); - return(IREG); - } +<*>\$I[0-9]+ DUP_AND_RET(valp, IREG); +<*>\$N[0-9]+ DUP_AND_RET(valp, NREG); +<*>\$S[0-9]+ DUP_AND_RET(valp, SREG); +<*>\$P[0-9]+ DUP_AND_RET(valp, PREG); -<emit,INITIAL>\$N[0-9]+ { - valp->s = str_dup(yytext); - return(NREG); +<emit,INITIAL>{WS}+ /* skip */; + +<emit,INITIAL>. { + return yytext[0]; } -<emit,INITIAL>\$S[0-9]+ { - valp->s = str_dup(yytext); - return(SREG); +<emit><<EOF>> { + BEGIN (INITIAL); + if (pasm_file) { + pasm_file = 2; + return EOM; + } + return 0; } -<emit,INITIAL>\$P[0-9]+ { - valp->s = str_dup(yytext); - return(PREG); +<INITIAL><<EOF>> yyterminate(); + +<macro>".endm" DUP_AND_RET(valp, ENDM); + +<macro>{WS}*{EOL} { + line++; + DUP_AND_RET(valp, '\n'); } -<emit,INITIAL>[\t\f\r ]+ ; -<emit,INITIAL>. { - return yytext[0]; +<macro>"$"{ID}":" return LABEL; +<macro>".local"{WS}+ { + char *label; + char *name = macros[num_macros].name; + + if (yylex(valp, interp) != LABEL) yyerror("LABEL expected"); + + if (valp) { + YYCHOP(); + + label = mem_sys_allocate(strlen(name) + yyleng + 15); + sprintf(label, "local__%s__%s__$:", name, yytext+1); + + valp->s = label; + } + + return LABEL; } -<emit><<EOF>> { - BEGIN (INITIAL); - if (pasm_file) { - pasm_file = 2; - return EOM; - } - return 0; +<macro>".$"{ID} { + char *label; + char *name = macros[num_macros].name; + + if (valp) { + label = mem_sys_allocate(strlen(name) + yyleng + 15); + sprintf(label, "local__%s__%s__$", name, yytext+2); + + valp->s = label; + } + + return IDENTIFIER; } -<<EOF>> yyterminate(); +<macro>^{WS}+ /* skip leading ws */; +<macro>{WS}+ DUP_AND_RET(valp, ' '); +<macro>{ID} DUP_AND_RET(valp, IDENTIFIER); +<macro>{DOT}{ID} DUP_AND_RET(valp, MACRO); +<macro>. DUP_AND_RET(valp, yytext[0]); +<macro><<EOF>> yyterminate(); + %% #ifdef yywrap @@ -225,6 +348,280 @@ yywrap returns 0 if scanning is to continue */ yy_delete_buffer(YY_CURRENT_BUFFER); + + /* pop old frames */ + if (frames) { + struct macro_frame_t *tmp; + tmp = frames; + frames = frames->next; + destroy_frame(tmp); + return 0; + } + return 1; } +static struct macro_frame_t * +new_frame (void) +{ + static int label = 0; + struct macro_frame_t *tmp; + + tmp = mem_sys_allocate_zeroed(sizeof(struct macro_frame_t)); + tmp->label = ++label; + + return tmp; +} + +static void +scan_string (struct macro_frame_t *frame, const char *expansion) +{ + frame->buffer = YY_CURRENT_BUFFER; + frame->next = frames; + frames = frame; + + yy_scan_string(expansion); +} + +static void +destroy_frame (struct macro_frame_t *frame) +{ + YY_BUFFER_STATE buffer; + int i; + + buffer = frame->buffer; + + for (i = 0; i < frame->expansion.num_param; i++) { + free(frame->expansion.name[i]); + } + + mem_sys_free(frame); + + yy_switch_to_buffer(buffer); +} + +static int +yylex_skip (YYSTYPE *valp, void *interp, const char *skip) +{ + int c; + const char *p; + + do { + c = yylex(NULL, interp); + p = skip; + while (*p && c != *p) p++; + } while (*p != '\0'); + + DUP_AND_RET(valp, c); +} + +static int +read_params (YYSTYPE *valp, void *interp, struct params_t *params, + int need_id) +{ + int c; + YYSTYPE val; + char *current = strdup(""); + int len = 0; + + params->num_param = 0; + c = yylex_skip(&val, interp, " \n"); + + while(c != ')') { + if (c == 0) yyerror ("Unexpected end of file"); + else if (c == ',') { + params->name[params->num_param++] = current; + current = strdup(""); + len = 0; + c = yylex_skip(&val, interp, " \n"); + } + else if (need_id && (*current || c != IDENTIFIER) && c != ' ') { + yyerror("macro parameter error"); + } + else { + if (!need_id || c != ' ') { + len += strlen(val.s); + current = realloc(current, len + 1); + strcat(current,val.s); + } + free(val.s); + c = yylex(&val,interp); + } + } + params->name[params->num_param++] = current; + + if (valp) *valp = val; + else free(val.s); + + return c; +} + +static int +read_macro (YYSTYPE *valp, void *interp) +{ + int c; + struct macro_t *m = macros + num_macros; + int start_cond; + + temp_buffer[0]='\0'; + + start_cond = YY_START; + BEGIN(macro); + + c = yylex(NULL, interp); + if (c != ' ') yyerror("macro error"); + + c = yylex(valp, interp); + if (c != IDENTIFIER) yyerror("macro error"); + + m->name = valp->s; + + /* white space is allowed between macro and opening paren) */ + c = yylex_skip(valp, interp, " "); + + if (c == '(') { + free(valp->s); + + c = read_params(NULL, interp, &m->params, 1); + if (c != ')') yyerror("macro parameter: \")\" expected"); + + c = yylex(valp, interp); + } + + while (c != ENDM) { + if (c == 0) yyerror("file ended before macro complete"); + + strcat(temp_buffer, valp->s); + free(valp->s); + + c = yylex(valp, interp); + } + free(valp->s); + + BEGIN(start_cond); + + macros[num_macros].expansion = str_dup(temp_buffer); + + num_macros++; + return MACRO; +} + +static char * +find_macro_param (const char *name) +{ + struct macro_frame_t *f; + int i; + + for (f = frames; f; f = f->next) { + if (f->params) { + for (i = 0; i < f->params->num_param; i++) { + if (strcmp(f->params->name[i], name) == 0) { + return f->expansion.name[i]; + } + } + } + } + + return NULL; +} + +static struct macro_t * +find_macro (const char* name) +{ + int i; + + for (i = 0; i < num_macros; i++) { + if (strcmp(name, macros[i].name) == 0) return macros + i; + } + + return NULL; +} + +static int +expand_macro (YYSTYPE *valp, void *interp, const char *name) +{ + int c; + struct macro_frame_t *frame; + struct macro_t *m; + const char *expansion; + int start_cond; + + frame = new_frame(); + + expansion = find_macro_param(name); + if (expansion) { + scan_string(frame, expansion); + return 1; + } + + m = find_macro(name); + frame->params = &m->params; + if (m) { + /* whitespace can be savely ignored */ + do { + c = input(); + } while (c == ' ' || c == '\t'); + + if (c != '(') { + unput(c); + if (m->params.num_param != 0) yyerror ("Missing macro parameter"); + scan_string(frame, m->expansion); + return 1; + } + + start_cond = YY_START; + BEGIN(macro); + + read_params (NULL, interp, &frame->expansion, 0); + + BEGIN(start_cond); + + if (frame->expansion.num_param == 0 && m->params.num_param == 1) { + frame->expansion.name[0] = str_dup(""); + frame->expansion.num_param = 1; + } + + if (frame->expansion.num_param != m->params.num_param) { + yyerror ("Wrong number of macro arguments"); + } + + scan_string(frame, m->expansion); + return 1; + } + + return 0; +} + +static void +include_file (const char *file_name) +{ + struct macro_frame_t *frame; + FILE *file; + + frame = new_frame(); + + file = fopen(file_name, "r"); + if (!file) yyerror ("file not found"); + + scan_file (frame, file); +} + +static void +scan_file (struct macro_frame_t *frame, FILE *file) +{ + frame->buffer = YY_CURRENT_BUFFER; + frame->next = frames; + frames = frame; + + yy_switch_to_buffer(yy_create_buffer(file, YY_BUF_SIZE)); +} + +/* + * Local variables: + * c-indentation-style: bsd + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * + * vim: expandtab shiftwidth=4: + */ Index: languages/imcc/imcc.y =================================================================== RCS file: /cvs/public/parrot/languages/imcc/imcc.y,v retrieving revision 1.44 diff -u -r1.44 imcc.y --- languages/imcc/imcc.y 11 Feb 2003 11:34:36 -0000 1.44 +++ languages/imcc/imcc.y 15 Feb 2003 00:46:13 -0000 @@ -169,6 +169,7 @@ return 0; } +#if 0 /* return the index of a PMC class */ static int get_pmc_num(struct Parrot_Interp *interpreter, char *pmc_type) @@ -191,6 +192,7 @@ r = mk_const(str_dup(buf), 'I'); return r; } +#endif static Instruction * multi_keyed(struct Parrot_Interp *interpreter,char *name, @@ -404,18 +406,18 @@ char * s; SymReg * sr; Instruction *i; } %token <t> CALL GOTO ARG IF UNLESS NEW END SAVEALL RESTOREALL %token <t> SUB NAMESPACE ENDNAMESPACE CLASS ENDCLASS SYM LOCAL CONST PARAM +%token <t> CONSTANT INC DEC -%token <t> INC DEC %token <t> SHIFT_LEFT SHIFT_RIGHT INTV FLOATV STRINGV DEFINED LOG_XOR %token <t> RELOP_EQ RELOP_NE RELOP_GT RELOP_GTE RELOP_LT RELOP_LTE %token <t> GLOBAL ADDR CLONE RESULT RETURN POW SHIFT_RIGHT_U LOG_AND LOG_OR %token <t> COMMA ESUB %token <s> LABEL %token <t> EMIT EOM -%token <s> IREG NREG SREG PREG IDENTIFIER STRINGC INTC FLOATC REG MACRO +%token <s> IREG NREG SREG PREG IDENTIFIER STRINGC INTC FLOATC REG MACRO ENDM %token <s> PARROT_OP %type <t> type %type <i> program sub sub_start emit @@ -425,7 +427,7 @@ %type <sr> target reg const var rc string %type <sr> key keylist _keylist newtype %type <sr> vars _vars var_or_i _var_or_i -%type <i> pasmcode pasmline pasm_inst +%type <i> pasmcode pasmline pasm_inst constant_def %type <sr> pasm_args lhs %token <sr> VAR @@ -448,16 +450,20 @@ ; pasmline: labels pasm_inst '\n' { $$ = 0; } + | MACRO '\n' { $$ = 0; } + | constant_def ; + pasm_inst: {clear_state();} PARROT_OP pasm_args { $$ = iANY(interp, $2,0,regs,1); free($2); } | /* none */ { $$ = 0;} - ; pasm_args: vars ; +constant_def: CONSTANT IDENTIFIER const { printf ("%s\n", $1); } + emit: EMIT { open_comp_unit(); } pasmcode @@ -586,8 +592,7 @@ ; newtype: - MACRO { $$ = macro(interp, $1); free($1); } - | const + const ; if_statement: @@ -638,7 +643,6 @@ IDENTIFIER { $$ = mk_address($1, U_add_once); } | PARROT_OP { $$ = mk_address($1, U_add_once); } | var - | MACRO { $$ = macro(interp, $1); free($1); } ; var: VAR Index: languages/imcc/imc.c =================================================================== RCS file: /cvs/public/parrot/languages/imcc/imc.c,v retrieving revision 1.32 diff -u -r1.32 imc.c --- languages/imcc/imc.c 8 Feb 2003 17:16:16 -0000 1.32 +++ languages/imcc/imc.c 15 Feb 2003 00:46:13 -0000 @@ -23,6 +23,17 @@ static IMCStack nodeStack; +/* return the index of a PMC class */ +int get_pmc_num(struct Parrot_Interp *interp, char *pmc_type) +{ + STRING * s = string_make(interp, pmc_type, + (UINTVAL) strlen(pmc_type), NULL, 0, NULL); + PMC * key = key_new_string(interp, s); + PMC * cnames = interp->Parrot_base_classname_hash; + + return cnames->vtable->get_integer_keyed(interp, cnames, key); +} + /* allocate is the main loop of the allocation algorithm */ void allocate(struct Parrot_Interp *interpreter) { int to_spill; Index: languages/imcc/imc.h =================================================================== RCS file: /cvs/public/parrot/languages/imcc/imc.h,v retrieving revision 1.25 diff -u -r1.25 imc.h --- languages/imcc/imc.h 8 Feb 2003 14:41:28 -0000 1.25 +++ languages/imcc/imc.h 15 Feb 2003 00:46:13 -0000 @@ -54,6 +54,7 @@ void restore_interference_graph(void); void free_reglist(void); int neighbours(int node); +int get_pmc_num(struct Parrot_Interp *interp, char *pmc_type); int check_op(struct Parrot_Interp *, char * fullname, char *op, SymReg *r[]); Index: languages/imcc/parser_util.c =================================================================== RCS file: /cvs/public/parrot/languages/imcc/parser_util.c,v retrieving revision 1.8 diff -u -r1.8 parser_util.c --- languages/imcc/parser_util.c 31 Jan 2003 10:54:08 -0000 1.8 +++ languages/imcc/parser_util.c 15 Feb 2003 00:46:20 -0000 @@ -29,10 +29,13 @@ iNEW(struct Parrot_Interp *interpreter, SymReg * r0, char * type, int emit) { char fmt[256]; - SymReg *pmc = macro(interpreter, type); + SymReg *pmc; + int pmc_num = get_pmc_num(interpreter, type); + sprintf(fmt, "%d", pmc_num); + pmc = mk_const(str_dup(fmt), 'I'); /* XXX check, if type exists, but aove keyed search * gives 0 for non existing PMCs */ - sprintf(fmt, "%%s, %d\t # .%s", atoi(pmc->name), type); + sprintf(fmt, "%%s, %d\t # .%s", pmc_num, type); r0->usage = U_NEW; if (!strcmp(type, "PerlArray") || !strcmp(type, "PerlHash")) r0->usage |= U_KEYED; Index: languages/imcc/symreg.c =================================================================== RCS file: /cvs/public/parrot/languages/imcc/symreg.c,v retrieving revision 1.14 diff -u -r1.14 symreg.c --- languages/imcc/symreg.c 7 Feb 2003 14:05:51 -0000 1.14 +++ languages/imcc/symreg.c 15 Feb 2003 00:46:21 -0000 @@ -150,9 +150,10 @@ r->type == VTADDRESS && r->lhs_use_count /* we use this for labes/subs */ ) { - if (uniq == U_add_uniq_label) + if (uniq == U_add_uniq_label) { fataly(1, "mk_address", line, - "Label '%s' already defined\n", name); + "Label '%s' already defined\n", name); + } else if (uniq == U_add_uniq_sub) fataly(1, "mk_address", line, "Subroutine '%s' already defined\n", name);