attached a patch for fixing * some things for parsing PASM instructions correctly * minor updates to pir.pg
regards, kjs
Index: languages/PIR/lib/pasm_args.pg =================================================================== --- languages/PIR/lib/pasm_args.pg (revision 17016) +++ languages/PIR/lib/pasm_args.pg (working copy) @@ -1,4 +1,4 @@ -grammar PIRGrammar; +grammar PIR::Grammar; # This file contains helper rules to handle # the PASM instruction arguments. Many instructions Index: languages/PIR/lib/pasm_core.pg =================================================================== --- languages/PIR/lib/pasm_core.pg (revision 17016) +++ languages/PIR/lib/pasm_core.pg (working copy) @@ -7,59 +7,59 @@ # end | noop -| reserved <int_constant> -| load_bytecode <arg_string> +| reserved <arg_hack> #<int_constant> +| load_bytecode <arg_hack> #<arg_string> # # control flow # -| branch <arg_int> -| branch_cs <arg_string> -| bsr <arg_int> +| branch <arg_hack> #<arg_int> +| branch_cs <arg_hack> #<arg_string> +| bsr <arg_hack> #<arg_int> | ret | jsr <arg_int> | enternative # conditional branch -| if <var> \, <arg_int> -| unless <var> \, <arg_int> - # subroutine ops -| invokecc <arg_pmc_var> -| invoke <arg_pmc_var> \, <arg_pmc_var> -| yield -| tailcall <arg_pmc_var> -| returncc -| newclosure <arg_pmc_var> \, <arg_pmc_var> +| if <arg_hack> #<target> \, <arg_int> +| unless <arg_hack> #<target> \, <arg_int> + # subroutine ops <arg_hack> # +| invokecc <arg_hack> #<arg_pmc_target> +| invoke <arg_hack> #<arg_pmc_target> \, <arg_pmc_target> +| yield <arg_hack> # +| tailcall <arg_hack> #<arg_pmc_target> +| returncc <arg_hack> # +| newclosure <arg_hack> #<arg_pmc_target> \, <arg_pmc_target> # function args ops -| set_args <arg_set_args> -| get_results <arg_get_results> -| get_params <arg_get_params> -| set_returns <arg_set_returns> +| set_args <arg_hack> # <arg_set_args> +| get_results <arg_hack> # <arg_get_results> +| get_params <arg_hack> # <arg_get_params> +| set_returns <arg_hack> # <arg_set_returns> | result_info <arg_hack> # address manipulation -| set_addr <arg_hack> #<var> \, <int_constant> -| get_addr <arg_hack> #<var> \, <var> +| set_addr <arg_hack> #<target> \, <int_constant> +| get_addr <arg_hack> #<target> \, <target> # exception handling | push_eh <arg_hack> #<id> | clear_eh <arg_hack> # -| throw <arg_hack> #<var> -| rethrow <arg_hack> #<var> +| throw <arg_hack> #<target> +| rethrow <arg_hack> #<target> | die <arg_hack> #<int_constant> \, <int_constant> | exit <arg_hack> #<int_constant> | pushmark <arg_hack> #<int_constant> | popmark <arg_hack> #<int_constant> -| pushaction <arg_hack> #<var> +| pushaction <arg_hack> #<target> # interpreter ops | debug <arg_hack> #<int_constant> | bounds <arg_hack> #<int_constant> | profile <arg_hack> #<int_constant> | trace <arg_hack> #<int_constant> | gc_debug <arg_hack> #<int_constant> -| interpinfo <arg_hack> #<var> \, <int_constant> +| interpinfo <arg_hack> #<target> \, <int_constant> | warningson <arg_hack> #<int_constant> | warningsoff <arg_hack> #<int_constant> | errorson <arg_hack> #<int_constant> | errorsoff <arg_hack> #<int_constant> -| runinterp <arg_hack> #<var> \, <int_constant> -| getinterp <arg_hack> #<var> +| runinterp <arg_hack> #<target> \, <int_constant> +| getinterp <arg_hack> #<target> # DOD/GC | sweep <arg_hack> #<int_constant> | collect @@ -67,11 +67,11 @@ | sweepon | collectoff | collecton -| needs_destroy <arg_hack> #<var> +| needs_destroy <arg_hack> #<target> # NCI -| loadlib <arg_hack> #<var> \, <var> -| dlfunc <arg_hack> #<var> \, <var> \, <var> \, <var> -| dlvar <arg_hack> #<var> \, <var> \, <var> -| compreg <arg_hack> #<var> \, <var> -| new_callback <arg_hack> #<var> \, <var> \, <var> \, <var> +| loadlib <arg_hack> #<target> \, <target> +| dlfunc <arg_hack> #<target> \, <target> \, <target> \, <target> +| dltarget <arg_hack> #<target> \, <target> \, <target> +| compreg <arg_hack> #<target> \, <target> +| new_callback <arg_hack> #<target> \, <target> \, <target> \, <target> } Index: languages/PIR/lib/pasm_instr.pg =================================================================== --- languages/PIR/lib/pasm_instr.pg (revision 17016) +++ languages/PIR/lib/pasm_instr.pg (working copy) @@ -7,320 +7,322 @@ # PGE implements this. token pasm_instruction { - end -| noop -| reserved -| load_bytecode -| branch -| branch_cs -| bsr -| ret -| jsr <arg_int> -| enternative -| if +[ yield +| xor +| warningson +| warningsoff +| valid_type +| upcase +| unshift +| unregister +| unpin +| unless_null | unless -| invokecc -| invoke -| yield -| tailcall -| returncc +| typeof +| trans_encoding +| trans_charset +| trace +| titlecase +| time +| throw +| thaw +| tell +| tanh +| tan +| tailcallmethod +| tailcall +| sysinfo +| sweepon +| sweepoff +| sweep +| substr +| subclass +| sub +| stringinfo +| store_lex +| stat +| sqrt +| sprintf +| split +| spawnw +| socket +| sockaddr +| sleep +| sizeof +| sinh +| singleton +| sin +| shr +| shl +| shift +| setstdout +| setstderr +| sets_ind +| setref +| setprop +| setp_ind +| setn_ind +| seti_ind +| setattribute +| set_root_global +| set_returns +| set_hll_global +| set_global +| set_args +| set_addr +| set +| send +| seek +| sech +| sec +| savec +| saveall +| save +| runinterp +| rotate_up +| rot +| returncc +| rethrow +| ret +| result_info +| restoreall +| restore +| reserved +| repeat +| removeparent +| removedoes +| removeattribute +| register +| recv +| readline +| read +| pushmark +| pushaction +| push_eh +| push +| prophash +| profile +| printerr +| print +| print +| pow +| popmark +| pop +| poll +| pioctl +| pin +| peek +| ord +| or +| open +| null +| not +| noop | newclosure -| set_args -| get_results -| get_params -| set_returns -| result_info -| set_addr +| newclass +| new_callback +| new +| new +| neg +| needs_destroy +| ne_str +| ne_num +| ne_addr +| ne +| n_repeat +| n_not +| n_neg +| n_infix +| n_concat +| n_bnots +| n_bnot +| n_abs +| mul +| mod +| mmdvtregister +| mmdvtfind +| lt_str +| lt_num +| lt_addr +| lt +| lsr +| lookback +| log2 +| log10 +| localtime +| loadlib +| load_bytecode +| ln +| listen +| length +| le_str +| le_num +| le_addr +| le +| lcm +| jsr +| join +| istrue +| issame +| isnull +| isntsame +| isne +| islt +| isle +| isgt +| isge +| isfalse +| iseq +| isa +| is_cclass +| invokecc +| invoke +| interpinfo +| infix +| index +| inc +| if_null +| if +| hash +| gt_str +| gt_num +| gt_addr +| gt +| gmtime +| getstdout +| getstdin +| getstderr +| getprop +| getline +| getinterp +| getfile +| getfd +| getclass +| getattribute +| get_root_namespace +| get_root_global +| get_results +| get_repr +| get_params +| get_namespace +| get_mro +| get_hll_namespace +| get_hll_global +| get_global | get_addr -| push_eh -| clear_eh -| throw -| rethrow -| die -| exit -| pushmark -| popmark -| pushaction -| debug -| bounds -| profile -| trace -| gc_debug -| interpinfo -| warningson -| warningsoff -| errorson -| errorsoff -| runinterp -| getinterp -| sweep -| collect -| sweepoff -| sweepon -| collectoff -| collecton -| needs_destroy -| loadlib -| dlfunc -| dlvar -| compreg -| new_callback -| band -| bands -| bnot -| n_bnot -| bnots -| n_bnots -| bor -| bors -| shl -| shr -| lsr -| rot -| bxor -| bxors -| eq -| eq_str -| eq_num -| eq_addr -| ne -| ne_str -| ne_num -| ne_addr -| lt -| lt_str -| lt_num -| lt_addr -| le -| le_str -| le_num -| le_addr -| gt -| gt_str -| gt_num -| gt_addr -| ge -| ge_str -| ge_num -| ge_addr -| if_null -| unless_null -| cmp -| cmp_str -| cmp_num -| issame -| isntsame -| istrue -| isfalse -| isnull -| isge -| isgt -| isle -| islt -| iseq -| isne -| and -| not -| n_not -| or -| xor -| debug_init -| debug_load -| debug_break -| debug_print -| backtrace -| getline -| getfile -| close -| fdopen -| getfd -| getstdin -| getstdout -| getstderr -| setstdout -| setstderr -| pioctl -| open -| print -| printerr -| print -| read -| readline -| peek -| stat -| seek -| tell -| socket -| sockaddr -| connect -| recv -| send -| poll -| bind -| listen -| accept -| infix -| n_infix -| abs -| n_abs -| add -| cmod -| dec -| div -| fdiv -| ceil -| floor -| inc -| mod -| mul -| neg -| n_neg -| pow -| sub -| sqrt -| acos -| asec -| asin -| atan -| cos -| cosh -| exp -| ln -| log10 -| log2 -| sec -| sech -| sin -| sinh -| tan -| tanh -| gcd -| lcm -| fact -| callmethodcc -| callmethod -| tailcallmethod -| addmethod -| can -| does -| isa -| newclass -| subclass -| getclass -| singleton -| class -| classname -| addparent -| removeparent -| addattribute -| removeattribute -| getattribute -| setattribute -| classoffset -| adddoes -| removedoes -| new -| typeof -| find_type -| valid_type -| get_repr -| find_method -| defined -| exists -| delete -| elements -| push -| pop -| unshift -| shift -| setprop -| getprop -| delprop -| prophash -| freeze -| thaw -| mmdvtregister -| mmdvtfind -| register -| unregister -| hash -| get_mro -| clone -| exchange -| set -| assign -| setref -| deref -| setp_ind -| setn_ind -| sets_ind -| seti_ind -| null -| cleari -| clearn -| clearp -| clears -| saveall -| restoreall -| entrytype -| depth -| lookback -| save -| savec -| restore -| rotate_up -| ord -| chr -| chopn -| concat -| n_concat -| repeat -| n_repeat -| length -| bytelength -| pin -| unpin -| substr -| index -| sprintf -| new # ?? -| stringinfo -| upcase -| downcase -| titlecase -| join -| split -| charset -| charsetname -| find_charset -| trans_charset -| encoding -| find_encoding -| trans_encoding -| is_cclass -| find_cclass -| find_not_cclass -| escape -| compose -| spawnw -| err -| time -| gmtime -| localtime -| decodetime -| decodelocaltime -| sysinfo -| sleep -| sizeof -| store_lex -| find_lex -| get_namespace -| get_hll_namespace -| get_root_namespace -| get_global -| get_hll_global -| get_root_global -| set_global -| set_hll_global -| set_root_global +| ge_str +| ge_num +| ge_addr +| ge +| gcd +| gc_debug +| freeze +| floor +| find_type +| find_not_cclass +| find_name +| find_method +| find_lex +| find_encoding +| find_charset +| find_cclass +| fdopen +| fdiv +| fact +| exp +| exit +| exists +| exchange +| escape +| errorson +| errorsoff +| err +| eq_str +| eq_num +| eq_addr +| eq +| entrytype +| enternative +| end +| encoding +| elements +| downcase +| does +| dlvar +| dlfunc +| div +| die +| deref +| depth +| delprop +| delete +| defined +| decodetime +| decodelocaltime +| dec +| debug_print +| debug_load +| debug_init +| debug_break +| debug +| cosh +| cos +| connect +| concat +| compreg +| compose +| collecton +| collectoff +| collect +| cmp_str +| cmp_num +| cmp +| cmod +| close +| clone +| clears +| clearp +| clearn +| cleari +| clear_eh +| classoffset +| classname +| class +| chr +| chopn +| charsetname +| charset +| ceil +| can +| callmethodcc +| callmethod +| bytelength +| bxors +| bxor +| bsr +| branch_cs +| branch +| bounds +| bors +| bor +| bnots +| bnot +| bind +| bands +| band +| backtrace +| atan +| assign +| asin +| asec +| and +| addparent +| addmethod +| adddoes +| addattribute +| add +| acos +| accept +| abs +] \b } Index: languages/PIR/lib/pir.pg =================================================================== --- languages/PIR/lib/pir.pg (revision 17016) +++ languages/PIR/lib/pir.pg (working copy) @@ -2,8 +2,7 @@ # TO DO: # 1. fix Heredocs parsing -# 2. fix macro parsing -# 3. Test and fix things +# 2. Test and fix things token TOP { ^ <program> [ $ | <syntax_error: end of file expected> ] @@ -120,13 +119,15 @@ | <reg> | <syntax_error: parameter type or register expected> ] - <get_flags>? + <param_flags> [ <?nl> | <syntax_error: newline expected after parameter declaration> ] } +rule param_flags { + [ <get_flags> | <':unique_reg'> ]* +} - # # PIR instructions # @@ -151,14 +152,11 @@ } -# this is a token, because no spaces are allowed between -# the id and the colon. -token label { - <id> <':'> -} + rule pir_instr { <local_decl> + | <sym_decl> | <lexical_decl> | <const_def> | <globalconst_def> @@ -181,18 +179,34 @@ ## Locals and Lexicals ## + +=head2 Local declarations + +Local declarations can be done using C<.sym> or C<.local> in normal context. +In macro context however, only C<.sym> is allowed, and the C<.local> keyword +indicates a label declaration. + +=cut + rule local_decl { - [ <'.local'> | <'.sym'> ] + <'.local'> [ <type> | <syntax_error: type for local symbol expected> ] <local_id_list> } +rule sym_decl { + <'.sym'> + [ <type> | <syntax_error: type for local symbol expected> ] + <local_id_list> +} + + rule local_id_list { <local_id> [ <','> <local_id> ]* } rule local_id { - <id> <local_flag>? + [ <id> | <syntax_error: identifier expected> ] <local_flag>? } ## Maybe more future flags for local symbols @@ -261,8 +275,8 @@ } rule conditional_expr { - [ <'null'> <target> ] - | [ <simple_expr> [ <relational_operator> <simple_expr> ]? ] + <'null'> <target> + | <simple_expr> [ <relational_operator> <simple_expr> ]? } ## Jump statements @@ -358,8 +372,9 @@ <target> <'='> <short_sub_call> | <target> <'='> <target> <keylist> | <target> <'='> <expression> - | <target> <'='> <pasm_op_1> <simple_expr> - | <target> <'='> <pasm_op_2> <simple_expr> \, <simple_expr> + | <target> <'='> <pasm_instruction> \N* + #| <target> <'='> <pasm_op_1> <simple_expr> + #| <target> <'='> <pasm_op_2> <simple_expr> \, <simple_expr> | <target> <'='> <'new'> [ <int_constant> | <string_constant> | <macro_id> ] | <target> <'='> <'new'> <keylist> | <target> <'='> <'find_type'> [ <string_constant> | <string_reg> | <id> ] @@ -371,47 +386,33 @@ | <result_var_list> [ <'='> | <syntax_error: '=' expected> ] <short_sub_call> } -## Rewrite of assignment_stat -## -##rule assignment_stat { -## <target> <'='> <rhs> -##| <target> <op_assign> <simple_expr> -##| <target> <keylist> <'='> <simple_expr> -##| <'global'> <string_constant> <'='> <target> # deprecated? -##| <result_var_list> [ <'='> | <syntax_error: '=' expected> ] <short_sub_call> -##} -## -##rule rhs { -## <short_sub_call> -##| <expression> -##| <pasm_op_1> <target> -##| <pasm_op_2> <target> <','> <target> -##| <'new'> [ <int_constant> | <string_constant> | <macro_id> ] -##| <'find_type'> [ <string_constant> | <string_reg> | <id> ] -##| <heredoc_string> -##| <global'> <string_constant> # deprecated? -##} +## TODO :fix delete x[x] syntax in PASM # pasm ops that take 1 argument # -rule pasm_op_1 { - clone - | compreg - | defined - | assign - | addr - | istrue - | isfalse - | isnull - #| others -} - -# pasm ops that take 2 arguments +#rule pasm_op_1 { +# [ clone +# | compreg +# | defined +# | assign +# | addr +# | istrue +# | isfalse +# | isnull +# | getclass +# | find_type +# | length +# ] \b +#} # -rule pasm_op_2 { - issame - | isntsame -} +## pasm ops that take 2 arguments +## +#rule pasm_op_2 { +# [ issame +# | isntsame +# | subclass +# ] \b +#} rule heredoc { <'<<'> @@ -461,7 +462,9 @@ | [ <target> <'->'> ] ]? # optional invocant [ <target> | <string_constant> ] # method or sub name/id - <parenthesized_args> # sub args + <parenthesized_args> # sub args + <process_heredocs> + <clear_heredocs> } rule sub_invocation { @@ -500,11 +503,20 @@ [ <target> | <syntax_error: target for named argument expected> ] ]? | <target> + | <heredoc_id> ] <set_flags>? } +token heredoc_id { + <'<<'> <string_constant> <store_heredoc_label: $1> +} +rule heredoc_label { + .* ^^ <ident> $$ +} + + ## Argument passing ## rule arguments { @@ -633,10 +645,15 @@ [ <')'> | <syntax_error: ')' expected> ] } -#### TODO: FIX +# In order to be able to parse macro identifiers, before +# the macro body is parsed, some rules are redefined. +# After parsing the macro body, they are restored. +# regex macro_body { - .*? - <'.endm'> \h* \n + <init_macro_rules> + <labeled_pir_instr>* + <'.endm'> + <close_macro_rules> } @@ -713,6 +730,22 @@ # Tokens # +# this is a token, because no spaces are allowed between +# the id and the colon. +token normal_label { + <id> <':'> +} + + +token macro_label { + <'$'> <id> <':'> +} + +rule macro_label_decl { + <'.local'> [ <macro_label> | <syntax_error: $LABEL: expected> ] + | <normal_label> +} + token int_constant { <binary_constant> | <hex_constant> @@ -762,12 +795,19 @@ } -token target { - [ <id> | <reg> ] +token normal_target { + <id> | <reg> } +# in a macro, a target can also be a +# macro_id +# +token macro_target { + <id> | <reg> | <macro_id> +} + token id { - <!keyword> \w+ + <!keyword> \w+ } token macro_id {