Greetings! I think you have issues with your function, number, and ATOM rules. see below...
I have attached my complete, modified, grammar that successfully parses your input sample. On 11/14/2011 11:47 PM, Jarrod Roberson wrote: > I am trying to write a parser for the following syntax > > hypotenuse(a,b) -> > sqr(x) -> x * x, > sqr(sqr(b) + sqr(b)). > > print(hypotenuse(2,3)). > > Where , and . are my statement separator and statement eol respectively. > > I am having an impossible time trying to figure out how to specify the > function rule to allow me to nest functions inside of other functions > without running afoul of ambiguities warnings. > > 23:37:47] warning(200): funcy.g:10:11: Decision can match input such as > "ID" using multiple alternatives: 1, 2 > As a result, alternative(s) 2 were disabled for that input > [23:37:47] error(201): funcy.g:10:11: The following alternatives can never > be matched: 2 > > > I really want to be able to use the above syntax without having to pepper > the code with keywords like `func` or `var` etc. > > Here is my grammar, are there any ways to resolve these ambiguities with > predicates of some sort that I haven't been able to figure out? > > I have read up on Google about them, but I can't get them to work with the > parser rules to remove the ambiguities. > > grammar funcy; > > options { > output = AST; > language = Java; > } > program : (statement'.')* ; just a nit pick here - you really should include EOF in your topmost rule. > > statement : expression > | assignment > ; > > assignment : ID '->' expression > | ATOM '->' ( string | number ) > | function '->' statement ((','statement)=> ',' statement)* ; I think you are being too liberal here with your function signatures. you permit any expression to be a formal argument. are you intending to have patterns akin to either ML or Haskell? if not, change the definition of function in your assignment rule. I also think that this permits multi-expression body, something like: foo(a,b)-> a, b. e.g. a function body consisting of two (or more) expressions. do you really want that -- you do if your expressions can have side-effects. maybe the third alt of assignment rule should be something like (assuming you do not have side effects and watch out for i/o!): | ID '(' ID (',' ID)* ')' '->' (assignment ',')* expression ; this eliminates the need for a predicate. > > args : expression (',' expression)*; > > function : ID '(' args ')' ; > > string : UNICODE_STRING; > number : HEX_NUMBER > | (INTEGER '.' INTEGER)=> INTEGER '.' INTEGER I do not think you want to recognize floating point values in the parser. any tokens you send to the HIDDEN $channel (or skip();) will be silently accepted before and after the '.' of the float. change your INTEGER rule to this: fragment FLOAT: ; INTEGER : DIGIT+ ('.' DIGIT+ {$type=FLOAT;} )? ; and use FLOAT in the number rule. > | INTEGER; > > // expressions > > term : '(' expression ')' > | number > | string > | function > | ID > | ATOM > ; > > negation : '!'* term; > > unary : ('+'|'-')* negation; > > mult : unary (('*' | '/' | ('%'|'mod') ) unary)*; > > add : mult (('+' | '-') mult)*; > > relation : add (('=' | '!=' | '<' | '<=' | '>=' | '>') add)*; > expression : relation (('&&' | '||') relation)*; > > // LEXER ================================================================ > > HEX_NUMBER : '0x' HEX_DIGIT+; > > INTEGER : DIGIT+; > > UNICODE_STRING : '"' ( ESC | ~('\u0000'..'\u001f' | '\\' | '\"' ) )* '"' > ; > > WS : (' '|'\n'|'\r'|'\t')+ {$channel = HIDDEN;} ; // ignore whitespace > > fragment > ESC : '\\' ( UNI_ESC |'b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\' ); > > fragment > UNI_ESC : 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT; > > fragment > HEX_DIGIT : (DIGIT|'a'..'f'|'A'..'F') ; > > fragment > DIGIT : ('0'..'9'); > > ATOM : (('A'..'Z'|'_')+)=> ('A'..'Z'|'0'..'9'|'_')+; no need for a predicate ATOM : ('A'..'Z')('A'..'Z'|'0'..'9'|'_')*; note that this also removes the ambiguity as to whether the string "_" is an ATOM or an ID. > > ID : ('a'..'z'|'_')('a'..'z'|'A'..'Z'|'0'..'9'|'_')*; > > COMMENT : '/*' .* '*/' {$channel = HIDDEN;}; >
grammar Test; options { output = AST; ASTLabelType = CommonTree; } @members { // test data - each string in the following array is parsed separately private static final String [] x = new String[] { "hypotenuse(a,b) ->\n" + " sqr(x) -> x * x,\n" + " sqr(sqr(b) + sqr(b)).\n" + "\n" + "print(hypotenuse(2,3)).\n", }; public static void main(String [] args) { for( int i = 0; i < x.length; ++i ) { try { System.out.println("about to parse:`"+x[i]+"`"); TestLexer lexer = new TestLexer(new ANTLRStringStream(x[i])); CommonTokenStream tokens = new CommonTokenStream(lexer); // System.out.format("dump of the token stream:\%n"); // int j = 0; // boolean looping = true; // while( looping ) { // Token token = lexer.nextToken(); // int typ = token.getType(); // System.out.format("\%d: type = \%s, text = `\%s`\%s\%n", // j++, // typ==EOF?"EOF":tokenNames[typ], // token.getText(), // token.getChannel()==HIDDEN?" (HIDDEN)":""); // looping = typ != EOF; // } // lexer.reset(); // System.out.format("now performing the parse\n"); TestParser parser = new TestParser(tokens); TestParser.test_return p_result = parser.test(); CommonTree ast = p_result.tree; if( ast == null ) { System.out.println("resultant tree: is NULL"); } else { System.out.println("resultant tree: " + ast.toStringTree()); } System.out.println(); } catch(Exception e) { e.printStackTrace(); } } } } test : program EOF! ; program : (statement'.')* ; statement : expression | assignment ; assignment : ID '->' expression | ATOM '->' ( string | number ) | function_signature '->' ( assignment ',' )* expression ; // | function_signature '->' statement ((','statement)=> ',' statement)* ; function_signature : ID '(' ID (',' ID)* ')' ; function_invocation : ID '(' expression (',' expression)* ')' ; string : UNICODE_STRING; number : HEX_NUMBER | FLOAT | INTEGER; // expressions term : '(' expression ')' | number | string | function_invocation | ID | ATOM ; negation : '!'* term; unary : ('+'|'-')* negation; mult : unary (('*' | '/' | ('%'|'mod') ) unary)*; add : mult (('+' | '-') mult)*; relation : add (('=' | '!=' | '<' | '<=' | '>=' | '>') add)*; expression : relation (('&&' | '||') relation)*; // LEXER ================================================================ HEX_NUMBER : '0x' HEX_DIGIT+; fragment FLOAT : ; INTEGER : DIGIT+ ('.' DIGIT+ { $type = FLOAT; })? ; UNICODE_STRING : '"' ( ESC | ~('\u0000'..'\u001f' | '\\' | '\"' ) )* '"' ; WS : (' '|'\n'|'\r'|'\t')+ {$channel = HIDDEN;} ; // ignore whitespace fragment ESC : '\\' ( UNI_ESC |'b'|'t'|'n'|'f'|'r'|'\"'|'\''|'\\' ); fragment UNI_ESC : 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT; fragment HEX_DIGIT : (DIGIT|'a'..'f'|'A'..'F') ; fragment DIGIT : ('0'..'9'); ATOM : ('A'..'Z')('A'..'Z'|'0'..'9'|'_')*; ID : ('a'..'z'|'_')('a'..'z'|'A'..'Z'|'0'..'9'|'_')*; COMMENT : '/*' .* '*/' {$channel = HIDDEN;};
List: http://www.antlr.org/mailman/listinfo/antlr-interest Unsubscribe: http://www.antlr.org/mailman/options/antlr-interest/your-email-address
-- You received this message because you are subscribed to the Google Groups "il-antlr-interest" group. To post to this group, send email to il-antlr-inter...@googlegroups.com. To unsubscribe from this group, send email to il-antlr-interest+unsubscr...@googlegroups.com. For more options, visit this group at http://groups.google.com/group/il-antlr-interest?hl=en.