[il-antlr-interest: 23066] [antlr-interest] Help with infix expression evaluator grammar

Christopher Schultz Thu, 09 Apr 2009 08:37:58 -0700

ANTLR folks,

I've been using a grammar for years to evaluate infix mathematical
expressions with function calls and things like that. It has performed
beautifully. I recently noticed that some (meaningless) junk at the end
of the expression was sometimes ignored.


For example, this expression should return 4:

2+2

It does return 4 (otherwise it would be a pretty lousy evaluator).
However, this expression /also/ returns 4:

2+2)

The evaluator is rigged to allow parens to group things so that (2+2)*2
yields 8 instead of 6 (which would be the case without the parens. It
appears that the trailing parens is being ignored which is nice in some
ways but not nice in others: you can end up with expressions whose
parens balance but are semantically incorrect, which is why I'm here.

Attached is the entire grammar (which uses ANTLR 2.7.3 to build and
execute). "expr" is the top-level production and also the function that
I call on my parser in order to obtain my own AST (as you can see from
the grammar).

At the end of the 'expr' production, you can see a comment (line 46)
that says "This (EOF!)? seems totally sketchy, but it works!".
Apparently, at the time I wrote the grammar, I knew something was amiss,
but wasn't sure how to fix it.

If I remove the ? from the (EOF!), making it required, my simple tests
(such as "2+2)") correctly fail to parse but other things (such as
"(2+2)") also fail to parse ("expecting EOF, found ')').

It seems like the most logical fix is to create a new top-level
production that just says:

 return=expr (EOF!)

...and remove the EOF entirely from my 'expr' production.

I'm pretty sure that would work, but I'm concerned that I may be missing
something more fundamental.

I'd appreciate some feedback on this issue, and I'd love it if anyone
had any comments on the grammar in general: if there are any obvious
goofs I've made or optimizations that would make sense.

At some point, I'd like to move-up to the later versions of ANTLR
(especially those that can direct-generate Java bytecode to parse these
things), so if there are any modifications I'd need to make to my
grammar for that purpose, I'd appreciate feeback, too.

Thanks,
-chris

header {
/**
 * This software is Copyright (C) 2008 Christopher Schultz
 *
 * Please see the COPYING file for more information.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
    package net.christopherschultz.evaluator.parser;

    import net.christopherschultz.evaluator.*;
}

class ExpressionParserImpl extends Parser;

options {
    buildAST = false;
    k=1;
    defaultErrorHandler = false;
}

/* We have expr and mexpr separate, with the addition and subtraction
   separate from multiplication in order to enforce order-of-operations. */
expr returns [Expression exp = null] throws EvaluationException
    {
        Expression right;
    }
    :
        exp=relexpr (
            ( AND^ right=relexpr) { exp = new BinaryOperatorExpression(exp, 
"&&", right); }
            | (OR^ right=relexpr)  { exp = new BinaryOperatorExpression(exp, 
"||", right); }
        )*
        // This (EOF)? seems totally sketchy, but it works!
        (EOF!)?
    ;

// Relational Expression
relexpr returns [Expression exp = null] throws EvaluationException
    {
        Expression right;
    }
    :
        exp=addexpr (
            (EQUALS^ right=addexpr) { exp = new BinaryOperatorExpression(exp, 
"=", right); }
            | (NOT_EQUALS^ right=addexpr) { exp = new 
BinaryOperatorExpression(exp, "!=", right); }
            | (GREATERTHAN^ right=addexpr) { exp = new 
BinaryOperatorExpression(exp, ">", right); }
            | (LESSTHAN^ right=addexpr) { exp = new 
BinaryOperatorExpression(exp, "<", right); }
            | (GREATERTHANEQUALTO^ right=addexpr) { exp = new 
BinaryOperatorExpression(exp, ">=", right); }
            | (LESSTHANEQUALTO^ right=addexpr) { exp = new 
BinaryOperatorExpression(exp, "<=", right); }
        )*
    ;

// Arithmetic Expression
addexpr returns [Expression exp = null] throws EvaluationException
    {
        Expression right;
    }
    :
        exp=multexpr (( PLUS^ right=multexpr { exp = new 
BinaryOperatorExpression(exp, "+", right); }
                      | MINUS^ right=multexpr { exp = new 
BinaryOperatorExpression(exp, "-", right); } ))*
    ;

// Multiplicative (or divisive) expression
multexpr returns [Expression exp = null] throws EvaluationException
    {
        Expression right;
    }
    :
        exp=unaryExpr (
            ( STAR^ right=unaryExpr { exp = new BinaryOperatorExpression(exp, 
"*", right); } )
            |
            ( SLASH right=unaryExpr { exp = new BinaryOperatorExpression(exp, 
"/", right); } )
            |
            ( PERCENT right=unaryExpr { exp = new BinaryOperatorExpression(exp, 
"%", right); } )
            
        )*
    ;

// (Logically) negated expression
unaryExpr returns [ Expression exp = null ] throws EvaluationException
    :
        (
         (BANG^ exp=atom { exp = new NegatedExpression(exp); })
         |
         (MINUS^ exp=atom { exp = new UnaryMinusExpression(exp); })
         |
         (PLUS!)? exp=atom
        )
    ;

functionsuffix returns [ java.util.List args = null ] throws EvaluationException
    :
        LPAREN! ( args=argumentlist )? RPAREN!
    ;

arraysuffix returns [ Expression index = null ] throws EvaluationException
    :
        LBRACKET! ( index=expr ) RBRACKET!
    ;

// Argument List
argumentlist returns [java.util.List list = new java.util.ArrayList()] throws 
EvaluationException
    {
        Expression arg;
    }
    :
        arg=argument { if(null != arg) { list.add(arg); } }
        (COMMA! arg=argument { if(null != arg) { list.add(arg); } })*
    ;

// Argument (i.e. anything)
argument returns [Expression exp = null] throws EvaluationException
    :
        exp=expr
    ;

// Indentifiers (i.e. symbols)
identifier returns [ Expression exp = null ] throws EvaluationException
    :
        id:IDENTIFIER
        {
            exp = new IdentifierExpression(id.getText());
        }
    ;

// Atoms
atom returns [Expression exp = null] throws EvaluationException
    {
        Expression index;
        java.util.List args;
    }
    :
        exp=identifier
        ( args=functionsuffix { exp=new FunctionCallExpression(exp, args); }
          |
          index=arraysuffix { exp=new ArrayReferenceExpression(exp, index); }
        )*
        |
        hex:HEX_LITERAL {   Object value;
                            String hexStr = hex.getText();
                            int length = hexStr.length();
                            // All these temporary objects, here, will
                            // allow us to create the correctly-sized
                            // object (byte, short, int, long) with the
                            // specified bit pattern.
//9223372036854775807

                            if(length > 10) // 8 hex + leading "0x"
                            {
                                value = Long.decode(hexStr);
                            }
                            else if(length > 6)
                            {
                                Long tl = Long.decode(hexStr);
                                if(tl.longValue() > Integer.MAX_VALUE)
                                    value = new Integer((int)(tl.longValue() - 
Integer.MAX_VALUE));
                                else
                                    value = new Integer(tl.intValue());
                            }
                            else if(length > 4)
                            {
                                Integer ti = Integer.decode(hexStr);
                                if(ti.intValue() > Short.MAX_VALUE)
                                    value = new Short((short)(ti.intValue() - 
Short.MAX_VALUE));
                                else
                                    value = new Short(ti.shortValue());
                            }
                            else
                            {
                                Short ts = Short.decode(hexStr);
                                if(ts.shortValue() > Byte.MAX_VALUE)
                                    value = new Byte((byte)(ts.shortValue() - 
Byte.MAX_VALUE));
                                else
                                    value = new Byte(ts.byteValue());
                            }
                            exp = new ConstantExpression(value);
                        }
        |
        oct:OCTAL_LITERAL { Object value;
                            String octalStr = oct.getText();
                            int length = octalStr.length();
                            if(length > 11) // 10 octal + leading "0"
                                value = Long.decode(octalStr);
                            else if(length > 6)
                                value = Integer.decode(octalStr);
                            else if(length > 3)
                                value = Short.decode(octalStr);
                            else
                                value = Byte.decode(octalStr);
                            exp = new ConstantExpression(value);
                        }
        |
/* TODO: allow larger precision than simply 32-bit integer. Long? BigInteger? */
/* TODO: How can we use a BigInteger to determine if the value fits into a 
certain type of primitive? Just make BigInteger objects out of 
Integer.MAX_VALUE, etc? */
        i:INT_LITERAL { exp = new ConstantExpression(new Integer(i.getText())); 
}
        |
        f:DECIMAL_LITERAL { exp = new ConstantExpression(new 
Double(f.getText())); }
        |
        s:STRING { exp = new ConstantExpression(s.getText()); }
        |
        LPAREN! exp=expr RPAREN!
    ;

class ExpressionLexerImpl extends Lexer;

options {
    k=2; // (2)needed for newline junk and relations like <, <= and >, >=
    charVocabulary = '\3'..'\377';
}

IDENTIFIER: WORDCHAR (WORDCHAR | DIGIT)*;

STRING
    :
        DOUBLEQUOTE!
        (CHAR_ESC | ~('\"' | '\\') )*
        DOUBLEQUOTE!
    ;

protected CHAR_ESC
    :
        '\\'
        ( 'n'  { $setText("\n"); }
        | 'r'  { $setText("\r"); }
        | 't'  { $setText("\t"); }
        | 'b'  { $setText("\b"); }
        | 'f'  { $setText("\f"); }
        | '\"' { $setText("\""); }
        | '\'' { $setText("\'"); }
        | '\\' { $setText("\\"); }
        )
    ;

NUMERIC_LITERAL:
        (ZERO (OCTAL_DIGIT)+) => OCTAL_LITERAL { $setType(OCTAL_LITERAL); }
        |
        (ZERO Xx) => HEX_LITERAL  { $setType(HEX_LITERAL); }
        |
        (DOT DIGITS) => DECIMAL_LITERAL { $setType(DECIMAL_LITERAL); }
        |
        (DIGITS DOT) => DECIMAL_LITERAL { $setType(DECIMAL_LITERAL); }
        |
        (DIGITS)    { $setType(INT_LITERAL); }
        ;

protected OCTAL_LITERAL: (ZERO (OCTAL_DIGIT)+) ;

protected OCTAL_DIGIT: '0'..'7' ;

protected HEX_LITERAL :  (ZERO Xx (HEX_DIGIT)+) ;

protected ZERO: '0' ;

protected Xx: ('x' | 'X') ;

protected HEX_DIGIT: ( 'a'..'z' | 'A'..'Z' | DIGIT ) ;

protected INT_LITERAL: DIGITS ;

protected DECIMAL_LITERAL: ( ( DIGITS )? DOT ( DIGITS )? ) ;

protected DIGITS : ( DIGIT )+ ;

protected SIGN: ( PLUS | MINUS ) ;

protected
WORDCHAR:   'a'..'z' | 'A'..'Z' | '_' ;

protected
DIGIT: '0'..'9';

protected
DOT: '.' ;

BANG: '!' ;

COMMA: ','
        ;

LBRACKET: '['
        ;

RBRACKET: ']'
        ;

LPAREN: '('
        ;

RPAREN: ')'
        ;

PLUS: '+'
        ;

MINUS: '-'
        ;

STAR: '*'
    ;

PERCENT: '%'
    ;

SLASH: '/'
    ;

AND: '&''&'
        ;

OR: '|''|'
        ;

EQUALS: '='
        ;

NOT_EQUALS
    :
        BANG EQUALS
    ;

GREATERTHAN: '>' ;

LESSTHAN: '<' ;

GREATERTHANEQUALTO: '>' '=' ;

LESSTHANEQUALTO: '<' '=' ;

SINGLEQUOTE: '\'' ;

DOUBLEQUOTE: '\"' ;

WS:
        (   ' '
        |   '\t'
        |   '\r' '\n' { newline(); }
        |   '\r'      { newline(); }
        |   '\n'      { newline(); }
        )
        {$setType(Token.SKIP);} //ignore this token
    ;

signature.asc
Description: OpenPGP digital signature

List: http://www.antlr.org/mailman/listinfo/antlr-interest
Unsubscribe: 
http://www.antlr.org/mailman/options/antlr-interest/your-email-address

[il-antlr-interest: 23066] [antlr-interest] Help with infix expression evaluator grammar

Reply via email to