2

I want to parse SQL expression that contains and and or key words. The problem is Antlrwork can throw exception for error grammar, but generated java code can not throw exception. Meanwhile, i find that generated java code is error that only can generate a half of AST and have no error information, that execute path can not reach point of exception.

This is my dot g files:

 grammar ContainsExpr;

    options {
      language = Java;
      output=AST;
      ASTLabelType=CommonTree;
    }

    tokens {
        DIVIDE = '/' ;
        PLUS = '+' ;
        MINUS = '-' ;
        STAR = '*' ;
        MOD = '%' ;

        AMPERSAND = '&' ;
        TILDE = '~' ;
        BITWISEOR = '|' ;
        COMMA = ',';
        DOT = '.';
        LPAREN = '(' ;
        RPAREN = ')' ;

        EQUAL = '=';
        NOTEQUAL = '!=';
        LESSTHANOREQUALTO = '<=';
        LESSTHAN = '<';
        GREATERTHANOREQUALTO = '>=';
        GREATERTHAN = '>';

        AND = 'AND';
        OR = 'OR' ;
        TRUE = 'TRUE';
        FALSE = 'FALSE';
        KW_NEAR = 'NEAR';
        DOUBLE_QUOTE = '\"';
        SINGLE_QUOTE = '\'';

        TOK_NEAR;
        TOK_ITEMS;
        TOK_PARAMETER;
        TOK_WILDCARDS;
    }

    @header {
    package test1;
    }

    @members {

       //override method
      public void reportError(RecognitionException e) {
        displayRecognitionError(this.getTokenNames(), e);
      }

     @Override
      public void emitErrorMessage(String message) {
        throw new RuntimeException(message);
      }
    }

    @lexer::header {
    package test1;
    }

    @lexer::members {

       //override method
      public void reportError(RecognitionException e) {
        displayRecognitionError(this.getTokenNames(), e);
      }
    }

    @rulecatch {
        catch (RecognitionException e) {
          reportError(e);
          throw e;
        }
    }

    // LITERALS
    fragment
    Letter
        : 'a'..'z' | 'A'..'Z'
        ;

    fragment
    Digit
        :
        '0'..'9'
        ;

    fragment
    Exponent
        :
        ('e' | 'E') ( PLUS|MINUS )? (Digit)+
        ;

    Number
        :
        (Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)?
        ;

    fragment
    UnquotedString
        :  
          ( ~(SINGLE_QUOTE|DOUBLE_QUOTE|' '|'\t'|'\n'|LPAREN|RPAREN|COMMA))+
        ;

    fragment
    QuotedLiteral
        :
        DOUBLE_QUOTE ( ~(DOUBLE_QUOTE|'\\') | ('\\' .) )* DOUBLE_QUOTE 
        ;

    Parameter
        :
        UnquotedString | QuotedLiteral
        ;

    WS  :  (' '|'\r'|'\t'|'\n'|'\u000C')* {$channel=HIDDEN;}
        ;

    eval
        :
        searchCondition 
        ;

    //AND has precedence over OR    
    searchCondition
        :
        andExpr (precedenceOrOperator^ andExpr)*
        ;

    andExpr
        :
        subCondition (precedenceAndOperator^ subCondition)*
        ;

    precedenceAndOperator
        :
        AND | AMPERSAND
        ;

    precedenceOrOperator
        :
        OR | BITWISEOR
        ;

    subCondition 
        : 
          atom
        | LPAREN searchCondition RPAREN
        ;

    atom 
        :
        subEressixpon      
        ;

    subEressixpon  
        :
          Parameter -> ^(TOK_PARAMETER Parameter) 
        ;

When input incorrectly expression expr1 epxr2(The AND is skipped), The execution result of java code is "expr1".

The generated java code of searchCondition rule is incorrect:

try {
            //  ( andExpr ( precedenceOrOperator ^ andExpr )* )
            //  andExpr ( precedenceOrOperator ^ andExpr )*
            {
            root_0 = (CommonTree)adaptor.nil();


            pushFollow(FOLLOW_andExpr_in_searchCondition714);
            andExpr2=andExpr();
            state._fsp--;

            adaptor.addChild(root_0, andExpr2.getTree());

            // ( precedenceOrOperator ^ andExpr )*
            loop1:
            while (true) {
                int alt1=2;
                int LA1_0 = input.LA(1);
                if ( (LA1_0==BITWISEOR||LA1_0==OR) ) {
                    alt1=1;
                }

                switch (alt1) {
                case 1 :
                    // precedenceOrOperator ^ andExpr
                    {
                    pushFollow(FOLLOW_precedenceOrOperator_in_searchCondition717);
                    precedenceOrOperator3=precedenceOrOperator();
                    state._fsp--;

                    root_0 = (CommonTree)adaptor.becomeRoot(precedenceOrOperator3.getTree(), root_0);
                    pushFollow(FOLLOW_andExpr_in_searchCondition720);
                    andExpr4=andExpr();
                    state._fsp--;

                    adaptor.addChild(root_0, andExpr4.getTree());

                    }
                    break;

                default :
                    break loop1;
                }
            }

            }

            retval.stop = input.LT(-1);

            retval.tree = (CommonTree)adaptor.rulePostProcessing(root_0);
            adaptor.setTokenBoundaries(retval.tree, retval.start, retval.stop);

        }

            catch (RecognitionException e) {
              reportError(e);
              throw e;
            }

        finally {
            // do for sure before leaving
        }

In while loop, When LA1_0 is OR key words, It will be normal reach point of exception. So it will can not to throw exception.

sol
  • 225
  • 4
  • 17
  • Please post the full grammar. The lexer rules are missing here (e.g. AMPERSAND) – Rainer Jan 25 '16 at 14:43
  • Please mark my answer as the solving one (if so) or post updates what´s still an issue for you – Rainer Jan 28 '16 at 08:55
  • @Rainer The problem is can not get exception by generated java code, but antlrworks and eclipse plugin of anltr3 both can throw exception when interpreter incorrectly inputs. – sol Jan 28 '16 at 09:03
  • I think there were some options you can provide in your grammar to modify the behaviour of lexer and parser http://www.antlr2.org/doc/options.html – Rainer Jan 28 '16 at 09:11
  • Ah, one thing just came into my mind. If I remember correctly you may wanna have a look at the EOF token and add it at the end of the your top parser rule. It´s really just a light memory but there was sth related between EOF-token and exception handling – Rainer Jan 28 '16 at 09:15
  • @Rainer Thank you, i edit my grammar file again. Please help me to check it! – sol Jan 28 '16 at 09:25
  • @Rainer I edit `eval : searchCondition` to `eval : searchCondition EOF !`. but still invalid. – sol Jan 28 '16 at 09:30
  • Did my last answer solve your problem? – Rainer Jan 29 '16 at 09:10

2 Answers2

2

It has been a while since I worked with antlr but you may have a look here.

How Get error messages of antlr parsing?

You can get the error messages from the parser.

Hope this helps. Cheers

Community
  • 1
  • 1
Rainer
  • 761
  • 5
  • 20
1

OK, I could confirm your problem in the first place, but after a little teaking it worked.

I changed 3 things:

  1. put the lexer rules AFTER the parser rules
  2. Added the EOF symbol on your top level parser rule
  3. only override the needed two methods "reportError" in parser and lexer

Have fun and keep asking^^

grammar ContainsExpr;

    options {
      language = Java;
      output=AST;
      ASTLabelType=CommonTree;
    }

    tokens {
        DIVIDE = '/' ;
        PLUS = '+' ;
        MINUS = '-' ;
        STAR = '*' ;
        MOD = '%' ;

        AMPERSAND = '&' ;
        TILDE = '~' ;
        BITWISEOR = '|' ;
        COMMA = ',';
        DOT = '.';
        LPAREN = '(' ;
        RPAREN = ')' ;

        EQUAL = '=';
        NOTEQUAL = '!=';
        LESSTHANOREQUALTO = '<=';
        LESSTHAN = '<';
        GREATERTHANOREQUALTO = '>=';
        GREATERTHAN = '>';

        AND = 'AND';
        OR = 'OR' ;
        TRUE = 'TRUE';
        FALSE = 'FALSE';
        KW_NEAR = 'NEAR';
        DOUBLE_QUOTE = '\"';
        SINGLE_QUOTE = '\'';

        TOK_NEAR;
        TOK_ITEMS;
        TOK_PARAMETER;
        TOK_WILDCARDS;
    }

    @header {
    package test1;
    }

    @lexer::header {
    package test1;
    }

    @parser::members {
      @Override
      public void reportError(RecognitionException e) {
        throw new RuntimeException("I quit!\n" + e.getMessage()); 
      }
    }

    @lexer::members {
      @Override
      public void reportError(RecognitionException e) {
        throw new RuntimeException("I quit!\n" + e.getMessage()); 
      }
    }

    eval
        :
        searchCondition EOF
        ;

    //AND has precedence over OR    
    searchCondition
        :
        andExpr (precedenceOrOperator^ andExpr)*
        ;

    andExpr
        :
        subCondition (precedenceAndOperator^ subCondition)*
        ;

    precedenceAndOperator
        :
        AND | AMPERSAND
        ;

    precedenceOrOperator
        :
        OR | BITWISEOR
        ;

    subCondition 
        : 
          atom
        | LPAREN searchCondition RPAREN
        ;

    atom 
        :
        subEressixpon      
        ;

    subEressixpon  
        :
          Parameter -> ^(TOK_PARAMETER Parameter) 
        ;

    // LITERALS
    fragment
    Letter
        : 'a'..'z' | 'A'..'Z'
        ;

    fragment
    Digit
        :
        '0'..'9'
        ;

    fragment
    Exponent
        :
        ('e' | 'E') ( PLUS|MINUS )? (Digit)+
        ;

    Number
        :
        (Digit)+ ( DOT (Digit)* (Exponent)? | Exponent)?
        ;

    fragment
    UnquotedString
        :  
          ( ~(SINGLE_QUOTE|DOUBLE_QUOTE|' '|'\t'|'\n'|LPAREN|RPAREN|COMMA))+
        ;

    fragment
    QuotedLiteral
        :
        DOUBLE_QUOTE ( ~(DOUBLE_QUOTE|'\\') | ('\\' .) )* DOUBLE_QUOTE 
        ;

    Parameter
        :
        UnquotedString | QuotedLiteral
        ;

    WS  :  (' '|'\r'|'\t'|'\n'|'\u000C')* {$channel=HIDDEN;}
        ;

Here is my little test case

package test1;

import junit.framework.TestCase;

import org.antlr.runtime.ANTLRStringStream;
import org.antlr.runtime.CommonTokenStream;
import org.antlr.runtime.TokenStream;
import org.junit.Assert;

public class Test extends TestCase {

    public void test() {
        String test = "expr1 epxr2"; // AND missing

        ANTLRStringStream input = new ANTLRStringStream(test);
        TokenStream tokens = new CommonTokenStream(new ContainsExprLexer(input));
        ContainsExprParser parser = new ContainsExprParser(tokens);

        try {
            parser.eval();
            Assert.fail("Should throw Exception");
        } catch (Exception e) {
            //jippi
            System.out.println(e);
        }
    }
}

With the result

java.lang.RuntimeException: I quit!
null

UPDATE

I found myself a part of my answer here ANTLR not throwing errors on invalid input

Community
  • 1
  • 1
Rainer
  • 761
  • 5
  • 20
  • hi, you should print stack trace of exception. The `null` is no pointer exception. The `RecognitionException` have not `getMessage()` method. – sol Jan 29 '16 at 13:33
  • Well, in a real application I would of course use a logging framework to handle the exception. This was just a proof of concept. Please mark as solved if so. thanks I just copied the exception block from the other answer I linked to. You should just wrap the exception in the new RuntimeException("...",e) – Rainer Jan 29 '16 at 13:36