1

Using Antlr 3.5 rule:

ID
 : ':'? ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '0'..'9' | '_')*
 ;

does not match: standard

Could anyone explain why?

Below is the complete grammar. You can use rule colname to test against "standard".

grammar Sql;

options {
  language = Java;
  output = AST;
}

@rulecatch {catch (RecognitionException e) { throw e;}}

@header {
  package com.something.antlr.sql;
  import java.util.ArrayList;
}

@lexer::header {
  package com.something.antlr.sql;
}

@members {
 private List<Table> tables = new ArrayList<Table>();
 private List<Column> columns = new ArrayList<Column>();

 private StringBuilder joinClauses = new StringBuilder();

 private String withClauseAsString;
 private String tablesAsString;
 private String columnsAsString;
 private String aggregate;
 private String whereClauseAsString;
 private String orderByAsString;
 private String groupByAsString;
 private String limitAsString;

 public List<Table> getTables() {
  return tables;
 }

 public String getColumnsAsString() {
    return columnsAsString;
 }

 public List<Column> getColumns() {
    return columns;
 }

 public String getTablesAsString() {
    return tablesAsString;
 }

 public String getJoinClauseAsString() {
    return joinClauses.toString();
 }

 public String getWhereClauseAsString() {
    return whereClauseAsString;
 }

 public String getOrderByAsString() {
    return orderByAsString;
 }

 public String getAggregate() {
    return aggregate;
 }

 public String getGroupByAsString() {
    return groupByAsString;
 }

 public String getWithClauseAsString() {
    return withClauseAsString;
 }

 public String getLimitAsString() {
    return limitAsString;
 }


}


sql_statement
  : sql EOF;
   catch [RecognitionException e]{ throw e;}

sql
:
  (WITH with_clause)?
    {withClauseAsString = $with_clause.text;}
  SELECT DISTINCT? (TOP LPAREN? t=number_or_param RPAREN?)? columns?
      {
        StringBuilder sb = new StringBuilder();

        if ($DISTINCT.text != null) {
          sb.append(" distinct ");
        }

        if ($TOP.text != null) {
          sb.append(" top ");
        }

        if ($t.text != null) {
          sb.append(" ");
          if ($LPAREN.text != null) {
            sb.append("(");
          }
          sb.append($t.text);
          if ($RPAREN.text != null) {
            sb.append(")");
          }
          sb.append(" ");
        }

        sb.append($columns.text);
        columnsAsString = sb.toString();
      }
      (
      FROM tables*
           {tablesAsString = $tables.text;}

    (join_type? JOIN join_clause)*
     {
       String joinClauseAsString = "";

       if ($join_type.text != null) {
         joinClauseAsString = $join_type.text + " ";
       }

       if ($join_clause.text != null) {
        joinClauseAsString += $join_clause.text;
       }

       if (joinClauseAsString.length() > 0) {
          joinClauses.append("\n ");
          joinClauses.append(joinClauseAsString);
       }

     }

    (WHERE where_clause)?
          {whereClauseAsString = $where_clause.text;}

    (ORDER_BY order_by_clause)?
          {orderByAsString = $order_by_clause.text;}

    (GROUP_BY group_by_clause)?
          {groupByAsString = $group_by_clause.text;}
    (LIMIT t2=number_or_param)?
          {
            if ($t2.text != null) {
              limitAsString = " limit " + $t2.text;
            }
          }

    (UNION ALL? alt_sql)?
    )?
  ;


with_ident
:
  ID
;

with_clause
:
  with_ident (LPAREN columns RPAREN)? AS alt_sql
;


alt_sql
:
  LPAREN?
  (
  SELECT DISTINCT? (TOP number_or_param)? columns?
    FROM tables*
    (join_type? JOIN join_clause)*
    (WHERE where_clause)?
    (ORDER_BY order_by_clause)?
    (GROUP_BY group_by_clause)?
    (LIMIT NUMBER)?
    (UNION ALL? alt_sql)?
  )
  RPAREN?
;

exists_select
:
  SELECT NUMBER FROM tables (WHERE where_clause)?
;

in_select
:
  SELECT colname FROM tables (WHERE where_clause)?
;

colname_or_operand
:
 colname | LPAREN ID RPAREN | RANGE | CONSTANT |  TICK '*' TICK | alt_sql
;

case_test
:
  (ID '.')? function (CALC function)* operator (ID | NUMBER)
;

colname
:
  //CASE WHEN case_test ( (AND | OR) case_test)* THEN (NUMBER | ID) ELSE (NUMBER | ID) END alias
  CASE (WHEN where_logical THEN (ID | NUMBER | LITERAL))+ ELSE (ID | NUMBER | LITERAL) END alias
  |
  col = ID alias?
    {
    Column c = new Column($col.text, "");
    columns.add(c);
    }
  |
  tbl=ID '.' col=ID ('::' ID)? alias?
    {
    Column c = new Column($tbl.text + "." + $col.text, $alias.text);
    columns.add(c);
    }
  |
  (ID '.')? '*'
  |
  (ID '.')? func_name = function alias?
    {
    Column c = new Column($func_name.text, $alias.text);
    columns.add(c);
    }
  |
  col = (ID '::' ID)
    {
    Column c = new Column($col.text, "");
    columns.add(c);
    }
  |
  ('?' | NUMBER | LITERAL) (('::' | AS) ID)?
    {
    Column c = new Column("?", "");
    columns.add(c);
  }
;

columns
:
  (colname (CONCAT colname)* | alt_sql alias?) (',' (colname (CONCAT colname)* | alt_sql alias?))*
;

tablename
:
  ID alias?
    {
    Table tbl = new Table($ID.text, $alias.text);
    tables.add(tbl);
    }
  |
  (ID '.')? function alias?
  |
  t = TABLE LPAREN func = ID LPAREN arguments RPAREN RPAREN alias?
    {
    Table tbl = new Table($t.text + "(" + $func.text + "(" + $arguments.text + ")" + ")", $alias.text);
    tables.add(tbl);
    }
;

arguments
:
  argument (',' argument)*
;

argument
:
  colname
;

tables
  : tablename (',' tablename)*;


join_sql
:
  LPAREN
  SELECT DISTINCT? columns
  FROM tables*
  (WHERE where_clause) RPAREN alias
  ON where_expression
;


join_type
:
  ('left' 'outer'? | 'LEFT' 'OUTER'? | 'Left' 'Outer'?) | ('right' 'outer'? | 'RIGHT' 'OUTER'?  | 'Right' 'Outer'?)
;

join_clause
  :  (alt_sql alias | tablename alias? | colname) ON where_clause
  ;

alias
:
  AS? ID
;


exists_clause
:
  EXISTS LPAREN exists_select RPAREN
;

in_clause
:
  LPAREN (LPAREN in_select RPAREN alias? | in_values) (',' (LPAREN in_select RPAREN alias? | in_values))* RPAREN
;

in_values
:
  (NUMBER | LITERAL) (',' (NUMBER | LITERAL))*
;

where_expression
:
  LPAREN* ((colname operator (colname_or_operand | in_clause)) | exists_clause?) RPAREN*
;

where_logical
:
  LPAREN* where_expression ((AND | OR) where_expression)* RPAREN*
;

where_clause
:
  where_logical ((AND | OR) where_logical)*
;

number_or_param
:
  NUMBER | '?'
;

order_by
:
  (ID | NUMBER) (ASC | DESC)?
;

order_by_clause
  : order_by (',' order_by)*;

group_by_clause
:
  ID ('.' ID)? (',' ID ('.' ID)?)*
;

BETWEEN
:
 'between' | 'BETWEEN' | 'Between'
;


operator
  : '=' | '<' | '>' | '<=' | '>=' | '<>' | 'is' | 'Is' | 'IS' | BETWEEN |  IN |  'like' | '&lt;=' | '&gt;=' | '&lt;&gt;';

CALC
:
  '+' | '-' | '/'
;

function
:
  ID LPAREN DISTINCT? arguments? RPAREN ('::' ID)?
;

CONSTANT
:
  TICK ID TICK
;

LITERAL
:
  TICK .* TICK
;

TABLE : ('t' | 'T') ('a' | 'A') ('b' | 'B') ('l' | 'L') ('e' | 'E');
WITH : 'with' | 'WITH' | 'With';
SELECT : 'select' | 'SELECT' | 'Select';
DISTINCT : ('d' | 'D') ('i' | 'I') ('s' | 'S') ('t' | 'T') ('i' | 'I') ('n' | 'N') ('c' | 'C') ('t' | 'T');
TOP : 'top' | 'TOP' | 'Top';
CASE : ('case' | 'Case' | 'CASE');
WHEN: ('when' | 'When' | 'WHEN');
THEN : 'then' | 'Then' | 'THEN';
ELSE : 'else' | 'Else' | 'ELSE';
END : 'end' | 'END' | 'End';
FROM : 'from' | 'FROM' | 'From';
EXISTS : 'exists' | 'EXISTS' | 'Exists';
WHERE : 'where' | 'WHERE'| 'Where';
JOIN : ('left' | 'LEFT' | 'Left' | 'right' | 'RIGHT' | 'Right')? 'join' | 'Join' | 'JOIN';
ON : 'on' | 'ON' | 'On';
AND : ('a' | 'A') ('n' | 'N') ('d' | 'D');
AS : ('a' | 'A') ('s' | 'S');
OR : ('o' | 'O') ('r' | 'R');
IN : 'in' | 'IN' | 'In';
ORDER_BY : ('o' | 'O') ('r' | 'R') ('d' | 'D') ('e' | 'E') ('r' | 'R') ' ' ('b' | 'B') ('y' | 'Y');
ASC : ('a' | 'A') ('s' | 'S') ('c' | 'C');
DESC : ('d' | 'D') ('e' | 'E') ('s' | 'S') ('c' | 'C');
GROUP_BY : 'group by';
UNION : ('u' | 'U') ('n' | 'N') ('i' | 'I') ('o' | 'O') ('n' | 'N');
ALL :  ('a' | 'A') ('l' | 'L') ('l' | 'L');
LIMIT : 'limit' | 'LIMIT' | 'Limit';
CONCAT : '||';
TICK: '\u0027';

LPAREN
: '('
;

RPAREN
: ')'
;

RANGE
:
  (ID | '?') AND (ID | '?')
;

NUMBER
  : ('0'..'9')+;

ID
 : ':'?('a'..'z' | 'A'..'Z' | '_')('a'..'z' | 'A'..'Z' | '0'..'9' | '_')*;

WHITESPACE
 : (' ' | '\t' | '\n' | '\r' | '\f')+  {$channel = HIDDEN;};
Bart Kiers
  • 166,582
  • 36
  • 299
  • 288
John Mikic
  • 640
  • 6
  • 11
  • it *does* match it, but some other rule in your grammar might match it too. – Bart Kiers Feb 06 '13 at 22:43
  • The problem here is that the rule: like_id : ID ; does not match "standard" when I run it in ANTLRWorks 1.5 Interpreter using this rule in isolation. How can it match another rule in such a case? – John Mikic Feb 07 '13 at 15:00
  • Sorry, I can't comment on something I can't see. Could you add your grammar? If you're reluctant to post the whole thing, be sure to post enough of it so that I, or someone else, can actually reproduce it. – Bart Kiers Feb 07 '13 at 15:55
  • Bart, I appreciate your trying to help! However, I just realized that the ANTLRWorks Interpreter cannot run in isolation, i.e. it cannot check single rule without taking the whole grammar into account. – John Mikic Feb 07 '13 at 18:43
  • The interpreter cannot, but the debugger can parse any rule you specify (be it a parser- or lexer rule). However, if you try `like_id : ID ;` in the debugger and it fails, it means there is another lexer rule that matches `"standard"` before `ID` can match it. This is because the lexer operates independently from the parser. It does not matter that the parser is trying to match an `ID`, the lexer will go its own way in creating tokens. See: http://stackoverflow.com/questions/9251048/antlr-v3-error-with-parser-lexer-rules – Bart Kiers Feb 07 '13 at 18:55
  • I cannot run Debugger because of "Connection Error - Cannot launch the debuggerTab. Time-out waiting to connect to the remote parser". – John Mikic Feb 07 '13 at 19:24
  • The port the debugger is trying to connect to is occupied by something. Choosing a different port in the settings will likely resolve this. But checkout my answer as well. – Bart Kiers Feb 07 '13 at 19:34
  • You should also place all lexer rules after all parser rules. Specifically, you should place the `BETWEEN` rule after the `operator` rule. – Sam Harwell Feb 19 '13 at 17:15

1 Answers1

1

The lexer has an issue with "standard" because you also defined a RANGE rule:

RANGE
 : (ID | '?') AND (ID | '?')
 ;

that could tokenize "standard" as:

  • ID ("st")
  • AND ("and")
  • ID ("ard")

Note that a such a rule shouldn't be a lexer rule because it doesn't mandate there be any spaces in between such tokens. This is a parser rule:

range
 : (ID | '?') AND (ID | '?')
 ;

After making that change, I'm sure "standard" will be tokenized as a ID token.

Bart Kiers
  • 166,582
  • 36
  • 299
  • 288