Looking for some validation on the approach to using ANTLR 4 to parse SQL joins.
I have generated a lexer, parser and visitor from the following grammar.
https://github.com/antlr/grammars-v4/tree/master/tsql
I can then create a parse tree (in this example for a view) and I can kick off a walk of tree using a listener I have implemented.
ICharStream stream = CharStreams.fromstring(view);
ITokenSource lexer = new TSqlLexer(stream);
ITokenStream tokens = new CommonTokenStream(lexer);
TSqlParser parser = new TSqlParser(tokens);
parser.BuildParseTree = true;
IParseTree tree = parser.create_view();
TSqlListener listener = new TSqlListener();
ParseTreeWalker.Default.Walk(listener, tree);
My question is. Is my method of extracting the tokens for the joins the 'correct' and most efficient way of doing this.
My implementation is below and i based on listening for a walk of the tree at the join conditions. I need to capture table aliases and relate them to columns so I need to be in the same context when I'm walking the tree. Hence I'm manually descending in a single method.
public void EnterTable_sources([NotNull] TSqlParser.Table_sourcesContext context)
{
var table_sources = context.table_source().ToList();
foreach (var table_source in table_sources)
{
var item = table_source.table_source_item_joined();
if (item != null)
{
//first aliases
var source_item = item.table_source_item();
if (source_item != null)
{
TableAlias tableAlias = new TableAlias();
var table_name = source_item.table_name_with_hint();
if (table_name != null)
{
var fullTableName = table_name.GetText();
if (fullTableName.Contains('.'))
{
var nameParts = fullTableName.Split('.').ToList();
for (int i = 0; i <nameParts.Count; i++)
{
tableAlias.AddParts(nameParts);
}
}
else
{
tableAlias.AddParts(fullTableName);
}
}
var table_alias = source_item.as_table_alias();
if (table_alias != null)
{
tableAlias.Alias = table_alias.GetText();
}
JoinAnalysis.Aliases.Add(tableAlias);
}
var join_parts = item.join_part();
foreach (var join_part in join_parts)
{
var table_source_joins = join_part.table_source();
if (table_source_joins != null)
{
//The join table and alias
var table_source_item_joined = table_source_joins.table_source_item_joined();
if (table_source_item_joined != null)
{
var joinAlias = new TableAlias();
var table_source_item = table_source_item_joined.table_source_item();
var table_name = table_source_item.table_name_with_hint();
if (table_name != null)
{
var fullTableName = table_name.GetText();
if (fullTableName.Contains('.'))
{
var nameParts = fullTableName.Split('.').ToList();
joinAlias.AddParts(nameParts);
}
else
{
joinAlias.AddParts(fullTableName);
}
}
if (table_source_item != null)
{
var table_alias = table_source_item.as_table_alias();
if (table_alias != null)
{
joinAlias.Alias = table_alias.GetText();
}
}
if (joinAlias.Alias != null)
{
JoinAnalysis.Aliases.Add(joinAlias);
}
}
}
var search_condition = join_part.search_condition();
if (search_condition != null)
{
//The join conditions
var conditions = search_condition.search_condition_and();
if (conditions != null)
{
foreach (var condition in conditions)
{
if (condition != null)
{
foreach (var search_condition_not in condition.search_condition_not())
{
JoinCondition joinCondition = new JoinCondition();
joinCondition.LineNumber = search_condition_not.Start.Line;
var conditionText = search_condition_not.GetText();
joinCondition.JoinConditionText = conditionText;
var splitCondition = conditionText.Split("=");
if (splitCondition.Length == 2)
{
joinCondition.LeftPart = splitCondition[0];
joinCondition.RightPart = splitCondition[1];
}
JoinAnalysis.JoinConditions.Add(joinCondition);
}
}
}
}
}
}
}
}
}
Is there a better way to do this using all of the other listener methods that have been generated without manually descending into child nodes? I there some magic that I'm missing that holds context between nodes as they are walked?