-1

I would like to build T-SQL parser which will be building new queries for different DBMS with specific rules. First rule which would like to implement is to add to every column name ' at the beginning and end of column name.

I get code from below link SQL, all column names are in brackets, C#

Used Code:

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Microsoft.SqlServer.TransactSql.ScriptDom;

namespace ConsoleApplication8
{
    public class QueryParser
    {
        public IEnumerable<string> Parse(string sqlSelect)
        {
            TSql100Parser parser = new TSql100Parser(false);
            TextReader rd = new StringReader(sqlSelect);
            IList<ParseError> errors;
            var columns = new List<string>();

            var fragments = parser.Parse(rd, out errors);
            var columnVisitor = new SQLVisitor();
            fragments.Accept(columnVisitor);
            columns = new List<string>(columnVisitor.Columns);

            return columns;
        }
    }


    internal class SQLVisitor : TSqlFragmentVisitor
    {
        private List<string> columns = new List<string>();

        private string GetNodeTokenText(TSqlFragment fragment) 
        { 
            StringBuilder tokenText = new StringBuilder(); 
            for (int counter = fragment.FirstTokenIndex; counter <= fragment.LastTokenIndex; counter++) 
            { 
                tokenText.Append(fragment.ScriptTokenStream[counter].Text); 
            }

            return tokenText.ToString(); 
        }


        public override void ExplicitVisit(ColumnReferenceExpression node)
        {
            columns.Add(GetNodeTokenText(node));
        }

        public IEnumerable<string>  Columns {
            get { return columns; }
        }
    } 

    public class Program
    {

        private static void Main(string[] args)
        {
            QueryParser queryParser = new QueryParser();
            var columns = queryParser.Parse("SELECT A,[B],C,[D],E FROM T  WHERE isnumeric(col3) = 1 Order by Id desc");
            foreach (var column in columns)
            {
                Console.WriteLine(column);
            }
        }
    }
}

But I do not know how to build a new script which the only difference between old and new one will be column names with '.

So the result of below SQL:

SELECT A,[B],C,[D],E FROM T  WHERE isnumeric(col3) = 1 Order by [Id] desc

should be

SELECT A,'B',C,'D',E FROM T WHERE isnumeric(col3) = 1 Order by 'Id' desc
adam.g
  • 101
  • 3
  • FYI, [don't use `ISNUMERIC`](https://wp.larnu.uk/stop-using-isnumeric-its-probably-wrong/), it's a *terrible* function; it doesn't tell you is the column can be converted to a `numeric` but if the value can be converted to *any* of the numerical data types (and even then, it's not always correct). Switch to using `TRY_CONVERT`/`TRY_CAST`. – Thom A Jan 18 '23 at 11:36
  • Good luck with that. I don't follow what this will accomplish, I would have assmued you would need to add the appropriate delimiters for the target DBMS. – Stu Jan 18 '23 at 11:37
  • Column names with single quotes will just return an unamed column containing a string with the column name. Is that what you actually need? Double quotes is standard SQL. And ISNUMERIC isn't standard SQL anyway – Martin Smith Jan 18 '23 at 12:32
  • 1
    But anyway TSQL ScriptDom is for generating TSQL not arbitrary SQL grammars – Martin Smith Jan 18 '23 at 12:38
  • 1
    You'll need to extract the text from the tokens that precede and follow each column to build the new SQL statement. – Dan Guzman Jan 18 '23 at 12:44
  • @DanGuzman could you help with that ? – adam.g Jan 18 '23 at 13:24
  • 1
    @adam.g, add the actual code to your question, not as link. I don't have time to write an answer now but maybe later if someone else doesn't answer first. – Dan Guzman Jan 18 '23 at 13:29
  • Isn't it better to use hibernate or jooq or sql alchemy or EF. Building server-agnostic sql is very complicated and someone else already done this work for you – siggemannen Jan 18 '23 at 13:53

1 Answers1

1

Below is a refactored version of the code that replaces square bracket enclosures in column identifier references with single quotes and returns the transformed script. This will also replace enclosures in multi-part identifiers.

I implemented the visitor with the QueryParser class, inheriting from TSqlConcreteFragmentVisitor instead of TSqlFragmentVisitor. It's best to use the concrete visitor as the base class unless you need to visit fragments as abstract base types.

using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Microsoft.SqlServer.TransactSql.ScriptDom;

namespace ConsoleApplication8
{
    public class QueryParser : TSqlConcreteFragmentVisitor
    {

        //column identifiers keyed by FirstTokenIndex
        private Dictionary<int, Identifier> columnIdentifiers = new Dictionary<int, Identifier>();
        private TSqlFragment tsqlScriptFragment;

        public string Parse(string sqlSelect)
        {

            var parser = new TSql160Parser(false);
            var rd = new StringReader(sqlSelect);
            IList<ParseError> errors;
            tsqlScriptFragment = parser.Parse(rd, out errors);
            if(errors.Count > 0)
            {
                throw new ArgumentException($"Error(s) parsing SQL script. {errors.Count} errors found.");
            }

            tsqlScriptFragment.Accept(this);

            return getTransformedSqlScript();

        }

        //this is not used in this example but retained if you need it for other use cases        
        private string getNodeTokenText(TSqlFragment fragment)
        {
            StringBuilder tokenText = new StringBuilder();
            for (int counter = fragment.FirstTokenIndex; counter <= fragment.LastTokenIndex; counter++)
            {
                tokenText.Append(fragment.ScriptTokenStream[counter].Text);
            }

            return tokenText.ToString();
        }

        //add identifiers in ColumnReferenceExpression to dictionary upon visit
        public override void ExplicitVisit(ColumnReferenceExpression node)
        {

            foreach(var identifier in node.MultiPartIdentifier.Identifiers)
            {
                this.columnIdentifiers.Add(identifier.FirstTokenIndex, identifier);
            }

        }

        private string getTransformedSqlScript()
        {

            var transformedScript = new StringBuilder();

            for (int i = 0; i < tsqlScriptFragment.ScriptTokenStream.Count; ++i)
            {

                if (columnIdentifiers.ContainsKey(i))
                {
                    //replace square braket enclosures with single quotes, if needed
                    var columnIdentifier = columnIdentifiers[i];
                    var newcolumnIdentifier = columnIdentifier.QuoteType == QuoteType.SquareBracket ? $"'{columnIdentifier.Value}'" : columnIdentifier.Value;
                    transformedScript.Append(newcolumnIdentifier);
                }
                else
                {
                    //keep original script text
                    transformedScript.Append(tsqlScriptFragment.ScriptTokenStream[i].Text);
                }

            }

            return transformedScript.ToString();
        }

    }

    public class Program
    {
        private static void Main(string[] args)
        {

            var queryParser = new QueryParser();
            var transformedScript = queryParser.Parse("SELECT A,[B],T.C,T.[D],E FROM T  WHERE isnumeric(col3) = 1 Order by [Id] desc");

            Console.WriteLine(transformedScript);

        }
    }
}
Dan Guzman
  • 43,250
  • 3
  • 46
  • 71