An algorithm based on recursive parser calls. There are several modes: comments of 3 subtypes parsing, quoted parsing and normal. Normal mode can be alternated by any other mode, that in turn become the only normal. Thus e. g. quote chars within comments and any comment chars within quoted text are ignored. Chars to be searched depend on the current mode. The source is parsed chunk by chunk, once target chars are found the mode is switched respectively, current chunk is finished and the next one begins with the next recursive call. Call stack stores transient results. After the source ends, backward process starts, and each called parser concatenates and returns it's chunk, so finally a complete code retrieved.
Here is the code:
Option Explicit
Sub RemoveComments()
Dim strOriginal As String
Dim strProcessed As String
strOriginal = ReadTextFile("C:\Users\DELL\Desktop\tmp\source.sql", 0) ' -2 - System default, -1 - Unicode, 0 - ASCII
Parse strOriginal, strProcessed, 0
WriteTextFile strProcessed, "C:\Users\DELL\Desktop\tmp\result.sql", 0
End Sub
Sub Parse(strSrc As String, strRes As String, lngMode As Long)
Static objRegExp As Object
Dim strBeg As String
Dim objMatches As Object
Dim lngPos As Long
Dim lngEscPos As Long
Dim strRet As String
If objRegExp Is Nothing Then ' initialize regexp once
Set objRegExp = CreateObject("VBScript.RegExp")
With objRegExp
.Global = False
.MultiLine = True
.IgnoreCase = True
End With
End If
strRes = ""
If strSrc = "" Then Exit Sub ' source completed
strBeg = "" ' preceding chunk is empty by default
Select Case lngMode
Case 0 ' processing normal
With objRegExp
.Pattern = "(\/\*)|(^[ \t]*--)|(--)|(\')"
Set objMatches = .Execute(strSrc)
If objMatches.Count = 0 Then
strRes = strSrc
Exit Sub ' source completed
End If
lngPos = objMatches(0).FirstIndex
With objMatches(0)
Select Case True
Case .SubMatches(0) <> ""
lngMode = 1 ' start multiline comment
Case .SubMatches(1) <> ""
lngMode = 2 ' start whole line comment
Case .SubMatches(2) <> ""
lngMode = 3 ' start singleline comment
Case .SubMatches(3) <> ""
lngMode = 4 ' start text in quotes
lngPos = lngPos + 1 ' skip found quote char
End Select
End With
End With
strBeg = Left(strSrc, lngPos)
lngPos = lngPos + 1
Case 1 ' processing multiline comment
lngMode = 0 ' start normal
lngPos = InStr(strSrc, "*/")
If lngPos = 0 Then Exit Sub ' source completed, comment unclosed
lngPos = lngPos + 2 ' skip comment closing char
Case 2 ' processing whole line comment
lngMode = 0 ' start normal
lngPos = InStr(strSrc, vbCrLf)
If lngPos = 0 Then Exit Sub ' source completed
lngPos = lngPos + 2 ' skip new line char
Case 3 ' processing singleline comment
lngMode = 0 ' start normal
lngPos = InStr(strSrc, vbCrLf)
If lngPos = 0 Then Exit Sub ' source completed
Case 4 ' processing text within quotes
lngPos = InStr(strSrc, "'")
If lngPos = 0 Then Exit Sub ' source completed
If Mid(strSrc, lngPos, 2) = "''" Then ' escaped quote char ''
strBeg = Left(strSrc, lngPos + 1) ' store preceding chunk with escaped quote char
lngPos = lngPos + 2 ' shift next from escaped quote char
Else
lngMode = 0 ' start normal
strBeg = Left(strSrc, lngPos) ' store preceding chunk with quote char
lngPos = lngPos + 1 ' shift next from quote char
End If
End Select
Parse Mid(strSrc, lngPos), strRet, lngMode ' recursive parser call
strRes = strBeg & strRet ' concatenate preceding chunk with processed and return result
End Sub