I have a list of more than 100 message and I need to identify a particular message and its occurrence.
For this purpose, I have created a regex lookup to match these random messages by placing *
at the variable section of the message as below:
CheckConvexity: Zone=
*
, Surface=*
is non-convex. ...vertex*
to vertex*
to vertex*
...vertex*
=[*
] ...vertex*
=[*
] ...vertex*
=[*
]
Later I substitute regex pattern quantifiers/literals on the place of *
to make it work but matching 1000 messages with 1000 lookup regex string takes too much time:
CheckConvexity:\ Zone=.*,\ Surface=.*\ is\ non-convex\.\ \
\.\.\.vertex\ .*\ to\ vertex\ .*\ to\ vertex\ .*\ \ \.\.\.vertex\
.*=\[.*\]\ \ \.\.\.vertex\ .*=\[.*\]\ \ \.\.\.vertex\ .*=\[.*\]
Running example: https://regex101.com/r/pX9jvD/1
Later I have replaced .*
with [^ ]*
but it only matches variable section until any space occurs so it fails for more of the messages. This way pattern matching was too much fast and messages are parsed with lighting speed.
let my message is as below, then what should be the regex pattern for the variable section of the string which matches any character with impacting the performance of the pattern matching:
"CheckConvexity: Zone=CEILING VOID ZONE STORY 1, Surface=CEILING VOID STAFF BEDROOM:SLAB 2.2 is non-convex. ...vertex 2 to vertex 3 to vertex 4 ...vertex 2=[-9.59,21.59,3.90] ...vertex 3=[-6.73,24.45,3.90] ...vertex 4=[-8.13,25.85,3.90]"
Below is the example code snippet to parse in different ways:
public class CalculateRegexProcessingTime
{
public void GetRegexTimeCaculations()
{
string patternString = "CheckConvexity: Zone=*, Surface=* is non-convex. ...vertex * to vertex * to vertex * ...vertex *=[*] ...vertex *=[*] ...vertex *=[*]";
patternString = Regex.Escape(patternString);
if (patternString.Contains("\\*]"))
patternString = patternString.Replace("\\*]", "\\*\\]");
string sampleMsgToMatch = "CheckConvexity: Zone=CEILING VOID ZONE STORY 1, Surface=CEILING VOID STAFF BEDROOM:SLAB 2.2 is non-convex. ...vertex 2 to vertex 3 to vertex 4 ...vertex 2=[-9.59,21.59,3.90] ...vertex 3=[-6.73,24.45,3.90] ...vertex 4=[-8.13,25.85,3.90]";
MatchRegExGreedyPattern(patternString, sampleMsgToMatch);
MatchRegExWithBoundaryConditon(patternString, sampleMsgToMatch);
MatchRegExWithLazyPattern(patternString, sampleMsgToMatch);
}
private void MatchRegExGreedyPattern(string patternString, string sampleMsgToMatch)
{
Console.WriteLine("Greedy method");
Stopwatch watch = new Stopwatch();
watch.Start();
patternString = patternString.Replace("\\*", ".*");
Console.WriteLine(string.Format("Pattern:{0}", patternString));
Console.WriteLine(String.Format("Matched:{0}", Regex.IsMatch(sampleMsgToMatch, patternString, RegexOptions.IgnorePatternWhitespace)));
watch.Stop();
Console.WriteLine(string.Format("Parse Time:{0} sec", watch.Elapsed.TotalSeconds));
}
private void MatchRegExWithBoundaryConditon(string patternString, string sampleMsgToMatch)
{
//It is failing to match becuase msg contain space between variable section of message,
// Need to correct it but what should be the conditions to make it generic
Console.WriteLine("Matching RegEx with Boundary Conditon");
Stopwatch watch = new Stopwatch();
watch.Start();
patternString = patternString.Replace("\\*", "[^ ]*");
Console.WriteLine(string.Format("Pattern:{0}", patternString));
Console.WriteLine(String.Format("Matched:{0}", Regex.IsMatch(sampleMsgToMatch, patternString, RegexOptions.IgnorePatternWhitespace)));
watch.Stop();
Console.WriteLine(string.Format("Parse Time:{0} sec", watch.Elapsed.TotalSeconds));
}
private void MatchRegExWithLazyPattern(string patternString, string sampleMsgToMatch)
{
//It is failing to match becuase msg contain space between variable section of message,
// Need to correct it but what should be the conditions to make it generic
Console.WriteLine("Matching RegEx with Lazy pattern matching");
Stopwatch watch = new Stopwatch();
watch.Start();
patternString = patternString.Replace("\\*", "[^$]*");
Console.WriteLine(string.Format("Pattern:{0}", patternString));
Console.WriteLine(String.Format("Matched:{0}", Regex.IsMatch(sampleMsgToMatch, patternString, RegexOptions.IgnorePatternWhitespace)));
watch.Stop();
Console.WriteLine(string.Format("Parse Time:{0} sec", watch.Elapsed.TotalSeconds));
}
}