I'm very very new to c#. I have currently written this piece of code that will essentially loop through paragraphs in a word document and search for any regex matches within a list of regexes. It starts off very fast but then becomes painfully slow after about 1000 paragraphs in. Does anyone know how I can optimise the below code to stop being so slow after looping many times?
Thanks in advance,
static List<MatchDetails> GetRegexMatchesContent(Word.Document document, List<string> regexes)
{
var matchDetails = new List<MatchDetails>(2000);
// For each paragraph in word document, run regex searches.
for (int i = 0; i < document.Paragraphs.Count; i++)
{
Console.WriteLine($"Processed Paragraph {i} of {document.Paragraphs.Count}");
var rng = document.Paragraphs[i + 1].Range;
// Loop through each regex input.
foreach (string regex in regexes)
{
// Match regexes on text.
MatchCollection matches = Regex.Matches(rng.Text, regex, RegexOptions.IgnoreCase);
foreach (Match match in matches)
{
foreach (Capture capture in match.Captures)
{
{
if (capture.Value != "")
{
// Retrieve regex match information and save to MatchDetails List.
string matchText = capture.Value;
int matchPageNo = rng.Information[Word.WdInformation.wdActiveEndPageNumber];
int matchLineNo = rng.Information[Word.WdInformation.wdFirstCharacterLineNumber];
int matchCharNo = capture.Index;
MatchDetails matchRow = new MatchDetails { matchText = matchText, documentSection = "Body Text", pageNo = matchPageNo, lineNo = matchLineNo, charNo = matchCharNo };
matchDetails.Add(matchRow);
}
}
}
}
}
}
return matchDetails;
}