So I have a large volume of HTML text and I want to extract all of the data that is between every occurrence of <p>
and </p>
I have code that can locate the first occurrence of it and extract the first occurrence but can't seem to loop it.
I have tried for looping for the amount of times <p>
will come up in the entire text.
I have tried looping it and deleting one occurrence and the text between(<p>
and </p>
) but that did not seem to work either
var startTag = $"<p>";
var endTag = $"</p>";
int count = 0;
string ImpureCText = "<p>hello this is the first part</p>fgbtfhsgs <p> this is the second part</p> <p> this is the third part</p>";
int index1 = ImpureCText.IndexOf(startTag);
int index2 = ImpureCText.IndexOf(endTag);
foreach (Match match in Regex.Matches(ImpureCText, startTag))
{
count++;
}
Console.WriteLine("'{0}'" + " Found " + "{1}" + " Times", startTag, count);
for (int i = 0; i < count; i++)
{
//Do code stuff
string delete = ImpureCText.Remove(ImpureCText.IndexOf("<p>"), ImpureCText.IndexOf("</p>"));
Console.WriteLine(delete);
}
Console.ReadKey();