As mentioned in the comment, you should use a HTML parse, but, if you want to give it a try with code, you could split the string, determine whether the splitted text is a title or an article and then, add the result on a list.
However, for this task you have to:
NOTE: This code assumes the string (i.e. your document's content) has equal amounts of titles and articles.
Here's the code I've made - hosted on dotnetfiddle.com as well:
// Variables:
string sample = "<h1>Title 1</h1>" + "Article text" + "<h1>Title 2</h1>" + "Article text" + "<h1>Title 3</h1>" + "Article text";
// string.split - by multiple character delimiter
// Credit: https://stackoverflow.com/a/1254596/12511801
string[] arr = sample.Split(new string[]{"</h1>"}, StringSplitOptions.None);
// I store the "title" and "article" in separated lists - their content will be unified later:
List<string> titles = new List<string>();
List<string> articles = new List<string>();
// Loop the splitted text by "</h1>":
foreach (string s in arr)
{
if (s.StartsWith("<h1>"))
{
titles.Add(s.Split(new string[]{"<h1>"}, StringSplitOptions.None)[1]);
}
else
{
if (s.Contains("<h1>"))
{
// Position 0 is the article and the 1 position is the title:
articles.Add(s.Split(new string[]{"<h1>"}, StringSplitOptions.None)[0]);
titles.Add(s.Split(new string[]{"<h1>"}, StringSplitOptions.None)[1]);
}
else
{
// Leading text - it's an article by default.
articles.Add(s.Split(new string[]{"<h1>"}, StringSplitOptions.None)[0]);
}
}
}
// ------------
// Create a list of lists.
// Credit: https://stackoverflow.com/a/12628275/12511801
List<List<string>> myList = new List<List<string>>();
for (int i = 0; i < titles.Count; i++)
{
myList.Add(new List<string>{"Title: " + titles[i], "Article: " + articles[i]});
}
// Print the results:
foreach (List<string> subList in myList)
{
foreach (string item in subList)
{
Console.WriteLine(item);
}
}
Result:
Title: Title 1
Article: Article text
Title: Title 2
Article: Article text
Title: Title 3
Article: Article text