I've got an ASP.NET Core project that requires me to read the response from a website and extract a certain word.
What I've tried was to replace the tags with white space, and remove the tags. Unfortunately, I'm not getting any where with this. What is a better approach?
I want to extract Toyota
from these html tags
<tr>
<td class="text-muted">Car Model</td>
<td><strong>Toyota 2015</strong></td>
</tr>
I've tried:
var documentSource = streamReader.ReadToEnd();
//removes html content
Regex remove = new Regex(@"<[^>].+?>");
var strippedSource = remove.Replace(documentSource.Replace("\n", ""), "");
//convert to array
string[] siteContextArray = strippedSource.Split(',');
//matching string
var match = new Regex("Car Model ([^2015]*)");
List<Model> modelList = new List<Model>();
Model model = new Model();
foreach (var item in siteContextArray)
{
var wordMatch = match.Match(item);
if (wordMatch.Success)
{
model.Add(
new Model
{
CarModel = wordMatch.Groups[1].Value
}
);
}
}
return modelList;