I am trying to increase the speed of my webscraping app by making my for each loop a parallel for each.
public List<MovieTVInformation> ViewMovies()
{
List<MovieTVInformation> AllFoundMovies = new List<MovieTVInformation>(100);
HtmlWeb website = new HtmlWeb();
HtmlDocument doc = website.Load("http://www.imdb.com/chart/moviemeter");
var MovieNames = doc.DocumentNode.SelectNodes("//*[@id='main']/div/span/div/div/div[3]/table/tbody/tr/td[2]").ToList();
var ImageLocation = doc.DocumentNode.SelectNodes("//*[@id='main']/div/span/div/div/div[3]/table/tbody/tr/td[1]/a").ToList();
var IMDBLinks = doc.DocumentNode.SelectNodes("//*[@id='main']/div/span/div/div/div[3]/table/tbody/tr/td[2]/a").ToList();
Parallel.ForEach(MovieNames, (name, state, index) =>
{
if (index > 0 && index < 99)
{
AllFoundMovies.Add(new MovieTVInformation());
var TempName = name.InnerText;
TempName = AdjustName(TempName, Convert.ToInt32(index));
AllFoundMovies[Convert.ToInt32(index)].Name = TempName;
}
});
return AllFoundMovies;
}
My issue is that the index goes out of bounds every time giving a negative error index error. I have added an IF statement to see if that would stop it going out of bounds (as the list only has 100 entries) and still no success.
Could anyone let me know what i could be doing wrong,
thanks