I am trying to download url content using the following method.
public static async Task<string> getURL(string link)
{
string result = "";
using (HttpClient client = new HttpClient())
using (HttpResponseMessage response = await client.GetAsync(link))
using (HttpContent content = response.Content)
{
result = await content.ReadAsStringAsync();
}
return result;
}
Previously I was directly loading the url through HtmlWeb.Load() method of HtmlAgilitypack. but it takes a lot of time, and i cannot put the code in a parallel for or foreach loop (there are a number of exceptions thrown, and the program ends after a few hundred iterations. I tried even 3 parallel threads, with no improvement). After searching on the internet, i found that writing own url download method might be a good idea. But I am not sure how can I pass one url and get url content from above method, which i copied from here Any ideas? Edit: Caller method is as follows
public static void Download(string link)
{
HtmlWeb htmlWeb = new HtmlWeb();
htmlWeb.OverrideEncoding = Encoding.UTF8;
HtmlAgilityPack.HtmlDocument document = new HtmlDocument();
document = htmlWeb.Load(getURL(link));
if(document != null)
{
if(document.DocumentNode.SelectSingleNode("//div[@class='urdu_results']") != null)
Console.WriteLine(link);
Console.WriteLine(count--);
{
if(document.DocumentNode.SelectNodes(".//div[@class='u']") != null && document.DocumentNode.SelectNodes(".//div[@class='r']") != null)
{
var uNodes = document.DocumentNode.SelectNodes(".//div[@class='u']");
var rNodes = document.DocumentNode.SelectNodes(".//div[@class='r']");
if(uNodes.Count == rNodes.Count)
{
for(int i=0;i<uNodes.Count;i++)
{
string u = uNodes[i].InnerText.Trim();
string r = rNodes[i].InnerText.Trim();
string word = u+"\t"+r;
if(!words.Contains(word))
{
File.AppendAllText(output, word+Environment.NewLine);
Console.WriteLine(r);
words.Add(word);
}
}
}
}
}
}
}