I have created a program that downloads links (from a web page) into a htm file. What I am hoping to do is test each one of those links within the htm file and output any links that are broken. Unfortunately not all of the downloaded links start with "http://" so I tried to avoid this problem by using an if statement. How can I read all links into an Array and then loop through that array with async web requests and responses.
private async void button4_Click(object sender, EventArgs e)
{
string text = System.IO.File.ReadAllText(@"C:\\Users\\Conal_Curran\\OneDrive\\C#\\MyProjects\\Web Crawler\\URLTester\\OP.htm");
List<string> stringlist = new List<string>();
stringlist.Add(text);
if (!text.StartsWith("http://"))
{
foreach (string line in stringlist)
{
var request = WebRequest.Create(text);
var response = (HttpWebResponse)await Task.Factory
.FromAsync<WebResponse>(request.BeginGetResponse, request.EndGetResponse, null);
Debug.Assert(response.StatusCode == HttpStatusCode.OK);
if (response == null)
{
BrokenLinks.Text = text;
}
else
{
BrokenLinks.Text = "All URLS Are OK";
}
}
}
Regex to parse the html file:
string text = System.IO.File.ReadAllText(@"C:\\Users\\Conal_Curran\\OneDrive\\C#\\MyProjects\\Web Crawler\\URLTester\\OP.htm");
string regex = "href=\"(.*)\"";
Match match = Regex.Match(text, regex);
if (match.Success)
{
string link = match.Groups[1].Value;
Console.WriteLine(link);
MessageBox.Show("Going over URLS now Please stand by.");
var request = WebRequest.Create(link);
var response = (HttpWebResponse)await Task.Factory
.FromAsync<WebResponse>(request.BeginGetResponse, request.EndGetResponse, null);
Debug.Assert(response.StatusCode == HttpStatusCode.OK);
if (response == null)
{
BrokenLinks.Text = text;
label2.ForeColor = System.Drawing.Color.Red;
}
else
{
BrokenLinks.Text = "All URLS Are OK";
label2.ForeColor = System.Drawing.Color.Green;
}
}