Something dosent work good with the text() function why ? For example if im going to www.walla.co.il in internet explorer and view the source i see that the first url address there is http://www.w3.org
But in textBox2 i see the first url address as: http://www.yad1.yad2.co.il
And i want to parse all the url's from the site and show them in textBox2 In textBox1 i just show thew hole content.
This is the code:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
namespace WebCrawler
{
public partial class Form1 : Form
{
StreamWriter sw;
string url = "http://www.walla.co.il";
StringBuilder sb;
HttpWebRequest req;
HttpWebResponse res;
public Form1()
{
InitializeComponent();
sw = new StreamWriter(@"d:\text.txt");
req = (HttpWebRequest)WebRequest.Create(url);
sb = new StringBuilder();
WebRequestGetExample("http://www.walla.co.il");
text();
}
private void Form1_Load(object sender, EventArgs e)
{
}
public string WebRequestGetExample(string url)
{
//validateUrl(url);
req.Method = "Get";
res = (HttpWebResponse)req.GetResponse();
// Display the status.
Console.WriteLine(res.StatusDescription);
// Get the stream containing content returned by the server.
Stream dataStream = res.GetResponseStream();
// Open the stream using a StreamReader for easy access.
StreamReader reader = new StreamReader(dataStream);
// Read the content.
string t = reader.ReadToEnd();
// Display the content.
textBox1.Text = t;
// Cleanup the streams and the response.
reader.Close();
dataStream.Close();
res.Close();
return t;
}
/* public void text()
{
string val;
StringBuilder sb = new StringBuilder();
Regex hrefs = new Regex("<a href.*?>");
Regex http = new Regex("http:.*?>");
foreach (Match m in hrefs.Matches(textBox1.Text))
{
//sb.Append(m.ToString());
if (http.IsMatch(m.ToString()))
{
val = http.Match(m.ToString()).ToString();
int end = val.IndexOf('"');
if (end > 0)
sb.Append(val.Substring(0, end));
//sb.Append("<br>");
}
else
{
val = http.Match(m.ToString()).ToString();
sb.Append(val);
//sb.Append(m.ToString().Substring(1, m.ToString().Length - 1) + "<br>");
}
textBox2.Text = sb.ToString();
}
}*/
public void text()
{
StringBuilder sb = new StringBuilder();
Regex hrefs = new Regex("<a href.*?>");
Regex http = new Regex("http:.*?>");
foreach (Match m in hrefs.Matches(textBox1.Text))
{
//sb.Append(m.ToString());
if (http.IsMatch(m.ToString()))
{
string val = http.Match(m.ToString()).ToString();
int end = val.IndexOf('"');
sb.Append(end > 0 ? val.Substring(0, end) : val);
//if (end > 0)
// sb.Append(val.Substring(0, end));
//else
// sb.Append(val);
//sb.Append("<br>");
}
else
{
//sb.Append(m.ToString().Substring(1, m.ToString().Length - 1) + "<br>");
}
textBox2.Text = sb.ToString();
sw.WriteLine(sb.ToString());
}
sw.Close();
}
}
}