0

i'm trying the load a page to may asp.net web form and extract only the text from it and display the extracted text in an Areatext

like this:

and my code is:

<%@ Page Language="C#" AutoEventWireup="true" CodeFile="Default.aspx.cs" Inherits="_Default" %>

<!DOCTYPE html>

<html xmlns="http://www.w3.org/1999/xhtml">
<head runat="server">
    <title></title>
    <style type="text/css">
        #form1 {
            height: 500px;
            width: 1199px;
        }
        .auto-style1 {}
        #TextArea1 {
            height: 288px;
            width: 1157px;
        }
    </style>
</head>
<body>

    <form id="form1" runat="server">
        <asp:Button ID="Button1" runat="server"  Text="Clike me" 
                    OnClick="Button1_Click" OnClientClick="aspnetForm.target ='_blank';"        
                    Width="160px" CssClass="auto-style1" Height="32px" />
        <br />
        <br />
        <asp:RadioButtonList ID="RadioButtonList1" runat="server">
            <asp:ListItem>CNN</asp:ListItem>
            <asp:ListItem>BBC</asp:ListItem>
            <asp:ListItem>FOX</asp:ListItem>
        </asp:RadioButtonList>
        <br />
        <br />
        <textarea id="TextArea1" name="S1" runat="server" ></textarea></form>
</body>
</html>

and

    using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.IO;
using System.Drawing;
using System.Threading;
using System.Windows.Forms;

public partial class _Default : System.Web.UI.Page
{
    Uri url = null;
    WebBrowser wb = new WebBrowser();

    protected void Button1_Click(object sender, EventArgs e)
    {

        wb.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(DisplayText);

        if (RadioButtonList1.Text == "CNN")
        {
            url = new Uri("http://www.edition.cnn.com/");
            wb.Url = url;
            //Response.Redirect(url);
        }
        else if (RadioButtonList1.Text == "BBC")
        {
            url = new Uri("http://www.bbc.com/");
            wb.Url = url;
        }
        else
        {
            url = new Uri("http://www.foxnews.com/");
          wb.Url = url;
        }

    }

    private void DisplayText(object sender, WebBrowserDocumentCompletedEventArgs e)
    {

        WebBrowser wb = (WebBrowser)sender;

        wb.Document.ExecCommand("SelectAll", false, null);

        wb.Document.ExecCommand("Copy", false, null);

        TextArea1.Value = Clipboard.GetText();

    }


    protected void Page_Load(object sender, EventArgs e)
    {

    }

}

but i have this error in line

 WebBrowser wb = new WebBrowser();

ActiveX control '8856f961-340a-11d0-a96b-00c04fd705a2' cannot be instantiated because the current thread is not in a single-threaded apartment.

so what i'm doing wrong pleas help and many thanks in advance

Fadi
  • 2,320
  • 8
  • 38
  • 77

3 Answers3

0

I have never attempted to use WebBrowser in an object-reference, but I know that this being a web-Form means you will be receiving post backs, and if you re-instantiate the Browser reference each time, it isn't going to operate like the Page object. I would just use the Page object, you can collect any controls and methods needed, while also utilizing Request/Response namespaces. I would also match on the radiobuttonlist control like the code below:

 protected void Page_Load(object sender, EventArgs e)
    {
        if (Page.IsPostBack) 
        {
             string url;
             RadioButtonList rdl = new RadioButtonList();
             url = rdl.SelectedItem.Text; 
        }  
    }

Of course you'd just grab the .SelectedItem.Text from your markup-based RadioButtonList, instead of building one.

I checked, and it also seems like the WebBrowser object is under System.Windows.Forms. From my experience, you never want to use that Library in Web Forms (bad experiences with MsgBox).

I'd refactor using the sample above and just

Response.Redirect(url);

Hope that helps!

taig.Nate
  • 46
  • 3
0

You may want to consider using an approach based upon a different automation control such as WatiN (c.f. c# asp.net use Windows Forms WebBrowser) or something like the HTML Agility Pack (c.f. Best method for Website Automation?)

Community
  • 1
  • 1
JimMSDN
  • 484
  • 4
  • 16
0

You can use html agility pack. Here is a sample code, taken from here:

var root = doc.DocumentNode;
var sb = new StringBuilder();
foreach (var node in root.DescendantNodesAndSelf())
{
    if (!node.HasChildNodes)
    {
        string text = node.InnerText;
        if (!string.IsNullOrEmpty(text))
            sb.AppendLine(text.Trim());
    }
}

An example code to show how to download the web page, you can try the following code (taken from here):

HtmlDocument doc = new HtmlDocument();
 doc.Load("file.htm");
 foreach(HtmlNode link in doc.DocumentElement.SelectNodes("//a[@href"])
 {
    HtmlAttribute att = link["href"];
    att.Value = FixLink(att);
 }
 doc.Save("file.htm");
Community
  • 1
  • 1
deostroll
  • 11,661
  • 21
  • 90
  • 161