I am writing a web crawler that uses WebBrowser to render javascript in webpages and then spit out the html code for scraping. The problem I'm running into is that it appears the virtual browser windows that are opened aren't closing because after a few minutes of crawling I get this error: Unhandled Exception: System.Runtime.InteropServices.COMException: The current process has used all of its system allowance of handles for Window Manager objects I am writing this code in an MVC web project:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Threading;
namespace Abot.Demo
{
// Threaded version
public class HeadlessBrowser
{
private static string GeneratedSource { get; set; }
private static string URL { get; set; }
public static string GetGeneratedHTML(string url)
{
string result = null;
ThreadStart pumpMessages = () =>
{
EventHandler idleHandler = null;
idleHandler = (s, e) =>
{
Application.Idle -= idleHandler;
WebBrowser wb = new WebBrowser();
wb.DocumentCompleted += (s2, e2) =>
{
result = wb.Document.Body.InnerHtml;
wb.Dispose();
Application.Exit();
};
try
{
wb.Navigate(url);
}
catch(Exception ex)
{
Console.WriteLine(ex.ToString());
}
};
Application.Idle += idleHandler;
Application.Run();
};
if (Thread.CurrentThread.GetApartmentState() == ApartmentState.STA)
pumpMessages();
else
{
Thread t = new Thread(pumpMessages);
t.SetApartmentState(ApartmentState.STA);
t.Start();
t.Join();
}
return result;
}
private static void WebBrowserThread()
{
WebBrowser wb = new WebBrowser();
try
{
wb.Navigate(URL);
}
catch(Exception exc)
{
Console.WriteLine(exc);
}
wb.DocumentCompleted +=
new WebBrowserDocumentCompletedEventHandler(
wb_DocumentCompleted);
while (wb.ReadyState != WebBrowserReadyState.Complete);
//Added this line, because the final HTML takes a while to show up
GeneratedSource = wb.Document.Body.InnerHtml;
wb.Dispose();
wb.Stop();
}
private static void wb_DocumentCompleted(object sender,
WebBrowserDocumentCompletedEventArgs e)
{
WebBrowser wb = (WebBrowser)sender;
GeneratedSource = wb.Document.Body.InnerHtml;
}
}
}