I am currently reading in the HTML source from a list of URLs that uses JavaScript to load a specific span with a dynamic hyperlink that I need to extract. Everything works fine except for two small bugs that occur but can be dealt with during debugging:
When arriving at the
DocumentCompleted
event sometimes theDocument.Body
is nullWhen
t.Join()
is called sometimes the program will hang for a long period of time.public class WebProcessor { private string GeneratedSource { get; set; } private string URL { get; set; } public string GetGeneratedHTML(string url) { URL = url; Thread t = new Thread(new ThreadStart(WebBrowserThread)); t.SetApartmentState(ApartmentState.STA); t.Start(); t.Join(); return GeneratedSource; //When GetGeneratedHTML() is called more than once there is a chance the program //will hang indefinitely maybe even deadlock?? } private void WebBrowserThread() { WebBrowser wb = new WebBrowser(); wb.Navigate(URL); wb.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler( wb_DocumentCompleted); while (wb.ReadyState != WebBrowserReadyState.Complete) Application.DoEvents(); wb.Dispose(); } private void wb_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { if(((WebBrowser)sender).Document.Body != null) { GeneratedSource = ((WebBrowser)sender).Document.Body.InnerHtml; } else { //Handle when Document isn't fully loaded } } }