2

This is a question about a slight variation on Noseratio's code in this question: [link]How to cancel Task await after a timeout period

I am able to build his code exactly as-is to create a Console application that returns the URL and OuterHtml of each of the three web pages specified in the code. However, when I put the same code in a WinForms application, the only output I get is

URL:
http://example.com

That means that the code does not display the OuterHtml of the first page, and it does not display the URL or OuterHtml of either of the other two pages. What I would like to know is what about WinForms breaks the code and how to get around it.

I expected that since the code still creates a new STA, which creates a new thread, it should not matter that I am using a WinForm. I am using VS2013 Ultimate, .NET 4.5.1.

The WinForm consists of a single button. Clicking it is handled by private void button1_Click(object sender, EventArgs e), which has a body identical to Main in Noseratio's code.

using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using System.Windows.Forms;
using Microsoft.Win32;

namespace WebScraperAsync005
{
    public partial class Form1 : Form
    {

        // main logic
        static async Task ScrapSitesAsync(string[] urls, CancellationToken token)
        {
            using (var apartment = new MessageLoopApartment())
            {
                // create WebBrowser inside MessageLoopApartment
                var webBrowser = apartment.Invoke(() => new WebBrowser());
                try
                {
                    foreach (var url in urls)
                    {
                        Console.WriteLine("URL:\n" + url);

                        // cancel in 30s or when the main token is signalled
                        var navigationCts =  CancellationTokenSource.CreateLinkedTokenSource(token);

                          navigationCts.CancelAfter((int)TimeSpan.FromSeconds(30).TotalMilliseconds);
                        var navigationToken = navigationCts.Token;

                        // run the navigation task inside MessageLoopApartment
                        string html = await apartment.Run(() =>
                        webBrowser.NavigateAsync(url, navigationToken), navigationToken);

                        Console.WriteLine("HTML:\n" + html);
                    }
                }
                finally
                {
                    // dispose of WebBrowser inside MessageLoopApartment
                    apartment.Invoke(() => webBrowser.Dispose());
                }
            }
        }





        public Form1()
        {
            InitializeComponent();
        }

        private void button1_Click(object sender, EventArgs e)
        {
            try
            {
                WebBrowserExt.SetFeatureBrowserEmulation(); // enable HTML5

                var cts = new   CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds);

                var task = ScrapSitesAsync(
                    new[] { "http://example.com", "http://example.org",   "http://example.net" },
                    cts.Token);

                task.Wait();

                Console.WriteLine("Press Enter to exit...");
                Console.ReadLine();
            }
            catch (Exception ex)
            {
                while (ex is AggregateException && ex.InnerException != null)
                    ex = ex.InnerException;
                Console.WriteLine(ex.Message);
                Environment.Exit(-1);
            }
        }
    }



    /// <summary>
    /// WebBrowserExt - WebBrowser extensions
    /// by Noseratio - https://stackoverflow.com/a/22262976/1768303
    /// </summary>
    public static class WebBrowserExt
    {
        const int POLL_DELAY = 500;

        // navigate and download 
        public static async Task<string> NavigateAsync(this WebBrowser webBrowser,     string url, CancellationToken token)
        {
            // navigate and await DocumentCompleted
            var tcs = new TaskCompletionSource<bool>();
            WebBrowserDocumentCompletedEventHandler handler = (s, arg) =>
            tcs.TrySetResult(true);

            using (token.Register(() => tcs.TrySetCanceled(), useSynchronizationContext: true))
            {
                webBrowser.DocumentCompleted += handler;
                try
                {
                    webBrowser.Navigate(url);
                    await tcs.Task; // wait for DocumentCompleted
                }
                finally
                {
                    webBrowser.DocumentCompleted -= handler;
                }
            }

            // get the root element
            var documentElement = webBrowser.Document.GetElementsByTagName("html")[0];

            // poll the current HTML for changes asynchronosly
            var html = documentElement.OuterHtml;
            while (true)
            {
                // wait asynchronously, this will throw if cancellation requested
                await Task.Delay(POLL_DELAY, token);

                // continue polling if the WebBrowser is still busy
                if (webBrowser.IsBusy)
                    continue;

                var htmlNow = documentElement.OuterHtml;
                if (html == htmlNow)
                    break; // no changes detected, end the poll loop

                html = htmlNow;
            }

            // consider the page fully rendered 
            token.ThrowIfCancellationRequested();
            return html;
        }

        // enable HTML5 (assuming we're running IE10+)
        // more info: https://stackoverflow.com/a/18333982/1768303
        public static void SetFeatureBrowserEmulation()
        {
            if (System.ComponentModel.LicenseManager.UsageMode !=     System.ComponentModel.LicenseUsageMode.Runtime)
                return;
            var appName = System.IO.Path.GetFileName(System.Diagnostics.Process.GetCurrentProcess().MainModule.FileNa    me);
            Registry.SetValue(@"HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\Main\FeatureControl\FEATURE_BROWSER_EMULATION",
                appName, 10000, RegistryValueKind.DWord);
        }
    }

    /// <summary>
    /// MessageLoopApartment
    /// STA thread with message pump for serial execution of tasks
    /// by Noseratio - https://stackoverflow.com/a/22262976/1768303
    /// </summary>
    public class MessageLoopApartment : IDisposable
    {
        Thread _thread; // the STA thread

        TaskScheduler _taskScheduler; // the STA thread's task scheduler

        public TaskScheduler TaskScheduler { get { return _taskScheduler; } }

        /// <summary>MessageLoopApartment constructor</summary>
        public MessageLoopApartment()
        {
            var tcs = new TaskCompletionSource<TaskScheduler>();

            // start an STA thread and gets a task scheduler
            _thread = new Thread(startArg =>
            {
                EventHandler idleHandler = null;

                idleHandler = (s, e) =>
                {
                    // handle Application.Idle just once
                    Application.Idle -= idleHandler;
                    // return the task scheduler
                    tcs.SetResult(TaskScheduler.FromCurrentSynchronizationContext());
                };

                // handle Application.Idle just once
                // to make sure we're inside the message loop
                // and SynchronizationContext has been correctly installed
                Application.Idle += idleHandler;
                Application.Run();
            });

            _thread.SetApartmentState(ApartmentState.STA);
            _thread.IsBackground = true;
            _thread.Start();
            _taskScheduler = tcs.Task.Result;
        }

        /// <summary>shutdown the STA thread</summary>
        public void Dispose()
        {
            if (_taskScheduler != null)
            {
                var taskScheduler = _taskScheduler;
                _taskScheduler = null;

                // execute Application.ExitThread() on the STA thread
                Task.Factory.StartNew(
                    () => Application.ExitThread(),
                    CancellationToken.None,
                    TaskCreationOptions.None,
                    taskScheduler).Wait();

                _thread.Join();
                _thread = null;
            }
        }

        /// <summary>Task.Factory.StartNew wrappers</summary>
        public void Invoke(Action action)
        {
            Task.Factory.StartNew(action,
                CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Wait();
        }

        public TResult Invoke<TResult>(Func<TResult> action)
        {
            return Task.Factory.StartNew(action,
                CancellationToken.None, TaskCreationOptions.None, _taskScheduler).Result;
        }

        public Task Run(Action action, CancellationToken token)
        {
            return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler);
        }

        public Task<TResult> Run<TResult>(Func<TResult> action, CancellationToken token)
        {
            return Task.Factory.StartNew(action, token, TaskCreationOptions.None,     _taskScheduler);
        }

        public Task Run(Func<Task> action, CancellationToken token)
        {
            return Task.Factory.StartNew(action, token, TaskCreationOptions.None, _taskScheduler).Unwrap();
        }

        public Task<TResult> Run<TResult>(Func<Task<TResult>> action, CancellationToken token)
        {
            return Task.Factory.StartNew(action, token, TaskCreationOptions.None,     _taskScheduler).Unwrap();
        }
    }


}
Community
  • 1
  • 1
Jacob Quisenberry
  • 1,131
  • 3
  • 20
  • 48
  • http://stackoverflow.com/questions/16715620/how-should-i-properly-invoke-a-webbrowser-using-multiplethreads or http://stackoverflow.com/questions/18428616/webbrowser-navigating-function-doesnt-work-and-handlers-are-not-called – L.B Jun 14 '14 at 20:24

1 Answers1

3

The code you linked was designed for use inside a console app or Windows service. When you use it from a WinForms app, you're experiencing a deadlock caused by task.Wait() here:

var task = ScrapSitesAsync(
    new[] { "http://example.com", "http://example.org", "http://example.net" },
    cts.Token);

task.Wait();

To understand the nature of this deadlock, refer to Stephen Cleary's "Don't Block on Async Code".

The easiest workaround (but not the right one) might be to add ConfigureAwait(false) here:

// run the navigation task inside MessageLoopApartment
string html = await apartment.Run(() =>
    webBrowser.NavigateAsync(url, navigationToken), navigationToken).ConfigureAwait(false);

However, the right fix would be to get rid of MessageLoopApartment altogether, it's redundant for a WinForms app which already had its own message loop. Then, use await task instead of task.Wait().

This way, the code actually gets simpler:

namespace WebScraperAsync005
{
    public partial class Form1 : Form
    {

        // main logic
        static async Task ScrapSitesAsync(string[] urls, CancellationToken token)
        {
            using(var webBrowser = new WebBrowser())
            {
                foreach (var url in urls)
                {
                    Console.WriteLine("URL:\n" + url);

                    // cancel in 30s or when the main token is signalled
                    var navigationCts = CancellationTokenSource.CreateLinkedTokenSource(token);

                    navigationCts.CancelAfter((int)TimeSpan.FromSeconds(30).TotalMilliseconds);
                    var navigationToken = navigationCts.Token;

                    // run the navigation task inside MessageLoopApartment
                    string html = await webBrowser.NavigateAsync(url, navigationToken);

                    Console.WriteLine("HTML:\n" + html);
                }
            }
        }

        public Form1()
        {
            InitializeComponent();
        }

        private async void button1_Click(object sender, EventArgs e)
        {
            this.button1.Enabled = false;
            try
            {
                WebBrowserExt.SetFeatureBrowserEmulation(); // enable HTML5

                var cts = new CancellationTokenSource((int)TimeSpan.FromMinutes(3).TotalMilliseconds);

                await ScrapSitesAsync(
                    new[] { "http://example.com", "http://example.org", "http://example.net" },
                    cts.Token);

                MessageBox.Show("Completed.");
            }
            catch (Exception ex)
            {
                while (ex is AggregateException && ex.InnerException != null)
                    ex = ex.InnerException;
                MessageBox.Show(ex.Message);
            }
            this.button1.Enabled = true;
        }
    }

    /// <summary>
    /// WebBrowserExt - WebBrowser extensions
    /// by Noseratio - http://stackoverflow.com/a/22262976/1768303
    /// </summary>
    public static class WebBrowserExt
    {
        const int POLL_DELAY = 500;

        // navigate and download 
        public static async Task<string> NavigateAsync(this WebBrowser webBrowser, string url, CancellationToken token)
        {
            // navigate and await DocumentCompleted
            var tcs = new TaskCompletionSource<bool>();
            WebBrowserDocumentCompletedEventHandler handler = (s, arg) =>
            tcs.TrySetResult(true);

            using (token.Register(() => tcs.TrySetCanceled(), useSynchronizationContext: true))
            {
                webBrowser.DocumentCompleted += handler;
                try
                {
                    webBrowser.Navigate(url);
                    await tcs.Task; // wait for DocumentCompleted
                }
                finally
                {
                    webBrowser.DocumentCompleted -= handler;
                }
            }

            // get the root element
            var documentElement = webBrowser.Document.GetElementsByTagName("html")[0];

            // poll the current HTML for changes asynchronosly
            var html = documentElement.OuterHtml;
            while (true)
            {
                // wait asynchronously, this will throw if cancellation requested
                await Task.Delay(POLL_DELAY, token);

                // continue polling if the WebBrowser is still busy
                if (webBrowser.IsBusy)
                    continue;

                var htmlNow = documentElement.OuterHtml;
                if (html == htmlNow)
                    break; // no changes detected, end the poll loop

                html = htmlNow;
            }

            // consider the page fully rendered 
            token.ThrowIfCancellationRequested();
            return html;
        }

        // enable HTML5 (assuming we're running IE10+)
        // more info: http://stackoverflow.com/a/18333982/1768303
        public static void SetFeatureBrowserEmulation()
        {
            if (System.ComponentModel.LicenseManager.UsageMode !=     System.ComponentModel.LicenseUsageMode.Runtime)
                return;
            var appName = System.IO.Path.GetFileName(System.Diagnostics.Process.GetCurrentProcess().MainModule.FileName);
            Registry.SetValue(@"HKEY_CURRENT_USER\Software\Microsoft\Internet Explorer\Main\FeatureControl\FEATURE_BROWSER_EMULATION",
                appName, 10000, RegistryValueKind.DWord);
        }
    }
}
noseratio
  • 59,932
  • 34
  • 208
  • 486
  • 1
    this code worked as I hoped. The article you linked to is a good primer on deadlocks in async code. I am sure to refer to it again as I work with Tasks more frequently. – Jacob Quisenberry Jun 16 '14 at 02:41