0
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Windows.Forms;
using System.Net;
using System.IO;
using HtmlAgilityPack;
using mshtml;
using System.Text.RegularExpressions;

namespace Extract_Images
{
    public partial class Form1 : Form
    {
        private string[] linkstoextract;
        private int numberoflinks;
        private int currentLinkNumber = 0;
        private string mainlink;
        private WebClient client;
        private WebBrowser webBrowser1;
        private string htmlCode;
        private bool pagesorimages = false;

        public Form1()
        {
            InitializeComponent();

            webBrowser1 = new WebBrowser();
            webBrowser1.ScriptErrorsSuppressed = true;
            webBrowser1.DocumentCompleted += webBrowser1_DocumentCompleted;
            label1.Text = "Number of links: ";
            mainlink = "http://www.test.com/";
            numberoflinks = 13;
            backgroundWorker1.RunWorkerAsync();

        }

        private void ProcessNextLink()
        {
            if (currentLinkNumber < numberoflinks)
            {
                currentLinkNumber++;
                string linktonav = mainlink + "index"+currentLinkNumber.ToString() + ".html";
                pagesorimages = false;
                backgroundWorker1.ReportProgress(0,currentLinkNumber);
                webBrowser1.Navigate(linktonav);

            }
        }

        int count = 0;
        void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
        {
            mshtml.HTMLDocument objHtmlDoc = (mshtml.HTMLDocument)webBrowser1.Document.DomDocument;
            string pageSource = objHtmlDoc.documentElement.innerHTML;

            List<string> links = new List<string>();

            string[] hrefs = this.webBrowser1.Document.Links.Cast<HtmlElement>()
             .Select(a => a.GetAttribute("href")).Where(h => h.Contains(".jpg")).ToArray();

            foreach(string a in hrefs)
            {
                using (WebClient client = new WebClient())
                {
                    client.DownloadFile(a, @"C:\Images\file" + count + ".jpg");
                }
                count ++;

                pagesorimages = true;
                backgroundWorker1.ReportProgress(0, count);
            }
            //ProcessNextLink();
        }

        private void Form1_Load(object sender, EventArgs e)
        {

        }

        private void backgroundWorker1_DoWork(object sender, DoWorkEventArgs e)
        {           
                ProcessNextLink();
        }

        private void backgroundWorker1_ProgressChanged(object sender, ProgressChangedEventArgs e)
        {
            if (pagesorimages == false)
            {
                label1.Text = e.UserState.ToString();
            }
            if (pagesorimages == true)
            {
                label2.Text = e.UserState.ToString();
            }
        }

        private void backgroundWorker1_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e)
        {

        }
    }
}

The exception is on the second ReportProgress:

backgroundWorker1.ReportProgress(0, count);

This operation has already had OperationCompleted called on it and further calls are illegal

What i want to do is to report first the current page number in this case it's 1 to label1. And then to report the number of downloaded images in this page to label2.

Then to move to the next page with the method ProcessNextLink(); and again report the page number it should be 2 and then to report the number of images downloaded in page 2.

But i'm getting this exception already on the first page.

It was working fine without the backgroundworker in the event webBrowser1_DocumentCompleted i called ProcessNextLink(); in the bottom and it was working fine. But with the backgroundworker it's not working.

TheLost Lostit
  • 505
  • 6
  • 28
  • The code is quite wrong. Biggest issue is that the BGW doesn't do any real work, ProcessNextLink() takes at most a handful of *microseconds*. The DocumentCompleted event fires on the UI thread, not the worker thread. Where it inevitably dies on the ReportProgress() call, first thing it sees wrong is that the BGW is not running anymore. It isn't very clear why you use a worker thread at all, the only thing that might bog down your UI is the DownloadFile() call. Consider DownloadFileAsync() instead. Or don't put the WebBrowser on the form at all, not that pretty to look at. – Hans Passant Aug 27 '16 at 08:52
  • @HansPassant using the DownloadFileAsync helping a bit. Still when it's making the navigation to the page it's freezing the program for 2-3 seconds and then when downloading i see the mouse cursour working each image download. The webBrowser1 is not on the form. It's working the whole thing but not smooth. – TheLost Lostit Aug 27 '16 at 12:08
  • 1
    http://stackoverflow.com/a/4271581/17034 – Hans Passant Aug 27 '16 at 12:13

0 Answers0