0

I'm making a tool for downloading images from the internet concurrently using a List<Uri> and the WebClient class. Here is the relevant code:

The new WebClient that I am using:

public class PatientWebClient : WebClient
{
    protected override WebRequest GetWebRequest(Uri uri)
    {
        WebRequest w = base.GetWebRequest(uri);
        w.Timeout = Timeout.Infinite;
        return w;
    }
}

and the download methods:

    public static void DownloadFiles()
    {
        string filename = string.Empty;

        while (_count < _images.Count())
        {
            PatientWebClient client = new PatientWebClient();

            client.DownloadDataCompleted += DownloadCompleted;
            filename = _images[_count].Segments.Last().ToString();
            if (!File.Exists(_destinationFolder + @"\" + filename))
            {
                try
                {
                    client.DownloadDataAsync(_images[_count], _images[_count]);
                }
                catch (Exception ex)
                {
                    Console.WriteLine(ex.ToString());
                }
             }
            ++_count;
        }
    }

    private static void DownloadCompleted(object sender, DownloadDataCompletedEventArgs e)
    {
        if (e.Error == null)
        {
            Uri uri = (Uri)e.UserState;
            string saveFilename = uri.Segments.Last().ToString();

            byte[] fileData = e.Result;

            if (saveFilename.EndsWith(".jpg") || saveFilename.EndsWith(".png") || saveFilename.EndsWith(".gif"))
                using (FileStream fileStream = new FileStream(_destinationFolder + @"\" + saveFilename, FileMode.Create))
                    fileStream.Write(fileData, 0, fileData.Length);
            else
                using (FileStream fileStream = new FileStream(_destinationFolder + @"\" + saveFilename + ".jpg", FileMode.Create))
                    fileStream.Write(fileData, 0, fileData.Length);
            ++_downloadedCounter;
            ((WebClient)sender).Dispose();
        }
    }

The issue is that not all of the images from the list _images are being downloaded. If I click the download button a second time more will be downloaded and it actually takes a few clicks to bring everything down. Are the WebClients timing out, and if so is there a way to get them to automatically retry the download? If not, what is the proper way to go about resolving this issue?

abatishchev
  • 98,240
  • 88
  • 296
  • 433
  • Try to extend webclient with timeout. Webclient doesnt have ability for timeout. http://stackoverflow.com/questions/1789627/how-to-change-the-timeout-on-a-net-webclient-object . You also ignore if e.Error != null , what happen there ? –  Jun 30 '16 at 00:27
  • @Stanley could you clarify your question? – shiitake_the_mushroom Jun 30 '16 at 00:35
  • Add some logging or `Console.WriteLine` statements so you know what is actually happening. Write out your counters, your file names, your url's and (as @Stanley suggests) and Error from the event args. Also, try adding a `fileString.Flush()` to the end of each `DownloadCompleted` call. – mdisibio Jun 30 '16 at 00:46
  • @mdisibio did you mean `fileStream.Flush()`? – shiitake_the_mushroom Jun 30 '16 at 01:21
  • synchronize _destinationfolder and _downloadcounter –  Jun 30 '16 at 01:23
  • @Stanley What do you mean by synchronize? – shiitake_the_mushroom Jun 30 '16 at 01:31
  • using lock, because both variables are accessed from multiple threads –  Jun 30 '16 at 01:39
  • another info: timeout in MyWebClient is also not working on DownloadDataAsync. Only di DownloadData. WebClient is not really good with timeout. –  Jun 30 '16 at 01:43
  • @shiitake_the_mushroom Yes. Yes I did. – mdisibio Jun 30 '16 at 15:34

2 Answers2

1

I mean something like this, set timeout of webclient and catch the error:

  internal class Program
  {
    private static void Main(string[] args)
    {
      Uri[] uris = {new Uri("http://www.google.com"), new Uri("http://www.yahoo.com")};
      Parallel.ForEach(uris, uri =>
      {
        using (var webClient = new MyWebClient())
        {
          try
          {
            var data = webClient.DownloadData(uri);
            // Success, do something with your data
          }
          catch (Exception ex)
          {
            // Something is wrong...
            Console.WriteLine(ex.ToString());
          }
        }
      });
    }
  }

  public class MyWebClient : WebClient
  {
    protected override WebRequest GetWebRequest(Uri uri)
    {
      var w = base.GetWebRequest(uri);
      w.Timeout = 5000; // 5 seconds timeout
      return w;
    }
  }
  • 1
    The problem with the original post wasn't the timeout. The problem was kicking off an asynchronous download task without waiting for it to finish. Stanley's Parallel.ForEach approach is one way to solve that problem elegantly and efficiently. – Eugene Shvets Jun 30 '16 at 00:49
  • @EugeneShvets-MSFT I'm testing his suggested code's behavior now. Will update. – shiitake_the_mushroom Jun 30 '16 at 01:03
  • @EugeneShvets-MSFT This approach causes only about 30 files to be downloaded, while I was getting about 300 with my method. I'm also not getting any errors so I'm not sure what's going on. – shiitake_the_mushroom Jun 30 '16 at 01:19
  • The problem with your old code, after starting (client.DownloadDataAsync(_images[_count], _images[_count]);) then webclient will be disposed without waiting to finish. –  Jun 30 '16 at 01:20
  • @Stanley I've updated the original post and I'm currently testing it. The `WebClient` now is not disposed of until after the file is downloaded. Your thoughts would be greatly appreciated. – shiitake_the_mushroom Jun 30 '16 at 01:28
  • In your own code, add check in DownloadCompleted if(e.Error != null) –  Jun 30 '16 at 01:56
1

If you still want to use that pattern, this one has no timeout, you must implement that using timer:

  internal class Program
  {
    private static int _downloadCounter;
    private static readonly object _syncObj = new object();

    private static void Main(string[] args)
    {
      Uri[] uris = {new Uri("http://www.google.com"), new Uri("http://www.yahoo.com")};
      foreach (var uri in uris)
      {
        var webClient = new WebClient();
        webClient.DownloadDataCompleted += OnWebClientDownloadDataCompleted;
        webClient.DownloadDataAsync(uri);
      }
      Thread.Sleep(Timeout.Infinite);
    }

    private static void OnWebClientDownloadDataCompleted(object sender, DownloadDataCompletedEventArgs e)
    {
      if (e.Error == null)
      {
        // OK
        Console.WriteLine(Encoding.UTF8.GetString(e.Result));
      }
      else
      {
        // Error
        Console.WriteLine(e.Error.ToString());
      }

      lock (_syncObj)
      {
        _downloadCounter++;
        Console.WriteLine("Counter = {0}", _downloadCounter);
      }

      var webClient = sender as WebClient;
      if (webClient == null) return;
      webClient.DownloadDataCompleted -= OnWebClientDownloadDataCompleted;
      webClient.Dispose();
    }
  }
  • For the `_downloadedCounter` I think I will use `Interlocked.Increment(ref _downloadedCounter);` but as of now I'm marking this as the answer and will be doing more testing. Thanks for all your help and advice. – shiitake_the_mushroom Jun 30 '16 at 02:06
  • Sometimes, this will help: ServicePointManager.DefaultConnectionLimit = int.Max; to increase max connection per server, if you download lot of files from 1 server –  Jun 30 '16 at 02:09