3

I am trying to use HTML agility pack to get my program to read in a file and get all the image srcs from it. Heres what I got so far:

 private ArrayList GetImageLinks(String html,String link)
    {
        //link = url of webpage
        //html = a string of the html, just for testing will remove after

        HtmlAgilityPack.HtmlDocument htmlDoc = new HtmlAgilityPack.HtmlDocument();
        htmlDoc.OptionFixNestedTags = true;
        htmlDoc.Load(link);
        List<String> imgs = (from x in htmlDoc.DocumentNode.Descendants()
                             where x.Name.ToLower() == "img"
                             select x.Attributes["src"].Value).ToList<String>();

        Console.Out.WriteLine("Hey");
        ArrayList imageLinks = new ArrayList(imgs);


        foreach (String element in imageLinks)
        {
            Console.WriteLine(element);

        }

        return imageLinks;
    }

And this is the error im getting: System.ArgumentException: URI formats are not supported.

R00059159
  • 171
  • 1
  • 7
  • 13
  • 1
    Is that error on the Load line? [**If so then you should download the content and then load it there, like this answer.**](http://stackoverflow.com/a/10558196/342740) – Prix Nov 25 '13 at 05:08
  • namespace HtmlWeb can not be found? – R00059159 Nov 25 '13 at 05:24
  • https://github.com/jstedfast/HtmlAgilityPack/blob/master/HtmlAgilityPack/HtmlWeb.Xpath.cs – Prix Nov 25 '13 at 05:47

1 Answers1

6
HtmlDocument docHtml = new HtmlWeb().Load(url);
whoan
  • 8,143
  • 4
  • 39
  • 48
puru
  • 59
  • 1
  • 2