3

I try to use this example to get images urls from http://www.nydailynews.com/cmlink/NYDN.Article.rss

but no success

Could u help me to find all correct ways to gets all possible image URLs from RSS feed item by SyndicationItem class?

There is draft solution here but I guess should be more generic solution.

Thank you!

 List<RssFeedItem> rssItems = new List<RssFeedItem>();
                    Stream stream = e.Result;
                    XmlReader response = XmlReader.Create(stream);
                    SyndicationFeed feeds = SyndicationFeed.Load(response);
                    foreach (SyndicationItem f in feeds.Items)
                    {
                        RssFeedItem rssItem = new RssFeedItem();

                        rssItem.Description = f.Summary.Text;
foreach (SyndicationLink enclosure in f.Links.Where<SyndicationLink>(x => x.RelationshipType == "enclosure"))
                            {
                                Uri url = enclosure.Uri;
                                long length = enclosure.Length;
                                string mediaType = enclosure.MediaType;
                                rssItem.ImageLinks.Add(url.AbsolutePath);
                            }
}
Community
  • 1
  • 1
NoWar
  • 36,338
  • 80
  • 323
  • 498
  • There is an element do you want url from that part or you want url from ? – Habib May 10 '12 at 17:46
  • @Habib.OSU The main problem is that I am confused because there are a lot of ways to represent image URLs under RSS feed item. So I try to make some code to get images from different places of the feed item. Now I have to get it from – NoWar May 10 '12 at 17:49

4 Answers4

5

I found the solution.

foreach (SyndicationElementExtension extension in f.ElementExtensions)
{
    XElement element = extension.GetObject<XElement>();

    if (element.HasAttributes)
    {
        foreach (var attribute in element.Attributes())
        {
            string value = attribute.Value.ToLower();
            if (value.StartsWith("http://") && (value.EndsWith(".jpg") || value.EndsWith(".png") || value.EndsWith(".gif") ))
            {
                   rssItem.ImageLinks.Add(value); // Add here the image link to some array
             }
        }                                
    }                            
}
NoWar
  • 36,338
  • 80
  • 323
  • 498
2
XDocument xDoc = XDocument.Load("http://www.nydailynews.com/cmlink/NYDN.Article.rss");
XNamespace media = XNamespace.Get("http://search.yahoo.com/mrss/");

var images = xDoc.Descendants(media+"content")
    .Where(m=>m.Attribute("type").Value=="image/jpeg")
    .Select(m=>m.Attribute("url").Value)
    .ToArray();

--EDIT--

var images = feeds.Items
     .SelectMany(i => i.ElementExtensions
                       .Select(e => e.GetObject<XElement>().Attribute("url").Value)
                )
     .ToArray();
L.B
  • 114,136
  • 19
  • 178
  • 224
  • 1
    Thanks! But how it could be done via SyndicationItem? I just try to use Microsoft class for it... – NoWar May 10 '12 at 17:51
2

Gets a list of images from string

var text = "your text with image links";
Regex regx = new Regex("http://([\\w+?\\.\\w+])+([a-zA-Z0-9\\~\\!\\@\\#\\$\\%\\^\\&amp;\\*\\(\\)_\\-\\=\\+\\\\\\/\\?\\.\\:\\;\\'\\,]*)?.(?:jpg|bmp|gif|png)", RegexOptions.IgnoreCase);
MatchCollection mactches = regx.Matches(text);
L.Barral
  • 313
  • 3
  • 10
  • It's not a good idea to use regex in big files. Thus iteration over element that has been suggested in accepted answer is much better. – scriptmonster Nov 18 '13 at 12:25
0

An other way that i used by parsing the HTML Code :

Public Function GetImagesUrlListromHtmlCode(strHtmlPageCode As String) As List(Of String)
        Dim regexPattern As String = "\bhttps?:[^)''" & Chr(34) & "]+\.(?:ase|art|bmp|blp|cd5|cit|cpt|cr2|cut|dds|dib|djvu|egt|exif|gif|gpl|grf|icns|ico|iff|jng|jpeg|jpg|jfif|jp2|jps|lbm|max|miff|mng|msp|nitf|ota|pbm|pc1|pc2|pc3|pcf|pcx|pdn|pgm|PI1|PI2|PI3|pict|pct|pnm|pns|ppm|psb|psd|pdd|psp|px|pxm|pxr|qfx|raw|rle|sct|sgi|rgb|int|bw|tga|tiff|tif|vtf|xbm|xcf|xpm|3dv|amf|ai|awg|cgm|cdr|cmx|dxf|e2d|egt|eps|fs|gbr|odg|svg|stl|vrml|x3d|sxd|v2d|vnd|wmf|emf|art|xar|png|webp|jxr|hdp|wdp|cur|ecw|iff|lbm|liff|nrrd|pam|pcx|pgf|sgi|rgb|rgba|bw|int|inta|sid|ras|sun|tga)(?![a-z])"
        Dim matchedImage As Match
        Dim intImageFound As Integer
        Dim GetImagesUrlListromHtmlCodeTemp As New List(Of String)
        Dim strMsgException As String = "Erreur dans le module GetImagesUrlListromHtmlCode"

        Try
            For Each matchedImage In Regex.Matches(strHtmlPageCode, regexPattern)
                GetImagesUrlListromHtmlCodeTemp.Add(Replace(Replace(matchedImage.Value, Chr(34), ""), "=", ""))
                intImageFound = intImageFound + 1
                
            Next

            Return GetImagesUrlListromHtmlCodeTemp.Distinct().ToList

        Catch ex As Exception            
            Return Nothing
            Exit Try

        End Try

End Function
8oris
  • 320
  • 2
  • 12