1

Lets say I have an RSS Feed (which is in XML format) which kind of looks like this:

<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/"
    xmlns:wfw="http://wellformedweb.org/CommentAPI/" xmlns:dc="http://purl.org/dc/elements/1.1/"
    xmlns:atom="http://www.w3.org/2005/Atom" xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
    xmlns:slash="http://purl.org/rss/1.0/modules/slash/">

    <channel>
        <title>MyFeed</title>
        <atom:link href="http://samplefeed.com/feed/" rel="self"
            type="application/rss+xml" />
        <link>http://samplefeed.com</link>
        <description></description>
        <lastBuildDate>Fri, 22 Sep 2017 22:43:51 +0000</lastBuildDate>
        <language>en-US</language>
        <sy:updatePeriod>hourly</sy:updatePeriod>
        <sy:updateFrequency>1</sy:updateFrequency>
        <generator>http://wordpress.org/?v=4.3.12</generator>
        <item>
            <title>A Good Product</title>
            <link>http://samplefeed.com/a-good-product/</link>
            <comments>http://samplefeed.com/a-good-product/#comments</comments>
            <pubDate>Wed, 20 Sep 2017 22:22:45 +0000</pubDate>
            <dc:creator><![CDATA[John Smith]]></dc:creator>
            <category><![CDATA[Entertainment]]></category>

            <guid isPermaLink="false">http://samplefeed.com/?p=9116</guid>
            <description>
                <![CDATA[<p>![CDATA[<p>9/22</p>
                <p>4K TV Samsung<br />
                Price: $500.00<br />
                Location: Walmart</p>
                ]]>
            </description>
        </item>
        <!-- More items -->
    </channel>
</rss>  

Used varren's suggestion but it returns the last entry instead of all them...

public static void main(String[] args) throws MalformedURLException, IOException {
    XmlMapper xmlMapper = new XmlMapper(); // <- This is line 21 
    ObjectMapper jsonMapper = new ObjectMapper();
    InputStream xml = getInputStreamFromUrlData("http://samplefeed.com/feed");
    String json = jsonMapper.writeValueAsString(xmlMapper.readTree(xml));
    System.out.println(json);       
}

public static InputStream getInputStreamForURLData(String Url) {
    URL url = null;
    HttpURLConnection httpConnection = null;
    InputStream content = null;

    try {
        url = new URL(Url);
        System.out.println("URL: " + Url);
        URLConnection conn = url.openConnection();
        conn.setRequestProperty("User-Agent", "Mozilla/5.0");
        httpConnection = (HttpURLConnection) conn;

        int responseCode = httpConnection.getResponseCode();
        System.out.println("Response Code : " + responseCode);

        content = (InputStream) httpConnection.getInputStream();
    } 
    catch (MalformedURLException e) {
        e.printStackTrace();
    } 
    catch (IOException e) {
        e.printStackTrace();
    }
    return content;
}

pom.xml:

    <dependency>
        <groupId>com.fasterxml.jackson.core</groupId>
        <artifactId>jackson-annotations</artifactId>
        <version>2.9.1</version>
    </dependency>

    <dependency>
        <groupId>com.fasterxml.jackson.core</groupId>
        <artifactId>jackson-core</artifactId>
        <version>2.9.1</version>
    </dependency>

    <dependency>
        <groupId>org.codehaus.jackson</groupId>
        <artifactId>jackson-core-asl</artifactId>
        <version>1.7.9</version>
    </dependency>

    <dependency>
        <groupId>com.fasterxml.jackson.core</groupId>
        <artifactId>jackson-databind</artifactId>
        <version>2.8.10</version>
    </dependency>

    <dependency>
        <groupId>org.codehaus.jackson</groupId>
        <artifactId>jackson-mapper-asl</artifactId>
        <version>1.7.9</version>
    </dependency>

    <dependency>
        <groupId>org.codehaus.jackson</groupId>
        <artifactId>jackson-xc</artifactId>
        <version>1.7.9</version>
    </dependency>

    <dependency>
        <groupId>com.fasterxml.jackson.dataformat</groupId>
        <artifactId>jackson-dataformat-xml</artifactId>
        <version>2.9.1</version>
    </dependency>

The problem now is that its only returning the last item in the RSS Feed instead of all them?!? It does produce it as JSON but just the last entry is displayed.

Why doesn't it display all of entries?

Can anyone suggest a different approach (e.g. how to do this using Rome or straight DOM)?

PacificNW_Lover
  • 4,746
  • 31
  • 90
  • 144
  • Please check my solution, it prints the desired output i.e. `item` as an array of objects and not as a single object. – JRG Sep 27 '17 at 06:27

4 Answers4

1

Someone else also had problems converting xml to json using Jackson and got only the last element: Converting xml to json using jackson

So instead of Jackson, you could try JSON in Java:

<dependency>
    <groupId>org.json</groupId>
    <artifactId>json</artifactId>
    <version>20170516</version>
</dependency>

Example code

import java.io.*;
import java.net.*;
import org.json.JSONObject;
import org.json.XML;
...
public static void main(String[] args) throws Exception {
    String xmlString = readUrlToString("http://www.feedforall.com/sample.xml");
    JSONObject xmlJSONObj = XML.toJSONObject(xmlString);
    String jsonPrettyPrintString = xmlJSONObj.toString(4);
    System.out.println(jsonPrettyPrintString);
}

public static String readUrlToString(String url) {
    BufferedReader reader = null;
    String result = null;
    try {
        URL u = new URL(url);
        HttpURLConnection conn = (HttpURLConnection) u.openConnection();
        conn.setRequestProperty("User-Agent", "Mozilla/5.0");
        conn.setRequestMethod("GET");
        conn.setDoOutput(true);
        conn.setReadTimeout(2 * 1000);
        conn.connect();
        reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
        StringBuilder builder = new StringBuilder();
        String line;
        while ((line = reader.readLine()) != null) {
            builder.append(line).append("\n");
        }
        result = builder.toString();
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        if (reader != null) {
            try { reader.close(); } catch (IOException ignoreOnClose) { }
        }
    }
    return result;
}    

See also: Quickest way to convert XML to JSON in Java

janih
  • 2,214
  • 2
  • 18
  • 24
  • Added line `conn.setRequestProperty("User-Agent", "Mozilla/5.0");`. Should work now on sites that block Java as an user agent. – janih Sep 27 '17 at 05:37
  • 1
    Since, your initial code helped me (e.g. recommending the JSON for Java library and providing the maven dependency), I will award you the 100 bounty points. – PacificNW_Lover Sep 27 '17 at 09:00
0

Can use Jackson with xml dependencies:

compile 'com.fasterxml.jackson.core:jackson-databind:2.9.1'
compile 'com.fasterxml.jackson.core:jackson-core:2.9.1'
compile 'com.fasterxml.jackson.dataformat:jackson-dataformat-xml:2.9.1' // <= that

And simple use case looks like (actually you can customize almost everything with custom POJOs and lots of annotations or manualy map xml attributes/prefixes to some json values):

public class Main4 {
  public static void main(String[] args) throws Exception {
    XmlMapper xmlMapper = new XmlMapper();
    ObjectMapper jsonMapper = new ObjectMapper();
    InputStream XML = Main4.class.getClassLoader()
            .getResourceAsStream("rss.xml");
    String json = jsonMapper.writeValueAsString(xmlMapper.readTree(XML));
    System.out.println(json);
  }
}

Results in:

{
  "version": "2.0",
  "channel": {
    "title": "MyFeed",
    "link": "http://samplefeed.com",
    "description": "",
    "lastBuildDate": "Fri, 22 Sep 2017 22:43:51 +0000",
    "language": "en-US",
    "updatePeriod": "hourly",
    "updateFrequency": "1",
    "generator": "http://wordpress.org/?v=4.3.12",
    "item": {
      "title": "A Good Product",
      "link": "http://samplefeed.com/a-good-product/",
      "comments": "http://samplefeed.com/a-good-product/#comments",
      "pubDate": "Wed, 20 Sep 2017 22:22:45 +0000",
      "creator": "John Smith",
      "category": "Entertainment",
      "guid": {
        "isPermaLink": "false",
        "": "http://samplefeed.com/?p=9116"
      },
      "description": "\n                <p>![CDATA[<p>9/22</p>\n                <p>4K TV Samsung<br />\n                Price: $500.00<br />\n                Location: Walmart</p>\n                \n            "
    }
  }
}
varren
  • 14,551
  • 2
  • 41
  • 72
  • I can't use getResourceAsStream() - the RSS Feed is an external URL. I updated my post with the basic solution that you suggested. Its throwing an Exception. Please view the newly updated post. Thank you very much. – PacificNW_Lover Sep 24 '17 at 05:10
  • @PacificNW_Lover the error you are showing has nothing to do with ` new URL("samplefeed.com/feed").openStream();` it is about `jackson-core`/`jackson-dataformat-xml` version incompatibility. It's hard to tell what is going on without knowing your stack. can you show your gradle/maven dependencies? – varren Sep 24 '17 at 07:19
  • I updated most of my pom.xml files to 2.9.1 and now have a new issue. Please see revised post... Now it's giving me a HTTP 403. Do you know of a different mechanism to obtain the XML as a string from that external URL? – PacificNW_Lover Sep 24 '17 at 22:13
  • @PacificNW_Lover, actually i don't know why you have so many dependencies. for this task you only need `jackson-dataformat-xml`. Everything else usually come with spring by default. [more info](https://docs.spring.io/spring-boot/docs/current/reference/html/howto-spring-mvc.html#howto-write-a-json-rest-service) Sure there are lots of other options like this: http://www.cubicrace.com/2015/06/How-to-convert-XML-to-JSON-format.html or you can use `javax.xml.*`for xml and then use Gson or Jackson to convert it to json or you can use https://github.com/beckchr/staxon/wiki/Converting-XML-to-JSON – varren Sep 24 '17 at 22:45
  • @PacificNW_Lover `InputStream xml = new URL("some feed url").openStream();` works perfectly fine for me, there is nothing wrong with this approach – varren Sep 24 '17 at 22:49
  • The reason for the dependencies is because it’s a pre-existing app which I am modifying. Those dependencies were put into place by others. – PacificNW_Lover Sep 24 '17 at 22:50
  • Do you know of a different approach to obtain XML as a String from an external URL? – PacificNW_Lover Sep 24 '17 at 23:11
  • Got it working (am able to get past the HTTP 403 error, please see my modified post) but it only returns the last entry of the RSS feed into JSON format instead of all of them. Do you have any suggestions? Thanks for the great help! – PacificNW_Lover Sep 25 '17 at 09:34
0

Why doesn't it display all of entries?

XML does not have distinction between concepts of "Objects" and "Arrays", hence only single item is printed and not all the items.

Here is the fix, besides the dependencies you are using, I am using 2 additional jars and they are as follows: -

enter image description here

Your Code:

import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;

import org.apache.commons.io.IOUtils;
import org.json.JSONObject;
import org.json.XML;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;

public class TestClass {

    public static void main(String[] args) throws Exception {
        InputStream xml = getInputStreamFromUrlData("http://www.feedforall.com/sample.xml");
        String xmlString = IOUtils.toString(xml);
        JSONObject jsonObject = XML.toJSONObject(xmlString);
        ObjectMapper objectMapper = new ObjectMapper();
        objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
        Object json = objectMapper.readValue(jsonObject.toString(), Object.class);
        String output = objectMapper.writeValueAsString(json);
        System.out.println(output);
    }

    public static InputStream getInputStreamFromUrlData(String Url) {
        URL url = null;
        HttpURLConnection httpConnection = null;
        InputStream content = null;

        try {
            url = new URL(Url);
            System.out.println("URL: " + Url);
            URLConnection conn = url.openConnection();
            conn.setRequestProperty("User-Agent", "Mozilla/5.0");
            httpConnection = (HttpURLConnection) conn;

            int responseCode = httpConnection.getResponseCode();
            System.out.println("Response Code : " + responseCode);

            content = (InputStream) httpConnection.getInputStream();
        } catch (MalformedURLException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return content;
    }
}

Sample Run: (check item is an array of objects and not a single object)

URL: http://www.feedforall.com/sample.xml
Response Code : 200
{
  "rss" : {
    "channel" : {
      "image" : {
        "link" : "http://www.feedforall.com/industry-solutions.htm",
        "width" : 48,
        "description" : "FeedForAll Sample Feed",
        "title" : "FeedForAll Sample Feed",
        "url" : "http://www.feedforall.com/ffalogo48x48.gif",
        "height" : 48
      },
      "copyright" : "Copyright 2004 NotePage, Inc.",
      "item" : [ {
        "comments" : "http://www.feedforall.com/forum",
        "link" : "http://www.feedforall.com/restaurant.htm",
        "description" : "<b>FeedForAll </b>helps Restaurant's communicate with customers. Let your customers know the latest specials or events.<br>\r\n<br>\r\nRSS feed uses include:<br>\r\n<i><font color=\"#FF0000\">Daily Specials <br>\r\nEntertainment <br>\r\nCalendar of Events </i></font>",
        "title" : "RSS Solutions for Restaurants",
        "category" : {
          "domain" : "www.dmoz.com",
          "content" : "Computers/Software/Internet/Site Management/Content Management"
        },
        "pubDate" : "Tue, 19 Oct 2004 11:09:11 -0400"
      }, {
        "comments" : "http://www.feedforall.com/forum",
        "link" : "http://www.feedforall.com/schools.htm",
        "description" : "FeedForAll helps Educational Institutions communicate with students about school wide activities, events, and schedules.<br>\r\n<br>\r\nRSS feed uses include:<br>\r\n<i><font color=\"#0000FF\">Homework Assignments <br>\r\nSchool Cancellations <br>\r\nCalendar of Events <br>\r\nSports Scores <br>\r\nClubs/Organization Meetings <br>\r\nLunches Menus </i></font>",
        "title" : "RSS Solutions for Schools and Colleges",
        "category" : {
          "domain" : "www.dmoz.com",
          "content" : "Computers/Software/Internet/Site Management/Content Management"
        },
        "pubDate" : "Tue, 19 Oct 2004 11:09:09 -0400"
      }, {
        "comments" : "http://www.feedforall.com/forum",
        "link" : "http://www.feedforall.com/computer-service.htm",
        "description" : "FeedForAll helps Computer Service Companies communicate with clients about cyber security and related issues. <br>\r\n<br>\r\nUses include:<br>\r\n<i><font color=\"#0000FF\">Cyber Security Alerts <br>\r\nSpecials<br>\r\nJob Postings </i></font>",
        "title" : "RSS Solutions for Computer Service Companies",
        "category" : {
          "domain" : "www.dmoz.com",
          "content" : "Computers/Software/Internet/Site Management/Content Management"
        },
        "pubDate" : "Tue, 19 Oct 2004 11:09:07 -0400"
      }, {
        "comments" : "http://www.feedforall.com/forum",
        "link" : "http://www.feedforall.com/government.htm",
        "description" : "FeedForAll helps Governments communicate with the general public about positions on various issues, and keep the community aware of changes in important legislative issues. <b><i><br>\r\n</b></i><br>\r\nRSS uses Include:<br>\r\n<i><font color=\"#00FF00\">Legislative Calendar<br>\r\nVotes<br>\r\nBulletins</i></font>",
        "title" : "RSS Solutions for Governments",
        "category" : {
          "domain" : "www.dmoz.com",
          "content" : "Computers/Software/Internet/Site Management/Content Management"
        },
        "pubDate" : "Tue, 19 Oct 2004 11:09:05 -0400"
      }, {
        "comments" : "http://www.feedforall.com/forum",
        "link" : "http://www.feedforall.com/politics.htm",
        "description" : "FeedForAll helps Politicians communicate with the general public about positions on various issues, and keep the community notified of their schedule. <br>\r\n<br>\r\nUses Include:<br>\r\n<i><font color=\"#FF0000\">Blogs<br>\r\nSpeaking Engagements <br>\r\nStatements<br>\r\n </i></font>",
        "title" : "RSS Solutions for Politicians",
        "category" : {
          "domain" : "www.dmoz.com",
          "content" : "Computers/Software/Internet/Site Management/Content Management"
        },
        "pubDate" : "Tue, 19 Oct 2004 11:09:03 -0400"
      }, {
        "comments" : "http://www.feedforall.com/forum",
        "link" : "http://www.feedforall.com/weather.htm",
        "description" : "FeedForAll helps Meteorologists communicate with the general public about storm warnings and weather alerts, in specific regions. Using RSS meteorologists are able to quickly disseminate urgent and life threatening weather warnings. <br>\r\n<br>\r\nUses Include:<br>\r\n<i><font color=\"#0000FF\">Weather Alerts<br>\r\nPlotting Storms<br>\r\nSchool Cancellations </i></font>",
        "title" : "RSS Solutions for Meteorologists",
        "category" : {
          "domain" : "www.dmoz.com",
          "content" : "Computers/Software/Internet/Site Management/Content Management"
        },
        "pubDate" : "Tue, 19 Oct 2004 11:09:01 -0400"
      }, {
        "comments" : "http://www.feedforall.com/forum",
        "link" : "http://www.feedforall.com/real-estate.htm",
        "description" : "FeedForAll helps Realtors and Real Estate companies communicate with clients informing them of newly available properties, and open house announcements. RSS helps to reach a targeted audience and spread the word in an inexpensive, professional manner. <font color=\"#0000FF\"><br>\r\n</font><br>\r\nFeeds can be used for:<br>\r\n<i><font color=\"#FF0000\">Open House Dates<br>\r\nNew Properties For Sale<br>\r\nMortgage Rates</i></font>",
        "title" : "RSS Solutions for Realtors & Real Estate Firms",
        "category" : {
          "domain" : "www.dmoz.com",
          "content" : "Computers/Software/Internet/Site Management/Content Management"
        },
        "pubDate" : "Tue, 19 Oct 2004 11:08:59 -0400"
      }, {
        "comments" : "http://www.feedforall.com/forum",
        "link" : "http://www.feedforall.com/banks.htm",
        "description" : "FeedForAll helps <b>Banks, Credit Unions and Mortgage companies</b> communicate with the general public about rate changes in a prompt and professional manner. <br>\r\n<br>\r\nUses include:<br>\r\n<i><font color=\"#0000FF\">Mortgage Rates<br>\r\nForeign Exchange Rates <br>\r\nBank Rates<br>\r\nSpecials</i></font>",
        "title" : "RSS Solutions for Banks / Mortgage Companies",
        "category" : {
          "domain" : "www.dmoz.com",
          "content" : "Computers/Software/Internet/Site Management/Content Management"
        },
        "pubDate" : "Tue, 19 Oct 2004 11:08:57 -0400"
      }, {
        "comments" : "http://www.feedforall.com/forum",
        "link" : "http://www.feedforall.com/law-enforcement.htm",
        "description" : "<b>FeedForAll</b> helps Law Enforcement Professionals communicate with the general public and other agencies in a prompt and efficient manner. Using RSS police are able to quickly disseminate urgent and life threatening information. <br>\r\n<br>\r\nUses include:<br>\r\n<i><font color=\"#0000FF\">Amber Alerts<br>\r\nSex Offender Community Notification <br>\r\nWeather Alerts <br>\r\nScheduling <br>\r\nSecurity Alerts <br>\r\nPolice Report <br>\r\nMeetings</i></font>",
        "title" : "RSS Solutions for Law Enforcement",
        "category" : {
          "domain" : "www.dmoz.com",
          "content" : "Computers/Software/Internet/Site Management/Content Management"
        },
        "pubDate" : "Tue, 19 Oct 2004 11:08:56 -0400"
      } ],
      "lastBuildDate" : "Tue, 19 Oct 2004 13:39:14 -0400",
      "link" : "http://www.feedforall.com/industry-solutions.htm",
      "description" : "RSS is a fascinating technology. The uses for RSS are expanding daily. Take a closer look at how various industries are using the benefits of RSS in their businesses.",
      "generator" : "FeedForAll Beta1 (0.0.1.8)",
      "language" : "en-us",
      "title" : "FeedForAll Sample Feed",
      "managingEditor" : "marketing@feedforall.com",
      "pubDate" : "Tue, 19 Oct 2004 13:38:55 -0400",
      "webMaster" : "webmaster@feedforall.com",
      "docs" : "http://blogs.law.harvard.edu/tech/rss",
      "category" : {
        "domain" : "www.dmoz.com",
        "content" : "Computers/Software/Internet/Site Management/Content Management"
      }
    },
    "version" : 2
  }
}
JRG
  • 4,037
  • 3
  • 23
  • 34
0

Got it working like this:

@RequestMapping(value = "/v2/convertIntoJson", 
                method = RequestMethod.GET, 
                produces = "application/json")
public String getRssFeedAsJson() throws IOException {
    InputStream xml = getInputStreamForURLData("http://www.samplefeed.com/feed/");
    byte[] byteArray = IOUtils.toByteArray(xml);
    String xmlString = new String(byteArray);
    JSONObject xmlToJsonObject = XML.toJSONObject(xmlString);
    String jsonString = xmlToJsonObject.toString();
    byte[] jsonStringAsByteArray = jsonString.getBytes("UTF-8");
    String retValue = new String(jsonStringAsByteArray, "UTF-8");
    return retValue;
}

public static InputStream getInputStreamForURLData(String Url) {
    URL url = null;
    HttpURLConnection httpConnection = null;
    InputStream content = null;

    try {
        url = new URL(Url);
        System.out.println("URL::" + Url);
        URLConnection conn = url.openConnection();
        conn.setRequestProperty("User-Agent", "Mozilla/5.0");
        httpConnection = (HttpURLConnection) conn;

        int responseCode = httpConnection.getResponseCode();
        System.out.println("Response Code : " + responseCode);

        content = (InputStream) httpConnection.getInputStream();
    } 
    catch (MalformedURLException e) {
        e.printStackTrace();
    } 
    catch (IOException e) {
        e.printStackTrace();
    }
    return content;
}
PacificNW_Lover
  • 4,746
  • 31
  • 90
  • 144