0

I have the following code but am struggling to save this to a csv using fs. Any assistance appreciated. I have been looking at this.

    var request = require('request');
var cheerio = require('cheerio');
var wait = require("wait.for");
var fs = require('fs');

function requestWaitForWrapper(url, callback) {
  request(url, function(error, response, html) {
    if (error)
      callback(error, response);
    else if (response.statusCode == 200)
      callback(null, html);
    else
      callback(new Error("Status not 200 OK"), response);
  });
}

function readBookInfo(baseUrl, s) {
  var html = wait.for(requestWaitForWrapper, baseUrl + '&s=' + s.toString());
  var $ = cheerio.load(html, {
    xmlMode: true
  });

  return {
    s: s,
    id: $('work').attr('id'),
    total: parseInt($('records').attr('total'))
  };
}

function readWorkInfo(id) {
  var html = wait.for(requestWaitForWrapper, 'http://api.trove.nla.gov.au/work/' + id.toString() + '?key=6k6oagt6ott4ohno&reclevel=full');
  var $ = cheerio.load(html, {
    xmlMode: true
  });

  return {
    title: $('title').text(),
    contributor: $('contributor').text(),
    description: $('abstract').text()
  }
}

function main() {
  var baseBookUrl = 'http://api.trove.nla.gov.au/result?key=6k6oagt6ott4ohno&zone=book&l-advformat=Thesis&sortby=dateDesc&q=+date%3A[2000+TO+2014]&l-availability=y&l-australian=y&n=1';
  var baseInfo = readBookInfo(baseBookUrl, 0);

  for (var s = 0; s < baseInfo.total; s++) {
    var bookInfo = readBookInfo(baseBookUrl, s);
    var workInfo = readWorkInfo(bookInfo.id);

    fs.writeFile("Users/name/echojs.csv", bookInfo.id + ";" + workInfo.contributor + ";" + workInfo.description, function (err) {
  if (err) throw err;
  console.log('Save Complete');
});

  }
}

wait.launchFiber(main);

EDIT AFTER COMMENT

Do you mean like this at the end:

  for (var s = 0; s < baseInfo.total; s++) {
    var bookInfo = readBookInfo(baseBookUrl, s);
    var workInfo = readWorkInfo(bookInfo.id);
    var combined = bookInfo.id+";"+workInfo.contributor+";"+workInfo.description;

    fs.writeFile("Users/name/echojs.csv", combined, function (err) {
  if (err) throw err;
  console.log('Save Complete');
});

  }
}

wait.launchFiber(main)

;

Thanks for bearing with me - node js is to me.

Community
  • 1
  • 1
user1222447
  • 113
  • 1
  • 1
  • 7
  • 1
    what specific error(s)/problem(s) are you running into? – go-oleg Aug 09 '14 at 00:59
  • You can't call `fs.writeFile` in a synchronous loop like that. I would recommend you to either compose a single string storing all your data before writing it, or to [use streams](http://nodejs.org/api/fs.html#fs_fs_createwritestream_path_options). – Leonid Beschastny Aug 09 '14 at 01:07
  • using `sync` versions of the functions should remove the asynchronous problems you see, e.g. [fs.appendFileSync](http://nodejs.org/api/fs.html#fs_fs_appendfilesync_filename_data_options), some clarifying article is available at http://disasterjs.blogspot.cz/2013/03/appending-text-to-file-in-nodejs.html Also this npm package https://github.com/jprichardson/node-fs-extra implements some `sync` file system functions like create directories etc. which you may find useful – xmojmr Sep 02 '14 at 06:30

1 Answers1

1

I don't know what exact errors you encounter, but here is what I think:

I believe your biggest problem is that you're using fs.writeFile, but it seems like you want to append the data to the file. You should use fs.appendFile for this - otherwise you will always override what you've written before, meaning only the last iteration of your loop is getting written.

Also note that fs.writeFile is asynchronous, meaning you are iterating through your loop before you know that data has been written to the file. You could just fs.writeFileSync (or rather fs.appendSync as mentioned above), which makes sure the file gets written - and only then continues to process the code. However, this also means writing/appending to the file blocks your thread.

An easy way to do this is by using a recursive function which waits for the callback to finish and then calls itself again:

function writeBooks(baseBookUrl, baseInfo, index) {
  if (!index) {
    index = 0;
  }

  var bookInfo = readBookInfo(baseBookUrl, s);
  var workInfo = readWorkInfo(bookInfo.id);
  var combined = bookInfo.id+";"+workInfo.contributor+";"+workInfo.description;

  fs.appendFile("Users/name/echojs.csv", combined, function(error) {
    if (error) {
      console.log("Whoops: " + error);
    }

    writeBooks(baseBookUrl, baseInfo, ++index);
  });
}

This means that other code can be processed during the creation of the file and the contents of the file will be written in order (because you know the previous info has already been written). You could also use something like the async library to do this.

brdigi
  • 135
  • 1
  • 6
  • Thank you for the comprehensive explanation. I have tried adding this to my above code and get `SyntaxError: Unexpected token }`. Also, should your solution above go at the end of my original code? – user1222447 Aug 10 '14 at 00:08
  • @user1222447 Whoops, fixed it - now syntax error free. It should be called in main after you're calling ```var baseInfo = readBookInfo(baseBookUrl, 0);```, as you need those 2 vars to call the function. However, it's important that you understand what it's doing so experiment with it. ;) – brdigi Aug 10 '14 at 09:41
  • I am now getting the following: `Error: Status not 200 OK at Request._callback (C:\Users\name\trial.js:13:16) at Request.self.callback (C:\Users\name\node_modules\request\request.js:123:2 2) at Request.EventEmitter.emit (events.js:98:17) at Request. (C:\Users\name\node_modules\request\request.js:1047:14 ) at Request.EventEmitter.emit (events.js:117:20) at IncomingMessage. (C:\Users\name\node_modules\request\request.js :998:12) at IncomingMessage.EventEmitter.emit (events.js:117:20) at _stream_readable.js:920:16 at process._tickCallback (node.js:415:13)`. – user1222447 Aug 10 '14 at 11:57
  • Is this because the website/server I am scraping wants to limit the calls I make? – user1222447 Aug 10 '14 at 12:22
  • Sorry, not sure what it means. Doesn't seem to be related to my code. Has your code worked before but didn't write the file? I was only answering to the fs-related part. – brdigi Aug 10 '14 at 12:55