2

I am trying to create a CSV. Since the data is huge, I wanted to keep appending the data into the file rather than first storing the records in an array and then dumping all the data at once into the CSV. So I wrote the following code, that simulates what I want to do. 50 random records are created and appended to the file each second. The code works fine, but the problem is the final CSV. It looks like the following:

"id","value""value","id"
3226,"3653aab688be4934""value","id"
4070,"9de2be11958fa207""value","id"
2061,"b754b9164146d37f""value","id"
6216,"ac85aa653bfc845d""value","id"
48,"caf5f55c49fde7bf""value","id"
4330,"2c33ae658de7a3eb""value","id"
1997,"34caef7b4ae96edd""value","id"

I could not understand the reason for this. I also read the related SO post but that did not help either.

const json2csv = require('json2csv').parse;
const fs = require('fs');

Promise = require('bluebird');

let plist = [];
let count = 0;
let intvl = null;

let fields = ['id', 'value'];

function start() {

    if(count++ > 50) {
            Promise.all(plist)
                    .then(r => {
                            clearInterval(intvl);
                            console.log('file created');
                            process.exit(0);
                    })
                    .catch(err => {
                            console.log(err);
                            process.exit(-1);
                    })
    }

    let data = [{
            value: Math.floor(Math.random() * 9999),
            id: require('crypto').randomBytes(8).toString('hex')
    }];

    plist.push(append(json2csv(data)));

}


function append(data) {

    return new Promise((resolve, reject) => {

            fs.appendFile('./stream.csv', data, (err, resp) => {
                    if(err) reject(err);
                    else resolve();
            });
    });
}


function init() {

    fs.stat('./stream.csv', (err, resp) => {
            if(err) {
                    fs.writeFileSync('./stream.csv', json2csv([], {fields}));
            }
            intvl = setInterval(() => {
                    start();
            }, 1100);
    })
}

init();

What is it that I am missing? Initially, the code checks if the file already exists. If it does not, creates the file with just headers and then the regular write process follows. I tried removing the part where only headers are written. It helps in removing the duplicate headers at the top but does not help the repitition of headers in each row. How could that be prevented?

Suhail Gupta
  • 22,386
  • 64
  • 200
  • 328

1 Answers1

1

I think the problem is due to multiple cycles of your code. Every loop append fields header ("value","id") to the last line so:

First loop we have

"id","value""value","id"
 3405,"6874eb66f714e717"

Second loop

 "id","value""value","id"
3405,"6874eb66f714e717""value","id" <-- "value","id" added
1436,"c91056b1207598bb"

and so on. You should add the header only the first time using fs.writeFileSync('./stream.csv', json2csv([], {fields})); and remove extra header returned from json2csv to isolate your data.

At the moment it seems not possible to pass an empty header to json2csv using call like json2csv(data, {}).

Here an example:

const json2csv = require('json2csv').parse;
const fs = require('fs');

Promise = require('bluebird');

let plist = [];
let count = 0;
let intvl = null;

let fields = ['id', 'value'];

function start() {

if(count++ > 50) {
    Promise.all(plist)
        .then(r => {
            clearInterval(intvl);
            console.log('file created');
            process.exit(0);
        })
        .catch(err => {
            console.log(err);
            process.exit(-1);
        })
}

let data = [{
    id: Math.floor(Math.random() * 9999), //id should be set in this way
    value: require('crypto').randomBytes(8).toString('hex')

}];

plist.push(append(json2csv(data).replace('"id","value"',''))); //remove header

}

function append(data) {

    return new Promise((resolve, reject) => {

    fs.appendFile('./stream.csv', data, (err, resp) => {
        if(err) reject(err);
        else resolve();
    });
});
}

function init() {

fs.stat('./stream.csv', (err, resp) => {
    if(err) {
        fs.writeFileSync('./stream.csv', json2csv([], {fields}));
       }
        intvl = setInterval(() => {
           start();
           }, 1100);
    })
}

init();

And the generated output file stream.csv:

"id","value"
2462,"7c9197ae6c101f27"
7714,"e1bbfa2dc9adba7a"
2728,"3ff6673cd22bb00b"
8686,"c1f61c138e7b9fdc"
6687,"01d006f74412459a"
7888,"7ccf8e40b9cc4192"
2892,"1672a034573d1be3"
6228,"d8d004148c59134b"
2273,"5028b14b40029d4c"
5114,"1e282fd1c9a84e25"
3636,"c2b7d2250e6fad1e"
8096,"9fb35e54f749417f"
8955,"f2ccc57eab5438a0"
3957,"b323e7addc967d29"
baymax
  • 151
  • 3