0

I have been tasked with scrubbing a few million lines of csv files. I am using Node JS and plan to upload the data to a SQL database later once the files are clean. I am able to read the data line by line and am performing the necessary steps to clean the data appropriately, however, I can't seem to write the data to a new csv file. I have tried multiple libraries including fast-csv and csvwriter. How can I write the data to a new csv file? Thank you for reading.

        const csv = require('csv-parser');
    const fastcsv = require('fast-csv');
    const fs = require('fs');
    const mysql = require('mysql');
    const {config} = require('../config.js');
    const _ = require('underscore');
    
    const connection = mysql.createConnection({
      host: 'localhost',
      user: 'root',
      password: config.password,
      database: 'products'
    });
    
    connection.connect((err) => {
      if (err) {
        return console.log('error: ', err);
      } else {
        console.log('Connected to MySQL!')
      }
    });
    
    const ws = fs.createWriteStream('/Users/Documents/clean-product.csv');
    
    
    
    fs.createReadStream('/Users/Downloads/product.csv')
      .pipe(csv())
      .on('data', async (row) => {
        
// Cleaning

        row.id.trim();
        row.name.trim();
        row.slogan.trim();
        row.description.trim();
        row.category.trim();
        row.default_price.trim();
        let id = parseInt(row.id);
        let default_price;
        if (!parseInt(row.default_price)) {
          let matches = row.default_price.match(/(\d+)/);
          defaultPrice = matches[0];
        } else {
          defaultPrice = parseInt(row.default_price);
        }
        let name = _.escape(row.name);
        let slogan = _.escape(row.slogan);
        let description = _.escape(row.description);
        let category = _.escape(row.category);

// Writing

        fastcsv.write([
          id, `'${name}'`, `'${slogan}'`, `'${description}'`, `'${category}'`, default_price
        ], {headers:true})
        .pipe(ws)
      })
      .on('end', () => {
        console.log('CSV file successfully processed');
      });
dd0sxx
  • 27
  • 8
  • Why not just store them in an array, and then join it with `\n` and write that string to file when done? Seems a lot easier. – somethinghere Jan 29 '21 at 18:36
  • @somethinghere there are millions of lines, I cannot store them in an array without running out of memory. – dd0sxx Jan 29 '21 at 19:16
  • You can also do it in batches, appending to a file using the the core `fs` from node (see this answer for info: https://stackoverflow.com/a/11267583/2991619 or https://stackoverflow.com/a/9812799/2991619, if you just want to do it with a stream), just add a check for when you reach maximum array size, write and empty it? Still seems simpler to me :) – somethinghere Jan 29 '21 at 19:23

0 Answers0