I've come across a problem in uploading a large csv file to Azure's Table Storage, in that it appears to stream the data from it so fast that it doesn't upload properly or throws a lot of Timeout Errors.
This is my current code:
var fs = require('fs');
var csv = require('csv');
var azure = require('azure');
var AZURE_STORAGE_ACCOUNT = "my storage account";
var AZURE_STORAGE_ACCESS_KEY = "my access key";
var tableService = azure.createTableService(AZURE_STORAGE_ACCOUNT,AZURE_STORAGE_ACCESS_KEY);
var count = 150000;
var uploadCount =1;
var counterror = 1;
tableService.createTableIfNotExists('newallactorstable', function(error){
if(!error){
console.log("Table created / located");
}
else
{
console.log("error");
}
});
csv()
.from.path(__dirname+'/actorsb-c.csv', {delimiter: '\t'})
.transform( function(row){
row.unshift(row.pop());
return row;
})
.on('record', function(row,index){
//Output plane carrier, arrival delay and departure delay
//console.log('Actor:' + row[0]);
var actorsUpload = {
PartitionKey : 'actors'
, RowKey : count.toString()
, Actors : row[0]
};
tableService.insertEntity('newallactorstable', actorsUpload, function(error){
if(!error){
console.log("Added: " + uploadCount);
}
else
{
console.log(error)
}
});
count++
})
.on('close', function(count){
console.log('Number of lines: '+count);
})
.on('error', function(error){
console.log(error.message);
});
The CSV file is roughly 800mb.
I know that to fix it, I probably need to send the data in batches, but I have literally no idea how to do this.