var pdfParser = require('pdf-parser')
var fs = require('fs')
var PDF_PATH = __dirname + '/pdfs'
var results = []
var failed = []
fs.readdir(PDF_PATH, function(err, files){
if(err){
return console.log(err)
}
for(const file of files){
let the_ent = {
'name': '',
'other data': []
}
pdfParser.pdf2json(PDF_PATH + '/' + file, function(error, pdf){
if(error != null){
console.log(error)
}else if(pdf['pages'] == undefined){
failed.push(file)
console.log(file +' failed')
}else{
//populate 'results' array
}
console.log(/*pdf_data*/)
results.push(/*pdf_data*/)
})
}
console.log(results)
console.log(failed)
results = JSON.stringify(results)
//fs.writeFileSync() write results to json
})
I don't know what is wrong with me this morning, I can't work out how to write this in async; obviously the logs/writefile at the bottom fire as soon as the script executes.
I have tried wrapping in async functions and awaiting the readdir / pdf parsing instead of using callbacks - clearly not correctly. I'm just trying to parse every pdf in a folder - push what I want to some arrays and then log them once the loop finishes zzz.