I'm slightly confused about how to use promises. I've read a few things on promises mainly because it seems like I have to use them. I'm working on a small application which is supposed to search through some pdfs using pdfjs, and they use promises. I knocked up something in nodejs, by looking at various examples on the net but I run into a problem.
Let's look at the code first:
require('pdfjs-dist');
var fs = require('fs');
//var searchTerm = "course";
var searchTerm = "designee";
//var searchTerm = "document";
var wordCounter = 0;
var searchResultJSON = [];
//var data = new Uint8Array(fs.readFileSync('testPdf.pdf'));
//var data = new Uint8Array(fs.readFileSync('advanced-javascript.pdf'));
var data = new Uint8Array(fs.readFileSync('iss4.pdf'));
PDFJS.getDocument(data).then(function (pdfDocument) {
console.log('Number of pages: ' + pdfDocument.numPages );
//var div = document.getElementById('viewer');
for(var i = 1; i<=pdfDocument.numPages; i++ ){//loops thru pages
console.log("i is " + (i));
pdfDocument.getPage((i)).then(function(page){//get page(i),
// console.log("page is " + (i));
//console.log("inside getPage()");
page.getTextContent().then( function(textContent){//get content of pdf
//console.log("inside getTextContent()");
//if( null != textContent.items ){
var page_text = "";
var last_block = null;
var lineWithResult = "";
for( var k = 0; k < textContent.items.length; k++ ){
var block = textContent.items[k];
//console.log("word " + textContent.items.length + " k is " + k );
/* if( last_block != null && last_block.str[last_block.str.length-1] != ' '){
if( block.x < last_block.x )
page_text += "\r\n";
else if ( last_block.y != block.y && ( last_block.str.match(/^(\s?[a-zA-Z])$|^(.+\s[a-zA-Z])$/) == null ))
page_text += ' ';
} */
page_text += block.str;
last_block = block;
lineWithResult = searchPdf(block.str);
if(lineWithResult != null){
console.log(lineWithResult + " wordCounter is " + wordCounter);
}
}//end of for(var k...)
//console.log(" page_text " + page_text);
//console.log(searchResultJSON);
//}
});//end of textContent.items
});//end of getPage
}//end of loop
});
function searchPdf(toSearch){//searching pdf for searchTerm
var result = toSearch.toLowerCase().indexOf(searchTerm);
if(result >=0){//if match is found
wordCounter++;
//console.log("toSearch " + toSearch + " result is " + result + " wordCounter " + wordCounter);
constructResult(toSearch, result);//build the result object
return toSearch;
}
else{//if match not found
return null;
}
}
function constructResult(toSearch, result){//construct array of objects containing: search term, search result and index of search term
searchResultJSON.push({
"TextLine":toSearch,
"SearchTerm":searchTerm,
"Result": result,
});
}
The purpose of this code is to:
loop through the pdf's pages
loop through the content
get the pdf text in a variable line by line
search the pdf content with a keyword
if the keyword finds a match, print the match
get the matches in a javascript object
So, it all works OK but you'll notice that from inside the second for loop (where I get the text of the pdf that is) I call a function, searchPdf()
which basically performs the search and from within that function I call another function constructResult(...)
which is supposed to create the javascript object with the results.
I have some problems printing this object though, because if I print it outside the scope of the for loop, it is empty because the print call (in my case the console.log) executes before the loop has actually copied and analysed (read process and found a match) the text. So, promises see the way to resolve the problem. Thing is, I'm not sure how to code this in such a way that I can chain the promises and print my object after everything has executed. Any idea?
EDIT: so to clarify, what I need in sequence is this: 1)loop through pdf (I will have to amend the code to loop through a collection of pdfs at some point soon) 2)get each line of text 3)check that there is a match 4)if so, copy the line of text in the javascript object 5)print the javascript object