How do I replace a string in a PDF file using NodeJS? has a solution to replace text in a PDF. With the same code, I have a puzzling issue: the text is replaced in the source code of the PDF but does not render. The relevant lines, adapted from the above solution, are:
console.log(replaceText);
var string = new Buffer(data).toString().replace(findText, replaceText);
console.log(string);
The console shows that it is replaced in the source string of the PDF:
/TT0 1 Tf 65.5689 -24.5097 24.5097 65.5689 363.9941 762.3682 Tm (e)Tj 61.1539 -34.0617 34.0617 61.1539 381.1689 756.6411 Tm (n)Tj 54.8214 -43.5272 43.5272 54.8214 408.6333 741.0947 Tm (d)Tj 48.8331 -50.153 50.153 48.8331 426.3779 726.999 Tm (a)Tj 52 0 0 52 75.8203 226.9756 Tm (abcdefghijklmnopqrstuvwxyz)Tj 33 0 0 33 25.8203 302.9756 Tm (E)Tj (ste cheque-prenda, para:)Tj 1.818 -7.152 Td (www.emocoes.org/abcdefghijklmnopqrstuvwxyz)Tj ET
and the PDF looks like this:
The K, X, and Y are missing in this case. Opening the file in Adobe Illustrator shows they are still there behind other letters:
I could not find a definite pattern: sometimes H and J are also missing with other replacemenet strings, and the missing letters are different with other fonts (I tested Open Sans and Times New Roman).
What is the problem, and how can I fix it?
My code is:
function customizeVoucher(findText, replaceText) {
var sourceFile = path.join(__dirname, "../private/vouchers/custom-old.pdf");
var link = "/vouchers/cheque-prenda-" + replaceText + ".pdf";
var targetFile = path.join(__dirname, "../private" + link);
var pageNumber = 0;
var writer = hummus.createWriterToModify(sourceFile, {
modifiedFilePath: targetFile,
log: path.join(__dirname, "../hummus.md")
});
var sourceParser = writer.createPDFCopyingContextForModifiedFile().getSourceDocumentParser();
var pageObject = sourceParser.parsePage(pageNumber);
var textObjectId = pageObject.getDictionary().toJSObject().Contents.getObjectID();
var textStream = sourceParser.queryDictionaryObject(pageObject.getDictionary(), 'Contents');
//read the original block of text data
var data = [];
var readStream = sourceParser.startReadingFromStream(textStream);
while(readStream.notEnded()){
Array.prototype.push.apply(data, readStream.read(10000));
}
console.log(replaceText);
var string = new Buffer(data).toString().replace(findText, replaceText);
console.log(string);
// Create and write our new text object.
var objectsContext = writer.getObjectsContext();
objectsContext.startModifiedIndirectObject(textObjectId);
var stream = objectsContext.startUnfilteredPDFStream();
stream.getWriteStream().write(strToByteArray(string));
objectsContext.endPDFStream(stream);
objectsContext.endIndirectObject();
writer.end();
return link;
}
and the source PDF is here.