I'm faced with the problem of converting a PDF to a Google Doc using ocr, but my PDF is too long, so I'm looking to manually set the page range that is being converted.
Here is my code:
function extractPdfData(){
const ss = SpreadsheetApp.getActiveSpreadsheet()
//Get all PDF files:
const folder = DriveApp.getFolderById(FOLDER_ID);
//const files = folder.getFiles();
const files = folder.getFilesByType("application/pdf");
//Iterate through each folder
while(files.hasNext()){
let file = files.next();
let fileID = file.getId();
const doc = getTextFromPDF(fileID);
const data= extractSpecifics(doc.text);
}
}
function getTextFromPDF(fileID) {
var blob = DriveApp.getFileById(fileID).getBlob()
var resource = {
title: blob.getName(),
mimeType: blob.getContentType()
};
var options = {
ocr: true,
ocrLanguage: "en"
};
// Convert the pdf to a Google Doc with ocr.
var file = Drive.Files.insert(resource, blob, options);
// Get the texts from the newly created text.
var doc = DocumentApp.openById(file.id);
var text = doc.getBody().getText();
var title = doc.getName();
// Delete the document once the text has been stored.
Drive.Files.remove(doc.getId());
return {
name:title,
text:text
};
}
Any assistance is greatly appreciated. (Please excuse my use of var instead of let).