0

I'm making a program that consists of three different functions:

  1. downloadPDF: download a PDF from the web
  2. getPDF: read and parse the pdf
  3. getDaata: loop through getPDF

Problem I'm having is that the third function(getData) that has a for of loop that runs getPDF, it seems as if it doesn't let getPDF finish before trying to console.log the result that getPDF returns.

Here are the three functions:

async function downloadPDF(pdfURL, outputFilename) {
  let pdfBuffer = await request.get({uri: pdfURL, encoding: null});
  console.log("Writing downloaded PDF file to " + outputFilename + "...");
  fs.writeFileSync(outputFilename, pdfBuffer);
}

async function getPDF(query, siteName, templateUrl, charToReplace) {
  const currentWeek = currentWeekNumber().toString();
  await downloadPDF(templateUrl.replace(charToReplace, currentWeek), "temp/pdf.pdf");
  var resultsArray = []
  let dataBuffer = fs.readFileSync("temp/pdf.pdf");
    pdf(dataBuffer).then(function(data) {
      pdfContent = data.text;
      const splittedArray = pdfContent.split("\n");
      const parsedArray = splittedArray.map((item, index) => {
          if(item.includes(query)) {
              resultsArray.push({result: item, caseId: splittedArray[index-1].split(',', 1)[0], site: siteName});
          }
        }).filter(value => value);
        return(resultsArray);
  });
  fs.unlinkSync("temp/pdf.pdf"); //deletes the downloaded file
}

async function getData(query, desiredSites) {
  var resultsArray = []
  for (const value of desiredSites) {
    let result = await getPDF(query, sitesList.sites[value].name, sitesList.sites[value].templateUrl, sitesList.sites[value].charToReplace);
    console.log(result)
  }
}
getData("test", ['a', 'b']);

In the bottom function(getData), the console.log results in undefined I'm guessing this has something to do with the promises. Any ideas? Thanks a lot!

Leomania
  • 79
  • 1
  • 4
  • Where is `sitesList` coming from? – radarbob Sep 06 '21 at 18:20
  • 1
    Inside `getPDF` you make a call to a function called `pdf`. This appears to be an async function (specifically a promise) because you call `then`. This function is not awaited at all. Also `getPDF` doesn't appear to return anything, so `result` will appear `undefined`. – Wing Sep 06 '21 at 18:24
  • 3
    For folks voting to close as a duplicate, it's not really a duplicate. Async loop isn't the problem – OP has implemented this bit perfectly fine. The problem appears deeper in the stack. I'd probably still vote to close but as "not reproducible or caused by a typo" because the resolution isn't likely to help future readers. – Wing Sep 06 '21 at 18:29
  • @Wing I do `return(resultsArray);` near the end of getPDF. – Leomania Sep 06 '21 at 18:44
  • That return is inside the function passed to `then`. It won't be returned from `getPDF`. See [How to return the response from an asynchronous call?](https://stackoverflow.com/questions/14220321/how-to-return-the-response-from-an-asynchronous-call) or [Return from a promise then()](https://stackoverflow.com/questions/34094806/return-from-a-promise-then). – Wing Sep 06 '21 at 18:49
  • You never `await` the promise `pdf(dataBuffer).then(…)` – Bergi Sep 06 '21 at 20:24

1 Answers1

0

In getPDF, you should chain all your async functions with await instead of .then or vice versa.

You can mix await with .then but this would be not easy to chain them with linear codes. The reason people use await because they want to make the codes look linear and easy to maintain.

async function downloadPDF(pdfURL, outputFilename) {
  let pdfBuffer = await request.get({ uri: pdfURL, encoding: null });
  console.log("Writing downloaded PDF file to " + outputFilename + "...");
  fs.writeFileSync(outputFilename, pdfBuffer);
}

async function getPDF(query, siteName, templateUrl, charToReplace) {
  const currentWeek = currentWeekNumber().toString();
  await downloadPDF(
    templateUrl.replace(charToReplace, currentWeek),
    "temp/pdf.pdf"
  );
  var resultsArray = [];
  let dataBuffer = fs.readFileSync("temp/pdf.pdf");
  const data = await pdf(dataBuffer);
  pdfContent = data.text;
  const splittedArray = pdfContent.split("\n");
  const resultsArray = splittedArray
    .filter(item => item.includes(query))
    .map(item => ({
      result: item,
      caseId: splittedArray[index - 1].split(",", 1)[0],
      site: siteName,
    }));

  fs.unlinkSync("temp/pdf.pdf"); //deletes the downloaded file
  return resultsArray;
}

async function getData(query, desiredSites) {
  for (const value of desiredSites) {
    let result = await getPDF(
      query,
      sitesList.sites[value].name,
      sitesList.sites[value].templateUrl,
      sitesList.sites[value].charToReplace
    );
    console.log(result);
  }
}

getData("test", ["a", "b"])
  .then(() => console.log("done"))
  .catch(console.log);
ikhvjs
  • 5,316
  • 2
  • 13
  • 36