-2

My intent is to write a function that takes an html file converted to a string and returns an array populated with all of the links from the html file.

The following code returns an empty array:

const fs = require("fs");

function findURLs(filePath) {
    let URLArray = [];
    fs.readFile(filePath, 'utf-8', (err, data) => {
        if (err) throw err
        let index = 0
        while (index !== -1) {
            let positionOfUrlMarker = data.indexOf("a href=", index);
            if (positionOfUrlMarker === -1) {
                index = -1;
            } else {
                let firstIndexOfUrl = data.indexOf("\"", positionOfUrlMarker);
                let lastIndexOfUrl = data.indexOf("\"", firstIndexOfUrl + 1);
                let foundUrl = data.slice(firstIndexOfUrl + 1, lastIndexOfUrl);
                let URLObject = { "URL": foundUrl };
                URLArray.push(URLObject);
                index = lastIndexOfUrl;
            }
        }
    })
    return URLArray
}

I have also tried replacing "index = -1" with the return statement. When I do this, the function returns undefined.

I tend to run into problems like this when I code and I assume it's because there's some important rule that I don't understand. Can anybody identify what I'm doing wrong?

  • In a browser? See https://stackoverflow.com/questions/10585029/parse-an-html-string-with-js/10585079#10585079 – danh Dec 04 '20 at 21:54
  • @danh - The code in the question is clearly designed to run on Node.js and does two major things that browsers don’t support (require and file system access). – Quentin Dec 04 '20 at 21:56

1 Answers1

0

This should work

const fs = require("fs");
const path = require("path");

function findURLs(filePath, callback) {
    let URLArray = [];
    fs.readFile(filePath, 'utf-8', (err, data) => {
        if (err) throw err
        let index = 0
        while (index !== -1) {
            let positionOfUrlMarker = data.indexOf("a href=", index);
            console.log(positionOfUrlMarker, 'positionOfUrlMarker')
            if (positionOfUrlMarker === -1) {
                index = -1;
            } else {
                let firstIndexOfUrl = data.indexOf("\"", positionOfUrlMarker);
                let lastIndexOfUrl = data.indexOf("\"", firstIndexOfUrl + 1);
                let foundUrl = data.slice(firstIndexOfUrl + 1, lastIndexOfUrl);
                let URLObject = { "URL": foundUrl };
                URLArray.push(URLObject);
                index = lastIndexOfUrl;
            }
        }
        callback(URLArray)
    })
}

new Promise((resolve, reject) => {
     findURLs(path.join(__dirname, 'test.html'), resolve)
}).then((data) => {
    console.log(data, 'data')
})
coder fire
  • 993
  • 2
  • 11
  • 24
  • Good advice to use a promise, but fs.readFile() returns a promise if you don't pass it a callback. Just have the op function `return fs.readFile(filePath, 'utf-8').then(...)` – danh Dec 04 '20 at 22:31