0

I am building a function that returns an array of links that are present on a webpage. Later on, this function will be used to further scrape information on the website. The function (cf. infra) works fine, but I have no clue how to access the final hrefArray. I have tried adding multiple return statements (within the for loop and the different then()'s but without success.


const {Builder, By, Key, WebDriver, elementLocated, Browser, tagName} = require('selenium-webdriver');


var driver = new Builder().forBrowser('chrome').build()

function findURLS(webpage){
    console.log('Starting to look for URLS on webpage: ' + webpage)
    var result = 
    driver.get(webpage)
    .then(res => {
        if(driver.getCurrentUrl() === webpage){
            return true;
        }
    }).then(res => {
        console.log('Webpage opened')
    }).then(res => {
        var links = driver.findElements(By.tagName('a'))
        return links;
       
    }).then(links => {
        var hrefArray = []

        function findhref(array, input){
            var href = driver.wait(array[input].getAttribute('href'))
            return href;
        }

    
        for(i=0; i<links.length; i++){
            findhref(links, i)
            .then(href => {
                hrefArray.push(href)
                if(hrefArray.includes(href)){
                    console.log("Href " + href + " succesfully pushed to hrefArray")
                    return hrefArray;
                }
                else{
                    console.log("something went wrong while pushing the link to the hrefArray")
                }
            })
            .then(hrefArray => {
                if(hrefArray.length == links.length){
                    console.log("hrefArray constructed successfully: " + hrefArray)
                    return hrefArray //this is the final hrefArray that need to be returned after calling the findURLS(webpage) function
                }
            })
        } //end of for statement
    })
    .catch(error => {
        console.log(error)
    })
}



function testexample(){
    findURLS('https://dbrc.be/rechtspraak');
}

testexample()

``
frankmurphy
  • 194
  • 1
  • 3
  • 13
  • I would suggest splitting this into 2 steps - finding the links, then processing them, using async/await to avoid deeply nested promise handling. – match Jan 04 '22 at 16:09
  • Thank you for the response. In the end (after nearly throwing my computer out the window :p), this solved the question: ```function getMessage() { return new Promise(function(resolve, reject) { setTimeout(function() { resolve('Hello asynchronous world!'); }, 0); }); } getMessage().then(function(message) { console.log(message); });``` I added resolve at the hrefArray and then used that – frankmurphy Jan 04 '22 at 18:33

0 Answers0