1

I have a recursive Javascript function which gets the links from one Wikipedia page, follows them, and then gets all of those links (repeating a specified number of times).

It calls itself an unknown number of times to construct an object of a known depth. When it completes, I want to output the object. Currently the object immediately outputs, and is empty, meaning the function obviously isn't waiting for all the recursive calls to complete.

As you can see, I have attempted to use callbacks, but I assume incorrectly. What am I doing wrong, how should I be doing it? I'm going to presume there's a few other things wrong I haven't spotted too; I'm relatively new to Javascript.

$(document).ready(function ()
{
  pageLinks[START_PAGE] = {};
  //Get initial pages
  links = getLinks(START_PAGE, 0, printLinks));
});

function printLinks()
{
  console.log(links);
}

function getLinks(currentPage, level, callback)
{
  visitedPages.push(currentPage)
  var pageLinks = {}
  var data = $.getJSON(URL_BEGIN + currentPage + URL_END, function(data)
  {
    var pages = data.query.pages;
    for(var page in pages)
    {
      pageContentObj = pages[page].revisions[0];
      for(var key in pageContentObj) if(pageContentObj[key].length > 100)
      {
        var pageContent = pageContentObj[key];
        //Get links
        hyperlinks = getFromBetween.get(pageContent,"[[","]]");
        for(var link in hyperlinks)
        {
          link = hyperlinks[link].split("|")[0]; //Remove friendly name
          link = link.replaceAll(" ", "%20");

          //Add to pagelist object
          prefix = link.split(":")[0];
          if(prefix != "Category" && prefix != "File" && prefix != "wikipedia")
            if(level < ITERATIONS && !visitedPages.includes(arguments, link))
            {
              console.log(level + ": " + link)
              pageLinks[link] = getLinks(link, level+1, callback); //===Recursive call===
            }
        }
      }
    }
  });
  if(level == 0 && callback) callback();
  return pageLinks;
}

Any help is appreciated, thanks in advance.

**EDIT: ** Link: https://github.com/JakeStanger/Wikipedia-Mapper/blob/master/init.js#L53

Jake Stanger
  • 449
  • 1
  • 8
  • 24
  • This has the very real possibility of sucking wikipedia down to your server. Are you sure you want to do this to your server and allowed to do this by Wikipedia? – mplungjan Feb 05 '17 at 17:07
  • 1
    Substitute `Promise.all()`, `Array.prototype.map()` for `for..in` loops. See also [multiple, sequential fetch() Promise](http://stackoverflow.com/questions/38034574/multiple-sequential-fetch-promise) – guest271314 Feb 05 '17 at 17:10
  • I'm only running it on my home pc as a bit of an experiment, so max 38Mb/s - I've tested it a few times and Wikipedia seems to hold up fine... – Jake Stanger Feb 05 '17 at 17:10
  • So you are planning to print it out? https://img.labnol.org/di/wikipedia-print.jpg – mplungjan Feb 05 '17 at 17:12
  • @mplungjan https://what-if.xkcd.com/59/ – Jonas Wilms Feb 05 '17 at 17:16
  • @Jonasw: https://en.wikipedia.org/wiki/Wikipedia:Size_in_volumes – mplungjan Feb 05 '17 at 17:19

1 Answers1

2

The recursive call needs to be like this:

var counter = 0;
//the big for loop
counter++;
getLinks(link, level + 1, function(res) {
    for (var key in res) { //with an array it would be concat...
        pageLinks[key] = res[key];
    }
    counter--;
    if (counter == 0 && callback) callback(pageLinks); //callback if all callbacks called
});

Also remove this weird code:

if(level == 0 && callback) callback();

No you can do:

getLinks(START_PAGE, 0, console.log);
Ömer Erden
  • 7,680
  • 5
  • 36
  • 45
Jonas Wilms
  • 132,000
  • 20
  • 149
  • 151