-3

I am trying to create a list of all the URLs of the requests that are listed below.

enter image description here

I found a similar question without the exact results I’m looking for here: How to get list of network requests done by HTML. I want to create a function that listens in the background and continues to add new requests to the list. The closest answer I’ve found is not a complete list.

hackr
  • 79
  • 7
ISAAC
  • 149
  • 9
  • Could you specify if you intend this to collect metrics on your users' machines, or is this for your development purposes? If for development purposes, you could use a proxy like Charles, or you could intercept the requests by using Puppeteer. The answers to this question depend on your use-case. – 000 Jul 31 '19 at 20:10
  • @JoeFrambach It is for development purposes for my website. I need to get all the URLs to see if there is any URL that sends more then X amount of requests – ISAAC Jul 31 '19 at 20:17

2 Answers2

0

To do that you can try to use XMLHttpRequest to make a request to the url you want then, handle the response with the status code of your needs.

    var req = new XMLHttpRequest();
req.open('GET', 'http://www.mozilla.org/', true);
req.onreadystatechange = function (aEvt) {
  if (req.readyState == 4) {
     if(req.status == 200)
      dump(req.responseText);
     else
      dump("Error loading page\n");
  }
};
req.send(null);

Source using xmlhttp request

As you posted a comment above, I will teach you how you can navigate through a website gathering all external or internal links parsing the html.

Get the link somehow then send to getHtml() function, I suggest you to try this Index.html

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta http-equiv="X-UA-Compatible" content="ie=edge">
    <title>Document</title>
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js"></script>
    <script src="main.js"></script>
    <style>
        p{
            margin: 0px;
            margin-block-end: 0px;
            margin-bottom: 0px;
        }
    </style>
</head>
<body>


    <input type="text" id="link">Ingrese enlace
    <button onclick="getHtml(document.getElementById('link').value)">Get External Content</button>

    <div id="div1"></div>
</body>
</html>

main.js

function getHref(links, as){
    var as_length = as.length;
    var links_length = links.length;
    for (let index = 0; index < as_length; index++) {
        var element = as[index].href;
        $("#div1").html($("#div1").html()+`<br>> ${element}`);

        if(element.indexOf(window.location.href)>-1){
            $("#div1").html($("#div1").html()+` - <p style='color:purple;'> current path DETECTED action: discarting... </p>`);
            element="0";

         } 

        if(element.indexOf("file:///C:/") > -1) {
            element = element.replace("file:///C:/", initiator);
            $("#div1").html($("#div1").html()+` -   ${element}`);
         }
         else if(element.indexOf(initiator)> -1){
            $("#div1").html($("#div1").html()+` - ${element}`);

         }
         else if(element.indexOf("file://") > -1) {
            element = element.replace("file://", initiator);
            $("#div1").html($("#div1").html()+` - <p style='color:red;'> External domain DETECTED action: discarting...</p>`);

         }
         else if(element.indexOf("mailto:") > -1) {
           element =0;
            $("#div1").html($("#div1").html()+` - <p style='color:cyan;'> External action DETECTED action: discarting... </p>`);
         }
         else if(element.indexOf("tel:") > -1) {
            element=0;
            $("#div1").html($("#div1").html()+` - <p style='color:cyan;'> External action DETECTED action: discarting...</p>`);
         }


         getHtmlChild(element);




    } 
}
function parseHtml(htmls)
{  // console.log(html);

    $("#div1").html($("#div1").html()+"<br>>Parsing...");
    var ele = document.createElement('div');
    ele.innerHTML = htmls;
    console.log(ele);
    var link_tag = ele.getElementsByTagName('link');
    var a_tag = ele.getElementsByTagName('a');
    $("#div1").html($("#div1").html()+`<br>>found ${link_tag.length} <b><u>link</u></b> tags and ${a_tag.length} <b><u>a</u></b> tags., BAD TAGS ARE ELIMINATED! `);
    getHref(link_tag, a_tag);
    //console.log(link_tag[0].href);
}
function getHtml(urls){
    console.log("INICIADOR:" + urls);
    if(urls[urls.length-1]!="/"){
        urls = urls+"/";

    }    
    initiator=urls;
    proceses++;
    $.ajax({
        url: `${urls}`,
        method: "GET",
        crossDomain: true,
        success: function(data) {
            $("#div1").html(">Content is succefull gathered....");
            parseHtml(data);
        }
    });
}

It will display all the links :)

Carlos Montiel
  • 301
  • 1
  • 4
  • 16
0

Use Puppeteer in non-headless mode (headed? headful?), using the interception to log the network requests. See the Puppeteer documentation at https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md#pagesetrequestinterceptionvalue.

Copied and modified:

const puppeteer = require('puppeteer');

puppeteer.launch().then(async browser => {
  const page = await browser.newPage();
  await page.setRequestInterception(true);
  page.on('request', interceptedRequest => {
    console.log(interceptedRequest.url());
    interceptedRequest.continue();
  });
  await page.goto('https://example.com');
  await browser.close();
});
000
  • 26,951
  • 10
  • 71
  • 101