0

I'm working on a chrome extension that should run a script in every youtube watch page (i.e, https://www.youtube.com/watch?v=YisbVr69r7U)

In that script I want to get the itag's of the video (which I can get from a script that is in every youtube video page just by parsing "url_encoded_fmt_stream_map" property which is in yt.config)

The problem is that I can't find that property by parsing the (document.body.innerHTML) of some pages.

here's my manifest.json:

{
  "manifest_version": 2,

  "name"            : "Test Extension",
  "version"         : "0.0",

  "background": {
    "scripts": ["background.js"]
  },

  "permissions": [
    "https://www.youtube.com/*", "tabs", "webNavigation"
  ]
}

I know that youtube use transitions between pages (for example if you clicked on a video to watch then a red bar on the top of the page appears then the video page appears), I use webNavigation onHistoryStateUpdated event to execute a script into the page after the transition ends.

background.js:

const r = /https:\/\/www\.youtube\.com\/watch\?v=(.*?)(&.*)?/;
chrome.webNavigation.onHistoryStateUpdated.addListener(function(details) {
    if(r.test(details.url))
        chrome.tabs.executeScript(details.tabId,{file:"script.js"});
});

and script.js:

function getURLMap(bodyHTML) {
    var r = /"url_encoded_fmt_stream_map":"(.*?)"/;
    var matches = bodyHTML.match(r);
    return matches[1];
}

function getTags(fmts_info) {
    var tags = [];
    r = /itag=(.*?)\\u/;
    console.log(fmts_info[0]);
    for(var i = 0; i < fmts_info.length; i++) {
        matches = fmts_info[i].match(r);
        tags[i] = matches[1];
    }
    return tags;
}

console.log(getTags(getURLMap(document.body.innerHTML).split(',')));

The extension does well when I go to a youtube watch page directly (openning a new tab on chrome and go directly to say: https://www.youtube.com/watch?v=YisbVr69r7U), it shows in console the itag's of that video correctly. The problem comes when I come to a youtube watch page by a transition (for example from the youtube index page to a video page by clicking on a video), in this case I have this error in console:

Uncaught TypeError: Cannot read property '1' of null  script.js:4

when I let my script.js show (document.body) in console, I can't find "url_encoded_stream_map" there

It seems like the problem is with how I deal with the transitions in the page.

I searched a lot to solve my problem but nothing worked for me.

I tried using content-scripts but seems like content scripts are inserted in the page when it's loaded, and not when a transition accures.

I want to get the actual HTML of the page, that has the itag's in it!

EDIT:

This is no duplicated to this

Tried this manifest.json:

{
  "manifest_version": 2,

  "name"            : "Test Extension",
  "version"         : "0.0",

  "content_scripts": [{
      "matches": [ "*://*.youtube.com/*" ],
      "js": [ "script.js" ],
      "run_at": "document_start"
  }]
}

script.js:

document.addEventListener("spfdone", process);
document.addEventListener("DOMContentLoaded", process);

function getURLMap(bodyHTML) {
    var r = /"url_encoded_fmt_stream_map":"(.*?)"/;
    var matches = bodyHTML.match(r);
    return matches[1];
}

function getTags(fmts_info) {
    var tags = [];
    r = /itag=(.*?)\\u/;
    for(var i = 0; i < fmts_info.length; i++) {
        matches = fmts_info[i].match(r);
        tags[i] = matches[1];
    }
    return tags;
}

function process() {
    if (location.pathname != "/watch") {
        return;
    }
    console.log(getTags(getURLMap(document.body.innerHTML).split(',')));
}

but the problem is not solved!

Community
  • 1
  • 1
Ammar
  • 770
  • 4
  • 11
  • 1
    I can detect page history state changing by using onHistoryStateChanged event in my background.js – Ammar Jul 21 '16 at 14:52
  • I tried using your code there, and let (process function) do what I did in my script.js , but the same problem accured! – Ammar Jul 21 '16 at 15:09

1 Answers1

1

If you debug your script you will see that url_encoded_fmt_stream_map isn't added anywhere in the document after in-site navigation. Hacking the site JS shows that ytplayer.config variable is updated directly in such cases.

We'll have to inject our script into the page itself.

Declare a content script that runs on all of youtube in manifest.json:

"content_scripts": [{
  "matches": [ "*://*.youtube.com/*" ],
  "js": [ "content.js" ],
  "run_at": "document_start"
}]

content.js:

function injectedCode() {
    document.addEventListener("spfdone", process);
    document.addEventListener("DOMContentLoaded", process);

    function process() {
        function getTags(fmts_info) {
            var tags = [];
            r = /itag=(\d+)/;
            for(var i = 0; i < fmts_info.length; i++) {
                var matches = fmts_info[i].match(r);
                if (matches)
                    tags.push(matches[1]);
            }
            return tags;
        }
        if (location.href.indexOf('watch?') < 0) {
            return;
        }
        var tags = getTags(ytplayer.config.args.url_encoded_fmt_stream_map.split(','));
        console.log(tags);
    }
}

function getFunctionText(f) {
    return f.toString().match(/\{[\s\S]*\}$/)[0];
}

document.documentElement.appendChild(document.createElement("script")).text =
    getFunctionText(injectedCode)

To pass the results back to content script use custom events, or externally_connectable to send data directly to extension's background page script.

Community
  • 1
  • 1
wOxxOm
  • 65,848
  • 11
  • 132
  • 136