The page is loaded dynamically, requests
doesn't support dynamically loaded pages. However, the data is available in JSON format, you can use the re
/json
modules to get the correct data.
For example, to get the "view count":
import re
import json
import requests
from bs4 import BeautifulSoup
url = "https://www.youtube.com/watch?v=1OfK8UmLMl0&ab_channel=HitraNtheUnnecessaryProgrammer"
soup = BeautifulSoup(requests.get(url).text, "html.parser")
# We locate the JSON data using a regular-expression pattern
data = re.search(r"var ytInitialData = ({.*?});", soup).group(1)
data = json.loads(data)
print(
data["contents"]["twoColumnWatchNextResults"]["results"]["results"]["contents"][0][
"videoPrimaryInfoRenderer"
]["viewCount"]["videoViewCountRenderer"]["viewCount"]["simpleText"]
)
Output:
124 views
The variable data
contains all the data in a Python dictionary (dict
) to print all the data you can use:
print(json.dumps(data, indent=4))
Output (truncated):
{
"responseContext": {
"serviceTrackingParams": [
{
"service": "CSI",
"params": [
{
"key": "c",
"value": "WEB"
},
{
"key": "cver",
"value": "2.20210701.07.00"
},
{
"key": "yt_li",
"value": "0"
},
{
"key": "GetWatchNext_rid",
"value": "0x1d62a299beac9e1f"
}
]
},
{
"service": "GFEEDBACK",
"params": [
{
"key": "logged_in",
"value": "0"
},
{
"key": "e",
"value": "24037443,24058293,24058128,24003103,24042870,23882685,24023960,23944779,24027649,24046896,24059898,24049577,23983296,23966208,24056265,23891346,1714258,24049575,24045412,24003105,23999405,24051884,23891344,23986022,24049573,24056839,24053866,24058240,23744176,23998056,24010336,24037586,23934970,23974595,23735348,23857950,24036947,24051353,24038425,23990875,24052245,24063702,24058380,23983813,24058812,24026834,23996830,23946420,24001373,24049820,24030040,24062848,23968386,24027689,24004644,23804281,24049569,23973490,24044110,23884386,24012512,24044124,24059521,23918597,24007246,24049567,24022729,24037794"
}
]
},
{
"service": "GUIDED_HELP",
"params": [
{
"key": "logged_in",
"value": "0"
}
]
},
{
"service": "ECATCHER",
"params": [
{
"key": "client.version",
"value": "2.20210701"
},
{
"key": "client.name",
"value": "WEB"
}
]
}
],
"mainAppWebResponseContext": {
"loggedOut": true
},
"webResponseContextExtensionData": {
"ytConfigData": {
"visitorData": "CgtoanprT1pPbmtWTSjYk46HBg%3D%3D",
"rootVisualElementType": 3832
},