I have been trying to learn something about API so I did some calls to YT API. I have the following code:
# Import Libraries
import requests
import pandas as pd
import time
# KEY
API_KEY = 'MY KEY'
CHANNEL_ID = 'SOME YT CHANNEL'
def get_video_details(video_id):
# Segundo call al API
url_video_stats = 'https://www.googleapis.com/youtube/v3/videos?id='+video_id+'&part=statistics&key='+API_KEY
response_video_stats = requests.get(url_video_stats).json()
# Recoleccion de views, likes, dislikes y comentarios de cada video
view_count = response_video_stats['items'][0]['statistics']['viewCount']
like_count = response_video_stats['items'][0]['statistics']['likeCount']
comment_count = response_video_stats['items'][0]['statistics']['commentCount']
return view_count, like_count, comment_count
def get_videos(df):
# Make API call
pageToken = ''
while 1: # while 1 is the same as while true. It means loop forever. The only way to stop the loop is to use a break statement.
url = 'https://www.googleapis.com/youtube/v3/search?key='+API_KEY+'&channelId='+CHANNEL_ID+'&part=snippet,id&order=date&maxResults=50&'+pageToken
response = requests.get(url).json()
time.sleep(2) # Para evitar que se quede en el loop infinito, esperamos 1 segundo antes de hacer el siguiente llamado.
# Crear el loop para extraer todos los datos de los videos
for video in response['items']:
if video['id']['kind'] == 'youtube#video': # Para asegurarme que el objeto sea un video y no otra cosa
video_id = video['id']['videoId']
video_title = video['snippet']['title']
video_title = str(video_title).replace('&', '')
upload_date = video['snippet']['publishedAt']
upload_date = str(upload_date).split('T')[0]
view_count, like_count, comment_count = get_video_details(video_id)
'''# Guardando los datos en el DataFrame 'df' vacio que creamos antes
df = df.append({
'video_id' : video_id,
'video_title' : video_title,
'upload_date' : upload_date,
'view_count' : view_count,
'like_count' : like_count,
#'dislike_count' : dislike_count,
'comment_count' : comment_count
},
ignore_index=True
)'''
# Guardamos los datos en el df usando pd.concat:
df = pd.concat([df, pd.DataFrame({
'video_id' : video_id,
'video_title' : video_title,
'upload_date' : upload_date,
'view_count' : view_count,
'like_count' : like_count,
'comment_count' : comment_count
},
index=[0]
)])
try:
if response['nextPageToken'] != None: # Si hay una pagina siguiente, entonces seguimos con el loop
pageToken = "pageToken=" + response['nextPageToken']
except:
break
return df
# Main df
df = pd.DataFrame(columns=['video_id', 'video_title', 'upload_date', 'view_count', 'like_count', 'comment_count'])
df = get_videos(df)
And if I try, for example, this channel: UCaY_-ksFSQtTGk0y1HA_3YQ I only get 322 videos in the DF, nevertheless the channel has +1000 videos
{'kind': 'youtube#searchListResponse',
'etag': 'JkC3s6SSNCamNNEIDoC_IcYw9dY',
'nextPageToken': 'CDIQAA',
'regionCode': 'AR',
'pageInfo': {'totalResults': 1063, 'resultsPerPage': 50},
'items': [{ ... }]}
I was doing some calls from others channels with less videos and I get all of them, but if the channel has, I don't know, 500+ videos, the built functions don't work very well like you can see...
Any idea? What am I doing wrong?
Thanks all!