I have this code and i want to extract github user info with github api and insert all the info into a mysql table but this inserting into mysql is taking so long nearly 30-40 minutes for a single user for data and that also only for info regarding repo i want so want to add info about commit that may take a day please suggest me what should i do to increase the speed of the code and insertion in table the code is big so sorry for that but i want to show the whole code so i could get suggestions regarding overall code.
connection = mysql.connector.connect(host='host',
user='root',
password='pas',
database='db',)
cursor = connection.cursor()
#user name--------------------------------------------------------------------------
username = 'waldekmastykarz'
#-----------------------------------------------------------------------------------
token = os.getenv('GITHUB_TOKEN', '-----------githubtoken---------')
params = {}
headers = {'Authorization': f'token {token}'}
data = requests.get('https://api.github.com/users/' +
username, headers=headers, params=params)
data = data.json()
#initial details ----------------------------------------------------------------
initial_details = {}
initial_details['username'] = username
initial_details['name'] = data['name']
initial_details['email'] = data['email']
initial_details['id'] = data['id']
initial_details['location'] = data['location']
initial_details['followers'] = data['followers']
initial_details['following'] = data['following']
#total details------------------------------------------------------------------
Total_commits_of_user = 0
total_repos = 0
total_stars = 0
#total repos -------------------------------------------------------------------
url = data['repos_url']
page_no = 1
repos_data = []
while (True):
response = requests.get(url, headers=headers, params={"per_page": 100})
response = response.json()
repos_data = repos_data + response
total_repos = len(response)+total_repos
repos_fetched = len(response)
if (repos_fetched == 100):
page_no = page_no + 1
url = data['repos_url'] + '?page=' + str(page_no)
else:
break
print(total_repos)
#get all repos info ---------------------------------------------------------------
all_repo_data = {}
for i, repo in enumerate(repos_data):
all_repo_data[(repo['name'])] = {}
all_repo_data[(repo['name'])]['name'] = (repo['name'])
all_repo_data[(repo['name'])]['id'] = (repo['id'])
all_repo_data[(repo['name'])]['description'] = repo['description']
all_repo_data[(repo['name'])]['created_at'] = repo['created_at']
all_repo_data[(repo['name'])]['updated_at'] = repo['updated_at']
all_repo_data[(repo['name'])]['owner'] = repo['owner']['login']
all_repo_data[(repo['name'])]['stars'] = repo['stargazers_count']
all_repo_data[(repo['name'])]['url'] = repo['url']
all_repo_data[(repo['name'])
]['commits_url'] = repo['commits_url'].split("{")[0]
all_repo_data[(repo['name'])]['languages'] = (repo['url'] + '/languages')
all_repo_data[(repo['name'])]['contributors'] = repo['contributors_url']
#total stars---------------------------------------------------------------------------------------
total_stars = all_repo_data[(repo['name'])]['stars']+total_stars
#corect format for date time ----------------------------------------------------------------------
d1 = datetime.datetime.strptime(
all_repo_data[(repo['name'])]['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
d2 = datetime.datetime.strptime(
all_repo_data[(repo['name'])]['created_at'], "%Y-%m-%dT%H:%M:%SZ")
new_format = "%Y-%m-%d"
d1.strftime(new_format)
d2.strftime(new_format)
d1.date()
d2.date()
all_repo_data[(repo['name'])]['created_at'] = d2
all_repo_data[(repo['name'])]['updated_at'] = d1
#total commits----------------------------------------------------------------------------------------------
total_commit = 0
url = all_repo_data[(repo['name'])]['commits_url']
page_no = 1
while (True):
response = requests.get(url, headers=headers, params={"per_page": 100})
response = response.json()
total_commit = len(response)+total_commit
commit_fetched = len(response)
if (commit_fetched == 100):
page_no = page_no + 1
url = all_repo_data[(repo['name'])]['commits_url'] + \
'?page=' + str(page_no)
else:
break
#total commits -------------------------------------------------------------------------------------
all_repo_data[(repo['name'])]['total_commits'] = total_commit
# commit_by_user ----------------------------------------------------------------------------------------
commit_by_user = 0
url = all_repo_data[(repo['name'])]['commits_url']
page_no = 1
while (True):
response = requests.get(url, headers=headers, params={
"author": username, "per_page": 100})
response = response.json()
commit_by_user = len(response)+commit_by_user
commit_by_user_fetched = len(response)
if (commit_by_user_fetched == 100):
page_no = page_no + 1
url = all_repo_data[(repo['name'])]['commits_url'] + \
'?page=' + str(page_no)
else:
break
#total commits by user---------------------------------------------------------------------------------
all_repo_data[(repo['name'])]['commit_by_user'] = commit_by_user
Total_commits_of_user = commit_by_user+Total_commits_of_user
#contributors----------------------------------------------------------------------------------------------
total_contributors = 0
url = all_repo_data[(repo['name'])]['contributors']
page_no = 1
while (True):
response = requests.get(url, headers=headers, params={"per_page": 100})
response = response.json()
total_contributors = len(response)+total_contributors
contributors_fetched = len(response)
if (contributors_fetched == 100):
page_no = page_no + 1
url = all_repo_data[(repo['name'])]['contributors'] + \
'?page=' + str(page_no)
else:
break
#total contributors---------------------------------------------------------------------------------------
all_repo_data[(repo['name'])]['contributors'] = total_contributors
##commit dates-------------------------------------------------------------------------------------------------
total_commit = 0
commit_data = {}
url = all_repo_data[(repo['name'])]['commits_url']
page_no = 1
while (True):
response = requests.get(url, headers=headers, params={"per_page": 100})
response = response.json()
for commits in response:
d1 = datetime.datetime.strptime(
commits['commit']['author']['date'], "%Y-%m-%dT%H:%M:%SZ")
new_format = "%Y-%m-%d"
d1.strftime(new_format)
d1.date()
# print(all_repo_data[(repo['name'])]['name'],d1 )
commit_fetched = len(response)
if (commit_fetched == 100):
page_no = page_no + 1
url = all_repo_data[(repo['name'])]['commits_url'] + \
'?page=' + str(page_no)
else:
break
sql = "INSERT INTO user_repo (user_id, github_id, repo_id, user_connection, repo_name, contributors, total_commits, commit_by_user, created_date, last_commit) VALUES ( %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
cursor.execute(sql, (
initial_details['id'],
username,
all_repo_data[(repo['name'])]['id'],
all_repo_data[(repo['name'])]['owner'],
all_repo_data[(repo['name'])]['name'],
all_repo_data[(repo['name'])]['contributors'],
all_repo_data[(repo['name'])]['total_commits'],
all_repo_data[(repo['name'])]['commit_by_user'],
all_repo_data[(repo['name'])]['created_at'],
all_repo_data[(repo['name'])]['updated_at'],
))
connection.commit()