I am using pydriller to extract metrics from certain github repos. while implementing a branch specific extraction function as you can see in the following code. the terminal returns the following errors: Problem reading repository at repoX
from pydriller import RepositoryMining
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
end_date = datetime.now()
start_date = end_date - timedelta(days=10) - relativedelta(years=0)
# Replace this path with your own repository of interest
paths = ['repoX']
branch_name = "main" # Replace with the desired branch name
commits = []
for path in paths:
try:
for commit in RepositoryMining(path, credentials=("username", "access_token"), since=start_date, to=end_date, only_in_branch=branch_name).traverse_commits():
hash = commit.hash
for f in commit.modified_files:
record = {
'hash': hash,
'message': commit.msg,
'author_name': commit.author.name,
'author_email': commit.author.email,
'author_date': commit.author_date,
'author_tz': commit.author_timezone,
'committer_name': commit.committer.name,
'committer_email': commit.committer.email,
'committer_date': commit.committer_date,
'committer_tz': commit.committer_timezone,
'in_main': commit.in_main_branch,
'is_merge': commit.merge,
'num_deletes': commit.deletions,
'num_inserts': commit.insertions,
'net_lines': commit.insertions - commit.deletions,
'num_files': commit.files,
'branches': ', '.join(commit.branches),
'filename': f.filename,
'old_path': f.old_path,
'new_path': f.new_path,
'project_name': commit.project_name,
'project_path': commit.project_path,
'parents': ', '.join(commit.parents),
}
# Omitted: modified_files (list), project_path, project_name
commits.append(record)
except Exception:
print('Problem reading repository at ' + path)
continue
# Save it to FileCommits.csv
df_file_commits = pd.DataFrame(commits)
df_file_commits.to_csv('FileCommits.csv')
I tried using my name and generating a git personal token sadly the problem persists. removing the branch specific function extracts the necessary functions. I also tried using Repository instead of RepositoryMining. P.S: the data extracted is going to imported into elasticSearch.