I made myself a Coursera-Downloader today using python , selenium package , chromewebdriver.(these 3 tools are needed in the rest of the answer)
first of all you need to find the course you want in coursera and enroll in it.
after that you should complete the code below and run it. it will take a while but the result(all video links) will be written inside a text file:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
# ########################### #
# ####-fill these vars-###### #
# ########################### #
# coursera login information:
username = "~" # e.g. : username = "john@doe.com"
password = "~" # e.g. : password = "12345asdfg"
# course details to download IMPORTANT: you should be enrolled in the course
path_course = "https://www.coursera.org/learn/programming-languages/home/week/1" # link to the course first week e.g. : path_course = "https://www.coursera.org/learn/game-theory-1/home/week/1"
num_of_weeks = 5 # number of course weeks(or desired weeks to download) e.g. : num_of_weeks = 5
path_to_save = "E:\\programming-languages-coursera-links.txt" # path to the file in wich the links will be saved e.g. : path_to_save = "E:\\interactive-python-links.txt"
#############################
#############################
#############################
print_each_link = False
# defining functions :
def get_links_of_week(week_add):
"""
this function gets the download links from the course.
:param week_add: address to the specific week in order to get links
:return: a list containing all download links regarding the specific week.
"""
driver.get(week_add)
print("going for" + week_add)
driver.implicitly_wait(5)
elems = driver.find_elements_by_xpath("//a[@href]")
links = []
for elem in elems:
sublink = elem.get_attribute("href")
# print(sublink)
if sublink.find("lecture") != -1 and sublink not in links:
links.append(sublink)
# print("---------------")
# print(links)
inner_links = []
for link in links:
driver.get(link)
driver.implicitly_wait(5)
inner_elems = driver.find_elements_by_xpath("//a[@href]")
for inelem in inner_elems:
sub_elem = inelem.get_attribute("href")
# print(sub_elem)
if sub_elem.find("mp4") != -1:
print("the link : " + sub_elem[37:77] + "... fetched")
inner_links.append(sub_elem)
return inner_links
def get_week_list():
"""
this function gets the URL address from predefined variables from the top
:return: a list containing each week main page.
"""
weeks = []
print('list of weeks are : ')
for x in range(1, num_of_weeks + 1):
weeks.append(path_course[:-1] + str(x))
print(path_course[:-1] + str(x))
return weeks
# loading chrome driver
driver = webdriver.Chrome("E:\\chromedriver.exe")
# login to Coursera
driver.get(path_course)
driver.implicitly_wait(10)
email = driver.find_element_by_name("email")
email.click()
email.send_keys(username)
pas = driver.find_element_by_name("password")
pas.click()
pas.send_keys(password)
driver.find_element_by_xpath("//*[@id=\"rendered-content\"]/div/div/div/div[3]/div/div/div/form/button").send_keys(
Keys.RETURN)
# fetching links from each week web page
weeks_link = get_week_list()
all_links = []
for week in weeks_link:
all_links += get_links_of_week(week)
driver.close()
# write to file
print("now writing to file ...")
text_file = open(path_to_save, "w")
for each_link in all_links:
if print_each_link:
print(each_link + "\n")
text_file.write(each_link)
text_file.write("\n")
text_file.close()
print("---------------------------------")
print("all Links are fetched successfully")
comment here if you got into any trouble.