I am trying to scrape the companies in the li in the ul table under final result. The source code looks like this
import string
import re
import urllib2
import datetime
import bs4
from bs4 import BeautifulSoup
class AJSpider(object):
def __init__(self):
print ("initisizing")
self.date = str(datetime.date.today())
self.cur_url = "https://youinvest.moneyam.com/modules/forward-diary/?date={date}&period=month"
self.datas = []
print ("initisization done")
def get_page(self,cur_date):
url = self.cur_url
try:
my_page = urllib2.urlopen(url.format(date = cur_date)).read().decode("utf-8")
my_soup = BeautifulSoup(my_page, "html.parser")
except:
print ('Failed')
return my_soup
def get_final(self, soup_page):
temp_data = []
final_result_section = soup_page.find("h3", text="Final Result")
print final_result_section
def start_spider(self):
my_page = self.get_page(self.date)
self.get_final(my_page)
def main():
my_spider = AJSpider()
my_spider.start_spider()
if __name__ == '__main__':
main()
I found a similar quesiton in stackoverflow Beautiful Soup: Accessing <li> elements from <ul> with no id , but this one here does have a class id, which makes things a lot easier.
In my scenario, how may I extract the li element from the ul table please? the only identifier here is really the content of the h3 tag, which is Final Result, however it is not a id so I have no idea how to make use of it.