I want to crawl a website here is a link (http://legacy-assignments.uspto.gov/assignments/q?db=pat&qt=rf&reel=044841&frame=0221&pat=&pub=&intn=&asnr=&asnri=&asne=&asnei=&asns=) which containd table tr and td . If i change the reel and frame from url then the result changes so i want to make a module in which i run a loop and all the data is crawled. But I stucked in looping or I am new to crawl html parser website.
Please help me to find out the perfect code for this website. I give input in reel and frame from csv and i know how to do that i just want to know how to extract data from html parser
Here is my code:-
from selenium import webdriver
from bs4 import BeautifulSoup
import requests
from time import sleep
import csv
import re
from lxml import html
import lxml.html
doc=lxml.html.parse("http://legacy-assignments.uspto.gov/assignments/q?db=pat&qt=rf&reel=044841&frame=0221&pat=&pub=&intn=&asnr=&asnri=&asne=&asnei=&asns=")
for row in doc.xpath('/html/body/table[3]/tbody/tr'):
try:
reelframe1=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[1]/tbody/tr/td[2]/table/tbody/tr[1]/td[2]/a/text()')[0]
reelframe=reelframe1.strip().encode('utf8')
print reelframe
except:
print error
try:
recorded1=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[1]/tbody/tr/td[2]/table/tbody/tr[2]/td[4]/text()')[0]
recorded= recorded1.strip().encode('utf8')
print recorded
except:
print error
try:
attorney1=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[1]/tbody/tr/td[2]/table/tbody/tr[3]/td[2]/span/text()')[0]
attorney =attorney1.strip().encode('utf8')
print attorney
except:
print error
try:
conveyance1=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[1]/tbody/tr/td[2]/table/tbody/tr[4]/td[2]/span/text()')[0]
conveyance= conveyance1.strip().encode('utf8')
print conveyance
except:
print error
try:
totalproperties1=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr[1]/td/div/text()')[0]
totalproperties= totalproperties1.strip().encode('utf8').replace(' ','').replace('Total properties:','').strip()
print totalproperties
except:
print error
try:
patent1=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr[2]/td[2]/table/tbody/tr[1]/td[2]/div/a/text()')[0]
patent= patent1.strip().encode('utf8')
print patent
except:
print error
try:
issuedate1=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr[2]/td[2]/table/tbody/tr[1]/td[4]/div/text()')[0]
issuedate = issuedate1.strip().encode('utf8')
print issuedate
except:
print error
try:
application1=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr[2]/td[2]/table/tbody/tr[1]/td[6]/div/text()')[0]
application=application1.strip().encode('utf8')
print application
except:
print error
try:
filing=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr[2]/td[2]/table/tbody/tr[1]/td[8]/div/text()')[0]
filingdate=filing.strip().encode('utf8')
print filingdate
except:
print error
try:
publication=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr[2]/td[2]/table/tbody/tr[2]/td[2]/div/a/text()')[0]
publicationno=publication.strip().encode('utf8')
print publicationno
except:
print error
try:
pubdt=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr[2]/td[2]/table/tbody/tr[2]/td[4]/div/text()')[0]
publicationdate=pubdt.strip().encode('utf8')
print publicationdate
except:
print error
try:
title1=row.xpath('/html/body/table[3]/tbody/tr/td/table/tbody/tr/td/table[2]/tbody/tr[2]/td[2]/table/tbody/tr[3]/td[2]/div/text()')[0]
title=title1.strip().encode('utf8')
print title
except:
print error