This is a code with Web crawler. I'm a beginer in learning python.So i don't know how to solve. It seems wrong with search()
# -*- coding:utf-8 -*-
import urllib,urllib2,re
class BDTB:
def __init__(self,baseUrl,seeLZ):
self.baseUrl = baseUrl
self.seeLZ = '?see_lz' + str(seeLZ)
def getPage(self,pageNum):
try:
url = self.baseUrl + self.seeLZ + '&pn=' + str(pageNum)
request = urllib2.Request(url)
response = urllib2.urlopen(request)
#print response.read().decode('utf-8')
return response
except urllib2.URLError,e:
if hasattr(e,'reason'):
print u'连接百度贴吧失败,错误原因',e.reason
return None
def getTitle(self):
page = self.getPage(1)
pattern = re.compile('<h3 class.*?px">(.*?)</h3>',re.S)
result = re.search(pattern,page)
if result:
print result.group(1)
return result.group(1).strip()
else:
return None
baseURL = 'http://tieba.baidu.com/p/4095047339'
bdtb = BDTB(baseURL,1)
bdtb.getTitle()