本文整理汇总了Python中analyzer.Analyzer.get_followhtml方法的典型用法代码示例。如果您正苦于以下问题:Python Analyzer.get_followhtml方法的具体用法?Python Analyzer.get_followhtml怎么用?Python Analyzer.get_followhtml使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类analyzer.Analyzer
的用法示例。
在下文中一共展示了Analyzer.get_followhtml方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_follow
# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_followhtml [as 别名]
def parse_follow(self,response):
#print '************************ source request url:',response.request.url
item = WeibospiderItem()
analyzer = Analyzer()
total_pq = analyzer.get_followhtml(response.body)
#item['followuidlist'] = analyzer.get_follow(total_pq)
followlist = analyzer.get_follow(total_pq)
#item['userinfo'] = {}
oldflag,stopflag= getinfo.get_followflag(WeiboSpider.filename)
p = re.compile('.*_page=(\d).*',re.S)
current_page = p.search(response.request.url).group(1) #获取当前关注用户列表页页数
if int(current_page) == 1:
getinfo.set_followflag(WeiboSpider.filename,followlist[0],'False')
print 'page is equal 1 '
else:
print 'page is NOT equal 1'
for follow_uid in followlist[:2]:
print '%%%%%%%%%%%%%%%%%%%%%%%%%%',follow_uid
#item['uid'] = follow_uid
if follow_uid != oldflag: #对于已爬uid不进行重复爬取,即增量爬取
#爬取该uid用户主页微博内容
if stopflag == 'False':
getinfo.set_followflag(WeiboSpider.filename,followlist[0],'True')
mainpageurl = 'http://weibo.com/u/'+str(follow_uid)+'?from=otherprofile&wvr=3.6&loc=tagweibo'
GetWeibopage.data['uid'] = follow_uid
getweibopage = GetWeibopage()
for page in range(WeiboSpider.page_num):
GetWeibopage.data['page'] = page+1
#当页第一次加载
#当页第二次加载
#当页第三次加载
thirdloadurl = mainpageurl + getweibopage.get_thirdloadurl()
if int(GetWeibopage.data['pagebar']) == 1 and page == WeiboSpider.page_num-1: #在最后一页最后一次加载时,获取用户基本信息
print 'hhhhhhhhhhhhhhhhhhhh',followlist
yield Request(url=thirdloadurl,meta={'cookiejar':response.meta['cookiejar'],'item':item,'uid':follow_uid,'followlist':followlist},callback=self.get_userurl)
#continue
#yield Request(url=thirdloadurl,meta={'cookiejar':response.meta['cookiejar'],'item':item,'uid':follow_uid},callback=self.parse_thirdload)
#firstloadurl = mainpageurl + getweibopage.get_firstloadurl()
#yield Request(url=firstloadurl,meta={'cookiejar':response.meta['cookiejar'],'item':item,'uid':follow_uid},callback=self.parse_firstload)
else:
break
else:
break
示例2: parse_follow
# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_followhtml [as 别名]
def parse_follow(self,response):
item = WeibospiderItem()
analyzer = Analyzer()
total_pq = analyzer.get_followhtml(response.body)
item['followuidlist'] = analyzer.get_follow(total_pq)
return item