本文整理汇总了Python中analyzer.Analyzer.get_content方法的典型用法代码示例。如果您正苦于以下问题:Python Analyzer.get_content方法的具体用法?Python Analyzer.get_content怎么用?Python Analyzer.get_content使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类analyzer.Analyzer
的用法示例。
在下文中一共展示了Analyzer.get_content方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse_secondload
# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_content [as 别名]
def parse_secondload(self,response):
item = response.meta['item']
analyzer = Analyzer()
total_pq = analyzer.get_mainhtml(response.body)
item['content'] = analyzer.get_content(total_pq)
item['time'] = analyzer.get_time(total_pq)
item['atuser'],item['repostuser'] = analyzer.get_atuser_repostuser(total_pq)
return item
示例2: parse_load
# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_content [as 别名]
def parse_load(self,response):
item = WeibospiderItem();analyzer = Analyzer();friendcircle = FriendCircle()
total_pq = analyzer.get_mainhtml(response.body)
item['uid'] = response.meta['uid']
item['content'] = analyzer.get_content(total_pq)
item['time'],item['timestamp'] = analyzer.get_time(total_pq)
atuser_info,item['repost_user'] = analyzer.get_atuser_repostuser(total_pq)
yield item
示例3: parse_thirdload
# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_content [as 别名]
def parse_thirdload(self,response):
item = WeibospiderItem()
analyzer = Analyzer()
total_pq = analyzer.get_mainhtml(response.body)
item['uid'] = response.meta['uid']
item['content'] = analyzer.get_content(total_pq)
item['time'] = analyzer.get_time(total_pq)
item['atuser'],item['repostuser'] = analyzer.get_atuser_repostuser(total_pq)
return item
示例4: parse_load
# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_content [as 别名]
def parse_load(self,response):
item = WeibospiderItem() #获取用户微博内容信息
analyzer = Analyzer()
friendcircle = FriendCircle()
total_pq = analyzer.get_html(response.body,'script:contains("WB_feed WB_feed_v3")')
item['uid'] = response.meta['uid']
item['content'] = analyzer.get_content(total_pq)
item['time'],item['timestamp'] = analyzer.get_time(total_pq)
weibo_analyzer = weibocontent_analyzer()
item['repost_nums'],item['comment_nums'],item['like_nums'] = weibo_analyzer.get_weibo_relative_args(total_pq)
yield item
示例5: parse_thirdload
# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_content [as 别名]
def parse_thirdload(self,response):
item = response.meta['item']
#print 'UUUUUUUUUUUUUUUUUUUUUUUUU',response.meta['item'],'OOOOOOOOOOOOOOOOOOO',item['userinfo'],"PPPPPPPPPPPPPPPPPPPPP"
item['uid'] = response.meta['uid']
item['followuidlist'] = response.meta['followlist']
#item['userinfo'] = response.meta['userinfo']
#print '{{{{{{{{{{{{{{{{{{{{{{{',response.meta['userinfo']
analyzer = Analyzer()
total_pq = analyzer.get_mainhtml(response.body)
item['content'] = analyzer.get_content(total_pq)
item['time'] = analyzer.get_time(total_pq)
item['atuser'],item['repostuser'] = analyzer.get_atuser_repostuser(total_pq)
return item
示例6: parse_load
# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_content [as 别名]
def parse_load(self,response):
request_url = response.request.url
p=re.compile('&pre_page=(\d).*&page=(\d)') #用于判断是第一页的第一次加载
match = p.search(request_url)
if match:
if int(match.group(1)) == 0 and int(match.group(2)) == 1: #进行当前主用户信息的获取(即非@用户和转发用户)
is_search = response.meta['is_search']
if not is_search: #没有搜索过该主用户,则is_search=0
analyzer = Analyzer()
total_pq = analyzer.get_html(response.body,'script:contains("PCD_person_info")')
user_property = analyzer.get_userproperty(total_pq)
if not user_property == 'icon_verify_co_v': #该账号不为公众账号
userinfo_url = analyzer.get_userinfohref(total_pq)
yield Request(url=userinfo_url,cookies=random.choice(COOKIES),meta={'uid':response.meta['uid'],'is_friend':0},callback=self.parse_userinfo)
item = WeibospiderItem() #获取用户微博信息及@用户与转发信息
analyzer = Analyzer()
friendcircle = FriendCircle()
total_pq = analyzer.get_html(response.body,'script:contains("WB_feed WB_feed_v3")')
item['uid'] = response.meta['uid']
item['content'] = analyzer.get_content(total_pq)
item['time'],item['timestamp'] = analyzer.get_time(total_pq)
weibo_analyzer = weibocontent_analyzer()
item['repost_nums'],item['comment_nums'],item['like_nums'] = weibo_analyzer.get_weibo_relative_args(total_pq)
atuser_info,item['repost_user'] = analyzer.get_atuser_repostuser(total_pq)
atuser_list = friendcircle.atuser_parser(atuser_info)
item['atuser_nickname_list'] = atuser_list
yield item
frc_analyzer = friendcircle_analyzer()
#获取@用户uid及基本信息
atuser_set = self.get_atuser_set(atuser_list)
for atuser_alias in atuser_set:
friend_url = frc_analyzer.get_frienduid_url(atuser_alias)
yield Request(url=friend_url,cookies=random.choice(COOKIES),meta={'uid':response.meta['uid'],'is_friend':1},callback=self.parse_friend_uid) #is_friend=1代表爬取@用户基本信息
#获取转发用户uid及基本信息
for repostuser_alias in item['repost_user']:
if repostuser_alias: #repostuser_alias不为空,即有转发用户
friend_url = frc_analyzer.get_frienduid_url(repostuser_alias)
yield Request(url=friend_url,cookies=random.choice(COOKIES),meta={'uid':response.meta['uid'],'is_friend':2},callback=self.parse_friend_uid) #is_friend=2代表爬取转发用户基本信息
示例7: parse_load
# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_content [as 别名]
def parse_load(self,response):
user_info = userinfo.WeiboSpider()
request_url = response.request.url
p=re.compile('&pre_page=(\d).*&page=(\d)') #用于判断是第一页的第一次加载
match = p.search(request_url)
if int(match.group(1)) == 0 and int(match.group(2)) == 1: #进行用户信息的获取
db = OracleStore();conn = db.get_connection()
sql = "select count(*) from t_user_info where userID='%s'" % self.uid
cursor = db.select_operation(conn,sql);count = cursor.fetchone()
if not count[0]: #若没有爬取过该uid用户,则爬取用户基本信息
analyzer = Analyzer()
total_pq = analyzer.get_html(response.body,'script:contains("PCD_person_info")')
user_property = analyzer.get_userproperty(total_pq)
if user_property == 'icon_verify_co_v': #该账号为公众账号
public_userinfo_url = analyzer.get_public_userinfohref(total_pq)
#yield Request(url=public_userinfo_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid'],'user_property':user_property},callback=self.parse_public_userinfo) 暂时不处理公众账号,需要数据库设置外键
else:
userinfo_url = analyzer.get_userinfohref(total_pq)
yield Request(url=userinfo_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid'],'user_property':user_property},callback=self.parse_userinfo)
db.close_connection(conn,cursor)
item = WeibospiderItem() #获取用户微博信息及@用户信息
analyzer = Analyzer()
friendcircle = FriendCircle()
total_pq = analyzer.get_mainhtml(response.body)
item['uid'] = response.meta['uid']
item['content'] = analyzer.get_content(total_pq)
item['time'],item['timestamp'] = analyzer.get_time(total_pq)
atuser_info,item['repost_user'] = analyzer.get_atuser_repostuser(total_pq)
atuser_list = friendcircle.atuser_parser(atuser_info)
item['atuser_nickname_list'] = atuser_list
yield item
for atuser_inlist in atuser_list:
if atuser_inlist != []:
for atuser in atuser_inlist:
uid_url = "http://s.weibo.com/user/"+quote(quote(str(atuser)))+"&Refer=SUer_box"
yield Request(url=uid_url,meta={'cookiejar':response.meta['cookiejar'],'uid':self.uid,'atuser_nickname':atuser},callback=self.parse_atuser_uid)
else:
continue