当前位置: 首页>>代码示例>>Python>>正文


Python Analyzer.get_html方法代码示例

本文整理汇总了Python中analyzer.Analyzer.get_html方法的典型用法代码示例。如果您正苦于以下问题:Python Analyzer.get_html方法的具体用法?Python Analyzer.get_html怎么用?Python Analyzer.get_html使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在analyzer.Analyzer的用法示例。


在下文中一共展示了Analyzer.get_html方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_user_info

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
    def get_user_info(self,response):
        item =WeibospiderItem()
        item['uid'] = response.meta['user_id']
        analyzer = Analyzer()
        keyword_analyzer = keyword_info_analyzer()
        total_pq1 = analyzer.get_html(response.body,'script:contains("pf_photo")')
        item['image_urls'] = analyzer.get_userphoto_url(total_pq1) + "?uid=" + str(response.meta['user_id'])
        total_pq2 = analyzer.get_html(response.body,'script:contains("PCD_text_b")')
        total_pq3 = analyzer.get_html(response.body,'script:contains("PCD_counter")')
        item['userinfo'] = analyzer.get_userinfo(total_pq2,total_pq3)
#        logger.info(item)
        yield item
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql,代码行数:14,代码来源:cauc_keyword_info.py

示例2: parse_userinfo

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def parse_userinfo(self,response):
     '''解析非公众账号个人信息'''
     item = WeibospiderItem()
     analyzer = Analyzer()
     try:
         total_pq1 = analyzer.get_html(response.body,'script:contains("pf_photo")')
         item['image_urls'] = analyzer.get_userphoto_url(total_pq1)
          
         total_pq2 = analyzer.get_html(response.body,'script:contains("PCD_text_b")')
         item['userinfo'] = analyzer.get_userinfo(total_pq2)
     except Exception,e:
         item['userinfo'] = {}.fromkeys(('昵称:'.decode('utf-8'),'所在地:'.decode('utf-8'),'性别:'.decode('utf-8'),'博客:'.decode('utf-8'),'个性域名:'.    decode('utf-8'),'简介:'.decode('utf-8'),'生日:'.decode('utf-8'),'注册时间:'.decode('utf-8')),'')
         item['image_urls'] = None
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql,代码行数:15,代码来源:weibocontent_userinfo.py

示例3: parse_public_userinfo

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
    def parse_public_userinfo(self,response):  
        '''解析公众账号个人信息'''
        item = WeibospiderItem()
        analyzer = Analyzer()
        try:
            total_pq1 = analyzer.get_html(response.body,'script:contains("pf_photo")')
            item['image_urls'] = analyzer.get_userphoto_url(total_pq1)

            total_pq2 = analyzer.get_html(response.body,'script:contains("PCD_text_b")') 
            item['userinfo'] = analyzer.get_public_userinfo(total_pq2)
        except Exception,e:
            item['userinfo'] = {}.fromkeys(('联系人:'.decode('utf-8'),'电话:'.decode('utf-8'),'>邮箱:'.decode('utf-8'),'友情链接:'.decode('utf-8')),'')   
            item['image_urls'] = None
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql,代码行数:15,代码来源:weibocontent_userinfo.py

示例4: atuser_uid_parser

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def atuser_uid_parser(self,response):
     item = WeibospiderItem()
     analyzer = Analyzer()
     friendcircle = FriendCircle()
     total_pq = analyzer.get_html(response.body,'script:contains("W_face_radius")') 
     uid = friendcircle.get_user_uid(total_pq)
     self.atuser_dict[response.meta['atuser_nickname']] = uid
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql,代码行数:9,代码来源:keyweibocontent.py

示例5: parse_keyuser

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def parse_keyuser(self,response):
     item = WeibospiderItem() 
     analyzer = Analyzer()
     total_pq = analyzer.get_html(response.body,'script:contains("feed_content wbcon")')
     item['keyword_uid'] =analyzer.get_keyuser(total_pq)
     item['keyword'] = response.meta['keyword']
     return item
开发者ID:commonfire,项目名称:scrapy-weibospider,代码行数:9,代码来源:keyuser.py

示例6: parse_total_page

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def parse_total_page(self,response):
     '''获取需要爬取的搜索结果总页数'''
     analyzer = Analyzer()
     total_pq = analyzer.get_html(response.body,'script:contains("W_pages")')
     keyword_analyzer = keyword_info_analyzer()
     total_pages = keyword_analyzer.get_totalpages(total_pq)  #需要爬取的搜索结果总页数
     for page in range(1):  #此处更改为total_pages
         search_url = response.meta['search_url'] + str(page + 1)  #此处添加for循环total_pages
         yield Request(url=search_url,meta={'cookiejar':response.meta['cookiejar'],'keyword':response.meta['keyword']},callback=self.parse_keyword_info)
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql,代码行数:11,代码来源:cauc_keyword_info.py

示例7: parse_keyword_info

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def parse_keyword_info(self,response):
     '''获取搜索结果信息'''
     item = WeibospiderItem()
     analyzer = Analyzer()
     total_pq = analyzer.get_html(response.body,'script:contains("feed_content wbcon")') 
     keyword_analyzer = keyword_info_analyzer()
     item['keyword_uid'],item['keyword_alias'],item['keyword_content'],item['keyword_publish_time'] = keyword_analyzer.get_keyword_info(total_pq)
     item['keyword'] = response.meta['keyword']
     return item
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql,代码行数:11,代码来源:cauc_keyword_info.py

示例8: parse_load

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
    def parse_load(self,response):
        request_url = response.request.url
        p=re.compile('&pre_page=(\d).*&page=(\d)')  #用于判断是第一页的第一次加载
        match = p.search(request_url)
        if match:
            if int(match.group(1)) == 0 and int(match.group(2)) == 1: #进行当前主用户信息的获取(即非@用户和转发用户)
                is_search = response.meta['is_search']
                if not is_search: #没有搜索过该主用户,则is_search=0
                    analyzer = Analyzer()
                    total_pq = analyzer.get_html(response.body,'script:contains("PCD_person_info")')
                    user_property = analyzer.get_userproperty(total_pq)
                    if not user_property == 'icon_verify_co_v': #该账号不为公众账号
                        userinfo_url = analyzer.get_userinfohref(total_pq)
                        yield Request(url=userinfo_url,cookies=random.choice(COOKIES),meta={'uid':response.meta['uid'],'is_friend':0},callback=self.parse_userinfo)

        item = WeibospiderItem()  #获取用户微博信息及@用户与转发信息
        analyzer = Analyzer()
        friendcircle = FriendCircle()
        total_pq = analyzer.get_html(response.body,'script:contains("WB_feed WB_feed_v3")')
        item['uid'] = response.meta['uid']
        item['content'] = analyzer.get_content(total_pq)
        item['time'],item['timestamp'] = analyzer.get_time(total_pq)

        weibo_analyzer = weibocontent_analyzer()
        item['repost_nums'],item['comment_nums'],item['like_nums'] = weibo_analyzer.get_weibo_relative_args(total_pq)


        atuser_info,item['repost_user'] = analyzer.get_atuser_repostuser(total_pq)
        atuser_list = friendcircle.atuser_parser(atuser_info)
        item['atuser_nickname_list'] = atuser_list
        yield item     
        
        frc_analyzer = friendcircle_analyzer()
        #获取@用户uid及基本信息
        atuser_set = self.get_atuser_set(atuser_list)
        for atuser_alias in atuser_set:
            friend_url = frc_analyzer.get_frienduid_url(atuser_alias)
            yield Request(url=friend_url,cookies=random.choice(COOKIES),meta={'uid':response.meta['uid'],'is_friend':1},callback=self.parse_friend_uid) #is_friend=1代表爬取@用户基本信息 
       
        #获取转发用户uid及基本信息
        for repostuser_alias in item['repost_user']:
            if repostuser_alias: #repostuser_alias不为空,即有转发用户
                friend_url = frc_analyzer.get_frienduid_url(repostuser_alias)
                yield Request(url=friend_url,cookies=random.choice(COOKIES),meta={'uid':response.meta['uid'],'is_friend':2},callback=self.parse_friend_uid) #is_friend=2代表爬取转发用户基本信息 
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql-tianjin,代码行数:46,代码来源:cauc_friendcircle_userinfo.py

示例9: parse_atuser_uid

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def parse_atuser_uid(self,response):
     '''解析对应@用户的uid'''
     item = WeibospiderItem()
     analyzer = Analyzer()
     friendcircle = FriendCircle()
     atuser_nickname = response.meta['atuser_nickname'];
     total_pq = analyzer.get_html(response.body,'script:contains("W_face_radius")') 
     #uid = friendcircle.get_user_uid(total_pq)
     atuser_uid = friendcircle.get_user_uid2(atuser_nickname,total_pq) #根据昵称获取@用户uid
     self.atuser_dict[atuser_nickname] = atuser_uid
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql,代码行数:12,代码来源:weibocontent_userinfo_intime.py

示例10: parse_total_page

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def parse_total_page(self,response):
     '''获取需要爬取的搜索结果总页数'''
     analyzer = Analyzer()
     total_pq = analyzer.get_html(response.body,'script:contains("W_pages")')
     keyword_analyzer = keyword_info_analyzer()
     total_pages = keyword_analyzer.get_totalpages(total_pq)  #需要爬取的搜索结果总页数
     logger.info("the total_pages is: %d",total_pages)
     for page in range(1):  #TODO 此处更改为total_pages
         search_url = response.meta['search_url'] + str(page + 1)  #此处添加for循环total_pages
         yield Request(url=search_url,cookies=random.choice(COOKIES),meta={'keyword':response.meta['keyword']},callback=self.parse_keyword_info)
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql-tianjin,代码行数:12,代码来源:cauc_keyword_info.py

示例11: parse_atuser_uid

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def parse_atuser_uid(self,response):
    item = WeibospiderItem()
    analyzer = Analyzer()
    friendcircle = FriendCircle()
    item['atuser_nickname'] = response.meta['atuser_nickname'];  
    total_pq = analyzer.get_html(response.body,'script:contains("W_face_radius")') 
    atuser_uid = friendcircle.get_user_uid2(item['atuser_nickname'],total_pq)
    item['atuser_uid'] = atuser_uid
    item['uid'] = response.meta['uid']
    yield item
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql,代码行数:12,代码来源:userid_info_list.py

示例12: get_userurl

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def get_userurl(self,response):
     analyzer = Analyzer()
     total_pq =  analyzer.get_html(response.body,'script:contains("PCD_person_info")')
     user_property = analyzer.get_userproperty(total_pq)
     if user_property == 'icon_verify_co_v': #该账号为公众账号
         public_userinfo_url = analyzer.get_public_userinfohref(total_pq)
         yield Request(url=public_userinfo_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid'],'user_property':user_property},callback=self.parse_public_userinfo)
     else:  #该账号为个人账号
         userinfo_url = analyzer.get_userinfohref(total_pq)
         yield Request(url=userinfo_url,meta={'cookiejar':response.meta['cookiejar'],'uid':response.meta['uid'],'user_property':user_property},callback=self.parse_userinfo)
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql,代码行数:12,代码来源:userid_info_list.py

示例13: parse_load

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
    def parse_load(self,response):
        item = WeibospiderItem()  #获取用户微博内容信息
        analyzer = Analyzer()
        friendcircle = FriendCircle()
        total_pq = analyzer.get_html(response.body,'script:contains("WB_feed WB_feed_v3")')
        item['uid'] = response.meta['uid']
        item['content'] = analyzer.get_content(total_pq)
        item['time'],item['timestamp'] = analyzer.get_time(total_pq)

        weibo_analyzer = weibocontent_analyzer()
        item['repost_nums'],item['comment_nums'],item['like_nums'] = weibo_analyzer.get_weibo_relative_args(total_pq)
        yield item     
开发者ID:commonfire,项目名称:scrapy-weibospider-mysql-tianjin,代码行数:14,代码来源:cauc_warningman_weibo.py

示例14: repostuser_uid_parser

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def repostuser_uid_parser(self,repostuser_list):
     '''获取转发用户对应用户昵称的用户uid'''
     analyzer = Analyzer()
     repostuser_uid_list = []
     for repostuser_nickname in repostuser_list:
         if repostuser_nickname != "":
             response =  urllib2.urlopen("http://s.weibo.com/user/"+quote(quote(str(repostuser_nickname)))+"&Refer=SUer_box") 
             total_pq = analyzer.get_html(response.read(),'script:contains("W_texta")') 
             uid = self.get_user_uid(total_pq)
             repostuser_uid_list.append(uid)
         else:
             repostuser_uid_list.append('')
     return repostuser_uid_list
开发者ID:fzhenyu,项目名称:scrapy-weibospider-2,代码行数:15,代码来源:friendcircle.py

示例15: parse_userinfo

# 需要导入模块: from analyzer import Analyzer [as 别名]
# 或者: from analyzer.Analyzer import get_html [as 别名]
 def parse_userinfo(self,response):
     item = response.meta['item'] 
     #f=open('./text2.html','w')
     #f.write(response.body)
     analyzer = Analyzer()
     total_pq = analyzer.get_html(response.body,'script:contains("PCD_text_b")')
     #userinfo_dict = analyzer.get_userinfo(total_pq)
     item['userinfo'] = analyzer.get_userinfo(total_pq)
     #uid = item['uid']
     mainpageurl = 'http://weibo.com/u/'+str(response.meta['uid'])+'?from=otherprofile&wvr=3.6&loc=tagweibo'
     GetWeibopage.data['uid'] = response.meta['uid']     #uid
     getweibopage = GetWeibopage()
     GetWeibopage.data['page'] = WeiboSpider.page_num-1
     thirdloadurl = mainpageurl + getweibopage.get_thirdloadurl()
     yield  Request(url=thirdloadurl,meta={'cookiejar':response.meta['cookiejar'],'item':item,'uid':response.meta['uid'],'followlist':response.meta['followlist']},callback=self.parse_thirdload)
开发者ID:commonfire,项目名称:scrapy-weibospider,代码行数:17,代码来源:weibo.py


注:本文中的analyzer.Analyzer.get_html方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。