当前位置: 首页>>代码示例>>Python>>正文


Python VCR.recognize方法代码示例

本文整理汇总了Python中vcr.VCR.recognize方法的典型用法代码示例。如果您正苦于以下问题:Python VCR.recognize方法的具体用法?Python VCR.recognize怎么用?Python VCR.recognize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在vcr.VCR的用法示例。


在下文中一共展示了VCR.recognize方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Fayuan

# 需要导入模块: from vcr import VCR [as 别名]
# 或者: from vcr.VCR import recognize [as 别名]
class Fayuan(object):
    def __str__(self):
        return 'Fayuan Zhixing Enging.'

    def __init__(self, max_pages=5):
        self.headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
                        'Host': 'zhixing.court.gov.cn',
                        'Upgrade-Insecure-Requests': '1',
                        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
                        }
        self.j_captcha = '0000'  # 验证码
        self.s = None  # session
        self.total_pages = 1
        self.case_ids = set()
        self.detail_info = []
        self.vcr = VCR()
        self.MAX_PAGES = max_pages

    def get_code(self):
        code_url = "http://zhixing.court.gov.cn/search/security/jcaptcha.jpg?" + str(random.randint(1, 99))
        code_response = self.s.get(code_url)  # 获取验证码图片和cookies (必须)
        with open('code.jpg', 'wb') as f:
            f.write(code_response.content)
        # self.j_captcha = raw_input("Input Code: ")
        self.j_captcha = self.vcr.recognize(os.path.join(os.getcwd(), 'code.jpg'))

    def get_page(self, name, page=1):
        self.get_code()  # 获取验证码
        data = {'searchCourtName': u'全国法院(包含地方各级法院)',
                'selectCourtId': 1,
                'selectCourtArrange': 1,
                'pname': name,
                'cardNum': '',
                'currentPage': page,
                'j_captcha': self.j_captcha
                }
        self.s.headers.update({'Origin': 'http://zhixing.court.gov.cn',
                          'Referer': 'http://zhixing.court.gov.cn/search/',
                          'Upgrade-Insecure-Requests': '1'})

        response = self.s.post('http://zhixing.court.gov.cn/search/newsearch', data=data)
        tried = 0
        while u"验证码出现错误,请重新输入" in response.text:  # 验证码出错
            tried += 1
            if tried >= 3:  # try 3 times
                return False
            self.get_code()  # 获取验证码
            data['j_captcha'] = self.j_captcha
            response = self.s.post('http://zhixing.court.gov.cn/search/newsearch', data=data)
        self.total_pages = int(re.search(u' .+页 \d+/(.+) 共.+', response.text).group(1))

        soup = BeautifulSoup(response.text, 'lxml')
        rows = soup.find('tbody').find_all('tr')[1:]
        for row in rows:
            case_id = row.find_all('td')[4].find('a')['id']
            self.case_ids.add(case_id)
        return True

    def get_basic_info(self, name):
        self.get_page(name, 1)
        if self.total_pages > 1:
            for i in range(2, min(self.total_pages+1, self.MAX_PAGES+1)):
                self.get_page(name, i)

    def get_detail(self):
        for case_id in self.case_ids:
            self.get_code()  # 获取验证码
            detail_url = "http://zhixing.court.gov.cn/search/newdetail?id=" + str(case_id) + "&j_captcha=" + str(self.j_captcha)
            detail_response = self.s.get(detail_url)
            tried = 0
            while detail_response.text == '{}':   # 验证码出错
                tried += 1
                if tried >= 3:  # try 3 times
                    return False
                self.get_code()  # 获取验证码
                detail_url = "http://zhixing.court.gov.cn/search/newdetail?id=" + str(case_id) + "&j_captcha=" + str(self.j_captcha)
                detail_response = self.s.get(detail_url)
            self.detail_info.append(json.loads(detail_response.text))
        return True

    def search(self, name='英孚'):
        # 初始化
        self.s = requests.Session()
        self.total_pages = 1
        self.case_ids = set()
        self.detail_info = []

        # 开始查询
        self.s.get("http://zhixing.court.gov.cn/search/", headers=self.headers)  # 第一次访问(必需)
        # s.get("http://zhixing.court.gov.cn/search/explain.html?v=20130408", headers=headers)   # 获取下方文字(非查询必须)
        self.get_basic_info(name)
        self.get_detail()
        return self.detail_info
开发者ID:ExtraYin,项目名称:Credit_Report,代码行数:95,代码来源:fayuan_zhixing.py


注:本文中的vcr.VCR.recognize方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。