本文整理汇总了Python中vcr.VCR.recognize方法的典型用法代码示例。如果您正苦于以下问题:Python VCR.recognize方法的具体用法?Python VCR.recognize怎么用?Python VCR.recognize使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类vcr.VCR
的用法示例。
在下文中一共展示了VCR.recognize方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: Fayuan
# 需要导入模块: from vcr import VCR [as 别名]
# 或者: from vcr.VCR import recognize [as 别名]
class Fayuan(object):
def __str__(self):
return 'Fayuan Zhixing Enging.'
def __init__(self, max_pages=5):
self.headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Host': 'zhixing.court.gov.cn',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'
}
self.j_captcha = '0000' # 验证码
self.s = None # session
self.total_pages = 1
self.case_ids = set()
self.detail_info = []
self.vcr = VCR()
self.MAX_PAGES = max_pages
def get_code(self):
code_url = "http://zhixing.court.gov.cn/search/security/jcaptcha.jpg?" + str(random.randint(1, 99))
code_response = self.s.get(code_url) # 获取验证码图片和cookies (必须)
with open('code.jpg', 'wb') as f:
f.write(code_response.content)
# self.j_captcha = raw_input("Input Code: ")
self.j_captcha = self.vcr.recognize(os.path.join(os.getcwd(), 'code.jpg'))
def get_page(self, name, page=1):
self.get_code() # 获取验证码
data = {'searchCourtName': u'全国法院(包含地方各级法院)',
'selectCourtId': 1,
'selectCourtArrange': 1,
'pname': name,
'cardNum': '',
'currentPage': page,
'j_captcha': self.j_captcha
}
self.s.headers.update({'Origin': 'http://zhixing.court.gov.cn',
'Referer': 'http://zhixing.court.gov.cn/search/',
'Upgrade-Insecure-Requests': '1'})
response = self.s.post('http://zhixing.court.gov.cn/search/newsearch', data=data)
tried = 0
while u"验证码出现错误,请重新输入" in response.text: # 验证码出错
tried += 1
if tried >= 3: # try 3 times
return False
self.get_code() # 获取验证码
data['j_captcha'] = self.j_captcha
response = self.s.post('http://zhixing.court.gov.cn/search/newsearch', data=data)
self.total_pages = int(re.search(u' .+页 \d+/(.+) 共.+', response.text).group(1))
soup = BeautifulSoup(response.text, 'lxml')
rows = soup.find('tbody').find_all('tr')[1:]
for row in rows:
case_id = row.find_all('td')[4].find('a')['id']
self.case_ids.add(case_id)
return True
def get_basic_info(self, name):
self.get_page(name, 1)
if self.total_pages > 1:
for i in range(2, min(self.total_pages+1, self.MAX_PAGES+1)):
self.get_page(name, i)
def get_detail(self):
for case_id in self.case_ids:
self.get_code() # 获取验证码
detail_url = "http://zhixing.court.gov.cn/search/newdetail?id=" + str(case_id) + "&j_captcha=" + str(self.j_captcha)
detail_response = self.s.get(detail_url)
tried = 0
while detail_response.text == '{}': # 验证码出错
tried += 1
if tried >= 3: # try 3 times
return False
self.get_code() # 获取验证码
detail_url = "http://zhixing.court.gov.cn/search/newdetail?id=" + str(case_id) + "&j_captcha=" + str(self.j_captcha)
detail_response = self.s.get(detail_url)
self.detail_info.append(json.loads(detail_response.text))
return True
def search(self, name='英孚'):
# 初始化
self.s = requests.Session()
self.total_pages = 1
self.case_ids = set()
self.detail_info = []
# 开始查询
self.s.get("http://zhixing.court.gov.cn/search/", headers=self.headers) # 第一次访问(必需)
# s.get("http://zhixing.court.gov.cn/search/explain.html?v=20130408", headers=headers) # 获取下方文字(非查询必须)
self.get_basic_info(name)
self.get_detail()
return self.detail_info