本文整理汇总了Python中pytesseract.image_to_string方法的典型用法代码示例。如果您正苦于以下问题:Python pytesseract.image_to_string方法的具体用法?Python pytesseract.image_to_string怎么用?Python pytesseract.image_to_string使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pytesseract
的用法示例。
在下文中一共展示了pytesseract.image_to_string方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: full_OCR
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def full_OCR(self):
bounded = self.img.copy()
res = np.zeros_like(self.gray_img)
string = image_to_string(Image.open(self.image_file))
if string == u'':
return bounded, res
boxes = image_to_boxes(Image.open(self.image_file))
boxes = [map(int, i) for i in [b.split(" ")[1:-1] for b in boxes.split("\n")]]
for box in boxes:
b = (int(box[0]), int(self.h - box[1]), int(box[2]), int(self.h - box[3]))
cv2.rectangle(bounded, (b[0], b[1]), (b[2], b[3]), (0, 255, 0), 2)
cv2.rectangle(res, (b[0], b[1]), (b[2], b[3]), 255, -1)
return bounded, res
示例2: autocaptcha
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def autocaptcha(path):
"""Auto identify captcha in path.
Use pytesseract to identify captcha.
Args:
path: string, image path.
Returns:
string, OCR identified code.
"""
im = Image.open(path)
im = im.convert('L')
im = ImageEnhance.Contrast(im)
im = im.enhance(3)
img2 = Image.new('RGB', (150, 60), (255, 255, 255))
img2.paste(im.copy(), (25, 10))
# TODO: add auto environment detect
return pytesseract.image_to_string(img2)
示例3: create_session
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def create_session(self):
"""
create a session by solving captcha challenge
"""
self.session['timestamp'] = int(time.time() * 1000)
url = "http://www.indianrail.gov.in/enquiry/captchaDraw.png?{}".format(self.session['timestamp'])
r = requests.get(url)
self.session['cookies'] = r.cookies
try:
f = BytesIO(r.content)
except OSError:
return None
im = Image.open(f)
text = pytesseract.image_to_string(im, lang = 'eng')
try:
self.session['captcha'] = eval(text.split("=")[0])
except:
self.create_session()
示例4: handle_tweet
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def handle_tweet(self, tweet_json):
screen_name = tweet_json["user"]["screen_name"]
id = tweet_json["id_str"]
text = tweet_json["text"].replace("\\", "")
# Get media if present
try:
urls = [x["media_url"].replace("\\", "") for x in tweet_json["entities"]["media"] if x["type"] == "photo"]
for url in urls:
response = requests.get(url)
img = Image.open(io.BytesIO(response.content))
# Extract text from image
img_text = pytesseract.image_to_string(img)
text += f' . {img_text}'
except KeyError:
pass
link = f'https://twitter.com/{screen_name}/status/{id}'
try:
self.tweet_callback(text, screen_name, link)
except:
pass
示例5: extracttext
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def extracttext(imgpath, preprocess):
if imgpath.startswith('http://') or imgpath.startswith('https://') or imgpath.startswith('ftp://'):
image = url_to_image(imgpath)
else:
image = cv2.imread(imgpath)
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if preprocess == "thresh":
gray = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
elif preprocess == "blur":
gray = cv2.medianBlur(gray, 3)
filename = "{}.png".format(os.getpid())
cv2.imwrite(filename, gray)
text = pytesseract.image_to_string(Image.open(filename))
os.remove(filename)
return {"text": text}
示例6: vcode
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def vcode(self):
r = self._session.get(
'https://jy.yongjinbao.com.cn/winner_gj/gjzq/user/extraCode.jsp',
params={'randomStamp': random.random()}
)
r.raise_for_status()
# 通过内存保存数据
img_buffer = BytesIO(r.content)
img = Image.open(img_buffer)
code = pytesseract.image_to_string(img)
img.close()
img_buffer.close()
if self.code_rule.findall(code) == []:
raise VerifyCodeError('Wrong verify code: %s' % code)
else:
logger.debug('Verify Code is: %s' % code)
return code
示例7: get_battle_id
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def get_battle_id(img_path: str):
img = Image.open(img_path)
region = img.crop((1286, 15, 1378, 62))
THRESHOLD = 200
BINARY_TABLE = [0 if i < THRESHOLD else 1 for i in range(256)]
text = image_to_string(
region.convert('L').point(BINARY_TABLE, '1'), config='--psm 7 --oem 3 -c tessedit_char_whitelist=/1234')
print(text)
try:
x = int(text[0])
except IndexError:
print("Failed to recognize battle id.")
return 0
except ValueError:
print("Failed to recognize battle id.")
return 0
else:
return x
示例8: detect_gf_result
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def detect_gf_result(image_path):
from PIL import ImageFilter, Image
import pytesseract
img = Image.open(image_path)
for x in range(img.width):
for y in range(img.height):
if img.getpixel((x, y)) < (100, 100, 100):
img.putpixel((x, y), (256, 256, 256))
gray = img.convert('L')
two = gray.point(lambda x: 0 if 68 < x < 90 else 256)
min_res = two.filter(ImageFilter.MinFilter)
med_res = min_res.filter(ImageFilter.MedianFilter)
for _ in range(2):
med_res = med_res.filter(ImageFilter.MedianFilter)
res = pytesseract.image_to_string(med_res, config='-psm 6')
return res.replace(' ', '')
示例9: deobfuscator
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def deobfuscator(dict_of_dicts):
#====Work backwards====
#Build graph from dict_of_dicts:
graph_from_dict = nx.DiGraph(dict_of_dicts)
#Get adjacency matrix of graph
graph_array = nx.to_numpy_array(graph_from_dict)
#Change 1's to 255's to save as an image
graph_array[graph_array == 1] = 255
image_from_array = Image.fromarray(graph_array).convert("L")
#We can send the array directly to OCR, but I like to see the image.
image_from_array.save("obfuscated.png")
#Run OCR on our image
return pytesseract.image_to_string("obfuscated.png")
示例10: getImgFromScreenCapture
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def getImgFromScreenCapture(ques, ans_one, ans_two, ans_thr):
question = os.system("screencapture -R {} ./question_screenshot.png".format(ques))
answer_one = os.system("screencapture -R {} ./answers_one.png".format(ans_one))
answer_two = os.system("screencapture -R {} ./answers_two.png".format(ans_two))
answer_thr = os.system("screencapture -R {} ./answers_thr.png".format(ans_thr))
question_img = Image.open("./question_screenshot.png")
answer_one_img = Image.open("./answers_one.png")
answer_two_img = Image.open("./answers_two.png")
answer_thr_img = Image.open("./answers_thr.png")
question_enh = getImageFromImageEnhanceForQuestion(question_img)
ans_one_enh = getImageFromImageEnhance(answer_one_img)
ans_two_enh = getImageFromImageEnhance(answer_two_img)
ans_thr_enh = getImageFromImageEnhance(answer_thr_img)
#使用简体中文解析图片
print('OCR ' + datetime.datetime.now().strftime('%H:%M:%S'))
question_text = pytesseract.image_to_string(question_enh, lang='chi_sim')
question = question_text
answers = ['','','']
return question, answers
示例11: get_name
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def get_name(img):
# cv2.imshow("method3", img)
# cv2.waitKey()
print('name')
_, _, red = cv2.split(img) #split 会自动将UMat转换回Mat
red = cv2.UMat(red)
red = hist_equal(red)
red = cv2.adaptiveThreshold(red, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 151, 50)
# red = cv2.medianBlur(red, 3)
red = img_resize(red, 150)
img = img_resize(img, 150)
# showimg(red)
# cv2.imwrite('name.png', red)
# img2 = Image.open('address.png')
# img = Image.fromarray(cv2.UMat.get(red).astype('uint8'))
#return get_result_vary_length(red, 'chi_sim', img, '-psm 7')
return get_result_vary_length(red, 'chi_sim', img, '--psm 7')
# return punc_filter(pytesseract.image_to_string(img, lang='chi_sim', config='-psm 13').replace(" ",""))
示例12: captcha_recognize
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def captcha_recognize(img_path):
import pytesseract
im = Image.open(img_path).convert("L")
# 1. threshold the image
threshold = 200
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
out = im.point(table, "1")
# 2. recognize with tesseract
num = pytesseract.image_to_string(out)
return num
示例13: ocr_get_port
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def ocr_get_port(self, data):
"""
用ocr提取图片中的端口
:param data: 返回的图片二进制流结果
:return:
"""
f = open('port.png', 'wb')
f.write(data)
f.close()
pytesseract.pytesseract.tesseract_cmd = 'C://Program Files//Tesseract-OCR//tesseract.exe'
port = pytesseract.image_to_string(Image.open('port.png'),
config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789')
# 删除图片
os.remove('port.png')
return port
示例14: ocr_image
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def ocr_image(orig_image_arr):
otsu_thresh_image = PIL.Image.fromarray(process_image(orig_image_arr))
return image_to_string(otsu_thresh_image, lang="letsgodigital", config="-psm 100 -c tessedit_char_whitelist=.0123456789")
示例15: image_ocr
# 需要导入模块: import pytesseract [as 别名]
# 或者: from pytesseract import image_to_string [as 别名]
def image_ocr(image_path):
"""
识别图像中的英文
:return:
"""
# 英文:lang='eng'
# 中文:lang='chi_sim'
return pytesseract.image_to_string(Image.open(image_path), lang='eng')