本文整理汇总了Python中pytesseract.image_to_string函数的典型用法代码示例。如果您正苦于以下问题:Python image_to_string函数的具体用法?Python image_to_string怎么用?Python image_to_string使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了image_to_string函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _get_captcha
def _get_captcha(self, type):
s = requests.Session()
if type.upper() == 'R':
try:
r = s.get('http://www.sunat.gob.pe/cl-ti-itmrconsruc/captcha?accion=image')
except s.exceptions.RequestException as e:
return (False,e)
img=Image.open(StringIO.StringIO(r.content))
captcha_val=pytesseract.image_to_string(img)
captcha_val=captcha_val.strip().upper()
return (s, captcha_val)
elif type.upper() == 'D':
try:
r = s.get('https://cel.reniec.gob.pe/valreg/codigo.do')
except s.exceptions.RequestException as e:
return (False,e)
img=Image.open(StringIO.StringIO(r.content))
img = img.convert("RGBA")
pixdata = img.load()
for y in xrange(img.size[1]):
for x in xrange(img.size[0]):
red, green, blue, alpha=pixdata[x, y]
if blue<100:
pixdata[x, y] = (255, 255, 255, 255)
temp_captcha_val=pytesseract.image_to_string(img)
temp_captcha_val=temp_captcha_val.strip().upper()
captcha_val=''
for i in range(len(temp_captcha_val)):
if temp_captcha_val[i].isalpha() or temp_captcha_val[i].isdigit():
captcha_val=captcha_val+temp_captcha_val[i]
return (s, captcha_val.upper())
示例2: get_text
def get_text(self):
"""Does OCR on this image."""
image_writer = ImageWriter("temp")
try:
temp_image = image_writer.export_image(self._image_obj)
except PDFNotImplementedError:
# No filter method available for this stream
# https://github.com/euske/pdfminer/issues/99
return u""
try:
text = image_to_string(Image.open("temp/" + temp_image),
lang="fin")
except IOError:
# PdfMiner did not return an image
# Let's try to create one ourselves
# TODO: Create proper color_mode values from ColorSpace
# Most of the times "L" will create something good enough
# for OCR, though
temp_image = Image.frombuffer("L",
self._image_obj.srcsize,
self._stream.get_data(), "raw",
"L", 0, 1)
text = image_to_string(temp_image, lang="fin")
unlink("temp/" + temp_image)
return text
示例3: ocrend
def ocrend(img_src):
#image_name = "img.jpg"
#im = Image.open(image_name)
user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
heads = {'User-Agent':user_agent}
req = urllib2.Request(img_src,headers=heads)
fails = 0
while True:
try:
if fails >= 10:
break
response = urllib2.urlopen(req,timeout=30)
html = response.read()
except:
fails += 1
print "Handing brand,the network may be not Ok,please wait...",fails
else:
break
file1 = StringIO(html)
im = Image.open(file1)
im = im.filter(ImageFilter.MedianFilter())
enhancer = ImageEnhance.Contrast(im)
im = enhancer.enhance(2)
im = im.convert('1')
#im.save("1.tif")
print pytesseract.image_to_string(im)
示例4: getNumbers
def getNumbers(img, name, threshhold):
# normal threshHold
#ret1, thresh = cv2.threshold(img, threshhold, 255, cv2.THRESH_BINARY)
# adaptive threshHold
thresh = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, THRESH_HOLD_BOX,8)
cv2.imwrite('threshHold/' + name + '.tiff', thresh)
_, contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
imT = thresh.copy()
outImg = np.zeros(img.shape, np.uint8)
outImg += 255
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if isNumber(x, y, w, h, img.shape):
cv2.rectangle(imT,(x,y),(x+w,y+h),(0,255,0),5)
outImg[y:y + h, x:x + w] = thresh[y:y + h, x:x + w]
cv2.drawContours(outImg, [cnt], 0, (125, 125, 125), 5 )
cv2.imwrite('out/' + name + '.tiff', outImg)
cv2.imwrite( 'contour/' + name + '.tiff', imT )
height, width = outImg.shape
outImg = cv2.resize( outImg, ( 400, 400 * height / width ) )
image = Image.fromarray( outImg )
print pytesseract.image_to_string( image, config = "-psm 6 config" )
示例5: process
def process(self, dir):
img = cv2.imread(dir, 0)
daima = self.jsonLoad(dir+'.desc.json',
'fapiaodaima', 'bounding_box')
daima_x = int(float(daima['top_left'][0]))
daima_y = int(float(daima['top_left'][1]))
daima_w = int(float(daima['low_right'][0])) - daima_x
daima_h = int(float(daima['low_right'][1])) - daima_y
daima_text = self.jsonLoad(dir+'.desc.json',
'fapiaodaima', 'text').encode('utf-8')
self.digits_num += len(daima_text)
daima_img = cropImage(img, int(daima_x), int(daima_y), int(daima_h), int(daima_w))
cv2.imwrite('./tmp.png', daima_img)
daima_predicted = pytesseract.image_to_string(Image.open('./tmp.png'))
self.cal_digits_right(dir, daima_text, daima_predicted)
os.remove('./tmp.png')
haoma = self.jsonLoad(dir+'.desc.json', 'fapiaohaoma', 'bounding_box')
haoma_x = int(float(haoma['top_left'][0]))
haoma_y = int(float(haoma['top_left'][1]))
haoma_w = int(float(haoma['low_right'][0])) - haoma_x
haoma_h = int(float(haoma['low_right'][1])) - haoma_y
haoma_text = self.jsonLoad(dir+'.desc.json',
'fapiaohaoma', 'text').encode('utf-8')
self.digits_num += len(haoma_text)
haoma_img = cropImage(img, int(haoma_x), int(haoma_y), int(haoma_h), int(haoma_w))
cv2.imwrite('./tmp.png', haoma_img)
haoma_predicted = pytesseract.image_to_string(Image.open('./tmp.png'))
self.cal_digits_right(dir, haoma_text, haoma_predicted)
os.remove('./tmp.png')
示例6: image2text
def image2text(filepath, lang='rus'):
import os
from PIL import Image
import pytesseract
if os.path.exists(filepath):
print pytesseract.image_to_string(Image.open(filepath), lang=lang)
return pytesseract.image_to_string(Image.open(filepath), lang=lang)
示例7: ocr_img
def ocr_img(image):
# 切割题目和选项位置,左上角坐标和右下角坐标,自行测试分辨率
question_im = image.crop((50, 350, 1000, 560)) # 坚果 pro1
choices_im = image.crop((75, 535, 990, 1150))
# question = img.crop((75, 315, 1167, 789)) # iPhone 7P
# 转化为灰度图
question_im = question_im.convert('L')
choices_im = choices_im.convert('L')
# 把图片变成二值图像。
question_im=binarizing(question_im,190)
choices_im = binarizing(choices_im, 190)
# img2=depoint(img1)
#img1.show()
# tesseract 路径
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract'
# 语言包目录和参数
tessdata_dir_config = '--tessdata-dir "C:\\Program Files (x86)\\Tesseract-OCR\\tessdata" --psm 6'
# lang 指定中文简体
question = pytesseract.image_to_string(question_im, lang='chi_sim', config=tessdata_dir_config)
question = question.replace("\n", "")[2:]
choice = pytesseract.image_to_string(choices_im, lang='chi_sim', config=tessdata_dir_config)
choices = choice.strip().split("\n")
choices = [ x for x in choices if x != '' ]
return question, choices
示例8: pic2str
def pic2str(x, lang=None):
if 'http' == x[:4]:
res = urllib2.urlopen(x)
t = cStringIO.StringIO(res.read())
res.close()
elif 'base64' in x:
res = x.split('base64,')[1]
t = cStringIO.StringIO(base64.decodestring(res))
else:
return None
image = Image.open(t)
t = numpy.asarray(image)
# 转换灰度图
gray = cv2.cvtColor(t, cv2.COLOR_BGR2GRAY)
# 二值化
temp = int(t.max() / 2)
thd, image_b = cv2.threshold(gray, temp, 255, cv2.THRESH_BINARY)
c, r = image_b.shape
image_b = cv2.resize(image_b, (r * 2, c * 2))
flag, image_a = cv2.imencode('.jpeg', image_b)
if flag:
image_ok = Image.open(cStringIO.StringIO(image_a.tostring()))
if not lang:
return pytesseract.image_to_string(image_ok)
else:
return pytesseract.image_to_string(image_ok, lang=lang)
else:
return None
示例9: omocr2
def omocr2(img_name,path1,co):#ex:20150508104553-1
#print "B"
timefin = ''
content = ""
patnum = path1[path1.rfind(u'\\'):len(path1)] #rfind find from right
patnum1 = path1.replace(patnum, u'&')
patnum2 = patnum1[patnum1.rfind(u'\\')+1:patnum1.rfind(u'&')-11]
ispath = os.path.exists(u'path_OCR\\path_ocr')
#print ispath
print u"第"+str(co)+u"筆 "
for x in range(y):
str1 = piece1[x]
str2 = patnum2.encode('utf-8')
if str1 == str2 :
content = piece1[x+1]
print content.decode('UTF-8')
strsp = path1+"\\"+img_name
im = Image.open(strsp+u'.jpg').convert('L')
isExists = os.path.exists(strsp)
if isExists :
im.crop((200, 2680, 650, 2770)).save(strsp+u'\\day.png')
im.crop((200, 2770, 550, 2860)).save(strsp+u'\\time.png')
im.crop((3240, 2770, 3560, 2850)).save(strsp+u'\\spdown.png')
im.crop((3080, 2680, 3400, 2770)).save(strsp+u'\\spup.png')
im.crop((15, 2130, 900, 2680)).save(strsp+u'\\li.png')
day = pytesseract.image_to_string(Image.open(strsp+u'\\day.png')).replace('/','').replace(" ", "").replace("O", "0")
time = pytesseract.image_to_string(Image.open(strsp+u'\\time.png')).replace(':','').replace(" ", "").replace("O", "0")
spdown ="0"+pytesseract.image_to_string(Image.open(strsp+u'\\spdown.png')).replace(" ", "").replace("O", "0")
spup = pytesseract.image_to_string(Image.open(strsp+u'\\spup.png')).replace(" ", "").replace("O", "0")
day=filter(str.isdigit, day)
time=filter(str.isdigit, time)
spdown=filter(str.isdigit, spdown)
spup=filter(str.isdigit, spup)
day2 = day[0:8]
time3 = img_name[8:12]#時間抓檔案名稱
if day[0] == "2":
day2 = int(day2)-19110000
time2 = time[0:4]
spup2 = spup[0:3]
spdown2 = spdown[0:3]
if time3 == time2 :
timefin = time2
else:
timefin = time3
f = file(strsp+'\\'+img_name+'.txt', 'w+')
f.write(img_name+u';'+str(day2)+u';'+str(timefin)+u';'+str(spup2)+u';'+str(spdown2)+u';') # write text to file
if ispath :
f.write(content)#寫入抓到的照相地點
else:
f.write(' ; ')
f.close()
print u'檔 名:'+img_name
print u'日 期:'+str(day2)
print u'時 間:'+timefin
print u'速 限:'+spup2
print u'車 速:'+spdown2
print "OK2"
示例10: setperson
def setperson(u0, p0):
driver = webdriver.PhantomJS(executable_path='D:\phantomjs-2.1.1-windows\phantomjs.exe')
# driver = webdriver.Firefox()
url = 'http://ssfw.tjut.edu.cn/ssfw/login/ajaxlogin.do'
driver.get(url)
driver.maximize_window()
driver.save_screenshot('static\images\i2.jpg')
image = Image.open('static\images\i2.jpg')
box = (703, 149, 766, 170)
# box = (700, 130, 766, 150)
image = image.crop(box)
image = image.convert('L')
image = image.convert('RGB')
image.save('static\images\i4.jpg')
print pytesseract.image_to_string(image).replace('\t', '')
y0 = pytesseract.image_to_string(image).replace('\t', '')
driver.find_element_by_id('j_username').send_keys(u0)
driver.find_element_by_id('j_password').send_keys(p0)
driver.find_element_by_id('validateCode').send_keys(y0)
driver.find_element_by_id('loginBtn').click()
time.sleep(2)
js = 'window.location.href="http://ssfw.tjut.edu.cn/ssfw/xjgl/jbxx.do"'
driver.execute_script(js)
print driver.current_url
try:
# print driver.find_element_by_id('form1').find_element_by_id('yxdm').getText()
print driver.find_element_by_id('xh').get_attribute('value') # 学号
student_id = driver.find_element_by_id('xh').get_attribute('value')
print driver.find_element_by_id('xm').get_attribute('value') # 姓名
student_name = driver.find_element_by_id('xm').get_attribute('value')
print driver.find_element_by_id('xbdm').get_attribute('value') # 性别
sex = driver.find_element_by_id('xbdm').get_attribute('value')
print driver.find_element_by_id('njdm').get_attribute('value') # 年级
grade = driver.find_element_by_id('njdm').get_attribute('value')
print driver.find_element_by_id('yxdm').get_attribute('value') # 院系college
college = driver.find_element_by_id('yxdm').get_attribute('value')
print driver.find_element_by_id('zydm').get_attribute('value') # 专业major
major = driver.find_element_by_id('zydm').get_attribute('value')
print driver.find_element_by_id('bjh').get_attribute('value') # 所在班级
inclass = driver.find_element_by_id('bjh').get_attribute('value')
print driver.find_element_by_id('xzdm').get_attribute('value') # 学制
length_of_schooling = driver.find_element_by_id('xzdm').get_attribute('value')
driver.close()
try:
print "................."
u = User(student_id=student_id, student_name=student_name, sex=sex, grade=grade, college=college,
major=major,
inclass=inclass, length_of_schooling=length_of_schooling)
db.session.add(u)
db.session.commit()
except Exception, e:
print e
pass
except Exception, e:
driver.close()
print e
setperson(u0, p0)
示例11: omocr
def omocr(img_name,path1,co):#ex:20150512_124104_906_1794_
# print "A"
content = ""
patnum = path1[path1.rfind(u'\\'):len(path1)] #rfind find from right
patnum1 = path1.replace(patnum, u'&')
patnum2 = patnum1[patnum1.rfind(u'\\')+1:patnum1.rfind(u'&')-11]
ispath = os.path.exists(u'path_OCR\\path_ocr')
# print ispath
print u"第"+str(co)+u"筆 "
for x in range(y):
str1 = piece1[x]
str2 = patnum2.encode('utf-8')
if str1 == str2 :
content = piece1[x+1]
print content.decode('UTF-8')
strsp = path1+"\\"+img_name
im = Image.open(strsp+u'.jpg').convert('L')
isExists = os.path.exists(strsp)
if isExists :
im.crop((185, 30, 480, 85)).save(strsp+u'\\day.png')
im.crop((185, 90, 330, 150)).save(strsp+u'\\time.png')
im.crop((959, 90, 1180, 150)).save(strsp+u'\\spdown.png')
im.crop((959, 25, 1180, 85)).save(strsp+u'\\spup.png')
im.crop((1, 1400, 610, 1710)).save(strsp+u'\\li.png')
day = pytesseract.image_to_string(Image.open(strsp+u'\\day.png')).replace('/','').replace(" ", "").replace("O", "0")
time = pytesseract.image_to_string(Image.open(strsp+u'\\time.png')).replace(':','').replace(" ", "").replace("O", "0")
spdown = pytesseract.image_to_string(Image.open(strsp+u'\\spdown.png')).replace(" ", "").replace("O", "0")
spup = pytesseract.image_to_string(Image.open(strsp+u'\\spup.png')).replace(" ", "").replace("O", "0")
day=filter(str.isdigit, day)
time=filter(str.isdigit, time)
spdown=filter(str.isdigit, spdown)
spup=filter(str.isdigit, spup)
day2 = day[0:8]
#day2 = img_name[0:6]+day[len(day)-2:len(day)]#日期年月抓檔案名稱日抓OCR辨識
if day[0] == "2":
day2 = int(day2)-19110000
time2 = time[0:4]
spdown2 = spdown[0:3]
spup2 = spup[0:3]
f = file(strsp+'\\'+img_name+'.txt', 'w+')
f.write(img_name+u';'+str(day2)+u';'+str(time2)+u';'+str(spup2)+u';'+str(spdown2)+u';') # write text to file
if ispath :
f.write(content)#寫入抓到的照相地點
else:
f.write(' ; ')
f.close()
print u'檔 名:'+img_name
print u'日 期:'+str(day2)
print u'時 間:'+time2
print u'速 限:'+spup2
print u'車 速:'+spdown2
print "OK"
示例12: main
def main(argv):
import getopt
url = ""
try:
opts, args = getopt.getopt(argv, "", ["url="])
except getopt.GetoptError:
print("Run with --url=http://mysite.com/myImage.png")
sys.exit(2)
for opt, arg in opts:
if opt == "--url":
url = arg
import PIL
from PIL import Image
from PIL import ImageOps
import pytesseract
import urllib.request
urllib.request.urlretrieve(url, "local.png")
img = Image.open("local.png")
img = ImageOps.grayscale(img)
img = img.resize((img.width * 2, img.height * 2))
text = pytesseract.image_to_string(
img,
None,
False,
"-c tessedit_char_whitelist=0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ")
text += " "
text += pytesseract.image_to_string(
img,
None,
False,
"-c tessedit_char_whitelist=0123456789-abcdefghijklmnopqrstuvqxyz")
chunks = text.split()
validChunks = list()
for c in chunks:
if c.count("-") == 2:
validChunks.append(c)
if c.count("-") == 4:
validChunks.append(c)
print("Possible keys:")
for c in validChunks:
print(c)
示例13: main
def main():
cap = cv2.VideoCapture(0)
i = 0
while True:
ret, frame = cap.read()
bw_img = cv2.cvtColor(frame, cv.CV_BGR2GRAY)
if i == 15:
im = Image.fromarray(np.uint8(cm.gist_earth(bw_img)*255))
print pytesseract.image_to_string(im)
i = 0
i += 1
cv2.imshow("camera", bw_img)
c = cv2.waitKey(1)
示例14: get_ocr_strings
def get_ocr_strings(evidence, helper):
# This is the actual OCR call
try:
cached = os.path.isfile(evidence['file_cache_path'])
if cached:
return pytesseract.image_to_string(Image.open(evidence['file_cache_path']))
else:
strings = pytesseract.image_to_string(Image.open(
helper.pathspec_helper._open_file_object(evidence['pathspec'])))
helper.pathspec_helper._close_file_object(evidence['pathspec'])
return strings
except:
logging.warn('Failed to perform OCR on file "' + evidence['file_cache_path'] + '"')
abort(400, 'It appears that the pathspec is for a file that the Tesseract cannot perform OCR on')
示例15: performTessOCR
def performTessOCR(self,imagePath):
"""
Performs OCR with Tesseract. Please train Tesseract Elsewhere if necessary.
JTessBox is a nice program implemented with VietOCR. SVM is used for OCR
(e.g. covariance matrices with letter images appended as pixels) or transforms.
*Required Parameters*
:param imagePath: string ipath,fp,cstr of Image, or PIL Image
"""
if type(imagePath) is str or type(imagePath) is cStringIO or type(imagePath) is file:
return image_to_string(Image.open(imagePath),True)
else:
return image_to_string(imagePath, True)