当前位置: 首页>>代码示例>>Python>>正文


Python pytesseract.image_to_string函数代码示例

本文整理汇总了Python中pytesseract.image_to_string函数的典型用法代码示例。如果您正苦于以下问题:Python image_to_string函数的具体用法?Python image_to_string怎么用?Python image_to_string使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了image_to_string函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _get_captcha

 def _get_captcha(self, type):
     s = requests.Session() 
     if type.upper() == 'R':
         try:
             r = s.get('http://www.sunat.gob.pe/cl-ti-itmrconsruc/captcha?accion=image')
         except s.exceptions.RequestException as e:
             return (False,e)
         img=Image.open(StringIO.StringIO(r.content))
         captcha_val=pytesseract.image_to_string(img)
         captcha_val=captcha_val.strip().upper()
         return (s, captcha_val)
     elif type.upper() == 'D':
         try:
             r = s.get('https://cel.reniec.gob.pe/valreg/codigo.do')
         except s.exceptions.RequestException as e:
             return (False,e)
         img=Image.open(StringIO.StringIO(r.content))
         img = img.convert("RGBA")
         pixdata = img.load()
         for y in xrange(img.size[1]):
             for x in xrange(img.size[0]):
                 red, green, blue, alpha=pixdata[x, y]
                 if blue<100:
                     pixdata[x, y] = (255, 255, 255, 255)
         temp_captcha_val=pytesseract.image_to_string(img)
         temp_captcha_val=temp_captcha_val.strip().upper()
         captcha_val=''
         for i in range(len(temp_captcha_val)):
             if temp_captcha_val[i].isalpha() or temp_captcha_val[i].isdigit():
                 captcha_val=captcha_val+temp_captcha_val[i]
         return (s, captcha_val.upper())
开发者ID:vmcloudsolution,项目名称:comunidad,代码行数:31,代码来源:res_partner.py

示例2: get_text

 def get_text(self):
     """Does OCR on this image."""
     image_writer = ImageWriter("temp")
     try:
         temp_image = image_writer.export_image(self._image_obj)
     except PDFNotImplementedError:
         # No filter method available for this stream
         # https://github.com/euske/pdfminer/issues/99
         return u""
     try:
         text = image_to_string(Image.open("temp/" + temp_image),
                                lang="fin")
     except IOError:
         # PdfMiner did not return an image
         # Let's try to create one ourselves
         # TODO: Create proper color_mode values from ColorSpace
         # Most of the times "L" will create something good enough
         # for OCR, though
         temp_image = Image.frombuffer("L",
                                       self._image_obj.srcsize,
                                       self._stream.get_data(), "raw",
                                       "L", 0, 1)
         text = image_to_string(temp_image, lang="fin")
     unlink("temp/" + temp_image)
     return text
开发者ID:jensfinnas,项目名称:ktweb-scraper,代码行数:25,代码来源:pdf.py

示例3: ocrend

def ocrend(img_src):
    #image_name = "img.jpg"
    #im = Image.open(image_name)
    user_agent = 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'
    heads = {'User-Agent':user_agent}
    req = urllib2.Request(img_src,headers=heads)
    fails = 0
    while True:
        try:
            if fails >= 10:
                break
            response = urllib2.urlopen(req,timeout=30)
            html = response.read()
        except:
            fails += 1
            print "Handing brand,the network may be not Ok,please wait...",fails
        else:
            break
    file1 = StringIO(html)
    im = Image.open(file1)
    im = im.filter(ImageFilter.MedianFilter())
    enhancer = ImageEnhance.Contrast(im)
    im = enhancer.enhance(2)
    im = im.convert('1')
    #im.save("1.tif")
    print pytesseract.image_to_string(im)    
开发者ID:southwolf,项目名称:carinfo_spider,代码行数:26,代码来源:update_51auto_telenum.py

示例4: getNumbers

def getNumbers(img, name, threshhold):
    # normal threshHold
    #ret1, thresh = cv2.threshold(img, threshhold, 255, cv2.THRESH_BINARY)
    # adaptive threshHold
    thresh = cv2.adaptiveThreshold(img,255,cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, THRESH_HOLD_BOX,8)
    cv2.imwrite('threshHold/' + name + '.tiff', thresh)

    _, contours, hierarchy = cv2.findContours(thresh.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    imT = thresh.copy()
    outImg = np.zeros(img.shape, np.uint8)
    outImg += 255

    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        if isNumber(x, y, w, h, img.shape):
            cv2.rectangle(imT,(x,y),(x+w,y+h),(0,255,0),5)
            outImg[y:y + h, x:x + w] = thresh[y:y + h, x:x + w]
            cv2.drawContours(outImg, [cnt], 0, (125, 125, 125), 5 )

    cv2.imwrite('out/' + name + '.tiff', outImg)
    cv2.imwrite( 'contour/' + name + '.tiff', imT )

    height, width = outImg.shape
    outImg = cv2.resize( outImg, ( 400, 400 * height / width ) )

    image = Image.fromarray( outImg )
    print pytesseract.image_to_string( image, config = "-psm 6 config" )
开发者ID:ABQpro,项目名称:ImageProcessing,代码行数:28,代码来源:tesseract.py

示例5: process

    def process(self, dir):
        img = cv2.imread(dir, 0)

        daima = self.jsonLoad(dir+'.desc.json',
                         'fapiaodaima', 'bounding_box')
        daima_x = int(float(daima['top_left'][0]))
        daima_y = int(float(daima['top_left'][1]))
        daima_w = int(float(daima['low_right'][0])) - daima_x
        daima_h = int(float(daima['low_right'][1])) - daima_y
        daima_text = self.jsonLoad(dir+'.desc.json',
                'fapiaodaima', 'text').encode('utf-8')
        self.digits_num += len(daima_text)

        daima_img = cropImage(img, int(daima_x), int(daima_y), int(daima_h), int(daima_w))
        cv2.imwrite('./tmp.png', daima_img)
        daima_predicted = pytesseract.image_to_string(Image.open('./tmp.png'))
        self.cal_digits_right(dir, daima_text, daima_predicted)
        os.remove('./tmp.png')

        haoma = self.jsonLoad(dir+'.desc.json', 'fapiaohaoma', 'bounding_box')
        haoma_x = int(float(haoma['top_left'][0]))
        haoma_y = int(float(haoma['top_left'][1]))
        haoma_w = int(float(haoma['low_right'][0])) - haoma_x
        haoma_h = int(float(haoma['low_right'][1])) - haoma_y
        haoma_text = self.jsonLoad(dir+'.desc.json',
                'fapiaohaoma', 'text').encode('utf-8')
        self.digits_num += len(haoma_text)
        
        haoma_img = cropImage(img, int(haoma_x), int(haoma_y), int(haoma_h), int(haoma_w))
        cv2.imwrite('./tmp.png', haoma_img)
        haoma_predicted = pytesseract.image_to_string(Image.open('./tmp.png'))
        self.cal_digits_right(dir, haoma_text, haoma_predicted)
        os.remove('./tmp.png')
开发者ID:DunHe,项目名称:ocr_digits,代码行数:33,代码来源:tesseract.py

示例6: image2text

def image2text(filepath, lang='rus'):
    import os
    from PIL import Image
    import pytesseract
    if os.path.exists(filepath):
        print pytesseract.image_to_string(Image.open(filepath), lang=lang)
        return pytesseract.image_to_string(Image.open(filepath), lang=lang)
开发者ID:vnishukov,项目名称:motors,代码行数:7,代码来源:pylib.py

示例7: ocr_img

def ocr_img(image):

    # 切割题目和选项位置,左上角坐标和右下角坐标,自行测试分辨率
    question_im = image.crop((50, 350, 1000, 560)) # 坚果 pro1
    choices_im = image.crop((75, 535, 990, 1150))
    # question = img.crop((75, 315, 1167, 789)) # iPhone 7P

    # 转化为灰度图
    question_im = question_im.convert('L')
    choices_im = choices_im.convert('L')
    # 把图片变成二值图像。
    question_im=binarizing(question_im,190)
    choices_im = binarizing(choices_im, 190)
    # img2=depoint(img1)
    #img1.show()

    # tesseract 路径
    pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract'
    # 语言包目录和参数
    tessdata_dir_config = '--tessdata-dir "C:\\Program Files (x86)\\Tesseract-OCR\\tessdata" --psm 6'

    # lang 指定中文简体
    question = pytesseract.image_to_string(question_im, lang='chi_sim', config=tessdata_dir_config)
    question = question.replace("\n", "")[2:]

    choice = pytesseract.image_to_string(choices_im, lang='chi_sim', config=tessdata_dir_config)
    choices = choice.strip().split("\n")
    choices = [ x for x in choices if x != '' ]

    return question, choices
开发者ID:JamesXu20150314,项目名称:TopSup,代码行数:30,代码来源:ocr.py

示例8: pic2str

def pic2str(x, lang=None):
    if 'http' == x[:4]:
        res = urllib2.urlopen(x)
        t = cStringIO.StringIO(res.read())
        res.close()
    elif 'base64' in x:
        res = x.split('base64,')[1]
        t = cStringIO.StringIO(base64.decodestring(res))
    else:
        return None
    image = Image.open(t)
    t = numpy.asarray(image)
    # 转换灰度图
    gray = cv2.cvtColor(t, cv2.COLOR_BGR2GRAY)
    # 二值化
    temp = int(t.max() / 2)
    thd, image_b = cv2.threshold(gray, temp, 255, cv2.THRESH_BINARY)
    c, r = image_b.shape
    image_b = cv2.resize(image_b, (r * 2, c * 2))
    flag, image_a = cv2.imencode('.jpeg', image_b)
    if flag:
        image_ok = Image.open(cStringIO.StringIO(image_a.tostring()))
        if not lang:
            return pytesseract.image_to_string(image_ok)
        else:
            return pytesseract.image_to_string(image_ok, lang=lang)
    else:
        return None
开发者ID:yangmingsong,项目名称:python,代码行数:28,代码来源:my_ocr.py

示例9: omocr2

def omocr2(img_name,path1,co):#ex:20150508104553-1
    #print "B"
    timefin = ''
    content = ""
    patnum = path1[path1.rfind(u'\\'):len(path1)] #rfind find from right
    patnum1 = path1.replace(patnum, u'&')
    patnum2 = patnum1[patnum1.rfind(u'\\')+1:patnum1.rfind(u'&')-11]
    ispath = os.path.exists(u'path_OCR\\path_ocr')
    #print ispath
    print u"第"+str(co)+u"筆 "
    
    for x in range(y):
        str1 = piece1[x]
        str2 = patnum2.encode('utf-8')
        if str1 == str2 :
            content = piece1[x+1]
            print content.decode('UTF-8')
    strsp = path1+"\\"+img_name
    im = Image.open(strsp+u'.jpg').convert('L')
    isExists = os.path.exists(strsp)
    if isExists :
        im.crop((200, 2680, 650, 2770)).save(strsp+u'\\day.png')
        im.crop((200, 2770, 550, 2860)).save(strsp+u'\\time.png')
        im.crop((3240, 2770, 3560, 2850)).save(strsp+u'\\spdown.png')
        im.crop((3080, 2680, 3400, 2770)).save(strsp+u'\\spup.png')
        im.crop((15, 2130, 900, 2680)).save(strsp+u'\\li.png')
        day = pytesseract.image_to_string(Image.open(strsp+u'\\day.png')).replace('/','').replace(" ", "").replace("O", "0")
        time = pytesseract.image_to_string(Image.open(strsp+u'\\time.png')).replace(':','').replace(" ", "").replace("O", "0")
        spdown ="0"+pytesseract.image_to_string(Image.open(strsp+u'\\spdown.png')).replace(" ", "").replace("O", "0")
        spup = pytesseract.image_to_string(Image.open(strsp+u'\\spup.png')).replace(" ", "").replace("O", "0")
        day=filter(str.isdigit, day)
        time=filter(str.isdigit, time)
        spdown=filter(str.isdigit, spdown)
        spup=filter(str.isdigit, spup)
        day2 = day[0:8]
        time3 = img_name[8:12]#時間抓檔案名稱
        if day[0] == "2":
            day2 = int(day2)-19110000
        time2 = time[0:4]
        spup2 = spup[0:3]
        spdown2 = spdown[0:3]
        if time3 == time2 :
            timefin = time2
        else:
            timefin = time3
        f = file(strsp+'\\'+img_name+'.txt', 'w+')
        f.write(img_name+u';'+str(day2)+u';'+str(timefin)+u';'+str(spup2)+u';'+str(spdown2)+u';') # write text to file
        if ispath :
            f.write(content)#寫入抓到的照相地點
        else:
            f.write(' ; ')
        f.close()
        print u'檔 名:'+img_name
        print u'日 期:'+str(day2)
        print u'時 間:'+timefin
        print u'速 限:'+spup2
        print u'車 速:'+spdown2
        print "OK2"
开发者ID:Jhougo,项目名称:Python_Work,代码行数:58,代码来源:Jhougo-OCR.py

示例10: setperson

def setperson(u0, p0):
    driver = webdriver.PhantomJS(executable_path='D:\phantomjs-2.1.1-windows\phantomjs.exe')
    # driver = webdriver.Firefox()
    url = 'http://ssfw.tjut.edu.cn/ssfw/login/ajaxlogin.do'
    driver.get(url)
    driver.maximize_window()
    driver.save_screenshot('static\images\i2.jpg')
    image = Image.open('static\images\i2.jpg')
    box = (703, 149, 766, 170)
    # box = (700, 130, 766, 150)
    image = image.crop(box)
    image = image.convert('L')
    image = image.convert('RGB')
    image.save('static\images\i4.jpg')
    print pytesseract.image_to_string(image).replace('\t', '')
    y0 = pytesseract.image_to_string(image).replace('\t', '')
    driver.find_element_by_id('j_username').send_keys(u0)
    driver.find_element_by_id('j_password').send_keys(p0)
    driver.find_element_by_id('validateCode').send_keys(y0)
    driver.find_element_by_id('loginBtn').click()
    time.sleep(2)
    js = 'window.location.href="http://ssfw.tjut.edu.cn/ssfw/xjgl/jbxx.do"'
    driver.execute_script(js)
    print driver.current_url
    try:
        # print driver.find_element_by_id('form1').find_element_by_id('yxdm').getText()
        print driver.find_element_by_id('xh').get_attribute('value')  # 学号
        student_id = driver.find_element_by_id('xh').get_attribute('value')
        print driver.find_element_by_id('xm').get_attribute('value')  # 姓名
        student_name = driver.find_element_by_id('xm').get_attribute('value')
        print driver.find_element_by_id('xbdm').get_attribute('value')  # 性别
        sex = driver.find_element_by_id('xbdm').get_attribute('value')
        print driver.find_element_by_id('njdm').get_attribute('value')  # 年级
        grade = driver.find_element_by_id('njdm').get_attribute('value')
        print driver.find_element_by_id('yxdm').get_attribute('value')  # 院系college
        college = driver.find_element_by_id('yxdm').get_attribute('value')
        print driver.find_element_by_id('zydm').get_attribute('value')  # 专业major
        major = driver.find_element_by_id('zydm').get_attribute('value')
        print driver.find_element_by_id('bjh').get_attribute('value')  # 所在班级
        inclass = driver.find_element_by_id('bjh').get_attribute('value')
        print driver.find_element_by_id('xzdm').get_attribute('value')  # 学制
        length_of_schooling = driver.find_element_by_id('xzdm').get_attribute('value')
        driver.close()
        try:
            print "................."
            u = User(student_id=student_id, student_name=student_name, sex=sex, grade=grade, college=college,
                     major=major,
                     inclass=inclass, length_of_schooling=length_of_schooling)
            db.session.add(u)
            db.session.commit()
        except Exception, e:
            print e
            pass
    except Exception, e:
        driver.close()
        print e
        setperson(u0, p0)
开发者ID:Zephery,项目名称:flasktest,代码行数:57,代码来源:personalmessage.py

示例11: omocr

def omocr(img_name,path1,co):#ex:20150512_124104_906_1794_
   # print "A"
    content = ""
    patnum = path1[path1.rfind(u'\\'):len(path1)] #rfind find from right
    patnum1 = path1.replace(patnum, u'&')
    patnum2 = patnum1[patnum1.rfind(u'\\')+1:patnum1.rfind(u'&')-11]
    ispath = os.path.exists(u'path_OCR\\path_ocr')
   # print ispath
    print u"第"+str(co)+u"筆 "

    for x in range(y):
        str1 = piece1[x]
        str2 = patnum2.encode('utf-8')
        if str1 == str2 :
            content = piece1[x+1]
            print content.decode('UTF-8')  

    strsp = path1+"\\"+img_name
    im = Image.open(strsp+u'.jpg').convert('L')
    isExists = os.path.exists(strsp)
    if isExists :
        im.crop((185, 30, 480, 85)).save(strsp+u'\\day.png')
        im.crop((185, 90, 330, 150)).save(strsp+u'\\time.png')
        im.crop((959, 90, 1180, 150)).save(strsp+u'\\spdown.png')
        im.crop((959, 25, 1180, 85)).save(strsp+u'\\spup.png')
        im.crop((1, 1400, 610, 1710)).save(strsp+u'\\li.png')
        day = pytesseract.image_to_string(Image.open(strsp+u'\\day.png')).replace('/','').replace(" ", "").replace("O", "0")
        time = pytesseract.image_to_string(Image.open(strsp+u'\\time.png')).replace(':','').replace(" ", "").replace("O", "0")
        spdown = pytesseract.image_to_string(Image.open(strsp+u'\\spdown.png')).replace(" ", "").replace("O", "0")
        spup = pytesseract.image_to_string(Image.open(strsp+u'\\spup.png')).replace(" ", "").replace("O", "0")
        day=filter(str.isdigit, day)
        time=filter(str.isdigit, time)
        spdown=filter(str.isdigit, spdown)
        spup=filter(str.isdigit, spup)
        day2 = day[0:8]
        #day2 = img_name[0:6]+day[len(day)-2:len(day)]#日期年月抓檔案名稱日抓OCR辨識
        if day[0] == "2":
            day2 = int(day2)-19110000
        time2 = time[0:4]
        spdown2 = spdown[0:3]
        spup2 = spup[0:3]
        f = file(strsp+'\\'+img_name+'.txt', 'w+')
        f.write(img_name+u';'+str(day2)+u';'+str(time2)+u';'+str(spup2)+u';'+str(spdown2)+u';') # write text to file
        if ispath :
            f.write(content)#寫入抓到的照相地點
        else:
            f.write(' ; ')
        f.close()
        print u'檔 名:'+img_name 
        print u'日 期:'+str(day2)
        print u'時 間:'+time2
        print u'速 限:'+spup2
        print u'車 速:'+spdown2
        print "OK"
开发者ID:Jhougo,项目名称:Python_Work,代码行数:54,代码来源:Jhougo-OCR.py

示例12: main

def main(argv):
    import getopt
    url = ""

    try:
        opts, args = getopt.getopt(argv, "", ["url="])
    except getopt.GetoptError:
        print("Run with --url=http://mysite.com/myImage.png")
        sys.exit(2)

    for opt, arg in opts:
        if opt == "--url":
            url = arg

    import PIL
    from PIL import Image
    from PIL import ImageOps

    import pytesseract

    import urllib.request
    urllib.request.urlretrieve(url, "local.png")

    img = Image.open("local.png")
    img = ImageOps.grayscale(img)
    img = img.resize((img.width * 2, img.height * 2))
    text = pytesseract.image_to_string(
        img,
        None,
        False,
        "-c tessedit_char_whitelist=0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ")

    text += " "

    text += pytesseract.image_to_string(
        img,
        None,
        False,
        "-c tessedit_char_whitelist=0123456789-abcdefghijklmnopqrstuvqxyz")

    chunks = text.split()
    validChunks = list()

    for c in chunks:
        if c.count("-") == 2:
            validChunks.append(c)
        if c.count("-") == 4:
            validChunks.append(c)

    print("Possible keys:")
    for c in validChunks:
        print(c)
开发者ID:MintPaw,项目名称:codeStealer,代码行数:52,代码来源:steal.py

示例13: main

def main():
  cap = cv2.VideoCapture(0)
  i = 0
  while True:
	ret, frame = cap.read()
	bw_img = cv2.cvtColor(frame, cv.CV_BGR2GRAY)
	if i == 15:
		im = Image.fromarray(np.uint8(cm.gist_earth(bw_img)*255))
		print pytesseract.image_to_string(im)
		i = 0
	i += 1
	cv2.imshow("camera", bw_img)
	c = cv2.waitKey(1)
开发者ID:imPoEssible,项目名称:OCR,代码行数:13,代码来源:test_static_ocr.py

示例14: get_ocr_strings

 def get_ocr_strings(evidence, helper):
     # This is the actual OCR call
     try:
         cached = os.path.isfile(evidence['file_cache_path'])
         if cached:
             return pytesseract.image_to_string(Image.open(evidence['file_cache_path']))
         else:
             strings = pytesseract.image_to_string(Image.open(
                 helper.pathspec_helper._open_file_object(evidence['pathspec'])))
             helper.pathspec_helper._close_file_object(evidence['pathspec'])
             return strings
     except:
         logging.warn('Failed to perform OCR on file "' + evidence['file_cache_path'] + '"')
         abort(400, 'It appears that the pathspec is for a file that the Tesseract cannot perform OCR on')
开发者ID:maurermj08,项目名称:efetch,代码行数:14,代码来源:fa_image_ocr.py

示例15: performTessOCR

 def performTessOCR(self,imagePath):
     """
     Performs OCR with Tesseract. Please train Tesseract Elsewhere if necessary.
     JTessBox is a nice program implemented with VietOCR. SVM is used for OCR
     (e.g. covariance matrices with letter images appended as pixels) or transforms.
     
     *Required Parameters*
     
     :param imagePath: string ipath,fp,cstr of Image, or PIL Image
     """
     if type(imagePath) is str or type(imagePath) is cStringIO or type(imagePath) is file:
         return image_to_string(Image.open(imagePath),True)
     else:
         return image_to_string(imagePath, True)
开发者ID:asevans48,项目名称:CrawlerAids,代码行数:14,代码来源:GetImage.py


注:本文中的pytesseract.image_to_string函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。