当前位置: 首页>>代码示例>>Python>>正文


Python pyocr.builders方法代码示例

本文整理汇总了Python中pyocr.builders方法的典型用法代码示例。如果您正苦于以下问题:Python pyocr.builders方法的具体用法?Python pyocr.builders怎么用?Python pyocr.builders使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyocr的用法示例。


在下文中一共展示了pyocr.builders方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: image2txt_pyocr

# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def image2txt_pyocr(self, image, do_orientation):
        txt = ""
        orientation = ""
        img_per_page = PI.open(io.BytesIO(image))

        if do_orientation is True:
            try:
                if self.tool.can_detect_orientation():
                    orientation = self.tool.detect_orientation(img_per_page, lang=self.lang)
                    angle = orientation["angle"]
                    if angle != 0:
                        img_per_page.rotate(orientation["angle"])
            except pyocr.PyocrException as exc:
                print("Orientation detection failed: {}".format(exc))
            print("Orientation: {}".format(orientation))

        try:
            txt = self.tool.image_to_string(
                img_per_page, lang=self.lang,
                builder=pyocr.builders.TextBuilder()
            )
        except pyocr.error.TesseractError as e:
            print("{}".format(e))
        return txt 
开发者ID:lucab85,项目名称:PDFtoTXT,代码行数:26,代码来源:LocalOCR.py

示例2: ocr_one_image

# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def ocr_one_image(self, area, image, threadList=-1, threadNum=None):
		print("Starting image...")
		txt = self.tool.image_to_string(image, lang=self.langs[0], builder=pyocr.builders.TextBuilder())
		print("==RESULT==" + str(area) + "\n" + txt + "\n==========================")
		if threadList != -1:
			threadList[threadNum] = txt
		return txt 
开发者ID:008karan,项目名称:PAN_OCR,代码行数:9,代码来源:tesseract_ocr.py

示例3: __get_boxes

# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def __get_boxes(self):
        """
        Get all the word boxes of this page.
        """
        boxfile = self.__box_path

        try:
            box_builder = pyocr.builders.LineBoxBuilder()
            with self.fs.open(boxfile, 'r') as file_desc:
                boxes = box_builder.read_file(file_desc)
            if boxes != []:
                return boxes
            # fallback: old format: word boxes
            # shouldn't be used anymore ...
            box_builder = pyocr.builders.WordBoxBuilder()
            with self.fs.open(boxfile, 'r') as file_desc:
                boxes = box_builder.read_file(file_desc)
            if len(boxes) <= 0:
                return []
            logger.warning("WARNING: Doc %s uses old box format" %
                           (str(self.doc)))
            return [pyocr.builders.LineBox(boxes, boxes[0].position)]
        except IOError as exc:
            logger.error("Unable to get boxes for '%s': %s"
                         % (self.doc.docid, exc))
            return [] 
开发者ID:openpaperwork,项目名称:paperwork-backend,代码行数:28,代码来源:page.py

示例4: __set_boxes

# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def __set_boxes(self, boxes):
        boxfile = self.__box_path
        with self.fs.open(boxfile, 'w') as file_desc:
            pyocr.builders.LineBoxBuilder().write_file(file_desc, boxes) 
开发者ID:openpaperwork,项目名称:paperwork-backend,代码行数:6,代码来源:page.py

示例5: __set_boxes

# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def __set_boxes(self, boxes):
        boxfile = self.__get_box_path()
        with self.fs.open(boxfile, 'w') as file_desc:
            pyocr.builders.LineBoxBuilder().write_file(file_desc, boxes) 
开发者ID:openpaperwork,项目名称:paperwork-backend,代码行数:6,代码来源:page.py

示例6: image_to_dict

# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def image_to_dict(self, image):
        bid = self.box_oid
        mode = 5
        size = image.size
        image = image.resize((size[0] * 3, size[1] * 3), Image.BICUBIC)
        if size[0] / size[1] < 1.15 and size[1] / size[0] < 1.15:
            mode = 10
        if size[0] > size[1] * 1.5:
            mode = 7
        string = self.image_to_string(image, lang="jpn",
                                      builder=pyocr.builders.TextBuilder(mode))
        string = string_filtered = "".join([c for c in string.strip()
                                            if c not in special_chars])
        self.draw("Looking up " + string)
        if string != "":
            dict_entry = myougiden_api.run(string)
        else:
            dict_entry = None
        # image.save("/tmp/export.png")
        if dict_entry is not None and string != "":
            string = dict_entry.strip("\n")
        if string == "":
            string = "Nothing recognized"
        # print(string)
        return textwrap.fill(string, 120, replace_whitespace=False,
                             drop_whitespace=False) 
开发者ID:klaxa,项目名称:OCR-Manga,代码行数:28,代码来源:Reader.py

示例7: __get_boxes

# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def __get_boxes(self):
        """
        Get all the word boxes of this page.
        """
        if self.__boxes is not None:
            return self.__boxes

        # Check first if there is an OCR file available
        boxfile = self.__get_box_path()
        if self.fs.exists(boxfile):
            box_builder = pyocr.builders.LineBoxBuilder()

            try:
                with self.fs.open(boxfile, 'r') as file_desc:
                    self.__boxes = box_builder.read_file(file_desc)
                return self.__boxes
            except IOError as exc:
                logger.error("Unable to get boxes for '%s': %s"
                             % (self.doc.docid, exc))
                # will fall back on pdf boxes

        # fall back on what libpoppler tells us

        txt = self.pdf_page.get_text()
        self.__boxes = []

        layout = self.pdf_page.get_text_layout()
        if not layout[0]:
            layout = []
            return self.__boxes
        layout = layout[1]

        for (line, line_rects) in custom_split(
            txt, layout, lambda x: x == "\n"
        ):
            words = []
            for (word, word_rects) in custom_split(
                line, line_rects, lambda x: x.isspace()
            ):
                word_box = PdfWordBox(word, word_rects)
                words.append(word_box)
            line_box = PdfLineBox(words, line_rects)
            self.__boxes.append(line_box)
        return self.__boxes 
开发者ID:openpaperwork,项目名称:paperwork-backend,代码行数:46,代码来源:page.py


注:本文中的pyocr.builders方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。