本文整理汇总了Python中pyocr.builders方法的典型用法代码示例。如果您正苦于以下问题:Python pyocr.builders方法的具体用法?Python pyocr.builders怎么用?Python pyocr.builders使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyocr
的用法示例。
在下文中一共展示了pyocr.builders方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: image2txt_pyocr
# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def image2txt_pyocr(self, image, do_orientation):
txt = ""
orientation = ""
img_per_page = PI.open(io.BytesIO(image))
if do_orientation is True:
try:
if self.tool.can_detect_orientation():
orientation = self.tool.detect_orientation(img_per_page, lang=self.lang)
angle = orientation["angle"]
if angle != 0:
img_per_page.rotate(orientation["angle"])
except pyocr.PyocrException as exc:
print("Orientation detection failed: {}".format(exc))
print("Orientation: {}".format(orientation))
try:
txt = self.tool.image_to_string(
img_per_page, lang=self.lang,
builder=pyocr.builders.TextBuilder()
)
except pyocr.error.TesseractError as e:
print("{}".format(e))
return txt
示例2: ocr_one_image
# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def ocr_one_image(self, area, image, threadList=-1, threadNum=None):
print("Starting image...")
txt = self.tool.image_to_string(image, lang=self.langs[0], builder=pyocr.builders.TextBuilder())
print("==RESULT==" + str(area) + "\n" + txt + "\n==========================")
if threadList != -1:
threadList[threadNum] = txt
return txt
示例3: __get_boxes
# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def __get_boxes(self):
"""
Get all the word boxes of this page.
"""
boxfile = self.__box_path
try:
box_builder = pyocr.builders.LineBoxBuilder()
with self.fs.open(boxfile, 'r') as file_desc:
boxes = box_builder.read_file(file_desc)
if boxes != []:
return boxes
# fallback: old format: word boxes
# shouldn't be used anymore ...
box_builder = pyocr.builders.WordBoxBuilder()
with self.fs.open(boxfile, 'r') as file_desc:
boxes = box_builder.read_file(file_desc)
if len(boxes) <= 0:
return []
logger.warning("WARNING: Doc %s uses old box format" %
(str(self.doc)))
return [pyocr.builders.LineBox(boxes, boxes[0].position)]
except IOError as exc:
logger.error("Unable to get boxes for '%s': %s"
% (self.doc.docid, exc))
return []
示例4: __set_boxes
# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def __set_boxes(self, boxes):
boxfile = self.__box_path
with self.fs.open(boxfile, 'w') as file_desc:
pyocr.builders.LineBoxBuilder().write_file(file_desc, boxes)
示例5: __set_boxes
# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def __set_boxes(self, boxes):
boxfile = self.__get_box_path()
with self.fs.open(boxfile, 'w') as file_desc:
pyocr.builders.LineBoxBuilder().write_file(file_desc, boxes)
示例6: image_to_dict
# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def image_to_dict(self, image):
bid = self.box_oid
mode = 5
size = image.size
image = image.resize((size[0] * 3, size[1] * 3), Image.BICUBIC)
if size[0] / size[1] < 1.15 and size[1] / size[0] < 1.15:
mode = 10
if size[0] > size[1] * 1.5:
mode = 7
string = self.image_to_string(image, lang="jpn",
builder=pyocr.builders.TextBuilder(mode))
string = string_filtered = "".join([c for c in string.strip()
if c not in special_chars])
self.draw("Looking up " + string)
if string != "":
dict_entry = myougiden_api.run(string)
else:
dict_entry = None
# image.save("/tmp/export.png")
if dict_entry is not None and string != "":
string = dict_entry.strip("\n")
if string == "":
string = "Nothing recognized"
# print(string)
return textwrap.fill(string, 120, replace_whitespace=False,
drop_whitespace=False)
示例7: __get_boxes
# 需要导入模块: import pyocr [as 别名]
# 或者: from pyocr import builders [as 别名]
def __get_boxes(self):
"""
Get all the word boxes of this page.
"""
if self.__boxes is not None:
return self.__boxes
# Check first if there is an OCR file available
boxfile = self.__get_box_path()
if self.fs.exists(boxfile):
box_builder = pyocr.builders.LineBoxBuilder()
try:
with self.fs.open(boxfile, 'r') as file_desc:
self.__boxes = box_builder.read_file(file_desc)
return self.__boxes
except IOError as exc:
logger.error("Unable to get boxes for '%s': %s"
% (self.doc.docid, exc))
# will fall back on pdf boxes
# fall back on what libpoppler tells us
txt = self.pdf_page.get_text()
self.__boxes = []
layout = self.pdf_page.get_text_layout()
if not layout[0]:
layout = []
return self.__boxes
layout = layout[1]
for (line, line_rects) in custom_split(
txt, layout, lambda x: x == "\n"
):
words = []
for (word, word_rects) in custom_split(
line, line_rects, lambda x: x.isspace()
):
word_box = PdfWordBox(word, word_rects)
words.append(word_box)
line_box = PdfLineBox(words, line_rects)
self.__boxes.append(line_box)
return self.__boxes