本文整理汇总了Python中fileinput.hook_compressed方法的典型用法代码示例。如果您正苦于以下问题:Python fileinput.hook_compressed方法的具体用法?Python fileinput.hook_compressed怎么用?Python fileinput.hook_compressed使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类fileinput
的用法示例。
在下文中一共展示了fileinput.hook_compressed方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: import fileinput [as 别名]
# 或者: from fileinput import hook_compressed [as 别名]
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--indic-nlp-path", required=True,
help="path to Indic NLP Library root")
parser.add_argument("--language", required=True)
parser.add_argument("--remove-nuktas", default=False, action="store_true")
parser.add_argument("input", help="input file; use - for stdin")
args = parser.parse_args()
try:
sys.path.extend([
args.indic_nlp_path,
os.path.join(args.indic_nlp_path, "src"),
])
from indicnlp.tokenize import indic_tokenize
from indicnlp.normalize.indic_normalize import IndicNormalizerFactory
except:
raise Exception(
"Cannot load Indic NLP Library, make sure --indic-nlp-path is correct"
)
# create normalizer
factory = IndicNormalizerFactory()
normalizer = factory.get_normalizer(
args.language, remove_nuktas=args.remove_nuktas,
)
# normalize and tokenize
for line in fileinput.input([args.input], openhook=fileinput.hook_compressed):
line = normalizer.normalize(line.decode("utf-8"))
line = " ".join(indic_tokenize.trivial_tokenize(line, args.language))
sys.stdout.write(line.encode("utf-8"))
示例2: main
# 需要导入模块: import fileinput [as 别名]
# 或者: from fileinput import hook_compressed [as 别名]
def main():
parser = argparse.ArgumentParser(description='')
parser.add_argument('files', nargs='*', help='input files')
args = parser.parse_args()
detok = sacremoses.MosesDetokenizer()
for line in fileinput.input(args.files, openhook=fileinput.hook_compressed):
print(detok.detokenize(line.strip().split(' ')).replace(' @', '').replace('@ ', '').replace(' =', '=').replace('= ', '=').replace(' – ', '–'))
示例3: test_gz_ext_fake
# 需要导入模块: import fileinput [as 别名]
# 或者: from fileinput import hook_compressed [as 别名]
def test_gz_ext_fake(self):
original_open = gzip.open
gzip.open = self.fake_open
try:
result = fileinput.hook_compressed("test.gz", 3)
finally:
gzip.open = original_open
self.assertEqual(self.fake_open.invocation_count, 1)
self.assertEqual(self.fake_open.last_invocation, (("test.gz", 3), {}))
示例4: test_bz2_ext_fake
# 需要导入模块: import fileinput [as 别名]
# 或者: from fileinput import hook_compressed [as 别名]
def test_bz2_ext_fake(self):
original_open = bz2.BZ2File
bz2.BZ2File = self.fake_open
try:
result = fileinput.hook_compressed("test.bz2", 4)
finally:
bz2.BZ2File = original_open
self.assertEqual(self.fake_open.invocation_count, 1)
self.assertEqual(self.fake_open.last_invocation, (("test.bz2", 4), {}))
示例5: do_test_use_builtin_open
# 需要导入模块: import fileinput [as 别名]
# 或者: from fileinput import hook_compressed [as 别名]
def do_test_use_builtin_open(self, filename, mode):
original_open = self.replace_builtin_open(self.fake_open)
try:
result = fileinput.hook_compressed(filename, mode)
finally:
self.replace_builtin_open(original_open)
self.assertEqual(self.fake_open.invocation_count, 1)
self.assertEqual(self.fake_open.last_invocation,
((filename, mode), {}))
示例6: __iter__
# 需要导入模块: import fileinput [as 别名]
# 或者: from fileinput import hook_compressed [as 别名]
def __iter__(self):
"""return an iterator over all non-header records in gtf"""
hook = fileinput.hook_compressed
with fileinput.input(self._files, openhook=hook, mode='r') as f:
# get rid of header lines
file_iterator = iter(f)
first_record = next(file_iterator)
while first_record.startswith('#'):
first_record = next(file_iterator)
yield first_record.split('\t') # avoid loss of first non-comment line
for record in file_iterator: # now, run to exhaustion
yield record.split('\t')