本文整理汇总了Python中argparse.open方法的典型用法代码示例。如果您正苦于以下问题:Python argparse.open方法的具体用法?Python argparse.open怎么用?Python argparse.open使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类argparse
的用法示例。
在下文中一共展示了argparse.open方法的8个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _process_lines
# 需要导入模块: import argparse [as 别名]
# 或者: from argparse import open [as 别名]
def _process_lines(bpe, filename, outfile, dropout, begin, end):
if isinstance(outfile, str):
fo = open(outfile, "w", encoding="utf-8")
else:
fo = outfile
with open(filename, encoding="utf-8") as f:
f.seek(begin)
line = f.readline()
while line:
pos = f.tell()
assert 0 <= pos < 1e20, "Bad new line separator, e.g. '\\r'"
if end > 0 and pos > end:
break
fo.write(bpe.process_line(line, dropout))
line = f.readline()
if isinstance(outfile, str):
fo.close()
示例2: _get_vocabulary
# 需要导入模块: import argparse [as 别名]
# 或者: from argparse import open [as 别名]
def _get_vocabulary(infile, outfile, begin, end):
import pickle
vocab = Counter()
with open(infile, encoding="utf8") as f:
f.seek(begin)
line = f.readline()
while line:
pos = f.tell()
assert 0 <= pos < 1e20, "Bad new line separator, e.g. '\\r'"
if end > 0 and pos > end:
break
for word in line.strip('\r\n ').split(' '):
if word:
vocab[word] += 1
line = f.readline()
with open(outfile, 'wb') as f:
pickle.dump(vocab, f)
示例3: __init__
# 需要导入模块: import argparse [as 别名]
# 或者: from argparse import open [as 别名]
def __init__(self, codes, separator='@@'):
with codecs.open(codes.name, encoding='utf-8') as codes:
self.bpe_codes = [tuple(item.split()) for item in codes]
# some hacking to deal with duplicates (only consider first instance)
self.bpe_codes = dict([(code,i) for (i,code) in reversed(list(enumerate(self.bpe_codes)))])
self.separator = separator
示例4: __init__
# 需要导入模块: import argparse [as 别名]
# 或者: from argparse import open [as 别名]
def __init__(self, codes, separator='__'):
with io.open(codes.name, 'rt', encoding='utf-8') as codes:
self.bpe_codes = [tuple(item.split()) for item in codes]
# some hacking to deal with duplicates (only consider first instance)
self.bpe_codes = dict([(code, i) for (i, code) in reversed(list(enumerate(self.bpe_codes)))])
self.separator = separator
self.cache = {}
示例5: __init__
# 需要导入模块: import argparse [as 别名]
# 或者: from argparse import open [as 别名]
def __init__(self, codes, merges=-1, separator='@@', vocab=None, glossaries=None):
with codecs.open(codes, encoding="utf-8") as codes:
# check version information
firstline = codes.readline()
if firstline.startswith('#version:'):
self.version = tuple([int(x) for x in re.sub(r'(\.0+)*$','', firstline.split()[-1]).split(".")])
else:
self.version = (0, 1)
codes.seek(0)
self.bpe_codes = [tuple(item.split()) for (n, item) in enumerate(codes) if (n < merges or merges == -1)]
# some hacking to deal with duplicates (only consider first instance)
self.bpe_codes = dict([(code,i) for (i,code) in reversed(list(enumerate(self.bpe_codes)))])
self.bpe_codes_reverse = dict([(pair[0] + pair[1], pair) for pair,i in self.bpe_codes.items()])
self.separator = separator
self.vocab = vocab
self.glossaries = glossaries if glossaries else []
self.cache = {}
示例6: process_lines
# 需要导入模块: import argparse [as 别名]
# 或者: from argparse import open [as 别名]
def process_lines(self, filename, outfile, dropout=0, num_workers=1):
if sys.version_info < (3, 0):
print("Parallel mode is only supported in Python3.")
sys.exit(1)
if num_workers == 1:
_process_lines(self, filename, outfile, dropout, 0, 0)
elif num_workers > 1:
with open(filename, encoding="utf-8") as f:
size = os.fstat(f.fileno()).st_size
chunk_size = int(size / num_workers)
offsets = [0 for _ in range(num_workers + 1)]
for i in range(1, num_workers):
f.seek(chunk_size * i)
pos = f.tell()
while True:
try:
line = f.readline()
break
except UnicodeDecodeError:
pos -= 1
f.seek(pos)
offsets[i] = f.tell()
assert 0 <= offsets[i] < 1e20, "Bad new line separator, e.g. '\\r'"
res_files = []
pool = Pool(processes=num_workers)
for i in range(num_workers):
tmp = tempfile.NamedTemporaryFile(delete=False)
tmp.close()
res_files.append(tmp)
pool.apply_async(_process_lines, (self, filename, tmp.name, dropout, offsets[i], offsets[i + 1]))
pool.close()
pool.join()
for i in range(num_workers):
with open(res_files[i].name, encoding="utf-8") as fi:
for line in fi:
outfile.write(line)
os.remove(res_files[i].name)
else:
raise ValueError('`num_workers` is expected to be a positive number, but got {}.'.format(num_workers))
示例7: learn_joint_bpe_and_vocab
# 需要导入模块: import argparse [as 别名]
# 或者: from argparse import open [as 别名]
def learn_joint_bpe_and_vocab(args):
if args.vocab and len(args.input) != len(args.vocab):
sys.stderr.write('Error: number of input files and vocabulary files must match\n')
sys.exit(1)
# read/write files as UTF-8
args.input = [codecs.open(f.name, encoding='UTF-8') for f in args.input]
args.vocab = [codecs.open(f.name, 'w', encoding='UTF-8') for f in args.vocab]
# get combined vocabulary of all input texts
full_vocab = Counter()
for f in args.input:
full_vocab += learn_bpe.get_vocabulary(f, num_workers=args.num_workers)
f.seek(0)
vocab_list = ['{0} {1}'.format(key, freq) for (key, freq) in full_vocab.items()]
# learn BPE on combined vocabulary
with codecs.open(args.output.name, 'w', encoding='UTF-8') as output:
learn_bpe.learn_bpe(vocab_list, output, args.symbols, args.min_frequency, args.verbose, is_dict=True, total_symbols=args.total_symbols)
with codecs.open(args.output.name, encoding='UTF-8') as codes:
bpe = apply_bpe.BPE(codes, separator=args.separator)
# apply BPE to each training corpus and get vocabulary
for train_file, vocab_file in zip(args.input, args.vocab):
tmp = tempfile.NamedTemporaryFile(delete=False)
tmp.close()
tmpout = codecs.open(tmp.name, 'w', encoding='UTF-8')
train_file.seek(0)
bpe.process_lines(train_file.name, tmpout, num_workers=args.num_workers)
tmpout.close()
tmpin = codecs.open(tmp.name, encoding='UTF-8')
vocab = learn_bpe.get_vocabulary(tmpin, num_workers=args.num_workers)
tmpin.close()
os.remove(tmp.name)
for key, freq in sorted(vocab.items(), key=lambda x: x[1], reverse=True):
vocab_file.write("{0} {1}\n".format(key, freq))
vocab_file.close()
示例8: get_vocabulary
# 需要导入模块: import argparse [as 别名]
# 或者: from argparse import open [as 别名]
def get_vocabulary(fobj, is_dict=False, num_workers=1):
"""Read text and return dictionary that encodes vocabulary
"""
vocab = Counter()
if is_dict:
for i, line in enumerate(fobj):
try:
word, count = line.strip('\r\n ').split(' ')
except:
print('Failed reading vocabulary file at line {0}: {1}'.format(i, line))
sys.exit(1)
vocab[word] += int(count)
elif num_workers == 1 or fobj.name == '<stdin>':
if num_workers > 1:
warnings.warn("In parallel mode, the input cannot be STDIN. Using 1 processor instead.")
for i, line in enumerate(fobj):
for word in line.strip('\r\n ').split(' '):
if word:
vocab[word] += 1
elif num_workers > 1:
if sys.version_info < (3, 0):
print("Parallel mode is only supported in Python3.")
sys.exit(1)
with open(fobj.name, encoding="utf8") as f:
size = os.fstat(f.fileno()).st_size
chunk_size = int(size / num_workers)
offsets = [0 for _ in range(num_workers + 1)]
for i in range(1, num_workers):
f.seek(chunk_size * i)
pos = f.tell()
while True:
try:
line = f.readline()
break
except UnicodeDecodeError:
pos -= 1
f.seek(pos)
offsets[i] = f.tell()
assert 0 <= offsets[i] < 1e20, "Bad new line separator, e.g. '\\r'"
vocab_files = []
pool = Pool(processes=num_workers)
for i in range(num_workers):
tmp = tempfile.NamedTemporaryFile(delete=False)
tmp.close()
vocab_files.append(tmp)
pool.apply_async(_get_vocabulary, (fobj.name, tmp.name, offsets[i], offsets[i + 1]))
pool.close()
pool.join()
import pickle
for i in range(num_workers):
with open(vocab_files[i].name, 'rb') as f:
vocab += pickle.load(f)
os.remove(vocab_files[i].name)
else:
raise ValueError('`num_workers` is expected to be a positive number, but got {}.'.format(num_workers))
return vocab