本文整理汇总了Python中opencc.OpenCC方法的典型用法代码示例。如果您正苦于以下问题:Python opencc.OpenCC方法的具体用法?Python opencc.OpenCC怎么用?Python opencc.OpenCC使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类opencc
的用法示例。
在下文中一共展示了opencc.OpenCC方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: traditional2simple
# 需要导入模块: import opencc [as 别名]
# 或者: from opencc import OpenCC [as 别名]
def traditional2simple(input_path, output_path):
"""繁体转简体
:param input_path:
:param output_path:
:return:
"""
# input check
assert os.path.exists(input_path)
# output check
path = Path(output_path).resolve()
path.parent.mkdir(exist_ok=True)
# convert
f_in = open(input_path, 'r', encoding='utf8')
f_out = open(output_path, 'w', encoding='utf8')
cc = opencc.OpenCC('t2s')
for (i, line) in enumerate(f_in.readlines()):
f_out.write(cc.convert(line))
if i % 10000 == 0:
logger.info("t2s %d lines complete" % i)
f_in.close()
f_out.close()
logger.info("Finished Text T2S")
示例2: segment_lyric_convert_pinyin_mir1k
# 需要导入模块: import opencc [as 别名]
# 或者: from opencc import OpenCC [as 别名]
def segment_lyric_convert_pinyin_mir1k():
openCC = OpenCC('tw2s')
folder_lyrics_mir1k = os.path.join(mir1k_root, 'Lyrics')
filenames_lyrics_mir1k = list(set(get_filenames_in_folder(folder_lyrics_mir1k)))
for fn in filenames_lyrics_mir1k:
fn_txt = os.path.join(folder_lyrics_mir1k, fn+'.txt')
try:
list_line = read_mir1k_lyrics(fn_txt)
line_simplified = openCC.convert(list_line[0])
line_pinyin = pinyin.get(line_simplified, format='strip', delimiter=' ')
line_char = ' '.join(fool.cut(line_simplified)[0])
write_lyrics_one_line(filename=os.path.join(mir1k_root, 'annotation', fn + '_phrase_char.txt'),
line=line_char)
write_lyrics_one_line(filename=os.path.join(mir1k_root, 'annotation', fn + '_phrase_pinyin.txt'),
line=line_pinyin)
except UnicodeDecodeError:
print(fn)
示例3: __init__
# 需要导入模块: import opencc [as 别名]
# 或者: from opencc import OpenCC [as 别名]
def __init__(self, infile, outfile):
self.infile = infile
self.outfile = outfile
self.cc = opencc.OpenCC('t2s')
self.t_corpus = []
self.s_corpus = []
self.read(self.infile)
self.t2s()
self.write(self.s_corpus, self.outfile)
示例4: test_class_convert
# 需要导入模块: import opencc [as 别名]
# 或者: from opencc import OpenCC [as 别名]
def test_class_convert():
cc = OpenCC()
text = '乾坤一擲'
expect = '乾坤一掷'
assert cc.convert(text) == expect
text = '開放中文轉換'
expect = '开放中文转换'
assert cc.convert(text) == expect
示例5: main
# 需要导入模块: import opencc [as 别名]
# 或者: from opencc import OpenCC [as 别名]
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-i', '--input', metavar='<file>',
help='Read original text from <file>.')
parser.add_argument('-o', '--output', metavar='<file>',
help='Write converted text to <file>.')
parser.add_argument('-c', '--config', metavar='<conversion>',
help='Conversion')
parser.add_argument('--in-enc', metavar='<encoding>', default='UTF-8',
help='Encoding for input')
parser.add_argument('--out-enc', metavar='<encoding>', default='UTF-8',
help='Encoding for output')
args = parser.parse_args()
if args.config is None:
print("Please specify a conversion.", file=sys.stderr)
return 1
cc = OpenCC(args.config)
with io.open(args.input if args.input else 0, encoding=args.in_enc) as f:
input_str = f.read()
output_str = cc.convert(input_str)
with io.open(args.output if args.output else 1, 'w',
encoding=args.out_enc) as f:
f.write(output_str)
return 0
示例6: setUp
# 需要导入模块: import opencc [as 别名]
# 或者: from opencc import OpenCC [as 别名]
def setUp(self):
# Unitialized convertor object
self.openCC = OpenCC()
# Constructor intitialized convertor object
self.openCC2 = OpenCC('hk2s')