本文整理汇总了Python中pypinyin.pinyin方法的典型用法代码示例。如果您正苦于以下问题:Python pypinyin.pinyin方法的具体用法?Python pypinyin.pinyin怎么用?Python pypinyin.pinyin使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pypinyin
的用法示例。
在下文中一共展示了pypinyin.pinyin方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: match_tone
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def match_tone(ci, tone_ci):
'''判断字词是否符合相应平仄'''
judge = True
for i in range(len(ci)):
diao = pinyin(ci[i], style=9, errors='ignore')[0][0][-1]
if tone_ci[i] == 'x':
pass
elif tone_ci[i] == '0' and diao in ['1', '2']:
pass
elif tone_ci[i] == '1' and diao in ['3', '4', 'i']:
pass
else:
judge = False
return judge
# yn:首行是否押韵,0押,1不押
示例2: _add_lab
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def _add_lab(txtlines, wav_dir_path):
logger = logging.getLogger('mtts')
for line in txtlines:
numstr, txt = line.split(' ')
txt = re.sub('#\d', '', txt)
pinyin_list = pinyin(txt, style=Style.TONE3)
new_pinyin_list = []
for item in pinyin_list:
if not item:
logger.warning(
'{file_num} do not generate right pinyin'.format(numstr))
if not item[0][-1].isdigit():
phone = item[0] + '5'
else:
phone = item[0]
new_pinyin_list.append(phone)
lab_file = os.path.join(wav_dir_path, numstr + '.lab')
with open(lab_file, 'w') as oid:
oid.write(' '.join(new_pinyin_list))
示例3: _add_pinyin
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def _add_pinyin(txtlines, output_path):
''' txt2pinyin in one file '''
logger = logging.getLogger('mtts')
all_pinyin = []
for line in txtlines:
numstr, txt = line.split(' ')
txt = re.sub('#\d', '', txt)
pinyin_list = pinyin(txt, style=Style.TONE3)
new_pinyin_list = []
for item in pinyin_list:
if not item:
logger.warning(
'{file_num} do not generate right pinyin'.format(numstr))
if not item[0][-1].isdigit():
phone = item[0] + '5'
else:
#phone = item[0]
phone = item[0].replace('v', 'u')
new_pinyin_list.append(phone)
all_pinyin.append(numstr + ' ' + ' '.join(new_pinyin_list))
all_pinyin_file = os.path.join(output_path, 'all_pinyin.lab')
with open(all_pinyin_file, 'w') as oid:
for item in all_pinyin:
oid.write(item + '\n')
示例4: pinyinformat
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def pinyinformat(syllable):
'''format pinyin to mtts's format'''
if not syllable[-1].isdigit():
syllable = syllable + '5'
assert syllable[-1].isdigit()
syl_no_tone = syllable[:-1]
if syl_no_tone in TRANSFORM_DICT:
syllable = syllable.replace(syl_no_tone, TRANSFORM_DICT[syl_no_tone])
return syllable
"""
for key, value in translate_dict.items():
syllable = syllable.replace(key, value)
for key, value in translate_dict_more.items():
syllable = syllable.replace(key, value)
if not syllable[-1].isdigit():
syllable = syllable + '5'
return syllable
"""
示例5: rhyme
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def rhyme(a, b):
# 判断两句话是几押,返回0为不押韵
# 两句话完全相同也返回0
if a == b:
return 0
# N押 韵母和声调都要相同
py1_tone = pinyin(a, style=FINALS_TONE3)
py2_tone = pinyin(b, style=FINALS_TONE3)
py1_tone.reverse()
py2_tone.reverse()
result = 0
result = n_rhyme(py1_tone, py2_tone)
if result > 1:
return result
# 单押和双押 韵母相同 声调可以不同
py1 = pinyin(a, style=FINALS)[-2:]
py2 = pinyin(b, style=FINALS)[-2:]
py1.reverse()
py2.reverse()
result = n_rhyme(py1, py2)
return result
# index -> sentence
示例6: pypinyin_g2p_phone
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def pypinyin_g2p_phone(text) -> List[str]:
from pypinyin import pinyin
from pypinyin import Style
from pypinyin.style._utils import get_finals
from pypinyin.style._utils import get_initials
phones = [
p
for phone in pinyin(text, style=Style.TONE3)
for p in [
get_initials(phone[0], strict=True),
get_finals(phone[0], strict=True),
]
if len(p) != 0
]
return phones
示例7: get_pinyin_first_letters
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def get_pinyin_first_letters(chinese_characters):
"""
Get fist letters of pin yin of chinese characters, if there's any 多音字
All combinations will be returned, for example for "调向"
Result of dx|tx will be returned.
:param chinese_characters: Chinese characters to get pinyin.
:return: first letters of pin yin of the letters
"""
pys = _get_pinyin_all([], chinese_characters)
result = ''
for py in pys:
for p in py:
result += p
result += "|"
result = result.rstrip('|') # <- Remove last "|"
return result
示例8: process_poetry
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def process_poetry(self, data_dir='/media/pony/DLdigest/data/languageModel/chinese-poetry/json'):
"""
Process Tang and Song poems dataset
"""
save_dir = os.path.join(self.save_dir, 'poem')
check_path_exists(save_dir)
count = 0
for entry in os.scandir(data_dir):
if entry.name.startswith('poet'):
with open(entry.path, 'r') as json_file:
poems = json.load(json_file)
for p in poems:
paras = HanziConv.toSimplified(''.join(p['paragraphs']).replace('\n', ''))
paras = filter_punctuation(paras)
for para in paras.split(' '):
if len(para.strip())>1:
pys = ' '.join(np.array(pinyin(para)).flatten())
with open(os.path.join(save_dir, str(count//400000+1)+'.txt'), 'a') as f:
f.write(para+','+pys+'\n')
count += 1
示例9: process_audioLabels
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def process_audioLabels(self, data_dir='/media/pony/DLdigest/data/ASR_zh/'):
"""
Processing label files in collected Chinese audio dataset
"""
save_dir = os.path.join(self.save_dir, 'audioLabels')
check_path_exists(save_dir)
count = 0
for subdir, dirs, files in os.walk(data_dir):
print(subdir)
for f in files:
if f.endswith("label"):
fullFilename = os.path.join(subdir, f)
with open(fullFilename, 'r') as f:
line = f.read()
con = HanziConv.toSimplified(line)
con = filter_punctuation(con)
for c in con.split(' '):
if len(c.strip())>1:
pys = ' '.join(np.array(pinyin(c)).flatten())
count += 1
with open(os.path.join(save_dir, str(count//400000+1)+'.txt'), 'a') as f:
f.write(c+','+pys+'\n')
示例10: __init__
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def __init__(self, encoding_type, composing_func, embedding_size, hidden_size, num_layers=1):
super(SubCharComponent, self).__init__()
self.encoding_type = encoding_type # 拼音,五笔
self.composing_func = composing_func # 构造函数:lstm, cnn, avg, max
self.hidden_size = hidden_size
self.num_layers = num_layers
if self.composing_func == 'LSTM':
self.composing = nn.LSTM(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
bidirectional=True)
elif self.composing_func == 'GRU':
self.composing = nn.GRU(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
bidirectional=True)
if self.encoding_type == 'wubi':
self.embedding = nn.Embedding(len(dict_wubi['idx2char']), embedding_size)
elif self.encoding_type == 'pinyin':
self.embedding = nn.Embedding(len(dict_pinyin['idx2char']), embedding_size)
示例11: token_indexing
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def token_indexing(idx, encoding_type, return_type):
"""
将输入的单词id映射为每个字五笔、拼音的字符的id
:param idx: (seq_len, batch_size)
:return: chars: (seq_len, batch_size, num_char) token_lens: (seq_len, batch_size, num_char)
"""
c = dict_word['idx2word'][idx]
if c == '<eos>':
c = '。'
if encoding_type == 'wubi':
encoding = wubi(c)[0] if wubi(c) else c
full_encoding = encoding if len(encoding) == 8 else encoding + '。' * (8 - len(encoding))
assert len(full_encoding) == 8, full_encoding
tokens = [dict_wubi['char2idx'][c] for c in full_encoding]
length = [i < len(encoding) for i in range(len(tokens))]
elif encoding_type == 'pinyin':
encoding = pinyin(c)[0][0] if pinyin(c) else c
full_encoding = encoding if len(encoding) == 8 else encoding + '。' * (8 - len(encoding))
assert len(full_encoding) == 8, full_encoding
tokens = [dict_pinyin['char2idx'][c] for c in full_encoding]
length = [i < len(encoding) for i in range(len(tokens))]
else:
raise NotImplementedError
# print(idx, c, encoding, tokens, length)
return tokens if return_type == 'tokens' else length
示例12: get_edit_distance_close_2d_code
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def get_edit_distance_close_2d_code(a, b):
res = 0
try:
if (a is None) or (b is None):
print("Error:pinyin({},{})".format(a.toString(),b.toString()))
return res
twoDcode_consonant_a = consonantMap_TwoDCode[a.consonant]
twoDcode_consonant_b = consonantMap_TwoDCode[b.consonant]
cDis = abs(get_distance_2d_code(twoDcode_consonant_a, twoDcode_consonant_b))
twoDcode_vowel_a = vowelMap_TwoDCode[a.vowel]
twoDcode_vowel_b = vowelMap_TwoDCode[b.vowel]
vDis = abs(get_distance_2d_code(twoDcode_vowel_a, twoDcode_vowel_b))
hcDis = get_sim_dis_from_hardcod_map(a,b)
res = min((cDis+vDis),hcDis) + 1.0*abs(a.tone-b.tone)/10
except:
raise Exception("Error pinyin {}{}".format(a.toString(), b.toString()))
return res
示例13: match_pinyin
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def match_pinyin(pinyin1, pinyin2):
"""Similarity score between two pinyin.
计算两个拼音的相似度得分。
"""
assert pinyin1 != "", "pinyin1 can not be empty"
assert pinyin2 != "", "pinyin2 can not be empty"
pv_match = 0
if len(pinyin1) < len(pinyin2):
len_short = len(pinyin1)
len_long = len(pinyin2)
pv_long = pinyin2
pv_short = pinyin1
else:
len_short = len(pinyin2)
len_long = len(pinyin1)
pv_long = pinyin1
pv_short = pinyin2
for i in range(0, len_short):
if pv_short[i] == pv_long[i]:
pv_match += 1
score = pv_match/len_long
return score
示例14: jaccard_pinyin
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def jaccard_pinyin(pv1, pv2, threshold=0.7):
"""Similarity score between two pinyin vectors with jaccard.
计算两个拼音向量的语义 jaccard 相似度得分。
According to the semantic jaccard model to calculate the similarity.
The similarity score interval for each two pinyin sentences was [0, 1].
根据语义jaccard模型来计算相似度。每两个拼音向量的相似度得分区间为为[0, 1]。
"""
sv_matrix = []
sv_rows = []
for pinyin1 in pv1:
for pinyin2 in pv2:
score = match_pinyin(pinyin1, pinyin2)
sv_rows.append(score)
sv_matrix.append(sv_rows)
sv_rows = []
matrix = mat(sv_matrix)
result = sum_cosine(matrix, threshold)
total = result["total"]
total_dif = result["total_dif"]
num = result["num_not_match"]
sim = total/(total + num*(1-total_dif))
return sim
示例15: to_pinyin
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import pinyin [as 别名]
def to_pinyin(name):
n = [x for a in pinyin(name, 0) for x in a]
return ''.join(n)