本文整理汇总了Python中pypinyin.lazy_pinyin方法的典型用法代码示例。如果您正苦于以下问题:Python pypinyin.lazy_pinyin方法的具体用法?Python pypinyin.lazy_pinyin怎么用?Python pypinyin.lazy_pinyin使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pypinyin
的用法示例。
在下文中一共展示了pypinyin.lazy_pinyin方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _pinyin
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def _pinyin(s):
symbols = '0123456789abcdefghijklmnopqrstuvwxyz '
s = lazy_pinyin(s, style=Style.TONE2)
yin = []
for token in s:
if token != ' ':
a = ''
for c in token:
if c in symbols:
a += c
yin.append(a)
a = ''
s = ' '.join(yin)
for i in range(len(s)):
if s[i] == ' ' and i < len(s) - 1 and s[i + 1] == ' ':
continue
a += s[i]
return a
示例2: speak
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def speak(self, text):
syllables = lazy_pinyin(text, style=pypinyin.TONE3)
print(syllables)
delay = 0
def preprocess(syllables):
temp = []
for syllable in syllables:
for p in TextToSpeech.punctuation:
syllable = syllable.replace(p, "")
if syllable.isdigit():
syllable = atc.num2chinese(syllable)
new_sounds = lazy_pinyin(syllable, style=pypinyin.TONE3)
for e in new_sounds:
temp.append(e)
else:
temp.append(syllable)
return temp
syllables = preprocess(syllables)
for syllable in syllables:
path = "syllables/"+syllable+".wav"
_thread.start_new_thread(TextToSpeech._play_audio, (path, delay))
delay += 0.355
示例3: __convert_transcript
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def __convert_transcript(raw_transcript):
"""
Converts a Chinese transcript to a Chinese pinyin sequence.
"""
waveid, raw_trans = raw_transcript.split("\t")[:2]
wavename = waveid + ".wav"
symbols = ",.!?"
# For simplicity, we only retain the Chinese chars and symbols
trans = ''.join([_char for _char in __replace_symbols(raw_trans) if __is_chinese(_char) or _char in symbols])
pinyin_trans = []
for pinyin in lazy_pinyin(trans, style=Style.TONE3):
if pinyin not in symbols and not pinyin[-1].isdigit():
pinyin_trans.append(pinyin + "0")
else:
pinyin_trans.append(pinyin)
return wavename, " ".join(pinyin_trans)
示例4: check_homepage_validity
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def check_homepage_validity(name, res):
"""
Check if the homepage is simtisfied basic rules.
Input: name-name of expert res-homepage info list
"""
title, url, detail, cited = res
if url.endswith('pdf') or url.endswith('doc') or 'linkedin' in url.lower() or 'researchgate' in url.lower() or 'citations' in url.lower():
return False
# to check if the title or detail contains the name
title = ' '.join(lazy_pinyin(title))
name = name.replace('?', '')
p = re.compile(r'|'.join(name.lower().split(' ')))
if len(p.findall(title.lower())) == 0:
return False
#if 'wikipedia' in title.lower():
# return False
return True
示例5: transform_chinese_to_pinyin
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def transform_chinese_to_pinyin(data_path, output_path,type='corpus'):
with open(data_path, 'rb') as fin, open(output_path, 'wb') as fout:
if type=='corpus':
for line in fin:
line = line.decode('utf-8').strip('\r\n ')
if not line:
continue
transformed_line = ' '.join(lazy_pinyin(line, style=Style.TONE2))
fout.write(f'{transformed_line}\n'.encode('utf-8'))
elif type=='training_data':
for line in fin:
line=line.decode('utf-8').strip('\r\n ')
if not line:
continue
index,chinese_text=line.split('|')
pinyin_text=' '.join(lazy_pinyin(chinese_text,style=Style.TONE2))
fout.write(f'{index}|{pinyin_text}\n'.encode('utf-8'))
示例6: synthesize
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def synthesize(self, text, src, dst):
"""
Synthesize .wav from text
src is the folder that contains all syllables .wav files
dst is the destination folder to save the synthesized file
"""
print("Synthesizing ...")
delay = 0
increment = 355 # milliseconds
pause = 500 # pause for punctuation
syllables = lazy_pinyin(text, style=pypinyin.TONE3)
# initialize to be complete silence, each character takes up ~500ms
result = AudioSegment.silent(duration=500*len(text))
for syllable in syllables:
path = src+syllable+".wav"
sound_file = Path(path)
# insert 500 ms silence for punctuation marks
if syllable in TextToSpeech.punctuation:
short_silence = AudioSegment.silent(duration=pause)
result = result.overlay(short_silence, position=delay)
delay += increment
continue
# skip sound file that doesn't exist
if not sound_file.is_file():
continue
segment = AudioSegment.from_wav(path)
result = result.overlay(segment, position=delay)
delay += increment
directory = dst
if not os.path.exists(directory):
os.makedirs(directory)
result.export(directory+"generated.wav", format="wav")
print("Exported.")
示例7: word_parser
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def word_parser(word: str) -> List[Tuple[str, List[str]]]:
pinyins: List[str] = lazy_pinyin(word)
return pinyin_parser(pinyins)
示例8: _confusion_word_set
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def _confusion_word_set(self, word):
confusion_word_set = set()
candidate_words = list(self.known(edit_distance_word(word, self.cn_char_set)))
for candidate_word in candidate_words:
if lazy_pinyin(candidate_word) == lazy_pinyin(word):
# same pinyin
confusion_word_set.add(candidate_word)
return confusion_word_set
示例9: to_pinyin
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def to_pinyin(word):
if not isinstance(word, unicode):
word = word.decode('utf-8')
return ''.join(lazy_pinyin(word))
示例10: get_full
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def get_full(word: str) -> List[str]:
fulls = []
for full in lazy_pinyin(word):
for e in full:
if e not in "abcdefghijklmnopqrstuvwxyz":
raise RuntimeError(f"{e} not alphe, word is: {word}")
fulls.append(full)
return fulls
示例11: get_pinyin_correct_candidates
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def get_pinyin_correct_candidates(self, word, tolerance): # 默认最多容忍一个拼音的变化
assert tolerance in [0, 1]
pinyins = lazy_pinyin(word)
tmp = pinyins[:]
pinyin_cands = {tuple(pinyins)}
if tolerance == 1:
for i, pinyin in enumerate(pinyins):
if pinyin in self.pinyin_adjlist:
pinyin_cands |= {tuple(tmp[:i] + [neibr] + tmp[i + 1:]) for neibr in self.pinyin_adjlist[pinyin]}
pinyin_cands = pinyin_cands & set(self.pinyin_mention_dict.keys())
mention_cands = set()
for pinyin in pinyin_cands:
mention_cands |= self.pinyin_mention_dict[pinyin]
return list(mention_cands)
示例12: build_trie
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def build_trie(self, new_word, entity, entity_type):
type0 = "#%s#" % entity_type
if not type0 in self.entity_types:
punct_regex = r"[、!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏!\"\#$%&\'\(\)\*\+,-\./:;<=>?@\[\\\]\^_`{\|}~]"
matched = re.search(punct_regex, entity_type, re.MULTILINE | re.UNICODE)
if matched:
punct0 = matched.group()
raise Exception("Your type input '{}' includes punctuation '{}', please remove them first".format(entity_type,punct0))
self.entity_types.add(type0)
self.prepared = False
self.hanlp_prepared = False
self.mentions.add(new_word)
self.pinyin_mention_dict[tuple(lazy_pinyin(new_word))].add(new_word)
trie_node = self.trie_root
for ch in new_word:
if not ch in trie_node:
trie_node[ch] = {}
trie_node = trie_node[ch]
if not 'leaf' in trie_node:
trie_node['leaf'] = {(entity, type0)}
else:
for (entity_orig, type_orig) in trie_node['leaf'].copy():
if entity_orig == entity: # 不允许同一实体有不同类型
trie_node['leaf'].remove((entity_orig, type_orig))
trie_node['leaf'].add((entity, type0))
示例13: get_pinyin_correct_candidates
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def get_pinyin_correct_candidates(self, word, tolerance=1): # 默认最多容忍一个拼音的变化
assert tolerance in [0, 1]
pinyins = lazy_pinyin(word)
tmp = pinyins[:]
pinyin_cands = {tuple(pinyins)}
if tolerance == 1:
for i, pinyin in enumerate(pinyins):
if pinyin in self.pinyin_adjlist:
pinyin_cands |= {tuple(tmp[:i] + [neibr] + tmp[i + 1:]) for neibr in self.pinyin_adjlist[pinyin]}
pinyin_cands = pinyin_cands & set(self.pinyin_mention_dict.keys())
mention_cands = set()
for pinyin in pinyin_cands:
mention_cands |= self.pinyin_mention_dict[pinyin]
return list(mention_cands)
示例14: generateIDInNameYearFormat
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def generateIDInNameYearFormat(self, cnkiNetEntry):
name = cnkiNetEntry["Author"].split(";")[0].split(",")[0].split(",")[0]
name = name.replace(" ", "").replace(u"\u3000", "")
year = cnkiNetEntry["Year"]
if self.__isFullEnglish(name):
self.ID = name + year
else:
self.ID = "".join([i.title() for i in pinyin(name)]) + year
示例15: generateIDInTitleFormat
# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def generateIDInTitleFormat(self, cnkiNetEntry):
title = cnkiNetEntry["Title"]
title = re.sub(r"[0-9]", "", title)
title = re.sub(r"[_,;]", "", title)
if self.__isFullEnglish(title):
titleWords = title.strip().split(" ")
self.ID = "".join(titleWords[0:min(len(titleWords), 4)])
else:
jieba.setLogLevel(logging.INFO)
title = title.replace(" ", "").replace(u"\u3000", "")
titleWords = list(jieba.cut(title))
stringForConvertToPinyin = "".join(
titleWords[0:min(len(titleWords), 3)])
self.ID = "".join(pinyin(stringForConvertToPinyin))