当前位置: 首页>>代码示例>>Python>>正文


Python pypinyin.lazy_pinyin方法代码示例

本文整理汇总了Python中pypinyin.lazy_pinyin方法的典型用法代码示例。如果您正苦于以下问题:Python pypinyin.lazy_pinyin方法的具体用法?Python pypinyin.lazy_pinyin怎么用?Python pypinyin.lazy_pinyin使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pypinyin的用法示例。


在下文中一共展示了pypinyin.lazy_pinyin方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _pinyin

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def _pinyin(s):
    symbols = '0123456789abcdefghijklmnopqrstuvwxyz '
    s = lazy_pinyin(s, style=Style.TONE2)
    yin = []
    for token in s:
        if token != ' ':
            a = ''
            for c in token:
                if c in symbols:
                    a += c
            yin.append(a)
    a = ''
    s = ' '.join(yin)
    for i in range(len(s)):
        if s[i] == ' ' and i < len(s) - 1 and s[i + 1] == ' ':
            continue
        a += s[i]
    return a 
开发者ID:KinglittleQ,项目名称:GST-Tacotron,代码行数:20,代码来源:generate.py

示例2: speak

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def speak(self, text):
        syllables = lazy_pinyin(text, style=pypinyin.TONE3)
        print(syllables)
        delay = 0
        
        def preprocess(syllables):
            temp = []
            for syllable in syllables:
                for p in TextToSpeech.punctuation:
                    syllable = syllable.replace(p, "")
                if syllable.isdigit():
                    syllable = atc.num2chinese(syllable)
                    new_sounds = lazy_pinyin(syllable, style=pypinyin.TONE3)
                    for e in new_sounds:
                        temp.append(e)
                else:
                    temp.append(syllable)
            return temp

        syllables = preprocess(syllables)
        for syllable in syllables:
            path = "syllables/"+syllable+".wav"
            _thread.start_new_thread(TextToSpeech._play_audio, (path, delay))
            delay += 0.355 
开发者ID:junzew,项目名称:HanTTS,代码行数:26,代码来源:main.py

示例3: __convert_transcript

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def __convert_transcript(raw_transcript):
    """
    Converts a Chinese transcript to a Chinese pinyin sequence.
    """
    waveid, raw_trans = raw_transcript.split("\t")[:2]
    wavename = waveid + ".wav"
    symbols = ",.!?"
    # For simplicity, we only retain the Chinese chars and symbols
    trans = ''.join([_char for _char in __replace_symbols(raw_trans) if __is_chinese(_char) or _char in symbols])
    pinyin_trans = []
    for pinyin in lazy_pinyin(trans, style=Style.TONE3):
        if pinyin not in symbols and not pinyin[-1].isdigit():
            pinyin_trans.append(pinyin + "0")
        else:
            pinyin_trans.append(pinyin)
    return wavename, " ".join(pinyin_trans) 
开发者ID:NVIDIA,项目名称:NeMo,代码行数:18,代码来源:get_databaker_data.py

示例4: check_homepage_validity

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def check_homepage_validity(name, res):
    """
    Check if the homepage is simtisfied basic rules.
    Input: name-name of expert res-homepage info list
    """
    title, url, detail, cited = res
    if url.endswith('pdf') or url.endswith('doc') or 'linkedin' in url.lower() or 'researchgate' in url.lower() or 'citations' in url.lower():
        return False
    # to check if the title or detail contains the name
    
    
    title = ' '.join(lazy_pinyin(title))
    name = name.replace('?', '')
    p = re.compile(r'|'.join(name.lower().split(' ')))
    if len(p.findall(title.lower())) == 0:
        return False
    
    #if 'wikipedia' in title.lower():
     #   return False
    return True 
开发者ID:geekinglcq,项目名称:aca,代码行数:22,代码来源:pg2.py

示例5: transform_chinese_to_pinyin

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def transform_chinese_to_pinyin(data_path, output_path,type='corpus'):
    with open(data_path, 'rb') as fin, open(output_path, 'wb') as fout:
        if type=='corpus':
            for line in fin:
                line = line.decode('utf-8').strip('\r\n ')
                if not line:
                    continue
                transformed_line = ' '.join(lazy_pinyin(line, style=Style.TONE2))
                fout.write(f'{transformed_line}\n'.encode('utf-8'))
        elif type=='training_data':
            for line in fin:
                line=line.decode('utf-8').strip('\r\n ')
                if not line:
                    continue
                index,chinese_text=line.split('|')
                pinyin_text=' '.join(lazy_pinyin(chinese_text,style=Style.TONE2))
                fout.write(f'{index}|{pinyin_text}\n'.encode('utf-8')) 
开发者ID:cnlinxi,项目名称:style-token_tacotron2,代码行数:19,代码来源:chinese_to_pinyin.py

示例6: synthesize

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def synthesize(self, text, src, dst):
        """
        Synthesize .wav from text
        src is the folder that contains all syllables .wav files
        dst is the destination folder to save the synthesized file
        """
        print("Synthesizing ...")
        delay = 0
        increment = 355 # milliseconds
        pause = 500 # pause for punctuation
        syllables = lazy_pinyin(text, style=pypinyin.TONE3)

        # initialize to be complete silence, each character takes up ~500ms
        result = AudioSegment.silent(duration=500*len(text))
        for syllable in syllables:
            path = src+syllable+".wav"
            sound_file = Path(path)
            # insert 500 ms silence for punctuation marks
            if syllable in TextToSpeech.punctuation:
                short_silence = AudioSegment.silent(duration=pause)
                result = result.overlay(short_silence, position=delay)
                delay += increment
                continue
            # skip sound file that doesn't exist
            if not sound_file.is_file():
                continue
            segment = AudioSegment.from_wav(path)
            result = result.overlay(segment, position=delay)
            delay += increment

        directory = dst
        if not os.path.exists(directory):
            os.makedirs(directory)

        result.export(directory+"generated.wav", format="wav")
        print("Exported.") 
开发者ID:junzew,项目名称:HanTTS,代码行数:38,代码来源:main.py

示例7: word_parser

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def word_parser(word: str) -> List[Tuple[str, List[str]]]:
    pinyins: List[str] = lazy_pinyin(word)
    return pinyin_parser(pinyins) 
开发者ID:jiaeyan,项目名称:chinese-rhymer,代码行数:5,代码来源:parser.py

示例8: _confusion_word_set

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def _confusion_word_set(self, word):
        confusion_word_set = set()
        candidate_words = list(self.known(edit_distance_word(word, self.cn_char_set)))
        for candidate_word in candidate_words:
            if lazy_pinyin(candidate_word) == lazy_pinyin(word):
                # same pinyin
                confusion_word_set.add(candidate_word)
        return confusion_word_set 
开发者ID:shibing624,项目名称:pycorrector,代码行数:10,代码来源:corrector.py

示例9: to_pinyin

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def to_pinyin(word):
    if not isinstance(word, unicode):
        word = word.decode('utf-8')
    return ''.join(lazy_pinyin(word)) 
开发者ID:python-cn,项目名称:slack_bot,代码行数:6,代码来源:utils.py

示例10: get_full

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def get_full(word: str) -> List[str]:
    fulls = []
    for full in lazy_pinyin(word):
        for e in full:
            if e not in "abcdefghijklmnopqrstuvwxyz":
                raise RuntimeError(f"{e} not alphe, word is: {word}")
        fulls.append(full)
    return fulls 
开发者ID:ledao,项目名称:lufly-im,代码行数:10,代码来源:common.py

示例11: get_pinyin_correct_candidates

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def get_pinyin_correct_candidates(self, word, tolerance):  # 默认最多容忍一个拼音的变化
        assert tolerance in [0, 1]
        pinyins = lazy_pinyin(word)
        tmp = pinyins[:]
        pinyin_cands = {tuple(pinyins)}
        if tolerance == 1:
            for i, pinyin in enumerate(pinyins):
                if pinyin in self.pinyin_adjlist:
                    pinyin_cands |= {tuple(tmp[:i] + [neibr] + tmp[i + 1:]) for neibr in self.pinyin_adjlist[pinyin]}
        pinyin_cands = pinyin_cands & set(self.pinyin_mention_dict.keys())
        mention_cands = set()
        for pinyin in pinyin_cands:
            mention_cands |= self.pinyin_mention_dict[pinyin]
        return list(mention_cands) 
开发者ID:blmoistawinde,项目名称:HarvestText,代码行数:16,代码来源:entity_discoverer.py

示例12: build_trie

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def build_trie(self, new_word, entity, entity_type):
        type0 = "#%s#" % entity_type
        if not type0 in self.entity_types:
            punct_regex = r"[、!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏!\"\#$%&\'\(\)\*\+,-\./:;<=>?@\[\\\]\^_`{\|}~]"
            matched = re.search(punct_regex, entity_type, re.MULTILINE | re.UNICODE)
            if matched:
                punct0 = matched.group()
                raise Exception("Your type input '{}' includes punctuation '{}', please remove them first".format(entity_type,punct0))
            self.entity_types.add(type0)
            self.prepared = False
            self.hanlp_prepared = False
        self.mentions.add(new_word)
        self.pinyin_mention_dict[tuple(lazy_pinyin(new_word))].add(new_word)

        trie_node = self.trie_root
        for ch in new_word:
            if not ch in trie_node:
                trie_node[ch] = {}
            trie_node = trie_node[ch]
        if not 'leaf' in trie_node:
            trie_node['leaf'] = {(entity, type0)}
        else:
            for (entity_orig, type_orig) in trie_node['leaf'].copy():
                if entity_orig == entity:           # 不允许同一实体有不同类型
                    trie_node['leaf'].remove((entity_orig, type_orig))
            trie_node['leaf'].add((entity, type0)) 
开发者ID:blmoistawinde,项目名称:HarvestText,代码行数:28,代码来源:harvesttext.py

示例13: get_pinyin_correct_candidates

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def get_pinyin_correct_candidates(self, word, tolerance=1):  # 默认最多容忍一个拼音的变化
        assert tolerance in [0, 1]
        pinyins = lazy_pinyin(word)
        tmp = pinyins[:]
        pinyin_cands = {tuple(pinyins)}
        if tolerance == 1:
            for i, pinyin in enumerate(pinyins):
                if pinyin in self.pinyin_adjlist:
                    pinyin_cands |= {tuple(tmp[:i] + [neibr] + tmp[i + 1:]) for neibr in self.pinyin_adjlist[pinyin]}
        pinyin_cands = pinyin_cands & set(self.pinyin_mention_dict.keys())
        mention_cands = set()
        for pinyin in pinyin_cands:
            mention_cands |= self.pinyin_mention_dict[pinyin]
        return list(mention_cands) 
开发者ID:blmoistawinde,项目名称:HarvestText,代码行数:16,代码来源:harvesttext.py

示例14: generateIDInNameYearFormat

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def generateIDInNameYearFormat(self, cnkiNetEntry):
        name = cnkiNetEntry["Author"].split(";")[0].split(",")[0].split(",")[0]
        name = name.replace(" ", "").replace(u"\u3000", "")
        year = cnkiNetEntry["Year"]
        if self.__isFullEnglish(name):
            self.ID = name + year
        else:
            self.ID = "".join([i.title() for i in pinyin(name)]) + year 
开发者ID:Vopaaz,项目名称:CNKI_2_BibTeX,代码行数:10,代码来源:BibTexEntries.py

示例15: generateIDInTitleFormat

# 需要导入模块: import pypinyin [as 别名]
# 或者: from pypinyin import lazy_pinyin [as 别名]
def generateIDInTitleFormat(self, cnkiNetEntry):
        title = cnkiNetEntry["Title"]
        title = re.sub(r"[0-9]", "", title)
        title = re.sub(r"[_,;]", "", title)
        if self.__isFullEnglish(title):
            titleWords = title.strip().split(" ")
            self.ID = "".join(titleWords[0:min(len(titleWords), 4)])
        else:
            jieba.setLogLevel(logging.INFO)
            title = title.replace(" ", "").replace(u"\u3000", "")
            titleWords = list(jieba.cut(title))
            stringForConvertToPinyin = "".join(
                titleWords[0:min(len(titleWords), 3)])
            self.ID = "".join(pinyin(stringForConvertToPinyin)) 
开发者ID:Vopaaz,项目名称:CNKI_2_BibTeX,代码行数:16,代码来源:BibTexEntries.py


注:本文中的pypinyin.lazy_pinyin方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。