當前位置: 首頁>>代碼示例>>Python>>正文


Python pypinyin.lazy_pinyin方法代碼示例

本文整理匯總了Python中pypinyin.lazy_pinyin方法的典型用法代碼示例。如果您正苦於以下問題:Python pypinyin.lazy_pinyin方法的具體用法?Python pypinyin.lazy_pinyin怎麽用?Python pypinyin.lazy_pinyin使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pypinyin的用法示例。


在下文中一共展示了pypinyin.lazy_pinyin方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: _pinyin

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def _pinyin(s):
    symbols = '0123456789abcdefghijklmnopqrstuvwxyz '
    s = lazy_pinyin(s, style=Style.TONE2)
    yin = []
    for token in s:
        if token != ' ':
            a = ''
            for c in token:
                if c in symbols:
                    a += c
            yin.append(a)
    a = ''
    s = ' '.join(yin)
    for i in range(len(s)):
        if s[i] == ' ' and i < len(s) - 1 and s[i + 1] == ' ':
            continue
        a += s[i]
    return a 
開發者ID:KinglittleQ,項目名稱:GST-Tacotron,代碼行數:20,代碼來源:generate.py

示例2: speak

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def speak(self, text):
        syllables = lazy_pinyin(text, style=pypinyin.TONE3)
        print(syllables)
        delay = 0
        
        def preprocess(syllables):
            temp = []
            for syllable in syllables:
                for p in TextToSpeech.punctuation:
                    syllable = syllable.replace(p, "")
                if syllable.isdigit():
                    syllable = atc.num2chinese(syllable)
                    new_sounds = lazy_pinyin(syllable, style=pypinyin.TONE3)
                    for e in new_sounds:
                        temp.append(e)
                else:
                    temp.append(syllable)
            return temp

        syllables = preprocess(syllables)
        for syllable in syllables:
            path = "syllables/"+syllable+".wav"
            _thread.start_new_thread(TextToSpeech._play_audio, (path, delay))
            delay += 0.355 
開發者ID:junzew,項目名稱:HanTTS,代碼行數:26,代碼來源:main.py

示例3: __convert_transcript

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def __convert_transcript(raw_transcript):
    """
    Converts a Chinese transcript to a Chinese pinyin sequence.
    """
    waveid, raw_trans = raw_transcript.split("\t")[:2]
    wavename = waveid + ".wav"
    symbols = ",.!?"
    # For simplicity, we only retain the Chinese chars and symbols
    trans = ''.join([_char for _char in __replace_symbols(raw_trans) if __is_chinese(_char) or _char in symbols])
    pinyin_trans = []
    for pinyin in lazy_pinyin(trans, style=Style.TONE3):
        if pinyin not in symbols and not pinyin[-1].isdigit():
            pinyin_trans.append(pinyin + "0")
        else:
            pinyin_trans.append(pinyin)
    return wavename, " ".join(pinyin_trans) 
開發者ID:NVIDIA,項目名稱:NeMo,代碼行數:18,代碼來源:get_databaker_data.py

示例4: check_homepage_validity

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def check_homepage_validity(name, res):
    """
    Check if the homepage is simtisfied basic rules.
    Input: name-name of expert res-homepage info list
    """
    title, url, detail, cited = res
    if url.endswith('pdf') or url.endswith('doc') or 'linkedin' in url.lower() or 'researchgate' in url.lower() or 'citations' in url.lower():
        return False
    # to check if the title or detail contains the name
    
    
    title = ' '.join(lazy_pinyin(title))
    name = name.replace('?', '')
    p = re.compile(r'|'.join(name.lower().split(' ')))
    if len(p.findall(title.lower())) == 0:
        return False
    
    #if 'wikipedia' in title.lower():
     #   return False
    return True 
開發者ID:geekinglcq,項目名稱:aca,代碼行數:22,代碼來源:pg2.py

示例5: transform_chinese_to_pinyin

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def transform_chinese_to_pinyin(data_path, output_path,type='corpus'):
    with open(data_path, 'rb') as fin, open(output_path, 'wb') as fout:
        if type=='corpus':
            for line in fin:
                line = line.decode('utf-8').strip('\r\n ')
                if not line:
                    continue
                transformed_line = ' '.join(lazy_pinyin(line, style=Style.TONE2))
                fout.write(f'{transformed_line}\n'.encode('utf-8'))
        elif type=='training_data':
            for line in fin:
                line=line.decode('utf-8').strip('\r\n ')
                if not line:
                    continue
                index,chinese_text=line.split('|')
                pinyin_text=' '.join(lazy_pinyin(chinese_text,style=Style.TONE2))
                fout.write(f'{index}|{pinyin_text}\n'.encode('utf-8')) 
開發者ID:cnlinxi,項目名稱:style-token_tacotron2,代碼行數:19,代碼來源:chinese_to_pinyin.py

示例6: synthesize

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def synthesize(self, text, src, dst):
        """
        Synthesize .wav from text
        src is the folder that contains all syllables .wav files
        dst is the destination folder to save the synthesized file
        """
        print("Synthesizing ...")
        delay = 0
        increment = 355 # milliseconds
        pause = 500 # pause for punctuation
        syllables = lazy_pinyin(text, style=pypinyin.TONE3)

        # initialize to be complete silence, each character takes up ~500ms
        result = AudioSegment.silent(duration=500*len(text))
        for syllable in syllables:
            path = src+syllable+".wav"
            sound_file = Path(path)
            # insert 500 ms silence for punctuation marks
            if syllable in TextToSpeech.punctuation:
                short_silence = AudioSegment.silent(duration=pause)
                result = result.overlay(short_silence, position=delay)
                delay += increment
                continue
            # skip sound file that doesn't exist
            if not sound_file.is_file():
                continue
            segment = AudioSegment.from_wav(path)
            result = result.overlay(segment, position=delay)
            delay += increment

        directory = dst
        if not os.path.exists(directory):
            os.makedirs(directory)

        result.export(directory+"generated.wav", format="wav")
        print("Exported.") 
開發者ID:junzew,項目名稱:HanTTS,代碼行數:38,代碼來源:main.py

示例7: word_parser

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def word_parser(word: str) -> List[Tuple[str, List[str]]]:
    pinyins: List[str] = lazy_pinyin(word)
    return pinyin_parser(pinyins) 
開發者ID:jiaeyan,項目名稱:chinese-rhymer,代碼行數:5,代碼來源:parser.py

示例8: _confusion_word_set

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def _confusion_word_set(self, word):
        confusion_word_set = set()
        candidate_words = list(self.known(edit_distance_word(word, self.cn_char_set)))
        for candidate_word in candidate_words:
            if lazy_pinyin(candidate_word) == lazy_pinyin(word):
                # same pinyin
                confusion_word_set.add(candidate_word)
        return confusion_word_set 
開發者ID:shibing624,項目名稱:pycorrector,代碼行數:10,代碼來源:corrector.py

示例9: to_pinyin

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def to_pinyin(word):
    if not isinstance(word, unicode):
        word = word.decode('utf-8')
    return ''.join(lazy_pinyin(word)) 
開發者ID:python-cn,項目名稱:slack_bot,代碼行數:6,代碼來源:utils.py

示例10: get_full

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def get_full(word: str) -> List[str]:
    fulls = []
    for full in lazy_pinyin(word):
        for e in full:
            if e not in "abcdefghijklmnopqrstuvwxyz":
                raise RuntimeError(f"{e} not alphe, word is: {word}")
        fulls.append(full)
    return fulls 
開發者ID:ledao,項目名稱:lufly-im,代碼行數:10,代碼來源:common.py

示例11: get_pinyin_correct_candidates

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def get_pinyin_correct_candidates(self, word, tolerance):  # 默認最多容忍一個拚音的變化
        assert tolerance in [0, 1]
        pinyins = lazy_pinyin(word)
        tmp = pinyins[:]
        pinyin_cands = {tuple(pinyins)}
        if tolerance == 1:
            for i, pinyin in enumerate(pinyins):
                if pinyin in self.pinyin_adjlist:
                    pinyin_cands |= {tuple(tmp[:i] + [neibr] + tmp[i + 1:]) for neibr in self.pinyin_adjlist[pinyin]}
        pinyin_cands = pinyin_cands & set(self.pinyin_mention_dict.keys())
        mention_cands = set()
        for pinyin in pinyin_cands:
            mention_cands |= self.pinyin_mention_dict[pinyin]
        return list(mention_cands) 
開發者ID:blmoistawinde,項目名稱:HarvestText,代碼行數:16,代碼來源:entity_discoverer.py

示例12: build_trie

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def build_trie(self, new_word, entity, entity_type):
        type0 = "#%s#" % entity_type
        if not type0 in self.entity_types:
            punct_regex = r"[、!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏!\"\#$%&\'\(\)\*\+,-\./:;<=>?@\[\\\]\^_`{\|}~]"
            matched = re.search(punct_regex, entity_type, re.MULTILINE | re.UNICODE)
            if matched:
                punct0 = matched.group()
                raise Exception("Your type input '{}' includes punctuation '{}', please remove them first".format(entity_type,punct0))
            self.entity_types.add(type0)
            self.prepared = False
            self.hanlp_prepared = False
        self.mentions.add(new_word)
        self.pinyin_mention_dict[tuple(lazy_pinyin(new_word))].add(new_word)

        trie_node = self.trie_root
        for ch in new_word:
            if not ch in trie_node:
                trie_node[ch] = {}
            trie_node = trie_node[ch]
        if not 'leaf' in trie_node:
            trie_node['leaf'] = {(entity, type0)}
        else:
            for (entity_orig, type_orig) in trie_node['leaf'].copy():
                if entity_orig == entity:           # 不允許同一實體有不同類型
                    trie_node['leaf'].remove((entity_orig, type_orig))
            trie_node['leaf'].add((entity, type0)) 
開發者ID:blmoistawinde,項目名稱:HarvestText,代碼行數:28,代碼來源:harvesttext.py

示例13: get_pinyin_correct_candidates

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def get_pinyin_correct_candidates(self, word, tolerance=1):  # 默認最多容忍一個拚音的變化
        assert tolerance in [0, 1]
        pinyins = lazy_pinyin(word)
        tmp = pinyins[:]
        pinyin_cands = {tuple(pinyins)}
        if tolerance == 1:
            for i, pinyin in enumerate(pinyins):
                if pinyin in self.pinyin_adjlist:
                    pinyin_cands |= {tuple(tmp[:i] + [neibr] + tmp[i + 1:]) for neibr in self.pinyin_adjlist[pinyin]}
        pinyin_cands = pinyin_cands & set(self.pinyin_mention_dict.keys())
        mention_cands = set()
        for pinyin in pinyin_cands:
            mention_cands |= self.pinyin_mention_dict[pinyin]
        return list(mention_cands) 
開發者ID:blmoistawinde,項目名稱:HarvestText,代碼行數:16,代碼來源:harvesttext.py

示例14: generateIDInNameYearFormat

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def generateIDInNameYearFormat(self, cnkiNetEntry):
        name = cnkiNetEntry["Author"].split(";")[0].split(",")[0].split(",")[0]
        name = name.replace(" ", "").replace(u"\u3000", "")
        year = cnkiNetEntry["Year"]
        if self.__isFullEnglish(name):
            self.ID = name + year
        else:
            self.ID = "".join([i.title() for i in pinyin(name)]) + year 
開發者ID:Vopaaz,項目名稱:CNKI_2_BibTeX,代碼行數:10,代碼來源:BibTexEntries.py

示例15: generateIDInTitleFormat

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import lazy_pinyin [as 別名]
def generateIDInTitleFormat(self, cnkiNetEntry):
        title = cnkiNetEntry["Title"]
        title = re.sub(r"[0-9]", "", title)
        title = re.sub(r"[_,;]", "", title)
        if self.__isFullEnglish(title):
            titleWords = title.strip().split(" ")
            self.ID = "".join(titleWords[0:min(len(titleWords), 4)])
        else:
            jieba.setLogLevel(logging.INFO)
            title = title.replace(" ", "").replace(u"\u3000", "")
            titleWords = list(jieba.cut(title))
            stringForConvertToPinyin = "".join(
                titleWords[0:min(len(titleWords), 3)])
            self.ID = "".join(pinyin(stringForConvertToPinyin)) 
開發者ID:Vopaaz,項目名稱:CNKI_2_BibTeX,代碼行數:16,代碼來源:BibTexEntries.py


注:本文中的pypinyin.lazy_pinyin方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。