當前位置: 首頁>>代碼示例>>Python>>正文


Python pypinyin.pinyin方法代碼示例

本文整理匯總了Python中pypinyin.pinyin方法的典型用法代碼示例。如果您正苦於以下問題:Python pypinyin.pinyin方法的具體用法?Python pypinyin.pinyin怎麽用?Python pypinyin.pinyin使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pypinyin的用法示例。


在下文中一共展示了pypinyin.pinyin方法的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: match_tone

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def match_tone(ci, tone_ci):  
    '''判斷字詞是否符合相應平仄'''
    judge = True
    for i in range(len(ci)):
        diao = pinyin(ci[i], style=9, errors='ignore')[0][0][-1]
        if tone_ci[i] == 'x':
            pass
        elif tone_ci[i] == '0' and diao in ['1', '2']:
            pass
        elif tone_ci[i] == '1' and diao in ['3', '4', 'i']:
            pass
        else:
            judge = False
    return judge

    # yn:首行是否押韻,0押,1不押 
開發者ID:ZubinGou,項目名稱:AI_Poet_Totoro,代碼行數:18,代碼來源:poetize_plus.py

示例2: _add_lab

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def _add_lab(txtlines, wav_dir_path):
    logger = logging.getLogger('mtts')
    for line in txtlines:
        numstr, txt = line.split(' ')
        txt = re.sub('#\d', '', txt)
        pinyin_list = pinyin(txt, style=Style.TONE3)
        new_pinyin_list = []
        for item in pinyin_list:
            if not item:
                logger.warning(
                    '{file_num} do not generate right pinyin'.format(numstr))
            if not item[0][-1].isdigit():
                phone = item[0] + '5'
            else:
                phone = item[0]
            new_pinyin_list.append(phone)
        lab_file = os.path.join(wav_dir_path, numstr + '.lab')
        with open(lab_file, 'w') as oid:
            oid.write(' '.join(new_pinyin_list)) 
開發者ID:Jackiexiao,項目名稱:MTTS,代碼行數:21,代碼來源:mtts.py

示例3: _add_pinyin

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def _add_pinyin(txtlines, output_path):
    ''' txt2pinyin in one file '''
    logger = logging.getLogger('mtts')
    all_pinyin = []
    for line in txtlines:
        numstr, txt = line.split(' ')
        txt = re.sub('#\d', '', txt)
        pinyin_list = pinyin(txt, style=Style.TONE3)
        new_pinyin_list = []
        for item in pinyin_list:
            if not item:
                logger.warning(
                    '{file_num} do not generate right pinyin'.format(numstr))
            if not item[0][-1].isdigit():
                phone = item[0] + '5'
            else:
                #phone = item[0]
                phone = item[0].replace('v', 'u')
            new_pinyin_list.append(phone)
        all_pinyin.append(numstr + ' ' + ' '.join(new_pinyin_list))
    all_pinyin_file = os.path.join(output_path, 'all_pinyin.lab')
    with open(all_pinyin_file, 'w') as oid:
        for item in all_pinyin:
            oid.write(item + '\n') 
開發者ID:Jackiexiao,項目名稱:MTTS,代碼行數:26,代碼來源:mtts.py

示例4: pinyinformat

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def pinyinformat(syllable):
    '''format pinyin to mtts's format''' 
    if not syllable[-1].isdigit():
        syllable = syllable + '5'
    assert syllable[-1].isdigit()
    syl_no_tone = syllable[:-1]
    if syl_no_tone in TRANSFORM_DICT:
        syllable = syllable.replace(syl_no_tone, TRANSFORM_DICT[syl_no_tone])
    return syllable
 
    """
    for key, value in translate_dict.items():
        syllable = syllable.replace(key, value)
    for key, value in translate_dict_more.items():
        syllable = syllable.replace(key, value)
    if not syllable[-1].isdigit():
        syllable = syllable + '5'
    return syllable
    """ 
開發者ID:Jackiexiao,項目名稱:MTTS,代碼行數:21,代碼來源:txt2pinyin.py

示例5: rhyme

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def rhyme(a, b):
    # 判斷兩句話是幾押,返回0為不押韻
    # 兩句話完全相同也返回0
    if a == b:
        return 0
    # N押 韻母和聲調都要相同
    py1_tone = pinyin(a, style=FINALS_TONE3)
    py2_tone = pinyin(b, style=FINALS_TONE3)
    py1_tone.reverse()
    py2_tone.reverse()
    result = 0
    result = n_rhyme(py1_tone, py2_tone)
    if result > 1:
        return result
    # 單押和雙押 韻母相同  聲調可以不同
    py1 = pinyin(a, style=FINALS)[-2:]
    py2 = pinyin(b, style=FINALS)[-2:]
    py1.reverse()
    py2.reverse()
    result = n_rhyme(py1, py2)
    return result


# index -> sentence 
開發者ID:TobiasLee,項目名稱:Chinese-Hip-pop-Generation,代碼行數:26,代碼來源:rhyme.py

示例6: pypinyin_g2p_phone

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def pypinyin_g2p_phone(text) -> List[str]:
    from pypinyin import pinyin
    from pypinyin import Style
    from pypinyin.style._utils import get_finals
    from pypinyin.style._utils import get_initials

    phones = [
        p
        for phone in pinyin(text, style=Style.TONE3)
        for p in [
            get_initials(phone[0], strict=True),
            get_finals(phone[0], strict=True),
        ]
        if len(p) != 0
    ]
    return phones 
開發者ID:espnet,項目名稱:espnet,代碼行數:18,代碼來源:phoneme_tokenizer.py

示例7: get_pinyin_first_letters

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def get_pinyin_first_letters(chinese_characters):
    """
    Get fist letters of pin yin of chinese characters, if there's any 多音字
    All combinations will be returned, for example for "調向"
    Result of dx|tx will be returned.
    :param chinese_characters: Chinese characters to get pinyin. 
    :return: first letters of pin yin of the letters
    """
    pys = _get_pinyin_all([], chinese_characters)
    result = ''
    for py in pys:
        for p in py:
            result += p
        result += "|"
    result = result.rstrip('|') # <- Remove last "|"
    return result 
開發者ID:betterlife,項目名稱:betterlifepsi,代碼行數:18,代碼來源:format_util.py

示例8: process_poetry

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def process_poetry(self, data_dir='/media/pony/DLdigest/data/languageModel/chinese-poetry/json'):
        """
        Process Tang and Song poems dataset
        """
        save_dir = os.path.join(self.save_dir, 'poem')
        check_path_exists(save_dir)
        count = 0
        for entry in os.scandir(data_dir):
            if entry.name.startswith('poet'):
                with open(entry.path, 'r') as json_file:
                    poems = json.load(json_file)
                    for p in poems: 
                        paras = HanziConv.toSimplified(''.join(p['paragraphs']).replace('\n', ''))
                        paras = filter_punctuation(paras)
                        for para in paras.split(' '):
                            if len(para.strip())>1:
                                pys = ' '.join(np.array(pinyin(para)).flatten())
                                with open(os.path.join(save_dir, str(count//400000+1)+'.txt'), 'a') as f:
                                    f.write(para+','+pys+'\n')
                                count += 1 
開發者ID:zzw922cn,項目名稱:Automatic_Speech_Recognition,代碼行數:22,代碼來源:gardener.py

示例9: process_audioLabels

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def process_audioLabels(self, data_dir='/media/pony/DLdigest/data/ASR_zh/'): 
        """
        Processing label files in collected Chinese audio dataset
        """
        save_dir = os.path.join(self.save_dir, 'audioLabels')
        check_path_exists(save_dir)
        count = 0
        for subdir, dirs, files in os.walk(data_dir):
            print(subdir)
            for f in files:
                if f.endswith("label"):
                    fullFilename = os.path.join(subdir, f)
                    with open(fullFilename, 'r') as f:
                        line = f.read()
                        con = HanziConv.toSimplified(line)
                        con = filter_punctuation(con)
                        for c in con.split(' '):
                            if len(c.strip())>1:
                                pys = ' '.join(np.array(pinyin(c)).flatten())
                                count += 1
                                with open(os.path.join(save_dir, str(count//400000+1)+'.txt'), 'a') as f:
                                    f.write(c+','+pys+'\n') 
開發者ID:zzw922cn,項目名稱:Automatic_Speech_Recognition,代碼行數:24,代碼來源:gardener.py

示例10: __init__

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def __init__(self, encoding_type, composing_func, embedding_size, hidden_size, num_layers=1):
        super(SubCharComponent, self).__init__()
        self.encoding_type = encoding_type  # 拚音,五筆
        self.composing_func = composing_func  # 構造函數:lstm, cnn, avg, max
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        if self.composing_func == 'LSTM':
            self.composing = nn.LSTM(input_size=embedding_size,
                                     hidden_size=hidden_size,
                                     num_layers=num_layers,
                                     bidirectional=True)
        elif self.composing_func == 'GRU':
            self.composing = nn.GRU(input_size=embedding_size,
                                    hidden_size=hidden_size,
                                    num_layers=num_layers,
                                    bidirectional=True)
        if self.encoding_type == 'wubi':
            self.embedding = nn.Embedding(len(dict_wubi['idx2char']), embedding_size)
        elif self.encoding_type == 'pinyin':
            self.embedding = nn.Embedding(len(dict_pinyin['idx2char']), embedding_size) 
開發者ID:ShannonAI,項目名稱:glyce,代碼行數:22,代碼來源:components.py

示例11: token_indexing

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def token_indexing(idx, encoding_type, return_type):
    """
    將輸入的單詞id映射為每個字五筆、拚音的字符的id
    :param idx: (seq_len, batch_size)
    :return: chars: (seq_len, batch_size, num_char)  token_lens: (seq_len, batch_size, num_char)
    """
    c = dict_word['idx2word'][idx]
    if c == '<eos>':
        c = '。'
    if encoding_type == 'wubi':
        encoding = wubi(c)[0] if wubi(c) else c
        full_encoding = encoding if len(encoding) == 8 else encoding + '。' * (8 - len(encoding))
        assert len(full_encoding) == 8, full_encoding
        tokens = [dict_wubi['char2idx'][c] for c in full_encoding]
        length = [i < len(encoding) for i in range(len(tokens))]
    elif encoding_type == 'pinyin':
        encoding = pinyin(c)[0][0] if pinyin(c) else c
        full_encoding = encoding if len(encoding) == 8 else encoding + '。' * (8 - len(encoding))
        assert len(full_encoding) == 8, full_encoding
        tokens = [dict_pinyin['char2idx'][c] for c in full_encoding]
        length = [i < len(encoding) for i in range(len(tokens))]
    else:
        raise NotImplementedError
    # print(idx, c, encoding, tokens, length)
    return tokens if return_type == 'tokens' else length 
開發者ID:ShannonAI,項目名稱:glyce,代碼行數:27,代碼來源:components.py

示例12: get_edit_distance_close_2d_code

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def get_edit_distance_close_2d_code(a, b):
    res = 0
    try:
        if (a is None) or (b is None):
            print("Error:pinyin({},{})".format(a.toString(),b.toString()))
            return res
        
        twoDcode_consonant_a = consonantMap_TwoDCode[a.consonant]
        twoDcode_consonant_b = consonantMap_TwoDCode[b.consonant]
        
        cDis = abs(get_distance_2d_code(twoDcode_consonant_a, twoDcode_consonant_b))
        
        twoDcode_vowel_a = vowelMap_TwoDCode[a.vowel]
        twoDcode_vowel_b = vowelMap_TwoDCode[b.vowel]
        
        vDis = abs(get_distance_2d_code(twoDcode_vowel_a, twoDcode_vowel_b))

        hcDis = get_sim_dis_from_hardcod_map(a,b)
        
        res = min((cDis+vDis),hcDis) + 1.0*abs(a.tone-b.tone)/10
        
    except:
        raise Exception("Error pinyin {}{}".format(a.toString(), b.toString()))
    return res 
開發者ID:System-T,項目名稱:DimSim,代碼行數:26,代碼來源:utils.py

示例13: match_pinyin

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def match_pinyin(pinyin1, pinyin2):
    """Similarity score between two pinyin.
    計算兩個拚音的相似度得分。
    """
    assert pinyin1 != "", "pinyin1 can not be empty"
    assert pinyin2 != "", "pinyin2 can not be empty"
    pv_match = 0
    if len(pinyin1) < len(pinyin2):
        len_short = len(pinyin1)
        len_long = len(pinyin2)
        pv_long = pinyin2
        pv_short = pinyin1
    else:
        len_short = len(pinyin2)
        len_long = len(pinyin1)
        pv_long = pinyin1
        pv_short = pinyin2
    for i in range(0, len_short):
        if pv_short[i] == pv_long[i]:
            pv_match += 1
    score = pv_match/len_long
    return score 
開發者ID:Decalogue,項目名稱:chat,代碼行數:24,代碼來源:word2pinyin.py

示例14: jaccard_pinyin

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def jaccard_pinyin(pv1, pv2, threshold=0.7):
    """Similarity score between two pinyin vectors with jaccard.
    計算兩個拚音向量的語義 jaccard 相似度得分。

    According to the semantic jaccard model to calculate the similarity.
    The similarity score interval for each two pinyin sentences was [0, 1].
    根據語義jaccard模型來計算相似度。每兩個拚音向量的相似度得分區間為為[0, 1]。
    """
    sv_matrix = []
    sv_rows = []
    for pinyin1 in pv1:
        for pinyin2 in pv2:
            score = match_pinyin(pinyin1, pinyin2)
            sv_rows.append(score)
        sv_matrix.append(sv_rows)
        sv_rows = []
    matrix = mat(sv_matrix)
    result = sum_cosine(matrix, threshold)
    total = result["total"]
    total_dif = result["total_dif"]
    num = result["num_not_match"]
    sim = total/(total + num*(1-total_dif))
    return sim 
開發者ID:Decalogue,項目名稱:chat,代碼行數:25,代碼來源:word2pinyin.py

示例15: to_pinyin

# 需要導入模塊: import pypinyin [as 別名]
# 或者: from pypinyin import pinyin [as 別名]
def to_pinyin(name):
    n = [x for a in pinyin(name, 0) for x in a]
    return ''.join(n) 
開發者ID:tobyqin,項目名稱:kog-money,代碼行數:5,代碼來源:match.py


注:本文中的pypinyin.pinyin方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。