當前位置: 首頁>>代碼示例>>Python>>正文


Python analyse.extract_tags方法代碼示例

本文整理匯總了Python中jieba.analyse.extract_tags方法的典型用法代碼示例。如果您正苦於以下問題:Python analyse.extract_tags方法的具體用法?Python analyse.extract_tags怎麽用?Python analyse.extract_tags使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在jieba.analyse的用法示例。


在下文中一共展示了analyse.extract_tags方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: xz_keywords

# 需要導入模塊: from jieba import analyse [as 別名]
# 或者: from jieba.analyse import extract_tags [as 別名]
def xz_keywords():
    """
    關鍵字提取
    """
    key_words = extract_tags(xz_text, topK=300, withWeight=True, allowPOS=())
    # 停用詞
    stopwords = pd.read_csv("data/stop_words.txt", index_col=False,
                            quoting=3, sep="\n", names=['stopword'], encoding='utf-8')
    words = [word for word, wegiht in key_words]
    keywords_df = pd.DataFrame({'keywords': words})    

    # 去掉停用詞
    keywords_df = keywords_df[~keywords_df.keywords.isin(stopwords.stopword.tolist())]

    word_freq = []
    for word in keywords_df.keywords.tolist():
        for w, k in key_words:
            if word == w:
                word_freq.append((word, k))
    print(word_freq)
    show_wordCloud(word_freq) 
開發者ID:jarvisqi,項目名稱:nlp_learning,代碼行數:23,代碼來源:gensim_jb.py

示例2: jieba_keywords

# 需要導入模塊: from jieba import analyse [as 別名]
# 或者: from jieba.analyse import extract_tags [as 別名]
def jieba_keywords():
    """
    關鍵字提取
    """
    
    key_words = extract_tags(st_text, topK=300, withWeight=True, allowPOS=())
    # 停用詞
    stopwords = pd.read_csv("data/origin/stop_words.txt", index_col=False,
                            quoting=3, sep="\n", names=['stopword'], encoding='utf-8')
    words = [word for word, weight in key_words]
    keywords_df = pd.DataFrame({'keywords': words})    

    # 去掉停用詞
    keywords_df = keywords_df[~keywords_df.keywords.isin(stopwords.stopword.tolist())]

    word_freq = []
    for word in keywords_df.keywords.tolist():
        for w, k in key_words:
            if word == w:
                word_freq.append((word, k))
    print("================去掉停用詞之後================")
    print(word_freq)

    show_wordCloud(word_freq) 
開發者ID:jarvisqi,項目名稱:nlp_learning,代碼行數:26,代碼來源:jieba_segment.py

示例3: get_tag

# 需要導入模塊: from jieba import analyse [as 別名]
# 或者: from jieba.analyse import extract_tags [as 別名]
def get_tag(sentence, config):
    """Get semantic tag of sentence. 獲取句子語義標簽。
    """
    iquestion = sentence.format(**config)
    try:
        keywords = analyse.extract_tags(iquestion, topK=1)
        keyword = keywords[0]
    except IndexError:
        keyword = iquestion
    tags = synonym_cut(keyword, 'wf') # tuple list
    if tags:
        tag = tags[0][1]
        if not tag:
            tag = keyword
    else:
        tag = keyword
    return tag 
開發者ID:Decalogue,項目名稱:chat,代碼行數:19,代碼來源:semantic.py

示例4: countIDF

# 需要導入模塊: from jieba import analyse [as 別名]
# 或者: from jieba.analyse import extract_tags [as 別名]
def countIDF(self,text,topK):
        '''
        text:字符串,topK根據TF-IDF得到前topk個關鍵詞的詞頻,用於計算相似度
        return 詞頻vector
        '''
        tfidf = analyse.extract_tags

        cipin = {} #統計分詞後的詞頻

        fenci = jieba.cut(text)

        #記錄每個詞頻的頻率
        for word in fenci:
            if word not in cipin.keys():
                cipin[word] = 0
            cipin[word] += 1

        # 基於tfidf算法抽取前10個關鍵詞,包含每個詞項的權重
        keywords = tfidf(text,topK,withWeight=True)

        ans = []
        # keywords.count(keyword)得到keyword的詞頻
        # help(tfidf)
        # 輸出抽取出的關鍵詞
        for keyword in keywords:
            #print(keyword ," ",cipin[keyword[0]])
            ans.append(cipin[keyword[0]]) #得到前topk頻繁詞項的詞頻

        return ans 
開發者ID:xiaorancs,項目名稱:text-similarity,代碼行數:31,代碼來源:textSimilarity.py

示例5: synonym_cut

# 需要導入模塊: from jieba import analyse [as 別名]
# 或者: from jieba.analyse import extract_tags [as 別名]
def synonym_cut(sentence, pattern="wf"):
    """Cut the sentence into a synonym vector tag.
    將句子切分為同義詞向量標簽。

    If a word in this sentence was not found in the synonym dictionary,
    it will be marked with default value of the word segmentation tool.
    如果同義詞詞典中沒有則標注為切詞工具默認的詞性。

    Args:
        pattern: 'w'-分詞, 'k'-唯一關鍵詞,'t'-關鍵詞列表, 'wf'-分詞標簽, 'tf-關鍵詞標簽'。
    """
    # 句尾標點符號過濾
    sentence = sentence.rstrip(''.join(punctuation_all))
    # 句尾語氣詞過濾
    sentence = sentence.rstrip(tone_words)
    synonym_vector = []
    if pattern == "w":
        synonym_vector = [item for item in jieba.cut(sentence) if item not in filter_characters]
    elif pattern == "k":
        synonym_vector = analyse.extract_tags(sentence, topK=1)
    elif pattern == "t":
        synonym_vector = analyse.extract_tags(sentence, topK=10)
    elif pattern == "wf":
        result = posseg.cut(sentence)
        # synonym_vector = [(item.word, item.flag) for item in result \
        # if item.word not in filter_characters]
        # Modify in 2017.4.27 
        for item in result:
            if item.word not in filter_characters:
                if len(item.flag) < 4:
                    item.flag = list(posseg.cut(item.word))[0].flag
                synonym_vector.append((item.word, item.flag))
    elif pattern == "tf":
        result = posseg.cut(sentence)
        tags = analyse.extract_tags(sentence, topK=10)
        for item in result:
            if item.word in tags:
                synonym_vector.append((item.word, item.flag))
    return synonym_vector 
開發者ID:Decalogue,項目名稱:chat,代碼行數:41,代碼來源:semantic.py


注:本文中的jieba.analyse.extract_tags方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。