本文整理汇总了Python中jieba.analyse.extract_tags方法的典型用法代码示例。如果您正苦于以下问题:Python analyse.extract_tags方法的具体用法?Python analyse.extract_tags怎么用?Python analyse.extract_tags使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类jieba.analyse
的用法示例。
在下文中一共展示了analyse.extract_tags方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: xz_keywords
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import extract_tags [as 别名]
def xz_keywords():
"""
关键字提取
"""
key_words = extract_tags(xz_text, topK=300, withWeight=True, allowPOS=())
# 停用词
stopwords = pd.read_csv("data/stop_words.txt", index_col=False,
quoting=3, sep="\n", names=['stopword'], encoding='utf-8')
words = [word for word, wegiht in key_words]
keywords_df = pd.DataFrame({'keywords': words})
# 去掉停用词
keywords_df = keywords_df[~keywords_df.keywords.isin(stopwords.stopword.tolist())]
word_freq = []
for word in keywords_df.keywords.tolist():
for w, k in key_words:
if word == w:
word_freq.append((word, k))
print(word_freq)
show_wordCloud(word_freq)
示例2: jieba_keywords
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import extract_tags [as 别名]
def jieba_keywords():
"""
关键字提取
"""
key_words = extract_tags(st_text, topK=300, withWeight=True, allowPOS=())
# 停用词
stopwords = pd.read_csv("data/origin/stop_words.txt", index_col=False,
quoting=3, sep="\n", names=['stopword'], encoding='utf-8')
words = [word for word, weight in key_words]
keywords_df = pd.DataFrame({'keywords': words})
# 去掉停用词
keywords_df = keywords_df[~keywords_df.keywords.isin(stopwords.stopword.tolist())]
word_freq = []
for word in keywords_df.keywords.tolist():
for w, k in key_words:
if word == w:
word_freq.append((word, k))
print("================去掉停用词之后================")
print(word_freq)
show_wordCloud(word_freq)
示例3: get_tag
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import extract_tags [as 别名]
def get_tag(sentence, config):
"""Get semantic tag of sentence. 获取句子语义标签。
"""
iquestion = sentence.format(**config)
try:
keywords = analyse.extract_tags(iquestion, topK=1)
keyword = keywords[0]
except IndexError:
keyword = iquestion
tags = synonym_cut(keyword, 'wf') # tuple list
if tags:
tag = tags[0][1]
if not tag:
tag = keyword
else:
tag = keyword
return tag
示例4: countIDF
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import extract_tags [as 别名]
def countIDF(self,text,topK):
'''
text:字符串,topK根据TF-IDF得到前topk个关键词的词频,用于计算相似度
return 词频vector
'''
tfidf = analyse.extract_tags
cipin = {} #统计分词后的词频
fenci = jieba.cut(text)
#记录每个词频的频率
for word in fenci:
if word not in cipin.keys():
cipin[word] = 0
cipin[word] += 1
# 基于tfidf算法抽取前10个关键词,包含每个词项的权重
keywords = tfidf(text,topK,withWeight=True)
ans = []
# keywords.count(keyword)得到keyword的词频
# help(tfidf)
# 输出抽取出的关键词
for keyword in keywords:
#print(keyword ," ",cipin[keyword[0]])
ans.append(cipin[keyword[0]]) #得到前topk频繁词项的词频
return ans
示例5: synonym_cut
# 需要导入模块: from jieba import analyse [as 别名]
# 或者: from jieba.analyse import extract_tags [as 别名]
def synonym_cut(sentence, pattern="wf"):
"""Cut the sentence into a synonym vector tag.
将句子切分为同义词向量标签。
If a word in this sentence was not found in the synonym dictionary,
it will be marked with default value of the word segmentation tool.
如果同义词词典中没有则标注为切词工具默认的词性。
Args:
pattern: 'w'-分词, 'k'-唯一关键词,'t'-关键词列表, 'wf'-分词标签, 'tf-关键词标签'。
"""
# 句尾标点符号过滤
sentence = sentence.rstrip(''.join(punctuation_all))
# 句尾语气词过滤
sentence = sentence.rstrip(tone_words)
synonym_vector = []
if pattern == "w":
synonym_vector = [item for item in jieba.cut(sentence) if item not in filter_characters]
elif pattern == "k":
synonym_vector = analyse.extract_tags(sentence, topK=1)
elif pattern == "t":
synonym_vector = analyse.extract_tags(sentence, topK=10)
elif pattern == "wf":
result = posseg.cut(sentence)
# synonym_vector = [(item.word, item.flag) for item in result \
# if item.word not in filter_characters]
# Modify in 2017.4.27
for item in result:
if item.word not in filter_characters:
if len(item.flag) < 4:
item.flag = list(posseg.cut(item.word))[0].flag
synonym_vector.append((item.word, item.flag))
elif pattern == "tf":
result = posseg.cut(sentence)
tags = analyse.extract_tags(sentence, topK=10)
for item in result:
if item.word in tags:
synonym_vector.append((item.word, item.flag))
return synonym_vector