本文整理汇总了Python中wordcloud.WordCloud.fit_words方法的典型用法代码示例。如果您正苦于以下问题:Python WordCloud.fit_words方法的具体用法?Python WordCloud.fit_words怎么用?Python WordCloud.fit_words使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类wordcloud.WordCloud
的用法示例。
在下文中一共展示了WordCloud.fit_words方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: create_wordclouds
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def create_wordclouds(self, text, name_of_cloud, additional_stop_list, max_words, width, height, bigram = False):
text_nopunc = self.remove_punctuation(text, "", "")
text_lower = text_nopunc.lower()
stop = self.stopwords
stop.extend(additional_stop_list)
text_nostop = self.remove_stopword(text_lower, stop)
tokens = wt(text_nostop)
text_lem = self.lemmatize(tokens)
tokens_lem = wt(text_lem)
my_bigrams = nltk.bigrams(tokens_lem)
if bigram:
bigram_merged=list()
for line in my_bigrams:
bigram_merged.append(line[0]+' ' + line[1])
counts = collections.Counter(bigram_merged)
else:
counts = collections.Counter(tokens_lem)
final = counts.most_common(max_words)
max_count = max(final, key=operator.itemgetter(1))[1]
final = [(name, count / float(max_count))for name, count in final]
# tags = make_tags(final, maxsize=max_word_size)
# create_tag_image(tags, name_of_cloud+'.png', size=(width, height), layout=3, fontname='Crimson Text', background = (255, 255, 255))
# temp_cloud = " ".join(text for text, count in final)
word_cloud = WordCloud(font_path="fonts/Georgia.ttf",
width=width, height=height, max_words=max_words, stopwords=stop)
word_cloud.fit_words(final)
word_cloud.to_file(name_of_cloud + ".png")
示例2: topic_word_cloud
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def topic_word_cloud(nmf, topic_idx, max_words=300, figsize=(14, 8), width=2400, height=1300, ax=None):
''' Create word cloud for a given topic
INPUT:
nmf: NMFClustering object
topic_idx: int
max_words: int
Max number of words to encorporate into the word cloud
figsize: tuple (int, int)
Size of the figure if an axis isn't passed
width: int
height: int
ax: None or matplotlib axis object
'''
wc = WordCloud(background_color='white', max_words=max_words, width=width, height=height)
word_freq = nmf.topic_word_frequency(topic_idx)
# Fit the WordCloud object to the specific topics word frequencies
wc.fit_words(word_freq)
# Create the matplotlib figure and axis if they weren't passed in
if not ax:
fig = plt.figure(figsize=figsize)
ax = fig.add_subplot(111)
ax.imshow(wc)
ax.axis('off')
示例3: make_clouds
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def make_clouds(files, n_words=20):
# set locations
base_model_name = os.path.splitext(os.path.basename(files.model))[0]
output_d = '../browser/clouds/' + base_model_name + '/'
if not os.path.exists(output_d):
os.makedirs(output_d)
# create wordcloud generator
wc = WordCloud(width=1000, height=500, background_color='white')
print('Loading model')
model = LdaModel.load(files.model)
beta = model.expElogbeta
print('Normalizing by topics, and by words')
pTW = normalize(beta, axis=0)
pWT = normalize(beta, axis=1)
# load bug<->id map, then invert to id<-> bug
bug_to_id = json.loads(open(files.replacements).read())
id_to_bug = {v: k for k, v in bug_to_id.items() if "." not in k}
for i in range(len(beta)):
# compute RAR
t_rar = np.sqrt(pTW[i] * pWT[i])
top_word_ids = t_rar.argsort()[:-1 - n_words:-1]
top_words = [model.id2word.id2token[wordid] for wordid in top_word_ids]
top_words = [id_to_bug[word] if word in id_to_bug else word for word in top_words]
wc.fit_words(zip(top_words, t_rar[top_word_ids]))
wc.to_file(output_d + str(i) + '.png')
示例4: get_wordcloud_img
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def get_wordcloud_img(self, interval_id):
text_freq = self.get_word_frequencies(interval_id)
wordcloud = WordCloud(font_path=FONT_PATH, width=self.image_width, height=int(self.image_width * .75))
wordcloud.fit_words(list(reversed(text_freq[-100:])))
img_io = StringIO()
wordcloud.to_image().save(img_io, 'JPEG', quality=70)
img_io.seek(0)
return img_io
示例5: generate_cloud
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def generate_cloud(self,tags,sizeX,sizeY,filename = None):
sentence = zip(tags.keys(),tags.values())
wordcloud = WordCloud(width=sizeX, height=sizeY,relative_scaling=0.6)
wordcloud.fit_words(sentence)
plt.figure( figsize=(20,10), facecolor='k')
plt.imshow(wordcloud)
plt.axis("off")
if(filename != None):
plt.savefig(filename, facecolor='k', bbox_inches='tight')
plt.show()
示例6: make_cloud
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def make_cloud(docs):
flat_doc = count_words(docs)
from wordcloud import WordCloud
import wordcloud
wc = WordCloud(ranks_only = True, font_path='/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf')
wc.fit_words(flat_doc)
plt.imshow(wc)
plt.axis("off")
plt.show()
示例7: generate_wordcloud
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def generate_wordcloud(vocab, metric, name):
''' Generate a simple word cloud of text '''
list_tuples = []
for w, c in zip(vocab, metric):
list_tuples.append((w,int(c*100)))
# Generate a word cloud image
wordcloud = WordCloud(background_color = "white")
wordcloud.fit_words(list_tuples)
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('../../figures/'+name)
pass
示例8: topic_time_and_cloud
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def topic_time_and_cloud(df, topic, feature_names, nmf, title, source=False, normalize=False, freq='W', year=True, max_words=300, positivity=True, show=True):
fig = plt.figure(figsize=(14, 8.5))
ax1 = fig.add_axes([0.05, 0.5, 0.93, 0.41])
article_count_by_time(df, topic=topic, source=source, normalize=normalize, freq=freq, year=year, fig=fig, label=topic_labels[topic[1]], show=False)
ax1.xaxis.labelpad = -4
plt.suptitle(title, fontsize=20)
fig.text(0.05, 0.44, 'Author: Erich Wellinger', fontsize=10, alpha=0.7)
fig.text(0.33, 0.8, 'github.com/ewellinger/election_analysis', fontsize=20, color='gray', alpha=0.5)
outlets = [('nyt', 'NYT', '#4c72b0'), ('foxnews', 'FOX', '#c44e52'), ('npr', 'NPR', '#55a868'), ('guardian', 'GUA', '#8172b2'), ('wsj', 'WSJ', '#ccb974')]
# Create a boolean mask for whether each document is in the topic or not
labels_mask = topic[0][:, topic[1]]
num_articles = labels_mask.sum()
percent_by_source = [float(len(df.loc[(labels_mask) & (df['source'] == outlet)])) / num_articles for outlet in zip(*outlets)[0]]
normalized = [percent / np.sum(df['source'] == outlet) for percent, outlet in zip(percent_by_source, zip(*outlets)[0])]
normalized = [percent / np.sum(normalized) for percent in normalized]
plt.title('Number of Articles in Topic: {}'.format(num_articles), x=0.4825)
''' You should incorporate the word_cloud function in here!!! '''
if not positivity:
ax2 = fig.add_axes([0.025, 0, 0.79, 0.43])
wc = WordCloud(background_color='white', max_words=max_words, width=1900, height=625)
else:
num_sources = 0
for idx in xrange(len(outlets)):
if len(df.loc[(labels_mask) & (df['source'] == outlets[idx][0])]) >= 5:
num_sources += 1
ax2 = fig.add_axes([0.025, 0, 0.712125-(num_sources*0.034425), 0.43])
wc = WordCloud(background_color='white', max_words=max_words, width=1715-(num_sources*83), height=625)
ax4 = fig.add_axes([0.782125-(num_sources*0.034425), 0.035, 0.034425+(num_sources*0.034425), 0.375])
word_freq = topic_word_freq(nmf.components_, topic[1], feature_names)
wc.fit_words(word_freq)
ax2.imshow(wc)
ax2.axis('off')
ax3 = fig.add_axes([0.825, 0.01, 0.15555, 0.4])
normalized_source_barchart(df, topic, outlets, ax3)
if positivity:
sentiment_source_barchart(df.loc[labels_mask], outlets, ax=ax4)
if num_sources < 3:
ax4.set_title('')
if show:
plt.show()
return ax1
示例9: paint_clouds
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def paint_clouds(genre, cloud_words):
'''
For a given genre (text), paint a word cloud of at most cloud_words (int)
words. Call the load_frequencies function to get a frequency list with 50
more words in it than are needed for the word cloud, in case some don't fit.
'''
freq_list = load_frequencies(genre, cloud_words+50)
wc = WordCloud(background_color = "white", max_words = cloud_words, \
max_font_size = 40, random_state = 42)
wc.fit_words(freq_list)
fig = plt.figure()
plt.imshow(wc)
plt.axis("off")
plt.title(genre)
plt.show()
filename = '../data/cloud_' + genre + '.png'
fig.savefig(filename)
示例10: generate_wordcloud
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def generate_wordcloud(y, vocab):
''' Generate a simple word cloud of text '''
ingred_counts = np.sum(y, axis=0)
word_cloud_text = []
for i, vocab in zip(ingred_counts, vocab):
word_cloud_text.append((str(vocab),int(i)))
# Generate a word cloud image
wordcloud = WordCloud(background_color = "white")
wordcloud.fit_words(word_cloud_text)
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('../../figures/vocab_wordcloud.png')
pass
示例11: post_process
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def post_process():
#with open('clda_data/out_prism', 'r') as fin:
# phi_prism = [np.array(ast.literal_eval(line.strip())) for line in fin]
#phi_prism = np.array(phi_prism)
#theta_pb = np.load('/tmp/peircebayes/avg_samples.npz')
#theta_pb = np.load('/home/rares/Desktop/peircebayes_all_no_sampling/last_sample.npz')
theta_pb = np.load('data/avg_samples.npz')
phi = theta_pb['arr_1']
print phi.shape
vocab = pickle.load(open('data/vocab.pkl', 'r'))
inv = dict((v, k) for k, v in vocab.iteritems())
axis = 1
index = list(np.ix_(*[np.arange(i) for i in phi.shape]))
index[axis] = phi.argsort(axis)
a = phi[index][:,-20:]
counts = np.rint(a/np.sum(a, axis=1).reshape(-1,1)*1000).tolist()
idx_l = index[axis][:,-20:].tolist()
words = [[inv[i] for i in subl] for subl in idx_l]
#pprint(words)
index_prism = list(np.ix_(*[np.arange(i) for i in phi_prism.shape]))
index_prism[axis] = phi_prism.argsort(axis)
a_prism = phi_prism[index_prism][:,-20:]
idx_l_prism = index_prism[axis][:,-20:].tolist()
words_prism = [[inv[i] for i in subl] for subl in idx_l_prism]
#pprint(words_prism)
# topic 1
freq1 = list(reversed(zip(words[0], list(a[0,:]))))
# topic 2
freq2 = list(reversed(zip(words[1], list(a[1,:]))))
# topic 1
#freq1_prism = list(reversed(zip(words_prism[19], list(a_prism[19,:]))))
# topic 2
#freq2_prism = list(reversed(zip(words_prism[18], list(a_prism[18,:]))))
wc = WordCloud(background_color="white", width=400, height=400,
random_state=1234).fit_words(freq1)
plt.imshow(wc.recolor(color_func=grey_color_func, random_state=3))
plt.axis("off")
plt.savefig('data/topic_1.pdf', format='pdf')
plt.close()
plt.imshow(wc.fit_words(freq2).recolor(color_func=grey_color_func, random_state=3))
plt.axis("off")
plt.savefig('data/topic_2.pdf', format='pdf')
plt.close()
示例12: make_word_cloud
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def make_word_cloud(topic_num, max_words=1000, width=10, height=10):
post_nmf = pickle.load( open(POST_NMF_PICKLE) )
post_tfidf = pickle.load( open(POST_TFIDF_PICKLE) )
words = np.array(post_tfidf.get_feature_names())
freq_sum = np.sum(post_nmf.components_[topic_num])
frequencies = [val / freq_sum for val in post_nmf.components_[topic_num]]
word_freq = zip(words, frequencies)
wc = WordCloud(background_color='white')
wc.fit_words(word_freq)
#fig = plt.figure(figsize=(10,10))
#ax = fig.add_subplot(111)
plt.imshow(wc)
plt.axis('off')
plt.show()
return word_freq
示例13: topic_word_cloud
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def topic_word_cloud(self, topic_num, max_words=200, figsize=None, width=2400, height=1300, ax=None, mask_fname=None, inherit_color=False):
''' Create word cloud for a given topic
INPUT:
topic_idx: int
max_words: int (default 200)
Max number of words to encorporate into the word cloud
figsize: tuple (int, int)
Size of the figure if an axis isn't passed
width: int (default 2400)
height: int (default 1300)
ax: None or matplotlib axis object
mask_fname: None or str
None if no mask is desired, otherwise a string providing the path the image being used as the mask
inherit_color: bool, default False
Indicates whether the wordcloud should inherit the colors from the image mask
'''
if figsize == None:
figsize = self.figsize
if mask_fname:
mask = np.array(Image.open(mask_fname))
wc = WordCloud(background_color='white', max_words=max_words, mask=mask, width=width, height=height)
else:
wc = WordCloud(background_color='white', max_words=max_words, width=width, height=height)
word_freq = self.nmf.topic_word_frequency(topic_num)
# Fit the WordCloud object to the specific topic's word frequencies
wc.fit_words(word_freq)
# Create the matplotlib figure and axis if they weren't passed in
if not ax:
fig = plt.figure(figsize=self.figsize)
ax = fig.add_subplot(111)
if mask_fname and inherit_color:
image_colors = ImageColorGenerator(imread(mask_fname))
plt.imshow(wc.recolor(color_func=image_colors))
plt.axis('off')
else:
ax.imshow(wc)
ax.axis('off')
示例14: main
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def main():
#循环获取第一个电影的前10页评论
commentList = []
NowPlayingMovie_list = getNowPlayingMovie_list()
for i in range(10):
num = i + 1
commentList_temp = getCommentsById(NowPlayingMovie_list[0]['id'], num)
commentList.append(commentList_temp)
#将列表中的数据转换为字符串
comments = ''
for k in range(len(commentList)):
comments = comments + (str(commentList[k])).strip()
#使用正则表达式去除标点符号
pattern = re.compile(r'[\u4e00-\u9fa5]+')
filterdata = re.findall(pattern, comments)
cleaned_comments = ''.join(filterdata)
#使用结巴分词进行中文分词
segment = jieba.lcut(cleaned_comments)
words_df=pd.DataFrame({'segment':segment})
#去掉停用词
stopwords=pd.read_csv("stopwords.txt",index_col=False,quoting=3,sep="\t",names=['stopword'], encoding='utf-8')#quoting=3全不引用
words_df=words_df[~words_df.segment.isin(stopwords.stopword)]
#统计词频
words_stat=words_df.groupby(by=['segment'])['segment'].agg({"计数":numpy.size})
words_stat=words_stat.reset_index().sort_values(by=["计数"],ascending=False)
#用词云进行显示
wordcloud=WordCloud(font_path="simhei.ttf",background_color="white",max_font_size=80)
word_frequence = {x[0]:x[1] for x in words_stat.head(1000).values}
word_frequence_list = []
for key in word_frequence:
temp = (key,word_frequence[key])
word_frequence_list.append(temp)
wordcloud=wordcloud.fit_words(word_frequence_list)
plt.imshow(wordcloud)
示例15: float
# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
count = float(count)
print "Sum: %f" % count
words = []
# get the words from the whitelist and calculate their frequences
sql = """SELECT word, count FROM word_whitelist ORDER BY `count` DESC"""
for word, frequency in query(sql):
words.append((word, float(frequency) / count))
print "Creating cloud."
from scipy.misc import imread
mask = imread(MASK_FILE)
# generate the world cloud. This takes a while because the library is not parallelized.
wordcloud = WordCloud(font_path="/usr/share/fonts/truetype/msttcorefonts/Georgia.ttf", ranks_only=True, max_words = len(words),
mask=mask, background_color="white")
wordcloud.fit_words(words)
print "Creating LOW RES image."
wordcloud.to_file(LOW_RES)
# low let's beef up the scale
wordcloud.scale = 12
print "Creating HI RES image."
img = wordcloud.to_image()
img.save(HIGH_RES, dpi=(100000,100000))