当前位置: 首页>>代码示例>>Python>>正文


Python WordCloud.fit_words方法代码示例

本文整理汇总了Python中wordcloud.WordCloud.fit_words方法的典型用法代码示例。如果您正苦于以下问题:Python WordCloud.fit_words方法的具体用法?Python WordCloud.fit_words怎么用?Python WordCloud.fit_words使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在wordcloud.WordCloud的用法示例。


在下文中一共展示了WordCloud.fit_words方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: create_wordclouds

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
    def create_wordclouds(self, text, name_of_cloud, additional_stop_list, max_words, width, height, bigram = False):
        text_nopunc = self.remove_punctuation(text, "", "")
        text_lower = text_nopunc.lower()
        stop = self.stopwords
        stop.extend(additional_stop_list)
        text_nostop = self.remove_stopword(text_lower, stop)
        tokens = wt(text_nostop)
        text_lem = self.lemmatize(tokens)
        tokens_lem = wt(text_lem)
        my_bigrams = nltk.bigrams(tokens_lem)
        if bigram:
            bigram_merged=list()
            for line in my_bigrams:
                bigram_merged.append(line[0]+' ' + line[1])
            counts = collections.Counter(bigram_merged)
        else:
            counts = collections.Counter(tokens_lem)
        final = counts.most_common(max_words)
        max_count = max(final, key=operator.itemgetter(1))[1]
        final = [(name, count / float(max_count))for name, count in final]

        # tags = make_tags(final, maxsize=max_word_size)
        # create_tag_image(tags, name_of_cloud+'.png', size=(width, height), layout=3, fontname='Crimson Text', background = (255, 255, 255))

        # temp_cloud = " ".join(text for text, count in final)
        word_cloud = WordCloud(font_path="fonts/Georgia.ttf",
            width=width, height=height, max_words=max_words, stopwords=stop)
        word_cloud.fit_words(final)
        word_cloud.to_file(name_of_cloud + ".png")
开发者ID:nus-iss-ca,项目名称:text-mining,代码行数:31,代码来源:util.py

示例2: topic_word_cloud

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def topic_word_cloud(nmf, topic_idx, max_words=300, figsize=(14, 8), width=2400, height=1300, ax=None):
    ''' Create word cloud for a given topic
    INPUT:
        nmf: NMFClustering object
        topic_idx: int
        max_words: int
            Max number of words to encorporate into the word cloud
        figsize: tuple (int, int)
            Size of the figure if an axis isn't passed
        width: int
        height: int
        ax: None or matplotlib axis object
    '''
    wc = WordCloud(background_color='white', max_words=max_words, width=width, height=height)
    word_freq = nmf.topic_word_frequency(topic_idx)

    # Fit the WordCloud object to the specific topics word frequencies
    wc.fit_words(word_freq)

    # Create the matplotlib figure and axis if they weren't passed in
    if not ax:
        fig = plt.figure(figsize=figsize)
        ax = fig.add_subplot(111)
    ax.imshow(wc)
    ax.axis('off')
开发者ID:jbgalvanize,项目名称:election_analysis,代码行数:27,代码来源:plots.py

示例3: make_clouds

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def make_clouds(files, n_words=20):
    # set locations
    base_model_name = os.path.splitext(os.path.basename(files.model))[0]
    output_d = '../browser/clouds/' + base_model_name + '/'
    if not os.path.exists(output_d):
        os.makedirs(output_d)
    # create wordcloud generator
    wc = WordCloud(width=1000, height=500, background_color='white')

    print('Loading model')
    model = LdaModel.load(files.model)
    beta = model.expElogbeta

    print('Normalizing by topics, and by words')
    pTW = normalize(beta, axis=0)
    pWT = normalize(beta, axis=1)

    # load bug<->id map, then invert to id<-> bug
    bug_to_id = json.loads(open(files.replacements).read())
    id_to_bug = {v: k for k, v in bug_to_id.items() if "." not in k}

    for i in range(len(beta)):
        # compute RAR
        t_rar = np.sqrt(pTW[i] * pWT[i])
        top_word_ids = t_rar.argsort()[:-1 - n_words:-1]
        top_words = [model.id2word.id2token[wordid] for wordid in top_word_ids]
        top_words = [id_to_bug[word] if word in id_to_bug else word for word in top_words]
        wc.fit_words(zip(top_words, t_rar[top_word_ids]))
        wc.to_file(output_d + str(i) + '.png')
开发者ID:knights-lab,项目名称:bugbrowser,代码行数:31,代码来源:make_clouds.py

示例4: get_wordcloud_img

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
 def get_wordcloud_img(self, interval_id):
     text_freq = self.get_word_frequencies(interval_id)
     wordcloud = WordCloud(font_path=FONT_PATH, width=self.image_width, height=int(self.image_width * .75))
     wordcloud.fit_words(list(reversed(text_freq[-100:])))
     img_io = StringIO()
     wordcloud.to_image().save(img_io, 'JPEG', quality=70)
     img_io.seek(0)
     return img_io
开发者ID:adeze,项目名称:topic_space,代码行数:10,代码来源:app.py

示例5: generate_cloud

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
 def generate_cloud(self,tags,sizeX,sizeY,filename = None):
     sentence = zip(tags.keys(),tags.values())
     wordcloud = WordCloud(width=sizeX, height=sizeY,relative_scaling=0.6)
     wordcloud.fit_words(sentence)
     plt.figure( figsize=(20,10), facecolor='k')
     plt.imshow(wordcloud)
     plt.axis("off")
     if(filename != None):
         plt.savefig(filename, facecolor='k', bbox_inches='tight')
     plt.show()
开发者ID:Nicolas99-9,项目名称:TERApprentissage,代码行数:12,代码来源:summurization3.py

示例6: make_cloud

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def make_cloud(docs):
    flat_doc = count_words(docs)
    from wordcloud import WordCloud
    import wordcloud
   
    wc = WordCloud(ranks_only = True, font_path='/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf')
    wc.fit_words(flat_doc)
    
    
    plt.imshow(wc)
    plt.axis("off")
    plt.show()
开发者ID:oleeson,项目名称:bigdata-final,代码行数:14,代码来源:functions.py

示例7: generate_wordcloud

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def generate_wordcloud(vocab, metric, name):
    ''' Generate a simple word cloud of text '''
    list_tuples = []
    for w, c in zip(vocab, metric):
        list_tuples.append((w,int(c*100)))

    # Generate a word cloud image
    wordcloud = WordCloud(background_color = "white")

    wordcloud.fit_words(list_tuples)

    plt.imshow(wordcloud)
    plt.axis('off')
    plt.savefig('../../figures/'+name)
    pass
开发者ID:Mikelew88,项目名称:ChefNet,代码行数:17,代码来源:evaluate_models.py

示例8: topic_time_and_cloud

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def topic_time_and_cloud(df, topic, feature_names, nmf, title, source=False, normalize=False, freq='W', year=True, max_words=300, positivity=True, show=True):
    fig = plt.figure(figsize=(14, 8.5))
    ax1 = fig.add_axes([0.05, 0.5, 0.93, 0.41])
    article_count_by_time(df, topic=topic, source=source, normalize=normalize, freq=freq, year=year, fig=fig, label=topic_labels[topic[1]], show=False)
    ax1.xaxis.labelpad = -4
    plt.suptitle(title, fontsize=20)

    fig.text(0.05, 0.44, 'Author: Erich Wellinger', fontsize=10, alpha=0.7)
    fig.text(0.33, 0.8, 'github.com/ewellinger/election_analysis', fontsize=20, color='gray', alpha=0.5)

    outlets = [('nyt', 'NYT', '#4c72b0'), ('foxnews', 'FOX', '#c44e52'), ('npr', 'NPR', '#55a868'), ('guardian', 'GUA', '#8172b2'), ('wsj', 'WSJ', '#ccb974')]

    # Create a boolean mask for whether each document is in the topic or not
    labels_mask = topic[0][:, topic[1]]
    num_articles = labels_mask.sum()
    percent_by_source = [float(len(df.loc[(labels_mask) & (df['source'] == outlet)])) / num_articles for outlet in zip(*outlets)[0]]
    normalized = [percent / np.sum(df['source'] == outlet) for percent, outlet in zip(percent_by_source, zip(*outlets)[0])]
    normalized = [percent / np.sum(normalized) for percent in normalized]

    plt.title('Number of Articles in Topic: {}'.format(num_articles), x=0.4825)

    ''' You should incorporate the word_cloud function in here!!! '''
    if not positivity:
        ax2 = fig.add_axes([0.025, 0, 0.79, 0.43])
        wc = WordCloud(background_color='white', max_words=max_words, width=1900, height=625)
    else:
        num_sources = 0
        for idx in xrange(len(outlets)):
            if len(df.loc[(labels_mask) & (df['source'] == outlets[idx][0])]) >= 5:
                num_sources += 1
        ax2 = fig.add_axes([0.025, 0, 0.712125-(num_sources*0.034425), 0.43])
        wc = WordCloud(background_color='white', max_words=max_words, width=1715-(num_sources*83), height=625)
        ax4 = fig.add_axes([0.782125-(num_sources*0.034425), 0.035, 0.034425+(num_sources*0.034425), 0.375])
    word_freq = topic_word_freq(nmf.components_, topic[1], feature_names)
    wc.fit_words(word_freq)
    ax2.imshow(wc)
    ax2.axis('off')
    ax3 = fig.add_axes([0.825, 0.01, 0.15555, 0.4])
    normalized_source_barchart(df, topic, outlets, ax3)
    if positivity:
        sentiment_source_barchart(df.loc[labels_mask], outlets, ax=ax4)
        if num_sources < 3:
            ax4.set_title('')
    if show:
        plt.show()
    return ax1
开发者ID:jbgalvanize,项目名称:election_analysis,代码行数:48,代码来源:plots.py

示例9: paint_clouds

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def paint_clouds(genre, cloud_words):
    '''
    For a given genre (text), paint a word cloud of at most cloud_words (int)
    words. Call the load_frequencies function to get a frequency list with 50
    more words in it than are needed for the word cloud, in case some don't fit.
    '''
    freq_list = load_frequencies(genre, cloud_words+50)
    wc = WordCloud(background_color = "white", max_words = cloud_words, \
    max_font_size = 40, random_state = 42)
    wc.fit_words(freq_list)
    fig = plt.figure()
    plt.imshow(wc)
    plt.axis("off")
    plt.title(genre)
    plt.show()
    filename = '../data/cloud_' + genre + '.png'
    fig.savefig(filename)
开发者ID:JenniferDunne,项目名称:ml-from-movie-reviews,代码行数:19,代码来源:word_clouds.py

示例10: generate_wordcloud

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def generate_wordcloud(y, vocab):
    ''' Generate a simple word cloud of text '''
    ingred_counts = np.sum(y, axis=0)

    word_cloud_text = []

    for i, vocab in zip(ingred_counts, vocab):
        word_cloud_text.append((str(vocab),int(i)))

    # Generate a word cloud image
    wordcloud = WordCloud(background_color = "white")

    wordcloud.fit_words(word_cloud_text)

    plt.imshow(wordcloud)
    plt.axis('off')
    plt.savefig('../../figures/vocab_wordcloud.png')
    pass
开发者ID:Mikelew88,项目名称:ChefNet,代码行数:20,代码来源:vectorize_data.py

示例11: post_process

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def post_process():

    #with open('clda_data/out_prism', 'r') as fin:
    #    phi_prism = [np.array(ast.literal_eval(line.strip())) for line in fin]
    #phi_prism = np.array(phi_prism)

    #theta_pb = np.load('/tmp/peircebayes/avg_samples.npz')
    #theta_pb = np.load('/home/rares/Desktop/peircebayes_all_no_sampling/last_sample.npz')
    theta_pb = np.load('data/avg_samples.npz')
    phi = theta_pb['arr_1']
    print phi.shape

    vocab = pickle.load(open('data/vocab.pkl', 'r'))
    inv = dict((v, k) for k, v in vocab.iteritems())

    axis = 1
    index = list(np.ix_(*[np.arange(i) for i in phi.shape]))
    index[axis] = phi.argsort(axis)
    a = phi[index][:,-20:]
    counts = np.rint(a/np.sum(a, axis=1).reshape(-1,1)*1000).tolist()
    idx_l = index[axis][:,-20:].tolist()
    words = [[inv[i] for i in subl] for subl in idx_l]
    #pprint(words)

    index_prism = list(np.ix_(*[np.arange(i) for i in phi_prism.shape]))
    index_prism[axis] = phi_prism.argsort(axis)
    a_prism = phi_prism[index_prism][:,-20:]
    idx_l_prism = index_prism[axis][:,-20:].tolist()
    words_prism = [[inv[i] for i in subl] for subl in idx_l_prism]

    #pprint(words_prism)

    # topic 1
    freq1 = list(reversed(zip(words[0], list(a[0,:]))))
    # topic 2
    freq2 = list(reversed(zip(words[1], list(a[1,:]))))

    # topic 1
    #freq1_prism = list(reversed(zip(words_prism[19], list(a_prism[19,:]))))
    # topic 2
    #freq2_prism = list(reversed(zip(words_prism[18], list(a_prism[18,:]))))


    wc = WordCloud(background_color="white", width=400, height=400,
        random_state=1234).fit_words(freq1)

    plt.imshow(wc.recolor(color_func=grey_color_func, random_state=3))
    plt.axis("off")
    plt.savefig('data/topic_1.pdf', format='pdf')
    plt.close()

    plt.imshow(wc.fit_words(freq2).recolor(color_func=grey_color_func, random_state=3))
    plt.axis("off")
    plt.savefig('data/topic_2.pdf', format='pdf')
    plt.close()
开发者ID:raresct,项目名称:peircebayes_experiments,代码行数:57,代码来源:post_process.py

示例12: make_word_cloud

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def make_word_cloud(topic_num, max_words=1000, width=10, height=10):
    post_nmf = pickle.load( open(POST_NMF_PICKLE) )
    post_tfidf = pickle.load( open(POST_TFIDF_PICKLE) )
    words = np.array(post_tfidf.get_feature_names())
    freq_sum = np.sum(post_nmf.components_[topic_num])
    frequencies = [val / freq_sum for val in post_nmf.components_[topic_num]]
    word_freq = zip(words, frequencies)


    wc = WordCloud(background_color='white')

    wc.fit_words(word_freq)


    #fig = plt.figure(figsize=(10,10))
    #ax = fig.add_subplot(111)
    plt.imshow(wc)
    plt.axis('off')
    plt.show()
    return word_freq
开发者ID:RobertPRanney,项目名称:Blog_Exploration,代码行数:22,代码来源:helpers.py

示例13: topic_word_cloud

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
    def topic_word_cloud(self, topic_num, max_words=200, figsize=None, width=2400, height=1300, ax=None, mask_fname=None, inherit_color=False):
        ''' Create word cloud for a given topic
        INPUT:
            topic_idx: int
            max_words: int (default 200)
                Max number of words to encorporate into the word cloud
            figsize: tuple (int, int)
                Size of the figure if an axis isn't passed
            width: int (default 2400)
            height: int (default 1300)
            ax: None or matplotlib axis object
            mask_fname: None or str
                None if no mask is desired, otherwise a string providing the path the image being used as the mask
            inherit_color: bool, default False
                Indicates whether the wordcloud should inherit the colors from the image mask
        '''
        if figsize == None:
            figsize = self.figsize

        if mask_fname:
            mask = np.array(Image.open(mask_fname))
            wc = WordCloud(background_color='white', max_words=max_words, mask=mask, width=width, height=height)
        else:
            wc = WordCloud(background_color='white', max_words=max_words, width=width, height=height)
        word_freq = self.nmf.topic_word_frequency(topic_num)

        # Fit the WordCloud object to the specific topic's word frequencies
        wc.fit_words(word_freq)

        # Create the matplotlib figure and axis if they weren't passed in
        if not ax:
            fig = plt.figure(figsize=self.figsize)
            ax = fig.add_subplot(111)

        if mask_fname and inherit_color:
            image_colors = ImageColorGenerator(imread(mask_fname))
            plt.imshow(wc.recolor(color_func=image_colors))
            plt.axis('off')
        else:
            ax.imshow(wc)
            ax.axis('off')
开发者ID:ewellinger,项目名称:election_analysis,代码行数:43,代码来源:Election_Plots.py

示例14: main

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
def main():
    #循环获取第一个电影的前10页评论
    commentList = []
    NowPlayingMovie_list = getNowPlayingMovie_list()
    for i in range(10):
        num = i + 1
        commentList_temp = getCommentsById(NowPlayingMovie_list[0]['id'], num)
        commentList.append(commentList_temp)

    #将列表中的数据转换为字符串
    comments = ''
    for k in range(len(commentList)):
        comments = comments + (str(commentList[k])).strip()

    #使用正则表达式去除标点符号
    pattern = re.compile(r'[\u4e00-\u9fa5]+')
    filterdata = re.findall(pattern, comments)
    cleaned_comments = ''.join(filterdata)

    #使用结巴分词进行中文分词
    segment = jieba.lcut(cleaned_comments)
    words_df=pd.DataFrame({'segment':segment})

    #去掉停用词
    stopwords=pd.read_csv("stopwords.txt",index_col=False,quoting=3,sep="\t",names=['stopword'], encoding='utf-8')#quoting=3全不引用
    words_df=words_df[~words_df.segment.isin(stopwords.stopword)]

    #统计词频
    words_stat=words_df.groupby(by=['segment'])['segment'].agg({"计数":numpy.size})
    words_stat=words_stat.reset_index().sort_values(by=["计数"],ascending=False)

    #用词云进行显示
    wordcloud=WordCloud(font_path="simhei.ttf",background_color="white",max_font_size=80)
    word_frequence = {x[0]:x[1] for x in words_stat.head(1000).values}

    word_frequence_list = []
    for key in word_frequence:
        temp = (key,word_frequence[key])
        word_frequence_list.append(temp)

    wordcloud=wordcloud.fit_words(word_frequence_list)
    plt.imshow(wordcloud)
开发者ID:luanchuanmalu,项目名称:Code,代码行数:44,代码来源:webcatcher1.py

示例15: float

# 需要导入模块: from wordcloud import WordCloud [as 别名]
# 或者: from wordcloud.WordCloud import fit_words [as 别名]
count = float(count)

print "Sum: %f" % count

words = []

# get the words from the whitelist and calculate their frequences
sql = """SELECT word, count FROM word_whitelist ORDER BY `count` DESC"""
for word, frequency in query(sql):
    words.append((word, float(frequency) / count))

print "Creating cloud."

from scipy.misc import imread

mask = imread(MASK_FILE)

# generate the world cloud. This takes a while because the library is not parallelized.
wordcloud = WordCloud(font_path="/usr/share/fonts/truetype/msttcorefonts/Georgia.ttf", ranks_only=True, max_words = len(words),
    mask=mask, background_color="white")
wordcloud.fit_words(words)

print "Creating LOW RES image."
wordcloud.to_file(LOW_RES)

# low let's beef up the scale
wordcloud.scale = 12

print "Creating HI RES image."
img = wordcloud.to_image()
img.save(HIGH_RES, dpi=(100000,100000))
开发者ID:iannightingale,项目名称:cs109,代码行数:33,代码来源:wordCloud.py


注:本文中的wordcloud.WordCloud.fit_words方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。