本文整理汇总了Python中wordcloud.STOPWORDS.union方法的典型用法代码示例。如果您正苦于以下问题:Python STOPWORDS.union方法的具体用法?Python STOPWORDS.union怎么用?Python STOPWORDS.union使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类wordcloud.STOPWORDS
的用法示例。
在下文中一共展示了STOPWORDS.union方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: config_stopwords
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import union [as 别名]
def config_stopwords(self, more_stopwords=None):
"""
(obj) -> None
Configuring stopwords by adding more if required
"""
if more_stopwords is not None:
self.STOPWORDS = STOPWORDS.union(more_stopwords)
示例2: title_wordcloud
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import union [as 别名]
def title_wordcloud(dataFrame):
from wordcloud import WordCloud, STOPWORDS
from PIL import Image
#WordCloud Visualization
text = " ".join(list(dataFrame['track_name']))
STOPWORDS = STOPWORDS.union(["feat","Remix","Edit","Radio","Version","Mix","Remastered"])
spotify_mask = np.array(Image.open(path.join( "spotify-logo.jpg")))
wordcloud = WordCloud(width=2880, height=1800,background_color="white",
stopwords=STOPWORDS,mask = spotify_mask).generate(text)
# Open a plot of the generated image.
plt.figure( figsize=(10,6))
plt.imshow(wordcloud)
plt.axis("off")
plt.tight_layout(pad=0)
plt.savefig("project3_wordcloud.png")
plt.show()
示例3: open
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import union [as 别名]
from wordcloud import WordCloud
from wordcloud import STOPWORDS
import matplotlib.pyplot as plt
import re
filename="/Users/chunmeiGao/Documents/Dataincubator/emailsubject.txt"
# Read the whole text.
text = open(filename).read()
print text
text=re.sub('Re:', '', text)
text=re.sub('RE:', '', text)
text=re.sub('FW:', '', text)
text=re.sub('Fwd:', '', text)
text=re.sub('Enron', '', text)
more_stopwords = {'X', 'Re', 'Fwd','ENRON','NA','FW'}
STOPWORDS = STOPWORDS.union(more_stopwords)
# Generate a word cloud image
wordcloud = WordCloud(stopwords=STOPWORDS).generate(text)
# Display the generated image:
# the matplotlib way:
plt.imshow(wordcloud)
plt.axis("off")
# take relative word frequencies into account, lower max_font_size
wordcloud = WordCloud(max_font_size=40, relative_scaling=.5).generate(text)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
示例4: stop_words_configs
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import union [as 别名]
def stop_words_configs(self):
"""
Configuring stopwords by adding more if required
"""
more_stopwords = {'innojam', 'video', 'cebit2014'}
self.STOPWORDS = STOPWORDS.union(more_stopwords)
示例5: WordCloud
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import union [as 别名]
pessoa = sentenca[0]
frase = "".join(sentenca[1:])
falas.append(dict(pessoa=pessoa, frase=frase))
declarante = [
fala['frase'].decode('utf-8')
for fala in falas
if fala['pessoa'] == 'Declarante'
]
declarante = "".join(declarante).lower()
declarante = declarante.replace(u"não sei", u"nãosei")
swords = ['que', 'eu', u'não', 'da', 'de', 'por', 'ele', u'você', u'está',
'tem', 'um', 'uma', 'se', 'foi', u'lá', 'pra', 'para', 'vai',
u'já', 'na', 'era', 'em', u'aí', 'minha', u'nós', 'os', 'as',
'ou', 'essa', 'isso', 'como', 'aqui', 'pois', u'só', 'quando',
u'então', 'muito', 'porque', 'acho', 'nem', 'mais', 'meu',
'ser', 'estou', 'vou', 'coisa', 'tenho', 'tinha', 'ter', u'quem'
'fui', 'mas', u'são', 'muita', 'mim', 'tudo', 'toda', 'todo',
'deve', 'falar', 'eles', 'das']
STOPWORDS = STOPWORDS.union(swords)
wordcloud = WordCloud(width=800, height=400,
stopwords=STOPWORDS).generate(declarante)
wordcloud.to_file("wordcloud.png")
示例6: print
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import union [as 别名]
# Twitter API docs:
# https://dev.twitter.com/docs/api/1/get/search
#-----------------------------------------------------------------------
query = twitter.search.tweets(q = "modi", count=5000) #, until='2016-01-07')
#-----------------------------------------------------------------------
# How long did this query take?
#-----------------------------------------------------------------------
print ("Search complete (%.3f seconds)" % (query["search_metadata"]["completed_in"]))
#-----------------------------------------------------------------------
# Loop through each of the results, and print its content.
#-----------------------------------------------------------------------
#for result in query["statuses"]:
# print ("(%s) @%s %s" % (result["created_at"], result["user"]["screen_name"], result["text"]))
# make a corpus from the list of tweets
status_list = [ result['text'] for result in query['statuses']]
corpus = ' '.join(status_list)
# read in the image and colors and plot the word cloud
img = Image.open("modi.jpg")
#img = img.resize((980,1080), Image.ANTIALIAS)
modi_coloring = np.array(img)
image_colors = ImageColorGenerator(modi_coloring)
#hcmask = scipy.ndimage.zoom(hcmask, 2, order=3)
STOPWORDS = STOPWORDS.union({"http","https","t","co","rt","since","towards","now","ok","okay","tag", "amp"})
#wc = WordCloud(background_color="white", max_words=2000, mask=hcmask, stopwords=STOPWORDS)
wc = WordCloud(font_path='cabin-sketch.bold.ttf', background_color="white", max_words=2000, mask=modi_coloring, color_func=image_colors, stopwords=STOPWORDS)
wc.generate(corpus)
wc.to_file("wc_color.png")
示例7: info
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import union [as 别名]
def info(request):
if request.method == 'GET' and 'screen_name' in request.GET:
scn = request.GET['screen_name']
if scn == "":
return render(request, 'tweets/login.html', {'message':'Enter a valid Twitter handle'})
else:
# print("start", datetime.now())
STAT_PATH = os.path.join(settings.BASE_DIR, 'tweets/static/tweets/')
handle = tweepy.OAuthHandler(settings.CONSUMER_KEY, settings.CONSUMER_SECRET)
handle.set_access_token(settings.ACCESS_TOKEN, settings.ACCESS_TOKEN_SECRET)
api = tweepy.API(handle)
try:
user = api.get_user(screen_name=scn)
except tweepy.TweepError:
return render(request, 'tweets/login.html', {'message':'Enter a valid Twitter handle'})
"""
Tweets vs Weekday graph
"""
try:
timeline = api.user_timeline(screen_name=scn, count=3200, include_rts=True)
mid = timeline[-1].id - 1
while True:
tl = api.user_timeline(screen_name=scn, count=3200, include_rts=True, max_id=mid)
if not len(tl):
break
timeline += tl
mid = tl[-1].id - 1
except tweepy.TweepError or TypeError:
return render(request, 'tweets/login.html', {'message':'Enter a valid Twitter handle'})
y = [0]*7
for tw in timeline:
d = tw.created_at.strftime("%w")
y[int(d)] += 1
x = [0,1,2,3,4,5,6]
xpoints = ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat']
plt.xticks(x, xpoints)
plt.plot(x, y, 'b-')
plt.xlabel('Days of week')
plt.ylabel('No. of tweets')
path_graph = STAT_PATH + 'graph.png'
if os.path.isfile(path_graph):
os.remove(path_graph)
plt.savefig(path_graph, dpi=300, bbox_inches='tight')
plt.clf()
"""
Tag-cloud
"""
# more stopwords
file = open(STAT_PATH + 'stopwords.txt', 'r')
more_stops = file.readlines()
for i in range(len(more_stops)):
more_stops[i] = more_stops[i].rstrip('\n')
global STOPWORDS
STOPWORDS = STOPWORDS.union(more_stops)
words = []
matrix = []
for tw in timeline:
matrix.append(tw.text.split())
words = words + tw.text.split()
long_tweet_stripd = ""
for w in words:
if w != 'RT' and not(w.startswith('http')) and not(w.startswith('@')) and not(w.startswith('#')) and not(w.lower() in STOPWORDS):
long_tweet_stripd = " ".join([long_tweet_stripd, w.lower()])
un_words = long_tweet_stripd.split()
mask = imread(STAT_PATH + 'twitter_mask.png')
wcloud = WordCloud(max_words=50, background_color='white', stopwords=STOPWORDS, mask=mask).generate(long_tweet_stripd)
# print(long_tweet_stripd)
path_wordcloud = STAT_PATH + 'wordcloud.png'
if os.path.isfile(path_wordcloud):
os.remove(path_wordcloud)
plt.imshow(wcloud)
plt.gca().invert_yaxis()
plt.axis('off')
plt.savefig(path_wordcloud, dpi=600, bbox_inches='tight')
plt.clf()
plt.close()
"""
Word co-occurences matrix
"""
all_words_use = []
for w in un_words:
try:
#.........这里部分代码省略.........