本文整理汇总了Python中wordcloud.STOPWORDS.copy方法的典型用法代码示例。如果您正苦于以下问题:Python STOPWORDS.copy方法的具体用法?Python STOPWORDS.copy怎么用?Python STOPWORDS.copy使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类wordcloud.STOPWORDS
的用法示例。
在下文中一共展示了STOPWORDS.copy方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: make_cloud
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def make_cloud(words, image, size=10, filename='figures/cloud.png', max_words=200, horizontal=0.8):
# Remove URLs, 'RT' text, screen names, etc
my_stopwords = ['RT', 'amp', 'lt']
words_no_urls = ' '.join([word for word in words.split()
if word not in my_stopwords])
# Add stopwords, if needed
stopwords = STOPWORDS.copy()
stopwords.add("RT")
stopwords.add('amp')
stopwords.add('lt')
# Load up a logo as a mask & color image
logo = imread(image)
# Generate colors
image_colors = ImageColorGenerator(logo)
# Generate plot
wc = WordCloud(stopwords=stopwords, mask=logo, color_func=image_colors, scale=0.8,
max_words=max_words, background_color='white', random_state=42, prefer_horizontal=horizontal)
wc.generate(words_no_urls)
plt.figure(figsize=(size, size))
plt.imshow(wc)
plt.axis("off")
plt.savefig(filename)
示例2: main
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def main(save_files = False, db_filename = '../output/database.sqlite'):
conn = sqlite3.connect(db_filename)
c = conn.cursor()
# Retrieve papers
c.execute('''SELECT *
FROM Papers''')
paper_content = c.fetchall()
conn.close()
titles = ''
for pc in paper_content:
titles += pc[1]
# A Marvin Minsky mask
mask = np.array(Image.open("../files/minsky_mask.png"))
wc = WordCloud(background_color="white", max_words=2000, mask=mask, stopwords=STOPWORDS.copy())
# Generate word cloud
wc.generate(titles)
if (save_files):
# Store to file
wc.to_file("../files/title_cloud.png")
# Show word cloud
plt.imshow(wc)
plt.axis("off")
# Show mask
# plt.figure()
# plt.imshow(mask, cmap=plt.cm.gray)
# plt.axis("off")
plt.show()
示例3: word_cloud
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def word_cloud(csv_file, stopwords_path, pic_path):
pic_name = csv_file+"_词云图.png"
path = os.path.abspath(os.curdir)
csv_file = path+ "\\" + csv_file + ".csv"
csv_file = csv_file.replace('\\', '\\\\')
d = pd.read_csv(csv_file, engine='python', encoding='utf-8')
content = []
for i in d['content']:
try:
i = translate(i)
except AttributeError as e:
continue
else:
content.append(i)
comment_after_split = jieba.cut(str(content), cut_all=False)
wl_space_split = " ".join(comment_after_split)
backgroud_Image = plt.imread(pic_path)
stopwords = STOPWORDS.copy()
with open(stopwords_path, 'r', encoding='utf-8') as f:
for i in f.readlines():
stopwords.add(i.strip('\n'))
f.close()
wc = WordCloud(width=1024, height=768, background_color='white',
mask=backgroud_Image, font_path="C:\simhei.ttf",
stopwords=stopwords, max_font_size=400,
random_state=50)
wc.generate_from_text(wl_space_split)
img_colors = ImageColorGenerator(backgroud_Image)
wc.recolor(color_func=img_colors)
plt.imshow(wc)
plt.axis('off')
plt.show()
wc.to_file(pic_name)
示例4: makeWordCloud
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def makeWordCloud(text):
#preprocess
stopwords = STOPWORDS.copy()
# text.replace("State","")
# text.replace("year","")
# text.replace("Congress","")
# text.replace("will","")
wC = WordCloud(max_words=2000, stopwords=stopwords, margin=5, random_state=1, width = 1600, height = 800).generate(text)
plt.imshow(wC)
plt.show()
示例5: plotTwiiterWordCloud
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def plotTwiiterWordCloud():
args = sys.argv
tracefile = open(args[2], 'r')
nLines = sum(1 for line in tracefile)
tracefile.seek(0)
dictTerms = dict()
blacklist = STOPWORDS.copy()
blacklist.add('rt')
punctuation = set(string.punctuation)
punctuation.remove('@')
punctuation.remove('&')
# punctuation.remove('#')
for line in tqdm(tracefile, total=nLines):
try:
linesplited = line.split(', ')
tweet = linesplited[6].lower()
for p in punctuation:
tweet = tweet.replace(p, '')
terms = tweet.split(' ')
for t in terms:
if (len(t) > 1) and 'http' not in t and (t not in blacklist):
try:
dictTerms[t] += 1
except KeyError:
dictTerms[t] = 1
except IndexError:
print 'IndexError'
for t in blacklist:
try:
del dictTerms[t]
except KeyError:
continue
popularTerms = sorted(dictTerms.keys(), key=lambda w:dictTerms[w], reverse=True)
popularTerms = [p for p in popularTerms if (dictTerms[p]) > 1]
print len(popularTerms)
text = list()
terms = ''
for p in popularTerms:
text.append((p, dictTerms[p]))
for i in range(dictTerms[p]):
terms += ' ' + p
# print terms
maskfile = 'csgo-icon'
mask = imread(maskfile + '.jpg') # mask=mask
wc = WordCloud(mask=mask, background_color='white', width=1280, height=720).generate(terms) # max_words=10000
default_colors = wc.to_array()
plt.figure()
plt.imshow(default_colors)
plt.axis('off')
plt.savefig(maskfile + '-wordcloud.png', dpi=500, bbox_inches='tight', pad_inches=0) # bbox_inches='tight'
plt.show()
示例6: makeCloud
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def makeCloud(text, imgFile, words):
"""
Makes a word cloud and stores it in a jpeg file
"""
excludewords = STOPWORDS.copy()
for word in words:
excludewords.add(word)
wordcloud = WordCloud(max_words=NUM_OF_WORDS, width=WIDTH, height=HEIGHT, stopwords=excludewords).generate(text)
image = wordcloud.to_image()
image.show()
image.save(imgFile + '.jpeg')
示例7: writeFreq
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def writeFreq(text, outFile, words):
"""
Writes frequencies of words into the specified file
"""
excludewords = STOPWORDS.copy()
for word in words:
excludewords.add(word)
wordcloud = WordCloud(max_words=NUM_OF_WORDS, stopwords=excludewords)
freqList = wordcloud.process_text(text)
for item in freqList:
outFile.write(item[0] + ',' + str(item[1]) + '\n')
示例8: generate_word_cloud
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def generate_word_cloud(text, mask_filename):
d = path.dirname(__file__) #??
mask = imread(path.join(d, mask_filename))
# adding movie script specific stopwords
stopwords = STOPWORDS.copy()
stopwords.add("info")
stopwords.add("meetbot")
stopwords.add("supybot")
wc = WordCloud(max_words=1000, mask=mask, stopwords=stopwords, margin=10,
random_state=1).generate(text)
_, tmpfilename = tempfile.mkstemp('-wordcloud.png')
wc.to_file(tmpfilename)
return tmpfilename
示例9: main
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def main():
d = os.path.dirname(__file__)
DOC_NAME = "cvpr2015papers.txt"
text = open(os.path.join(d, DOC_NAME)).read()
# adding computer vision specific stopwords
stopwords = STOPWORDS.copy()
stopwords.add("image")
wc = WordCloud(max_words=300, stopwords=stopwords, width=800, height=400)
wc.generate(text)
wc.to_file(os.path.join(d, "cvpr2015wordcloud.png"))
plt.imshow(wc)
plt.axis("off")
plt.show()
示例10: generate_wordcloud
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def generate_wordcloud(text):
def my_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
"""
To change colors change the range for random ints below:
Hue values are between 0 and 360
Follows rainbow:
Red Orange Yellow Green Blue Indigo Violet
0 50 100 150 200 250 300 360
"""
hue_lower = 0
hue_upper = 150
saturation = 500
light_lower = 80
light_upper = 120
return "hsl(%d, %d%%, %d%%)" % (
random.randint(hue_lower, hue_upper),
saturation,
random.randint(light_lower, light_upper),
)
stopwords = STOPWORDS.copy()
stopwords.add("us")
stopwords.add("one")
stopwords.add("will")
stopwords.add("u")
rand_num = random.randint(1, 100)
wc = WordCloud(
max_words=100, stopwords=stopwords, margin=10, random_state=rand_num, width=2000, height=1200
).generate(text)
fig = plt.figure(figsize=(32, 20), dpi=100)
plt.imshow(wc.recolor(color_func=my_color_func, random_state=1))
# Save image
outfilename = "tmp.png"
wc.to_file(outfilename)
plt.axis("off")
plt.show()
示例11: makeWC
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def makeWC(theText, mask_image, mw):
SW = STOPWORDS.copy()
mywords = ['and', 'the', 'to', 'by', 'in', 'of', 'up',
'Facebook', 'Twitter', 'Pinterest', 'Flickr',
'Google', 'Instagram', 'login', 'Login', 'Log',
'website', 'Website', 'Contact', 'contact',
'twitter', 'Branding', 'Tweet', 'pic', 'location',
'Details'
] + list(bad_words())
[SW.add(w) for w in mywords]
wordcloud = WordCloud(
relative_scaling=0,
prefer_horizontal=random.uniform(0.5, 1),
stopwords=SW,
background_color='black',
max_words=mw,
mask = mask_image
).generate(theText)
return wordcloud
示例12: map
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
cursor = links.find({},{"body":1})
test=""
for document in cursor :
test=test+document['body']
with codecs.open("text_mining/my_stopwords.txt","r",encoding="utf-8") as f:
read_data = f.readlines()
stopwords = STOPWORDS.copy()
for data in read_data:
stopwords.add(data)
stopwords = map(lambda s: s.strip(), stopwords)
mask_choko = np.array(Image.open("text_mining/chokomag.png"))
wordcloud = WordCloud( stopwords=stopwords,background_color="black", max_words=10000,mask=mask_choko).generate(test)
示例13: main
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def main():
data = pickle.load( open('alldata.pickle', 'rb') )
data['wordclouds'] = {}
stopwords = STOPWORDS.copy()
sphere_stopwords = { 'common': ['sam', 'mayu', 'mani'],
'1a':['live', 'born', 'year', 'years', 'yrs', 'since', 'bangalore'], \
'1b':['live', 'born', 'year', 'yrs', 'moved', 'since', 'bangalore'], \
'2a':['balance', 'time'], \
'2b':['inspiration'], \
'3':['lifestyle', ], \
'4a':['bangalore', 'advantage', 'advantages'], \
'4b':['bangalore', 'challenge', 'challenges'], \
'4c':['work'], \
'4d':['dependency', 'dependencies', 'external'], \
'4e':['area', 'bangalore'], \
'4f':['measures'], \
'5b':['food', 'air'], \
'6a':['end', 'user', 'enduser'], \
'6b':['month', 'income', 'per', 'household'], \
'6c':['interact', 'interaction','end', 'user', 'enduser'], \
'6d':['design', 'end', 'user', 'enduser'], \
'7a':['quality', 'control', 'challenge'], \
'7b':['end', 'user', 'access', 'challenge'], \
'7c':[], \
'7d':[], \
'8a':['tool', 'tools', 'resource', 'resources'], \
'8b':['average', 'age', 'years', 'team', 'people'], \
'8c':['fund', 'funding', 'funded', 'money'], \
'8d':['tech', 'technology'], \
'8e':['office', 'location', 'work', 'space'], \
'9a':['skill', 'skills'], \
'9b':['training'], \
'10a':['active', 'internal', 'collaboration', 'collaborate'], \
'10b':['active', 'external', 'collaboration', 'collaborate'], \
'10c':['lead', 'leads', 'learning', 'collaboration', 'collaborate'], \
'10d':['part', 'formal', 'collaboration', 'collaborate', 'platform'], \
'10e':['culture', 'open', 'share', 'sharing', 'sector'], \
'10f':['share', 'shares', 'shared'], \
'11a':['partner', 'partners', 'partnership', 'partnerships'], \
'11b':['criteria', 'partner', 'partners', 'partnership', 'partnerships'], \
'11c':['partner', 'partners', 'partnership', 'partnerships', 'sector'], \
'12a':['monitoring', 'evaluation', 'method', 'methods', 'impact'], \
'12b':['goal', 'next', 'year', 'years'], \
'12c':['impact', 'studies', 'data', 'shared'], \
'13a':['entrepreneur'], \
'13b':['start', 'starting', 'startup'], \
'13c':['entrepreneur', 'entrepreneurs', 'interact', 'interaction'], \
'13d':['entrepreneur', 'entrepreneurs', 'role', 'local', 'needs'], \
'13e':['advantage', 'advantages', 'local', 'entrepreneur', 'entrepreneurs'], \
'13f':['barrier', 'barriers', 'entry', 'local', 'entrepreneur', 'entrepreneurs'], \
'13g':['challenge', 'challenges', 'local', 'entrepreneur', 'entrepreneurs'], \
'13h':['entrepreneur', 'entrepreneurs', 'fail'], \
'13i':['resource', 'resources', 'need', 'needed', 'strengthen', 'local', 'entrepreneur', 'entrepreneurs'], \
'14a':['recommend', 'stakeholder', 'stakeholders'], \
'14b':['map', 'visual'], \
}
#Filter out standalone words 2 letters or shorter
shortword = re.compile(r'\W*\b\w{1,2}\b')
questions = []
text = {}
for stakeholder in data:
text[stakeholder] = ''
for question in data[stakeholder]:
if question not in questions:
questions.append(question)
text[question] = ''
response = shortword.sub('', ' '.join(data[stakeholder][question]).lower().translate(None,string.punctuation))
text[stakeholder] += response + ' '
text[question] += response + ' '
#Generate word clouds:
for question in sorted(questions):
if question is not '5a':
try:
s = stopwords.union(set(sphere_stopwords[question]+sphere_stopwords['common']))
wordcloud = WordCloud(stopwords=s, width=1600, height=800, background_color='white').generate(text[question])
wordcloud.to_file('clouds/'+question+'-'+str(len(text[question].split()))+'words.png')
except:
print question
for stakeholder in data:
try:
s = stopwords.union(set(sphere_stopwords['common']))
wordcloud = WordCloud(stopwords=s, width=1600, height=800, background_color='white').generate(text[stakeholder])
wordcloud.to_file('clouds/'+stakeholder+'-'+str(len(text[stakeholder].split()))+'words.png')
except:
print stakeholder
示例14: main
# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def main():
data = {}
data['questions'] = []
data['stakeholders'] = []
data['wordclouds'] = {}
stopwords = STOPWORDS.copy()
sphere_stopwords = {'1a':['live', 'born', 'year', 'yrs', 'since'], \
'1b':['live', 'born', 'year', 'yrs', 'moved', 'since'], \
'2a':['time'], \
'2b':[], \
'3':[], \
'4a':['bangalore', 'advantage', 'advantages'], \
'4b':['bangalore', 'challenge', 'challenges'], \
'4c':['work'], \
'4d':['dependency', 'dependencies', 'external'], \
'4e':['area', 'bangalore'], \
'4f':[], \
'5b':['food', 'air'], \
'6a':['end', 'user', 'enduser'], \
'6b':['month', 'income', 'per', 'household'], \
'6c':['interact', 'interaction','end', 'user', 'enduser'], \
'6d':['design', 'end', 'user', 'enduser'], \
'7a':['quality', 'control', 'challenge'], \
'7b':['end', 'user', 'access', 'challenge'], \
'7c':[], \
'7d':[], \
'8a':['tool', 'resource', 'resource'], \
'8b':['average', 'age', 'team'], \
'8c':[], \
'8d':[], \
'8e':[], \
'9a':[], \
'9b':[], \
'10a':[], \
'10b':[], \
'10c':[], \
'10d':[], \
'10e':[], \
'10f':[], \
'11a':[], \
'11b':[], \
'11c':[], \
'12a':[], \
'12b':[], \
'12c':[], \
'13a':[], \
'13b':[], \
'13c':[], \
'13d':[], \
'13e':[], \
'13f':[], \
'13g':[], \
'13h':[], \
'13i':[], \
'14a':[], \
'14b':[], \
}
#Filter out standalone words 2 letters or shorter
shortword = re.compile(r'\W*\b\w{1,2}\b')
with open('alldata.csv') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
stakeholder = row['Code']
data['stakeholders'].append(stakeholder)
data[stakeholder] = {}
data[stakeholder]['alltext'] = ''
for key in row:
if key != 'Code':
question = key
if question not in data['questions']:
data['questions'].append(question)
data[question] = ''
raw_response = shortword.sub('', row[key].lower().translate(None,string.punctuation))
stemmed_response = ' '.join([stem(word) for word in raw_response.split()])
data[stakeholder][question] = row[key]
data[stakeholder]['alltext'] += stemmed_response
data[stakeholder]['alltext'] += ' '
data[question] += stemmed_response
data[question] += ' '
#Generate word clouds:
for question in sorted(data['questions']):
if question is not '5a':
#Number of words per question
# print question, ':', len(data[question].split())
try:
s = stopwords.union(set(sphere_stopwords[question]))
data['wordclouds'][question] = WordCloud(stopwords=s).generate(data[question])
except:
print question
for stakeholder in data['stakeholders']:
try:
data['wordclouds'][stakeholder] = WordCloud(stopwords=stopwords).generate(data[stakeholder]['alltext'])
except:
#.........这里部分代码省略.........