当前位置: 首页>>代码示例>>Python>>正文


Python STOPWORDS.copy方法代码示例

本文整理汇总了Python中wordcloud.STOPWORDS.copy方法的典型用法代码示例。如果您正苦于以下问题:Python STOPWORDS.copy方法的具体用法?Python STOPWORDS.copy怎么用?Python STOPWORDS.copy使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在wordcloud.STOPWORDS的用法示例。


在下文中一共展示了STOPWORDS.copy方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: make_cloud

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def make_cloud(words, image, size=10, filename='figures/cloud.png', max_words=200, horizontal=0.8):

    # Remove URLs, 'RT' text, screen names, etc
    my_stopwords = ['RT', 'amp', 'lt']
    words_no_urls = ' '.join([word for word in words.split()
                              if word not in my_stopwords])

    # Add stopwords, if needed
    stopwords = STOPWORDS.copy()
    stopwords.add("RT")
    stopwords.add('amp')
    stopwords.add('lt')

    # Load up a logo as a mask & color image
    logo = imread(image)

    # Generate colors
    image_colors = ImageColorGenerator(logo)

    # Generate plot
    wc = WordCloud(stopwords=stopwords, mask=logo, color_func=image_colors, scale=0.8,
                   max_words=max_words, background_color='white', random_state=42, prefer_horizontal=horizontal)

    wc.generate(words_no_urls)

    plt.figure(figsize=(size, size))
    plt.imshow(wc)
    plt.axis("off")
    plt.savefig(filename)
开发者ID:dr-rodriguez,项目名称:conventions_2016,代码行数:31,代码来源:wordcloud.py

示例2: main

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def main(save_files = False, db_filename = '../output/database.sqlite'):
    conn = sqlite3.connect(db_filename)
    c = conn.cursor()

    # Retrieve papers
    c.execute('''SELECT *
                 FROM Papers''')

    paper_content = c.fetchall()
    conn.close()

    titles = ''

    for pc in paper_content:
        titles += pc[1]

    # A Marvin Minsky mask
    mask = np.array(Image.open("../files/minsky_mask.png"))

    wc = WordCloud(background_color="white", max_words=2000, mask=mask, stopwords=STOPWORDS.copy())
    # Generate word cloud
    wc.generate(titles)
    
    if (save_files):
        # Store to file
        wc.to_file("../files/title_cloud.png")
    
    # Show word cloud
    plt.imshow(wc)
    plt.axis("off")
    # Show mask
#    plt.figure()
#    plt.imshow(mask, cmap=plt.cm.gray)
#    plt.axis("off")
    plt.show()
开发者ID:edintelligence,项目名称:nips-2015-papers,代码行数:37,代码来源:exploration.py

示例3: word_cloud

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def word_cloud(csv_file, stopwords_path, pic_path):
    pic_name = csv_file+"_词云图.png"
    path = os.path.abspath(os.curdir)
    csv_file = path+ "\\" + csv_file + ".csv"
    csv_file = csv_file.replace('\\', '\\\\')
    d = pd.read_csv(csv_file, engine='python', encoding='utf-8')
    content = []
    for i in d['content']:
        try:
            i = translate(i)
        except AttributeError as e:
            continue
        else:
            content.append(i)
    comment_after_split = jieba.cut(str(content), cut_all=False)
    wl_space_split = " ".join(comment_after_split)
    backgroud_Image = plt.imread(pic_path)
    stopwords = STOPWORDS.copy()
    with open(stopwords_path, 'r', encoding='utf-8') as f:
        for i in f.readlines():
            stopwords.add(i.strip('\n'))
        f.close()

    wc = WordCloud(width=1024, height=768, background_color='white',
                   mask=backgroud_Image, font_path="C:\simhei.ttf",
                   stopwords=stopwords, max_font_size=400,
                   random_state=50)
    wc.generate_from_text(wl_space_split)
    img_colors = ImageColorGenerator(backgroud_Image)
    wc.recolor(color_func=img_colors)
    plt.imshow(wc)
    plt.axis('off')  
    plt.show() 
    wc.to_file(pic_name)
开发者ID:miaomao1989,项目名称:DA_projects,代码行数:36,代码来源:visualization_analysis.py

示例4: makeWordCloud

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def makeWordCloud(text):
	#preprocess
	stopwords = STOPWORDS.copy()
#        text.replace("State","")
#        text.replace("year","")
#        text.replace("Congress","")
#        text.replace("will","")
	wC = WordCloud(max_words=2000, stopwords=stopwords, margin=5, random_state=1, width = 1600, height = 800).generate(text)
	plt.imshow(wC)
	plt.show()
开发者ID:kshabahang,项目名称:fbminer,代码行数:12,代码来源:mine.py

示例5: plotTwiiterWordCloud

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def plotTwiiterWordCloud():
	args = sys.argv
	tracefile = open(args[2], 'r')
	nLines = sum(1 for line in tracefile)
	tracefile.seek(0)

	dictTerms = dict()
	blacklist = STOPWORDS.copy()
	blacklist.add('rt')
	punctuation = set(string.punctuation)
	punctuation.remove('@')
	punctuation.remove('&')
	# punctuation.remove('#')
	for line in tqdm(tracefile, total=nLines):
		try:
			linesplited = line.split(', ')
			tweet = linesplited[6].lower()
			for p in punctuation:
				tweet = tweet.replace(p, '')
			terms = tweet.split(' ')
			for t in terms:
				if (len(t) > 1) and 'http' not in t and (t not in blacklist):
					try:
						dictTerms[t] += 1
					except KeyError:
						dictTerms[t] = 1
		except IndexError:
			print 'IndexError'
	for t in blacklist:
		try:
			del dictTerms[t]
		except KeyError:
			continue
	popularTerms = sorted(dictTerms.keys(), key=lambda w:dictTerms[w], reverse=True)
	popularTerms = [p for p in popularTerms if (dictTerms[p]) > 1]
	print len(popularTerms)
	text = list()
	terms = ''
	for p in popularTerms:
		text.append((p, dictTerms[p]))
		for i in range(dictTerms[p]):
			terms += ' ' + p
	# print terms
	maskfile = 'csgo-icon'
	mask = imread(maskfile + '.jpg') # mask=mask
	wc = WordCloud(mask=mask, background_color='white', width=1280, height=720).generate(terms) # max_words=10000
	default_colors = wc.to_array()
	plt.figure()
	plt.imshow(default_colors)
	plt.axis('off')
	plt.savefig(maskfile + '-wordcloud.png', dpi=500, bbox_inches='tight', pad_inches=0) # bbox_inches='tight'
	plt.show()
开发者ID:leokassio,项目名称:csgo-analysis,代码行数:54,代码来源:csgo-analysis.py

示例6: makeCloud

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def makeCloud(text, imgFile, words):
    """
    Makes a word cloud and stores it in a jpeg file
    """
    excludewords = STOPWORDS.copy()
    
    for word in words:
        excludewords.add(word)
    
    wordcloud = WordCloud(max_words=NUM_OF_WORDS, width=WIDTH, height=HEIGHT, stopwords=excludewords).generate(text)
    image = wordcloud.to_image()
    image.show()
    image.save(imgFile + '.jpeg')      
开发者ID:KaushikR,项目名称:SubredditStats,代码行数:15,代码来源:RandiaAnalysis.py

示例7: writeFreq

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def writeFreq(text, outFile, words):
    """
    Writes frequencies of words into the specified file
    """

    excludewords = STOPWORDS.copy()
    
    for word in words:
        excludewords.add(word)
    
    wordcloud = WordCloud(max_words=NUM_OF_WORDS, stopwords=excludewords)
    freqList  = wordcloud.process_text(text)

    for item in freqList:
        outFile.write(item[0] + ',' + str(item[1]) + '\n')
开发者ID:KaushikR,项目名称:SubredditStats,代码行数:17,代码来源:RandiaAnalysis.py

示例8: generate_word_cloud

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def generate_word_cloud(text, mask_filename):
    d = path.dirname(__file__)  #??
    mask = imread(path.join(d, mask_filename))

    # adding movie script specific stopwords
    stopwords = STOPWORDS.copy()
    stopwords.add("info")
    stopwords.add("meetbot")
    stopwords.add("supybot")

    wc = WordCloud(max_words=1000, mask=mask, stopwords=stopwords, margin=10,
                random_state=1).generate(text)

    _, tmpfilename = tempfile.mkstemp('-wordcloud.png')
    wc.to_file(tmpfilename)
    return tmpfilename
开发者ID:decause,项目名称:word_cloud,代码行数:18,代码来源:mailcloud.py

示例9: main

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def main():
    d = os.path.dirname(__file__)
    DOC_NAME = "cvpr2015papers.txt"
    text = open(os.path.join(d, DOC_NAME)).read()

    # adding computer vision specific stopwords
    stopwords = STOPWORDS.copy()
    stopwords.add("image")

    wc = WordCloud(max_words=300, stopwords=stopwords, width=800, height=400)
    wc.generate(text)
    wc.to_file(os.path.join(d, "cvpr2015wordcloud.png"))

    plt.imshow(wc)
    plt.axis("off")
    plt.show()
开发者ID:satojkovic,项目名称:cvpr2015wordcloud,代码行数:18,代码来源:cvpr2015wordcloud.py

示例10: generate_wordcloud

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def generate_wordcloud(text):
    def my_color_func(word, font_size, position, orientation, random_state=None, **kwargs):
        """
        To change colors change the range for random ints below:
        Hue values are between 0 and 360
        Follows rainbow: 
        Red Orange Yellow Green Blue Indigo Violet
         0   50  100  150  200  250  300   360
        """
        hue_lower = 0
        hue_upper = 150

        saturation = 500

        light_lower = 80
        light_upper = 120

        return "hsl(%d, %d%%, %d%%)" % (
            random.randint(hue_lower, hue_upper),
            saturation,
            random.randint(light_lower, light_upper),
        )

    stopwords = STOPWORDS.copy()
    stopwords.add("us")
    stopwords.add("one")
    stopwords.add("will")
    stopwords.add("u")

    rand_num = random.randint(1, 100)

    wc = WordCloud(
        max_words=100, stopwords=stopwords, margin=10, random_state=rand_num, width=2000, height=1200
    ).generate(text)

    fig = plt.figure(figsize=(32, 20), dpi=100)
    plt.imshow(wc.recolor(color_func=my_color_func, random_state=1))

    # Save image
    outfilename = "tmp.png"
    wc.to_file(outfilename)
    plt.axis("off")

    plt.show()
开发者ID:araval,项目名称:Twitter-Word-Cloud-Generator,代码行数:46,代码来源:makeWordCloud.py

示例11: makeWC

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def makeWC(theText, mask_image, mw):
    SW = STOPWORDS.copy()
    mywords = ['and', 'the', 'to', 'by', 'in', 'of', 'up',
           'Facebook', 'Twitter', 'Pinterest', 'Flickr',
           'Google', 'Instagram', 'login', 'Login', 'Log',
           'website', 'Website', 'Contact', 'contact',
           'twitter', 'Branding', 'Tweet', 'pic', 'location',
           'Details'
           ] + list(bad_words())
    [SW.add(w) for w in mywords]
    wordcloud = WordCloud(
                relative_scaling=0, 
                prefer_horizontal=random.uniform(0.5, 1), 
                stopwords=SW,
                background_color='black',
                max_words=mw, 
                mask = mask_image
                ).generate(theText)
    return wordcloud
开发者ID:alexpkeil1,项目名称:twitterbot,代码行数:21,代码来源:wordle_pic_bot.py

示例12: map

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
cursor = links.find({},{"body":1})


test=""

for document in cursor :
    test=test+document['body']

with codecs.open("text_mining/my_stopwords.txt","r",encoding="utf-8") as f:
     read_data = f.readlines()





stopwords = STOPWORDS.copy()


for data in read_data:

    stopwords.add(data)


stopwords = map(lambda s: s.strip(), stopwords)


mask_choko = np.array(Image.open("text_mining/chokomag.png"))


wordcloud = WordCloud( stopwords=stopwords,background_color="black", max_words=10000,mask=mask_choko).generate(test)
开发者ID:fisheatfish,项目名称:mediascrap,代码行数:32,代码来源:draw_wordclouds.py

示例13: main

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def main():

    data = pickle.load( open('alldata.pickle', 'rb') )
    data['wordclouds'] = {}

    stopwords = STOPWORDS.copy()
    sphere_stopwords = { 'common': ['sam', 'mayu', 'mani'],
    '1a':['live', 'born', 'year', 'years', 'yrs', 'since', 'bangalore'], \
    '1b':['live', 'born', 'year', 'yrs', 'moved', 'since', 'bangalore'], \
    '2a':['balance', 'time'], \
    '2b':['inspiration'], \
    '3':['lifestyle', ], \
    '4a':['bangalore', 'advantage', 'advantages'], \
    '4b':['bangalore', 'challenge', 'challenges'], \
    '4c':['work'], \
    '4d':['dependency', 'dependencies', 'external'], \
    '4e':['area', 'bangalore'], \
    '4f':['measures'], \
    '5b':['food', 'air'], \
    '6a':['end', 'user', 'enduser'], \
    '6b':['month', 'income', 'per', 'household'], \
    '6c':['interact', 'interaction','end', 'user', 'enduser'], \
    '6d':['design', 'end', 'user', 'enduser'], \
    '7a':['quality', 'control', 'challenge'], \
    '7b':['end', 'user', 'access', 'challenge'], \
    '7c':[], \
    '7d':[], \
    '8a':['tool', 'tools', 'resource', 'resources'], \
    '8b':['average', 'age', 'years', 'team', 'people'], \
    '8c':['fund', 'funding', 'funded', 'money'], \
    '8d':['tech', 'technology'], \
    '8e':['office', 'location', 'work', 'space'], \
    '9a':['skill', 'skills'], \
    '9b':['training'], \
    '10a':['active', 'internal', 'collaboration', 'collaborate'], \
    '10b':['active', 'external', 'collaboration', 'collaborate'], \
    '10c':['lead', 'leads', 'learning', 'collaboration', 'collaborate'], \
    '10d':['part', 'formal', 'collaboration', 'collaborate', 'platform'], \
    '10e':['culture', 'open', 'share', 'sharing', 'sector'], \
    '10f':['share', 'shares', 'shared'], \
    '11a':['partner', 'partners', 'partnership', 'partnerships'], \
    '11b':['criteria', 'partner', 'partners', 'partnership', 'partnerships'], \
    '11c':['partner', 'partners', 'partnership', 'partnerships', 'sector'], \
    '12a':['monitoring', 'evaluation', 'method', 'methods', 'impact'], \
    '12b':['goal', 'next', 'year', 'years'], \
    '12c':['impact', 'studies', 'data', 'shared'], \
    '13a':['entrepreneur'], \
    '13b':['start', 'starting', 'startup'], \
    '13c':['entrepreneur', 'entrepreneurs', 'interact', 'interaction'], \
    '13d':['entrepreneur', 'entrepreneurs', 'role', 'local', 'needs'], \
    '13e':['advantage', 'advantages', 'local', 'entrepreneur', 'entrepreneurs'], \
    '13f':['barrier', 'barriers', 'entry', 'local', 'entrepreneur', 'entrepreneurs'], \
    '13g':['challenge', 'challenges', 'local', 'entrepreneur', 'entrepreneurs'], \
    '13h':['entrepreneur', 'entrepreneurs', 'fail'], \
    '13i':['resource', 'resources', 'need', 'needed', 'strengthen', 'local', 'entrepreneur', 'entrepreneurs'], \
    '14a':['recommend', 'stakeholder', 'stakeholders'], \
    '14b':['map', 'visual'], \
    }

    #Filter out standalone words 2 letters or shorter
    shortword = re.compile(r'\W*\b\w{1,2}\b')

    questions = []
    text = {}

    for stakeholder in data:
        text[stakeholder] = ''
        for question in data[stakeholder]:
            if question not in questions:
                questions.append(question)
                text[question] = ''
            response = shortword.sub('', ' '.join(data[stakeholder][question]).lower().translate(None,string.punctuation))
            text[stakeholder] += response + ' '
            text[question] += response + ' '

    #Generate word clouds:
    for question in sorted(questions):
        if question is not '5a':
            try:
                s = stopwords.union(set(sphere_stopwords[question]+sphere_stopwords['common']))
                wordcloud = WordCloud(stopwords=s, width=1600, height=800, background_color='white').generate(text[question])
                wordcloud.to_file('clouds/'+question+'-'+str(len(text[question].split()))+'words.png')
            except:
                print question

    for stakeholder in data:
        try:
            s = stopwords.union(set(sphere_stopwords['common']))
            wordcloud = WordCloud(stopwords=s, width=1600, height=800, background_color='white').generate(text[stakeholder])
            wordcloud.to_file('clouds/'+stakeholder+'-'+str(len(text[stakeholder].split()))+'words.png')
        except:
            print stakeholder
开发者ID:JaagaStartup,项目名称:project_sphere,代码行数:94,代码来源:wordcloudgenv4.py

示例14: main

# 需要导入模块: from wordcloud import STOPWORDS [as 别名]
# 或者: from wordcloud.STOPWORDS import copy [as 别名]
def main():

    data = {}
    data['questions'] = []
    data['stakeholders'] = []
    data['wordclouds'] = {}


    stopwords = STOPWORDS.copy()
    sphere_stopwords = {'1a':['live', 'born', 'year', 'yrs', 'since'], \
    '1b':['live', 'born', 'year', 'yrs', 'moved', 'since'], \
    '2a':['time'], \
    '2b':[], \
    '3':[], \
    '4a':['bangalore', 'advantage', 'advantages'], \
    '4b':['bangalore', 'challenge', 'challenges'], \
    '4c':['work'], \
    '4d':['dependency', 'dependencies', 'external'], \
    '4e':['area', 'bangalore'], \
    '4f':[], \
    '5b':['food', 'air'], \
    '6a':['end', 'user', 'enduser'], \
    '6b':['month', 'income', 'per', 'household'], \
    '6c':['interact', 'interaction','end', 'user', 'enduser'], \
    '6d':['design', 'end', 'user', 'enduser'], \
    '7a':['quality', 'control', 'challenge'], \
    '7b':['end', 'user', 'access', 'challenge'], \
    '7c':[], \
    '7d':[], \
    '8a':['tool', 'resource', 'resource'], \
    '8b':['average', 'age', 'team'], \
    '8c':[], \
    '8d':[], \
    '8e':[], \
    '9a':[], \
    '9b':[], \
    '10a':[], \
    '10b':[], \
    '10c':[], \
    '10d':[], \
    '10e':[], \
    '10f':[], \
    '11a':[], \
    '11b':[], \
    '11c':[], \
    '12a':[], \
    '12b':[], \
    '12c':[], \
    '13a':[], \
    '13b':[], \
    '13c':[], \
    '13d':[], \
    '13e':[], \
    '13f':[], \
    '13g':[], \
    '13h':[], \
    '13i':[], \
    '14a':[], \
    '14b':[], \
    }

    #Filter out standalone words 2 letters or shorter
    shortword = re.compile(r'\W*\b\w{1,2}\b')

    with open('alldata.csv') as csvfile:
        reader = csv.DictReader(csvfile)
        for row in reader:
            stakeholder = row['Code']
            data['stakeholders'].append(stakeholder)
            data[stakeholder] = {}
            data[stakeholder]['alltext'] = ''
            for key in row:
                if key != 'Code':
                    question = key
                    if question not in data['questions']:
                        data['questions'].append(question)
                        data[question] = ''
                    raw_response = shortword.sub('', row[key].lower().translate(None,string.punctuation))
                    stemmed_response = ' '.join([stem(word) for word in raw_response.split()])
                    data[stakeholder][question] = row[key]
                    data[stakeholder]['alltext'] += stemmed_response
                    data[stakeholder]['alltext'] += ' '
                    data[question] += stemmed_response
                    data[question] += ' '

    #Generate word clouds:
    for question in sorted(data['questions']):
        if question is not '5a':
        #Number of words per question
        # print question, ':', len(data[question].split())
            try:
                s = stopwords.union(set(sphere_stopwords[question]))
                data['wordclouds'][question] = WordCloud(stopwords=s).generate(data[question])
            except:
                print question

    for stakeholder in data['stakeholders']:
        try:
            data['wordclouds'][stakeholder] = WordCloud(stopwords=stopwords).generate(data[stakeholder]['alltext'])
        except:
#.........这里部分代码省略.........
开发者ID:JaagaStartup,项目名称:project_sphere,代码行数:103,代码来源:wordcloudgenv2.py


注:本文中的wordcloud.STOPWORDS.copy方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。