当前位置: 首页>>代码示例>>Python>>正文


Python stop_words.get_stop_words方法代码示例

本文整理汇总了Python中stop_words.get_stop_words方法的典型用法代码示例。如果您正苦于以下问题:Python stop_words.get_stop_words方法的具体用法?Python stop_words.get_stop_words怎么用?Python stop_words.get_stop_words使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在stop_words的用法示例。


在下文中一共展示了stop_words.get_stop_words方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: LDA_train

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def LDA_train(doc):
    red = []
    en_stop = get_stop_words('en')
    for d in doc:
        try:
            raw = d.lower()
            tokens = tokenizer.tokenize(raw)
            stopped_tokens = [i for i in tokens if not i in en_stop]
            red.append(stopped_tokens)
        except:
            continue
    print("Forming Dictionary.....")
    dictionary = corpora.Dictionary(red)
    print("Forming Corpus.....")
    corpus = [dictionary.doc2bow(text) for text in red]
    print("Training Model.....")
    lda = models.ldamodel.LdaModel(corpus, num_topics=10, id2word = dictionary, passes=1)
    return lda

#Returns Average Of Probablity Of Word Present In LDA Model For Input Document(Returns Float): 
开发者ID:GauravBh1010tt,项目名称:DeepLearn,代码行数:22,代码来源:lex_sem_ft.py

示例2: clean_up_words

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def clean_up_words(words):
    new_words = [] # empty list
    pkg_stop_words = get_stop_words('en')
    my_stop_words = [
                'the', 
                'is', 
                'and', 
                'thisfacebooktwitteremailredditprint', 
                '',
                'reply',
                'likelike',
                'likeliked',
                'comments',
                'commenting',
                '/',
                '='
                ]
    for word in words:
        word = word.lower()
        cleaned_word = clean_word(word)
        if cleaned_word in my_stop_words or cleaned_word in pkg_stop_words:
            pass
        else:
            new_words.append(cleaned_word)
    return new_words 
开发者ID:codingforentrepreneurs,项目名称:Web-Scraping,代码行数:27,代码来源:scrape2.py

示例3: __init__

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def __init__(self):
        self.stop_words = get_stop_words("en") 
开发者ID:abhinavkashyap,项目名称:sciwing,代码行数:4,代码来源:instance_preprocessing.py

示例4: __remove_stop_words

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def __remove_stop_words(self, docs):
		output = []
		for doc in docs:
			en_stop = get_stop_words('en')
			stopped_tokens = [i for i in doc if not i in en_stop]
			output.append(stopped_tokens)
		return output 
开发者ID:skashyap7,项目名称:TBBTCorpus,代码行数:9,代码来源:topic_extractor.py

示例5: __init__

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def __init__(self):
        self.episodeInfo = {}
        self.Info = []
        self.allTranscripts = {}
        self.vocabulary = collections.defaultdict(int)
        self.Stopwords = get_stop_words('en')
        self.impactActors = ["Leonard","Sheldon","Penny", "Howard","Raj","Amy","Bernadette"] 
开发者ID:skashyap7,项目名称:TBBTCorpus,代码行数:9,代码来源:preprocessing.py

示例6: clean_up_words

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def clean_up_words(words):
    new_words = [] # empty list
    pkg_stop_words = get_stop_words('en')
    my_stop_words = ['the', 'is', 'and', 'thisfacebooktwitteremailredditprint']
    for word in words:
        word = word.lower()
        cleaned_word = clean_word(word)
        if cleaned_word in my_stop_words or cleaned_word in pkg_stop_words:
            pass
        else:
            new_words.append(cleaned_word)
    return new_words 
开发者ID:codingforentrepreneurs,项目名称:Web-Scraping,代码行数:14,代码来源:scrape1.py

示例7: __init__

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def __init__(self, language):
        self._stop_words = set(stop_words.get_stop_words(language)) 
开发者ID:AmadeusITGroup,项目名称:GraphDash,代码行数:4,代码来源:nlp.py

示例8: load_stopwords

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def load_stopwords(language):
    return [t for w in get_stop_words(language) for t in slugify(w).split("-")] 
开发者ID:acl-org,项目名称:acl-anthology,代码行数:4,代码来源:index.py

示例9: test_get_stop_words

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def test_get_stop_words(self):
        sw = get_stop_words('english')
        self.assertEqual(len(sw), self.number_of_english_stop_words) 
开发者ID:Alir3z4,项目名称:python-stop-words,代码行数:5,代码来源:tests.py

示例10: test_get_stop_words_language_mapping

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def test_get_stop_words_language_mapping(self):
        sw = get_stop_words('en')
        self.assertEqual(len(sw), self.number_of_english_stop_words)
        self.assertEqual(sw, get_stop_words('english')) 
开发者ID:Alir3z4,项目名称:python-stop-words,代码行数:6,代码来源:tests.py

示例11: test_get_stop_words_cache

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def test_get_stop_words_cache(self):
        self.assertFalse('french' in stop_words.STOP_WORDS_CACHE)
        sw = get_stop_words('fr')
        self.assertTrue('french' in stop_words.STOP_WORDS_CACHE)
        original_stop_words_dir = stop_words.STOP_WORDS_DIR
        stop_words.STOP_WORDS_DIR = 'not-existing-directory'
        self.assertEqual(sw, get_stop_words('french'))
        stop_words.STOP_WORDS_DIR = original_stop_words_dir
        try:
            get_stop_words('klingon')
        except:
            pass
        self.assertFalse('klingon' in stop_words.STOP_WORDS_CACHE) 
开发者ID:Alir3z4,项目名称:python-stop-words,代码行数:15,代码来源:tests.py

示例12: test_get_stop_words_unavailable_language

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def test_get_stop_words_unavailable_language(self):
        self.assertRaises(StopWordError, get_stop_words, 'sindarin') 
开发者ID:Alir3z4,项目名称:python-stop-words,代码行数:4,代码来源:tests.py

示例13: test_get_stop_words_install_issue

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def test_get_stop_words_install_issue(self):
        original_stop_words_dir = stop_words.STOP_WORDS_DIR
        stop_words.STOP_WORDS_DIR = 'not-existing-directory'
        self.assertRaises(StopWordError, get_stop_words, 'german')
        stop_words.STOP_WORDS_DIR = original_stop_words_dir 
开发者ID:Alir3z4,项目名称:python-stop-words,代码行数:7,代码来源:tests.py

示例14: test_filters

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def test_filters(self):
            language = 'en'
            before = get_stop_words(language, False)
            letter = random.choice(random.choice(before))

            def remove_letter(stopwords, language):
                return [word for word in stopwords if letter not in word]
            stop_words.add_filter(remove_letter)
            after = get_stop_words(language, False)
            for stopword in after:
                self.assertFalse(letter in stopword)
            self.assertTrue(stop_words.remove_filter(remove_letter)) 
开发者ID:Alir3z4,项目名称:python-stop-words,代码行数:14,代码来源:tests.py

示例15: remove_stopwords

# 需要导入模块: import stop_words [as 别名]
# 或者: from stop_words import get_stop_words [as 别名]
def remove_stopwords(tokenized_data):

        en_stop = get_stop_words('en')

        stopped_tokens = [token for token in tokenized_data if token not in en_stop]
        return stopped_tokens 
开发者ID:practical-recommender-systems,项目名称:moviegeek,代码行数:8,代码来源:lda_model_calculator.py


注:本文中的stop_words.get_stop_words方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。