本文整理匯總了Python中sklearn.feature_extraction.text.ENGLISH_STOP_WORDS屬性的典型用法代碼示例。如果您正苦於以下問題:Python text.ENGLISH_STOP_WORDS屬性的具體用法?Python text.ENGLISH_STOP_WORDS怎麽用?Python text.ENGLISH_STOP_WORDS使用的例子?那麽, 這裏精選的屬性代碼示例或許可以為您提供幫助。您也可以進一步了解該屬性所在類sklearn.feature_extraction.text
的用法示例。
在下文中一共展示了text.ENGLISH_STOP_WORDS屬性的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: test_get_stoplisted_unigram_corpus
# 需要導入模塊: from sklearn.feature_extraction import text [as 別名]
# 或者: from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS [as 別名]
def test_get_stoplisted_unigram_corpus(self):
tdm = make_a_test_term_doc_matrix()
uni_tdm = tdm.get_stoplisted_unigram_corpus()
term_df = tdm.get_term_freq_df()
uni_term_df = uni_tdm.get_term_freq_df()
self.assertEqual(set(term for term in term_df.index
if ' ' not in term
and "'" not in term
and term not in ENGLISH_STOP_WORDS),
set(uni_term_df.index)),
示例2: test_allow_single_quotes_in_unigrams
# 需要導入模塊: from sklearn.feature_extraction import text [as 別名]
# 或者: from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS [as 別名]
def test_allow_single_quotes_in_unigrams(self):
tdm = make_a_test_term_doc_matrix()
self.assertEqual(type(tdm.allow_single_quotes_in_unigrams()), type(tdm))
uni_tdm = tdm.get_stoplisted_unigram_corpus()
term_df = tdm.get_term_freq_df()
uni_term_df = uni_tdm.get_term_freq_df()
self.assertEqual(set(term for term in term_df.index
if ' ' not in term
and term not in ENGLISH_STOP_WORDS),
set(uni_term_df.index)),
示例3: _assert_stoplisted_minus_joe
# 需要導入模塊: from sklearn.feature_extraction import text [as 別名]
# 或者: from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS [as 別名]
def _assert_stoplisted_minus_joe(self, tdm, uni_tdm):
term_df = tdm.get_term_freq_df()
uni_term_df = uni_tdm.get_term_freq_df()
self.assertEqual(set(term for term in term_df.index
if ' ' not in term
and 'joe' != term.lower()
and "'" not in term
and term not in ENGLISH_STOP_WORDS),
set(uni_term_df.index)),
示例4: test_countvectorizer_stop_words
# 需要導入模塊: from sklearn.feature_extraction import text [as 別名]
# 或者: from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS [as 別名]
def test_countvectorizer_stop_words():
cv = CountVectorizer()
cv.set_params(stop_words='english')
assert_equal(cv.get_stop_words(), ENGLISH_STOP_WORDS)
cv.set_params(stop_words='_bad_str_stop_')
assert_raises(ValueError, cv.get_stop_words)
cv.set_params(stop_words='_bad_unicode_stop_')
assert_raises(ValueError, cv.get_stop_words)
stoplist = ['some', 'other', 'words']
cv.set_params(stop_words=stoplist)
assert_equal(cv.get_stop_words(), set(stoplist))
示例5: _build_stop_words
# 需要導入模塊: from sklearn.feature_extraction import text [as 別名]
# 或者: from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS [as 別名]
def _build_stop_words(self) -> Set[str]:
additional_stop_words = self.field.get_vectorizer_stop_words()
if additional_stop_words:
stop_words = set(ENGLISH_STOP_WORDS)
stop_words.update(additional_stop_words)
return stop_words
else:
return ENGLISH_STOP_WORDS