本文整理汇总了Python中classifier.Classifier.set_idf方法的典型用法代码示例。如果您正苦于以下问题:Python Classifier.set_idf方法的具体用法?Python Classifier.set_idf怎么用?Python Classifier.set_idf使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类classifier.Classifier
的用法示例。
在下文中一共展示了Classifier.set_idf方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TestClassifier
# 需要导入模块: from classifier import Classifier [as 别名]
# 或者: from classifier.Classifier import set_idf [as 别名]
class TestClassifier(unittest.TestCase):
""" Tests the Classifier class.
"""
def setUp(self):
self.c = Classifier(CleanTextUtil("french"))
def tearDown(self):
rm_data_dir()
def test_add_text(self):
""" Tests add_text.
Add a text to the classifier:
1- Verify if the number of text equals 1.
2- Verify if the text added is equals to words wanted.
"""
flux1_text = (
u"Comment Google classe les pages Internet "
u"Bientôt une sphère pour remplacer souris et écrans tactiles ? "
u"Le clip kitsch du couple présidentiel chinois"
)
flux1_text_wanted = [
"bient", "chinois", "class", "clip", "comment", "coupl",
"cran", "googl", "internet", "kitsch", "le", "pag", "pr",
"re", "remplac", "sidentiel", "sour", "sph", "tactil"
]
self.c.add_text(flux1_text)
self.assertEquals(int(self.c.classifier_state_db.get("text_nb")), 1) # 1
words = [word for word, _ in kc_util.gen_db(self.c.dictionary_db.cursor())]
self.assertEquals(words, flux1_text_wanted) # 2
def test_set_idf(self):
""" Tests set_idf.
Add two texts:
1- Verify idf equals 0.0
Add idf:
2- Verify idf not equals 0.0
"""
self.c.add_text("foo")
self.c.add_text("bar") # important for idf
_, word_info = kc_util.gen_db(self.c.dictionary_db.cursor()).next()
self.assertEquals(word_info.idf, 0.0) # 1
self.c.set_idf()
_, word_info = kc_util.gen_db(self.c.dictionary_db.cursor()).next()
self.assertNotEquals(word_info.idf, 0.0) # 2
def test_set_idf_tfidf_norm(self):
""" Tests set_idf_tfidf_norm.
Add two texts:
1- Verify idf equals 0.0
2- Verify idf norm equals '0.0'
Update idf:
2- Verify idf not equals 0.0
3- Verify idf norm not equals '0.0'
"""
text, vector_1 = "foo", "foo_1"
self.c.add_text(text)
self.c.add_text("bar") # important for idf
_, word_info = kc_util.gen_db(self.c.dictionary_db.cursor()).next()
self.assertEquals(word_info.idf, 0.0) # 1
self.c.add_vector(vector_1, text)
norm = self.c.vectors_norm_db.get(vector_1)
self.assertEquals(norm, '0.0') # 2
self.c.set_idf()
_, word_info = kc_util.gen_db(self.c.dictionary_db.cursor()).next()
self.assertNotEquals(word_info.idf, 0.0) # 3
self.c.set_tfidf_norm()
norm = self.c.vectors_norm_db.get(vector_1)
self.assertNotEquals(norm, '0.0') # 4
def test_add_vector(self):
""" Tests add_vector.
Add a text.
1- Check if there is not a vector.
Add a vector:
#.........这里部分代码省略.........