本文整理汇总了Python中twitter.Twitter.load_tweets_and_build_index方法的典型用法代码示例。如果您正苦于以下问题:Python Twitter.load_tweets_and_build_index方法的具体用法?Python Twitter.load_tweets_and_build_index怎么用?Python Twitter.load_tweets_and_build_index使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类twitter.Twitter
的用法示例。
在下文中一共展示了Twitter.load_tweets_and_build_index方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: TwitterTests
# 需要导入模块: from twitter import Twitter [as 别名]
# 或者: from twitter.Twitter import load_tweets_and_build_index [as 别名]
class TwitterTests(unittest.TestCase):
def setUp(self):
self.twitter = Twitter(CUR_DIR + "/test_crossfit.tweets", CUR_DIR + "/test_stop_words.txt")
def test_data_preprocess(self):
processor = TwitterDataPreprocessor()
p_entry1 = processor.preprocess("\"There are no office hours for champions.\"—Paul Dietzel @CrossFitGames")
p_entry2 = processor.preprocess("Saturday 6-21-14 - http://t.co/ZtQWUsfal1 http://t.co/jPICqL3adi @ReebokCrossFit1")
p_entry3 = processor.preprocess("Crossfit Named - Kristan Clever Valley CrossFit : @Cleverhandz")
text1 = p_entry1[0].strip()
screen_name1 = p_entry1[1].strip()
text2 = p_entry2[0].strip()
screen_name2 = p_entry2[1].strip()
text3 = p_entry3[0].strip()
screen_name3 = p_entry3[1].strip()
self.assertEqual(text1, "there are no office hours for champions paul dietzel")
self.assertEqual(screen_name1, "crossfitgames")
self.assertEqual(text2, "saturday 6 21 14 http t co ztqwusfal1 http t co jpicql3adi")
self.assertEqual(screen_name2, "reebokcrossfit1")
self.assertEqual(text3, "crossfit named kristan clever valley crossfit")
self.assertEqual(screen_name3, "cleverhandz")
def test_twitter_data_building(self):
self.twitter.load_tweets_and_build_index()
示例2: SearcherTests
# 需要导入模块: from twitter import Twitter [as 别名]
# 或者: from twitter.Twitter import load_tweets_and_build_index [as 别名]
class SearcherTests(unittest.TestCase):
"""
Test case for SearchEngine class.
"""
def setUp(self):
"""
Setup search engine that will be subjected to the tests.
"""
self.twitter = Twitter(CUR_DIR + "/test_crossfit.tweets", CUR_DIR + "/test_stop_words.txt")
self.twitter.load_tweets_and_build_index()
self.searcher = Searcher(self.twitter.tweets, self.twitter.stop_words)
def test_indexed_doc_count(self):
self.assertEqual(self.searcher.count(), 10)
def test_existent_term_search(self):
"""
Test if search is correctly performed.
"""
results = self.searcher.search("coach")
expected_results = 3
self.assertEqual(results[0].indexable.docid, expected_results)
def test_non_existent_term_search(self):
"""
Test if search is correctly performed.
"""
expected_results = []
results = self.searcher.search("asdasdasdas")
self.assertListEqual(results, expected_results)
def test_search_result_limit(self):
"""
Test if search results can be limited.
"""
results = self.searcher.search("crossfit", 1)
expected_results = 6
self.assertEqual(results[0].indexable.docid, expected_results)
示例3: RankTests
# 需要导入模块: from twitter import Twitter [as 别名]
# 或者: from twitter.Twitter import load_tweets_and_build_index [as 别名]
class RankTests(unittest.TestCase):
"""
Test case for Index class.
"""
def setUp(self):
"""
Setup ranker that will be subjected to the tests.
"""
self.rank = Rank(sample_stop_words())
def test_sample_ranking_with_no_exceptions(self):
"""
Test if ranking is built without any exception.
"""
sample1 = Indexable(1, "this is an indexable metadata")
sample2 = Indexable(2, "this is an indexable super metadata")
sample3 = Indexable(3, "this is another indexable metadata")
self.rank.build_rank([sample1, sample2, sample3])
def test_doc_frequency_matrix_with_sample1(self):
"""
Test if document frequency matrix is correctly built.
"""
sample1 = Indexable(1, "this is an indexable metadata")
sample2 = Indexable(2, "this is an indexable super metadata")
sample3 = Indexable(3, "this is another indexable metadata")
self.rank.build_rank([sample1, sample2, sample3])
expected_vocab_indices = {"an": 2, "super": 3, "indexable": 1, "metadata": 0, "another": 4}
expected_tf = np.array([[1, 1, 1, 0, 0], [1, 1, 1, 1, 0], [1, 1, 0, 0, 1]])
self.assertEqual(self.rank.vocabulary, expected_vocab_indices)
np.testing.assert_array_equal(self.rank.ft_matrix.todense(), expected_tf)
def test_doc_frequency_matrix_with_sample2(self):
"""
Test if document frequency matrix is correctly built.
"""
sample1 = Indexable(1, "the sky is blue")
sample2 = Indexable(2, "the sun is bright")
self.rank.build_rank([sample1, sample2])
expected_vocab_indices = {"blue": 0, "sun": 2, "bright": 3, "sky": 1}
expected_tf = np.array([[1, 1, 0, 0], [0, 0, 1, 1]])
self.assertEqual(self.rank.vocabulary, expected_vocab_indices)
np.testing.assert_array_equal(self.rank.ft_matrix.todense(), expected_tf)
def test_doc_inverse_term_frequency_vector1(self):
"""
Test if document inverse term frequency vector is correctly built.
"""
sample1 = Indexable(1, "this is an indexable metadata")
sample2 = Indexable(2, "this is an indexable super metadata")
sample3 = Indexable(3, "this is another indexable metadata")
self.rank.build_rank([sample1, sample2, sample3])
expected_idf = [1.0, 1.0, 1.28768207, 1.69314718, 1.69314718]
expected_tf_idf = [
[0.52284231, 0.52284231, 0.67325467, 0, 0],
[0.39148397, 0.39148397, 0.50410689, 0.66283998, 0],
[0.45329466, 0.45329466, 0, 0, 0.76749457],
]
np.testing.assert_almost_equal(self.rank.ifd_diag_matrix.diagonal(), expected_idf, 4)
np.testing.assert_almost_equal(self.rank.tf_idf_matrix.todense(), expected_tf_idf, 4)
def test_doc_inverse_term_frequency_vector2(self):
"""
Test if document inverse term frequency vector is correctly built.
"""
sample1 = Indexable(1, "the sky is blue")
sample2 = Indexable(2, "the sun is bright")
self.rank.build_rank([sample1, sample2])
expected_idf = [1.40546511, 1.40546511, 1.40546511, 1.40546511]
expected_tf_idf = [[0.70710678, 0.70710678, 0, 0], [0, 0, 0.70710678, 0.70710678]]
np.testing.assert_almost_equal(self.rank.ifd_diag_matrix.diagonal(), expected_idf, 4)
np.testing.assert_almost_equal(self.rank.tf_idf_matrix.todense(), expected_tf_idf, 4)
def test_score_computation(self):
"""
Test if document score is correctly calculated.
"""
sample1 = Indexable(1, "the sky is blue")
self.rank.build_rank([sample1])
np.testing.assert_almost_equal(self.rank.compute_rank(0, ["blue"]), 0.707106, 5)
np.testing.assert_almost_equal(self.rank.compute_rank(0, ["sky"]), 0.7071067, 5)
np.testing.assert_almost_equal(self.rank.compute_rank(0, ["blue", "sky"]), 1.414213, 5)
def test_debug_ft_matrix(self):
self.twitter = Twitter(CUR_DIR + "/test_crossfit.tweets", CUR_DIR + "/test_stop_words.txt")
self.twitter.load_tweets_and_build_index()