Python Twitter.load_tweets_and_build_index方法代码示例

本文整理汇总了Python中twitter.Twitter.load_tweets_and_build_index方法的典型用法代码示例。如果您正苦于以下问题：Python Twitter.load_tweets_and_build_index方法的具体用法？Python Twitter.load_tweets_and_build_index怎么用？Python Twitter.load_tweets_and_build_index使用的例子？那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类twitter.Twitter的用法示例。

在下文中一共展示了Twitter.load_tweets_and_build_index方法的3个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: TwitterTests

# 需要导入模块: from twitter import Twitter [as 别名]
# 或者: from twitter.Twitter import load_tweets_and_build_index [as 别名]
class TwitterTests(unittest.TestCase):

    def setUp(self):
        self.twitter = Twitter(CUR_DIR + "/test_crossfit.tweets", CUR_DIR + "/test_stop_words.txt")

    def test_data_preprocess(self):
        processor = TwitterDataPreprocessor()

        p_entry1 = processor.preprocess("\"There are no office hours for champions.\"—Paul Dietzel	@CrossFitGames")
        p_entry2 = processor.preprocess("Saturday 6-21-14 - http://t.co/ZtQWUsfal1 http://t.co/jPICqL3adi	@ReebokCrossFit1")
        p_entry3 = processor.preprocess("Crossfit Named - Kristan Clever  Valley CrossFit :	@Cleverhandz")

        text1 = p_entry1[0].strip()
        screen_name1 = p_entry1[1].strip()
        text2 = p_entry2[0].strip()
        screen_name2 = p_entry2[1].strip()
        text3 = p_entry3[0].strip()
        screen_name3 = p_entry3[1].strip()

        self.assertEqual(text1, "there are no office hours for champions paul dietzel")
        self.assertEqual(screen_name1, "crossfitgames")
        self.assertEqual(text2, "saturday 6 21 14 http t co ztqwusfal1 http t co jpicql3adi")
        self.assertEqual(screen_name2, "reebokcrossfit1")
        self.assertEqual(text3, "crossfit named kristan clever valley crossfit")
        self.assertEqual(screen_name3, "cleverhandz")

    def test_twitter_data_building(self):
        self.twitter.load_tweets_and_build_index()

开发者ID:UIKit0，项目名称:simple-search-engine，代码行数:30，代码来源:test_twitter.py

示例2: SearcherTests

# 需要导入模块: from twitter import Twitter [as 别名]
# 或者: from twitter.Twitter import load_tweets_and_build_index [as 别名]
class SearcherTests(unittest.TestCase):
    """
    Test case for SearchEngine class.
    """

    def setUp(self):
        """
        Setup search engine that will be subjected to the tests.
        """
        self.twitter = Twitter(CUR_DIR + "/test_crossfit.tweets", CUR_DIR + "/test_stop_words.txt")
        self.twitter.load_tweets_and_build_index()

        self.searcher = Searcher(self.twitter.tweets, self.twitter.stop_words)

    def test_indexed_doc_count(self):

        self.assertEqual(self.searcher.count(), 10)

    def test_existent_term_search(self):
        """
        Test if search is correctly performed.
        """
        results = self.searcher.search("coach")
        expected_results = 3

        self.assertEqual(results[0].indexable.docid, expected_results)

    def test_non_existent_term_search(self):
        """
        Test if search is correctly performed.
        """

        expected_results = []
        results = self.searcher.search("asdasdasdas")

        self.assertListEqual(results, expected_results)

    def test_search_result_limit(self):
        """
        Test if search results can be limited.
        """
        results = self.searcher.search("crossfit", 1)
        expected_results = 6

        self.assertEqual(results[0].indexable.docid, expected_results)

开发者ID:UIKit0，项目名称:simple-search-engine，代码行数:47，代码来源:test_search.py

示例3: RankTests

# 需要导入模块: from twitter import Twitter [as 别名]
# 或者: from twitter.Twitter import load_tweets_and_build_index [as 别名]
class RankTests(unittest.TestCase):
    """
    Test case for Index class.
    """

    def setUp(self):
        """
        Setup ranker that will be subjected to the tests.
        """
        self.rank = Rank(sample_stop_words())

    def test_sample_ranking_with_no_exceptions(self):
        """
        Test if ranking is built without any exception.
        """
        sample1 = Indexable(1, "this is an indexable metadata")
        sample2 = Indexable(2, "this is an indexable super metadata")
        sample3 = Indexable(3, "this is another indexable metadata")
        self.rank.build_rank([sample1, sample2, sample3])

    def test_doc_frequency_matrix_with_sample1(self):
        """
        Test if document frequency matrix is correctly built.
        """
        sample1 = Indexable(1, "this is an indexable metadata")
        sample2 = Indexable(2, "this is an indexable super metadata")
        sample3 = Indexable(3, "this is another indexable metadata")
        self.rank.build_rank([sample1, sample2, sample3])

        expected_vocab_indices = {"an": 2, "super": 3, "indexable": 1, "metadata": 0, "another": 4}

        expected_tf = np.array([[1, 1, 1, 0, 0], [1, 1, 1, 1, 0], [1, 1, 0, 0, 1]])

        self.assertEqual(self.rank.vocabulary, expected_vocab_indices)
        np.testing.assert_array_equal(self.rank.ft_matrix.todense(), expected_tf)

    def test_doc_frequency_matrix_with_sample2(self):
        """
        Test if document frequency matrix is correctly built.
        """
        sample1 = Indexable(1, "the sky is blue")
        sample2 = Indexable(2, "the sun is bright")
        self.rank.build_rank([sample1, sample2])

        expected_vocab_indices = {"blue": 0, "sun": 2, "bright": 3, "sky": 1}

        expected_tf = np.array([[1, 1, 0, 0], [0, 0, 1, 1]])

        self.assertEqual(self.rank.vocabulary, expected_vocab_indices)
        np.testing.assert_array_equal(self.rank.ft_matrix.todense(), expected_tf)

    def test_doc_inverse_term_frequency_vector1(self):
        """
        Test if document inverse term frequency vector is correctly built.
        """
        sample1 = Indexable(1, "this is an indexable metadata")
        sample2 = Indexable(2, "this is an indexable super metadata")
        sample3 = Indexable(3, "this is another indexable metadata")
        self.rank.build_rank([sample1, sample2, sample3])

        expected_idf = [1.0, 1.0, 1.28768207, 1.69314718, 1.69314718]
        expected_tf_idf = [
            [0.52284231, 0.52284231, 0.67325467, 0, 0],
            [0.39148397, 0.39148397, 0.50410689, 0.66283998, 0],
            [0.45329466, 0.45329466, 0, 0, 0.76749457],
        ]

        np.testing.assert_almost_equal(self.rank.ifd_diag_matrix.diagonal(), expected_idf, 4)

        np.testing.assert_almost_equal(self.rank.tf_idf_matrix.todense(), expected_tf_idf, 4)

    def test_doc_inverse_term_frequency_vector2(self):
        """
        Test if document inverse term frequency vector is correctly built.
        """
        sample1 = Indexable(1, "the sky is blue")
        sample2 = Indexable(2, "the sun is bright")
        self.rank.build_rank([sample1, sample2])

        expected_idf = [1.40546511, 1.40546511, 1.40546511, 1.40546511]
        expected_tf_idf = [[0.70710678, 0.70710678, 0, 0], [0, 0, 0.70710678, 0.70710678]]

        np.testing.assert_almost_equal(self.rank.ifd_diag_matrix.diagonal(), expected_idf, 4)

        np.testing.assert_almost_equal(self.rank.tf_idf_matrix.todense(), expected_tf_idf, 4)

    def test_score_computation(self):
        """
        Test if document score is correctly calculated.
        """
        sample1 = Indexable(1, "the sky is blue")
        self.rank.build_rank([sample1])

        np.testing.assert_almost_equal(self.rank.compute_rank(0, ["blue"]), 0.707106, 5)
        np.testing.assert_almost_equal(self.rank.compute_rank(0, ["sky"]), 0.7071067, 5)
        np.testing.assert_almost_equal(self.rank.compute_rank(0, ["blue", "sky"]), 1.414213, 5)

    def test_debug_ft_matrix(self):
        self.twitter = Twitter(CUR_DIR + "/test_crossfit.tweets", CUR_DIR + "/test_stop_words.txt")
        self.twitter.load_tweets_and_build_index()

开发者ID:yesnyes，项目名称:simple-search-engine，代码行数:102，代码来源:test_rank.py

注：本文中的twitter.Twitter.load_tweets_and_build_index方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。