當前位置: 首頁>>代碼示例>>Python>>正文


Python Source.build方法代碼示例

本文整理匯總了Python中newspaper.Source.build方法的典型用法代碼示例。如果您正苦於以下問題:Python Source.build方法的具體用法?Python Source.build怎麽用?Python Source.build使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在newspaper.Source的用法示例。


在下文中一共展示了Source.build方法的5個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_source_build

# 需要導入模塊: from newspaper import Source [as 別名]
# 或者: from newspaper.Source import build [as 別名]
    def test_source_build(self):
        """
        builds a source object, validates it has no errors, prints out
        all valid categories and feed urls
        """
        DESC = """CNN.com delivers the latest breaking news and information on the latest top stories, weather, business, entertainment, politics, and more. For in-depth coverage, CNN.com provides special reports, video, audio, photo galleries, and interactive guides."""
        BRAND = "cnn"

        config = Configuration()
        config.verbose = False
        s = Source("http://cnn.com", config=config)
        s.clean_memo_cache()
        s.build()

        assert s.brand == BRAND
        assert s.description == DESC

        # For this test case and a few more, I don't believe you can actually
        # assert two values to equal eachother because some values are ever changing.

        # Insead, i'm just going to print some stuff out so it is just as easy to take
        # a glance and see if it looks OK.

        print "\t\tWe have %d articles currently!" % s.size()
        print
        print "\t\t%s categories are: %s" % (s.url, str(s.category_urls()))
開發者ID:WheresWardy,項目名稱:newspaper,代碼行數:28,代碼來源:unit_tests.py

示例2: main

# 需要導入模塊: from newspaper import Source [as 別名]
# 或者: from newspaper.Source import build [as 別名]
def main():
    source="The Guardian"
    #config = Config()
    #config.memoize_articles = False
    guardian = Source("http://www.theguardian.com/world", memoize_articles=False)
    guardian.build()
    #guardian = newspaper.build('http://theguardian.com/world', memoize_articles=False)
    #news_pool.set([guardian], threads_per_source=2)
    #news_pool.join()

    #print(guardian.size())

    for article in [x for x in guardian.articles if re.match(".*/world/.*", x.url) is not  None]:
        url = article.url
        a = Article(url, language='en')
        a.download()
        for i in range(10):
            if a.is_downloaded:
                break
            else:
                a.download()
        try:
            a.parse()
            a.nlp()
        except:
            print("Error: Not parsed/downloaded correctly.")
            continue

        html = a.html
        summary = a.summary
        keywords = a.keywords
        title = a.title
        text = a.text
        date = str(a.publish_date).split()[0].split("-")
        date[0], date[1], date[2] = date[1], date[2], date[0]
        date = "/".join(date)
        delta = re.search(r'<span class="content__dateline-time">(.*)</span>' , html).group(1).replace(".",":").split()[0]
	time = datetime.now() + timedelta(hours=delta )
        date_time = date + " " + time
        #print(title)
        #print(date_time)
        date_obj = datetime.datetime.strptime(date_time,'%m/%d/%Y %H:%M')
        #print(date_obj.strftime('%Y/%m/%d %I:%M %p'))
        #TODO: Add stuff to the DB

        try:
            article = {
                'headline': title,
                'url': url,
                'text': text,
                'date': date_obj
            }
            newspaper_article('The Guardian', article, keywords=keywords)
        except Exception as ex:
            print 'Article could not be created due to following error'
            print ex
開發者ID:JessicaFu,項目名稱:CS1951aFinalProj,代碼行數:58,代碼來源:fetch_guardian.py

示例3: test_source_build

# 需要導入模塊: from newspaper import Source [as 別名]
# 或者: from newspaper.Source import build [as 別名]
    def test_source_build(self):
        """builds a source object, validates it has no errors, prints out
        all valid categories and feed urls"""

        DESC = """CNN.com delivers the latest breaking news and information on the latest top stories, weather, business, entertainment, politics, and more. For in-depth coverage, CNN.com provides special reports, video, audio, photo galleries, and interactive guides."""
        BRAND = 'cnn'

        configs = Configuration()
        configs.verbose = False
        s = Source('http://cnn.com', configs=configs)
        s.clean_memo_cache()
        s.build()

        assert s.brand == BRAND
        assert s.description == DESC

        print '\t\tWe have %d articles currently!' % s.size()
開發者ID:michaelhood,項目名稱:newspaper,代碼行數:19,代碼來源:unit_tests.py

示例4: test_source_build

# 需要導入模塊: from newspaper import Source [as 別名]
# 或者: from newspaper.Source import build [as 別名]
    def test_source_build(self):
        """
        builds a source object, validates it has no errors, prints out
        all valid categories and feed urls
        """
        DESC = ('CNN.com International delivers breaking news from across '
                'the globe and information on the latest top stories, '
                'business, sports and entertainment headlines. Follow the '
                'news as it happens through: special reports, videos, '
                'audio, photo galleries plus interactive maps and timelines.')
        CATEGORY_URLS = [
            u'http://cnn.com/ASIA', u'http://connecttheworld.blogs.cnn.com',
            u'http://cnn.com/HLN', u'http://cnn.com/MIDDLEEAST',
            u'http://cnn.com', u'http://ireport.cnn.com',
            u'http://cnn.com/video', u'http://transcripts.cnn.com',
            u'http://cnn.com/espanol',
            u'http://partners.cnn.com', u'http://www.cnn.com',
            u'http://cnn.com/US', u'http://cnn.com/EUROPE',
            u'http://cnn.com/TRAVEL', u'http://cnn.com/cnni',
            u'http://cnn.com/SPORT', u'http://cnn.com/mostpopular',
            u'http://arabic.cnn.com', u'http://cnn.com/WORLD',
            u'http://cnn.com/LATINAMERICA', u'http://us.cnn.com',
            u'http://travel.cnn.com', u'http://mexico.cnn.com',
            u'http://cnn.com/SHOWBIZ', u'http://edition.cnn.com',
            u'http://amanpour.blogs.cnn.com', u'http://money.cnn.com',
            u'http://cnn.com/tools/index.html', u'http://cnnespanol.cnn.com',
            u'http://cnn.com/CNNI', u'http://business.blogs.cnn.com',
            u'http://cnn.com/AFRICA', u'http://cnn.com/TECH',
            u'http://cnn.com/BUSINESS']
        FEEDS = [u'http://rss.cnn.com/rss/edition.rss']
        BRAND = 'cnn'

        s = Source('http://cnn.com', verbose=False, memoize_articles=False)
        url_re = re.compile(".*cnn\.com")
        mock_response_with(url_re, 'cnn_main_site')
        s.clean_memo_cache()
        s.build()

        assert s.brand == BRAND
        assert s.description == DESC
        assert s.size() == 241
        assert s.category_urls() == CATEGORY_URLS
        # TODO: A lot of the feed extraction is NOT being tested because feeds
        # are primarly extracted from the HTML of category URLs. We lose this
        # effect by just mocking CNN's main page HTML. Warning: tedious fix.
        assert s.feed_urls() == FEEDS
開發者ID:Geekking,項目名稱:newspaper,代碼行數:48,代碼來源:unit_tests.py

示例5: test_source_build

# 需要導入模塊: from newspaper import Source [as 別名]
# 或者: from newspaper.Source import build [as 別名]
    def test_source_build(self):
        """
        builds a source object, validates it has no errors, prints out
        all valid categories and feed urls
        """
        DESC = ('CNN.com International delivers breaking news from across '
                'the globe and information on the latest top stories, '
                'business, sports and entertainment headlines. Follow the '
                'news as it happens through: special reports, videos, '
                'audio, photo galleries plus interactive maps and timelines.')
        CATEGORY_URLS = [
            'http://cnn.com/ASIA', 'http://connecttheworld.blogs.cnn.com',
            'http://cnn.com/HLN', 'http://cnn.com/MIDDLEEAST',
            'http://cnn.com', 'http://ireport.cnn.com',
            'http://cnn.com/video', 'http://transcripts.cnn.com',
            'http://cnn.com/espanol',
            'http://partners.cnn.com', 'http://www.cnn.com',
            'http://cnn.com/US', 'http://cnn.com/EUROPE',
            'http://cnn.com/TRAVEL', 'http://cnn.com/cnni',
            'http://cnn.com/SPORT', 'http://cnn.com/mostpopular',
            'http://arabic.cnn.com', 'http://cnn.com/WORLD',
            'http://cnn.com/LATINAMERICA', 'http://us.cnn.com',
            'http://travel.cnn.com', 'http://mexico.cnn.com',
            'http://cnn.com/SHOWBIZ', 'http://edition.cnn.com',
            'http://amanpour.blogs.cnn.com', 'http://money.cnn.com',
            'http://cnn.com/tools/index.html', 'http://cnnespanol.cnn.com',
            'http://cnn.com/CNNI', 'http://business.blogs.cnn.com',
            'http://cnn.com/AFRICA', 'http://cnn.com/TECH',
            'http://cnn.com/BUSINESS']
        FEEDS = ['http://rss.cnn.com/rss/edition.rss']
        BRAND = 'cnn'

        s = Source('http://cnn.com', verbose=False, memoize_articles=False)
        # resp = mock_response_with('http://cnn.com', 'cnn_main_site')
        s.clean_memo_cache()
        s.build()
開發者ID:erezbil,項目名稱:newspaper,代碼行數:38,代碼來源:unit_tests.py


注:本文中的newspaper.Source.build方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。