本文整理汇总了Python中newspaper.Article.has_top_image方法的典型用法代码示例。如果您正苦于以下问题:Python Article.has_top_image方法的具体用法?Python Article.has_top_image怎么用?Python Article.has_top_image使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类newspaper.Article
的用法示例。
在下文中一共展示了Article.has_top_image方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: extract_article_from
# 需要导入模块: from newspaper import Article [as 别名]
# 或者: from newspaper.Article import has_top_image [as 别名]
def extract_article_from(self, url):
article = {}
doc = Article(url)
try:
doc.download()
doc.parse()
except ArticleException:
print("Exception getting article from url [{}]".format(url))
return
article["image"] = ""
if doc.has_top_image():
article["image"] = "<img src={}>".format(doc.top_image)
article["title"] = doc.title
article["source_title"] = "notYetSet"
article["summary"] = article["image"] + doc.text[:300] + " ...</br>"
article["href"]=url
return article
示例2: get_document_json
# 需要导入模块: from newspaper import Article [as 别名]
# 或者: from newspaper.Article import has_top_image [as 别名]
def get_document_json(post):
"""
Parameters
-------------
post: dict
post data.
Returns
-------------
dict: document data.
"""
try:
article = Article(post['url'])
article.download()
article.parse()
article.nlp()
if article.publish_date is None or isinstance(article.publish_date, str):
date = None
else:
date = article.publish_date.strftime('%Y-%m-%d')
if article.meta_lang != None and article.meta_lang != '':
stopwords = safe_get_stop_words(article.meta_lang)
keywords = [i for i in article.keywords if i not in stopwords]
else:
keywords = article.keywords
keywords = list(set([slugify(i) for i in keywords]))
json = {
'title': article.title,
'authors': article.authors,
'created_on': date,
'language': article.meta_lang,
'keywords': keywords,
'url': post['url'],
}
if article.has_top_image() and post['image'] == MISSING_IMAGE:
post['image'] = article.top_image
except ArticleException:
json = {
'url': post['url']
}
return json
示例3: article_handler
# 需要导入模块: from newspaper import Article [as 别名]
# 或者: from newspaper.Article import has_top_image [as 别名]
def article_handler(url=None, nlp=False):
response = {
'publish_date': None,
'html': None,
'title': None,
'top_image': None,
'source_url': None,
'images': None,
'authors': None,
'text': None,
'canonical_link': None,
'movies': None,
'keywords': None,
'summary': None
}
if not url:
statsd.increment('url_analysis.empty')
loggly.error("Cannot parse empty URL")
return response
## if
try:
article = Article(url)
if not article.is_downloaded:
statsd.increment('url_analysis.download')
loggly.info("Downloading article")
article.download()
##if
# response['html'] = article.html
if not article.is_parsed:
statsd.increment('url_analysis.parse')
loggly.info("Parsing article")
article.parse()
##if
response['title'] = article.title
if article.has_top_image() is True:
statsd.increment('url_analysis.get_top_image')
loggly.info("Extracting top_image")
response['top_image'] = article.top_image
##if-else
if nlp is True:
statsd.increment('url_analysis.nlp_process')
loggly.info("Doing NLP processing")
article.nlp()
response['summary'] = article.summary
response['keywords'] = article.keywords
##if
response['movies'] = article.movies
response['images'] = article.images
response['authors'] = article.authors
response['text'] = article.text
response['publish_date'] = article.publish_date
response['source_url'] = article.source_url
response['canonical_link'] = article.canonical_link
statsd.increment('url_analysis.ok')
return response
except Exception as e:
statsd.increment('url_analysis.error')
loggly.error(e)
return response