本文整理汇总了Python中newspaper.Article.text方法的典型用法代码示例。如果您正苦于以下问题:Python Article.text方法的具体用法?Python Article.text怎么用?Python Article.text使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类newspaper.Article
的用法示例。
在下文中一共展示了Article.text方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: show_article
# 需要导入模块: from newspaper import Article [as 别名]
# 或者: from newspaper.Article import text [as 别名]
def show_article():
url_to_clean = request.args.get('url_to_clean')
text_to_clean = request.args.get('text_to_clean')
# if not url_to_clean:
# return redirect(url_for('index'))
article = Article(url_to_clean)
article.download()
article.parse()
try:
html_string = ElementTree.tostring(article.clean_top_node)
except:
html_string = "Error converting html to string."
try:
# OVERWRITE ARTICLE.TEXT AS TEXT BEFORE NLP
article.text = text_to_clean;
article.nlp()
except:
log.error("Couldn't process with NLP")
a = {
'html': html_string,
'authors': str(', '.join(article.authors)),
'title': article.title,
'text': article.text,
'top_image': article.top_image,
'videos': str(', '.join(article.movies)),
'keywords': str(', '.join(article.keywords)),
'summary': article.summary
}
return render_template('article/index.html', article=a, url=url_to_clean)
示例2: get_article_info
# 需要导入模块: from newspaper import Article [as 别名]
# 或者: from newspaper.Article import text [as 别名]
def get_article_info(memento_url, dt, uri_id, base_dir):
print memento_url
article = Article(memento_url)
html = get_uri_offline_data(dt, uri_id, "html", base_dir)
article.download(html)
article.parse()
text = get_uri_offline_data(dt, uri_id, "txt", base_dir)
if text != None:
article.text = text
article.nlp()
return article
示例3: home
# 需要导入模块: from newspaper import Article [as 别名]
# 或者: from newspaper.Article import text [as 别名]
def home(url):
data = {}
data['url'] = url
# Validate url
if urlparse.urlparse(url).scheme not in ('http', 'https'):
data['error'] = 'Invalid URL'
return json.dumps(data)
a = Article(url)
a.download()
a.parse()
data['title'] = a.title
data['authors'] = a.authors
data['text'] = a.text
try:
a.nlp()
except UnicodeDecodeError:
# Strip non-ascii characters
a.title = to_ascii(a.title)
a.text = to_ascii(a.text)
a.nlp()
# NLP
data['summary'] = a.summary
data['keywords'] = a.keywords
data['tags'] = list(a.tags)
# Media
data['top_image'] = a.top_image
data['images'] = a.images
data['movies'] = a.movies
# Meta
data['source_url'] = a.source_url
data['published_date'] = a.published_date
data['meta_img'] = a.meta_img
data['meta_keywords'] = a.meta_keywords
data['meta_lang'] = a.meta_lang
return json.dumps(data)