本文整理匯總了Python中readability.Document.title方法的典型用法代碼示例。如果您正苦於以下問題:Python Document.title方法的具體用法?Python Document.title怎麽用?Python Document.title使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類readability.Document
的用法示例。
在下文中一共展示了Document.title方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: get
# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
def get(self):
url = self.get_argument("url", None)
# https://www.ifanr.com/1080409
doc = Webcache.find_one({'url': url}, {'_id': 0})
if doc:
self.res = dict(doc)
return self.write_json()
try:
sessions = requests.session()
sessions.headers[
'User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'
response = sessions.get(url)
# response.encoding = 'utf-8' # TODO
response.encoding = get_charset(response)
doc = Document(response.text)
title = doc.title()
summary = doc.summary()
markdown = html2text.html2text(summary)
markdown = markdown.replace('-\n', '-')
markdown = markdown.strip()
res = {}
res['url'] = url
res['title'] = title
res['markdown'] = markdown
if title and markdown:
webcache = Webcache
webcache.new(res)
self.res = res
self.write_json()
except Exception as e:
print(e)
示例2: transform
# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
def transform(self, row, chan):
row['response'] = resolve_future(row['response'])
doc = Document(row['response'].content)
row['title'] = doc.title()
summary = doc.summary()
row['text'] = html2text(summary, bodywidth=160).replace('****', '').strip()
yield row
示例3: extract_article
# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
def extract_article(url, ip):
"""Extracts the article using readability"""
title, summary = None, None
response = get_url(url, ip)
if response.status_code == 200:
doc = Document(response.content)
summary = unicode(doc.summary())
title = unicode(doc.title())
return title, summary
else:
return None
示例4: view_html
# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
def view_html(url):
"""Converts an html document to a markdown'd string
using my own fork of python-readability"""
try:
from readability import Document
except ImportError:
print("Can't convert document: python-readability is not installed")
return
html = urlopen(url).read()
doc=Document(html)
print(wrap(asciify(BOLD+doc.title()+RESET+"\n"+doc.markdown(),strip_newlines=False),80,''))
示例5: extract_content_texts
# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
def extract_content_texts(name):
article_archive = os.path.join(DEFAULT_SAVE_PATH, name, 'raw_articles')
json_archive = os.path.join(DEFAULT_SAVE_PATH, name, 'json_articles')
mkdir_p(json_archive)
for html in glob.glob(article_archive+'/*.html'):
fname = os.path.basename(html)+'.json'
savepath = os.path.join(json_archive, fname)
if os.path.exists(savepath):
logging.info('Skipping existing json data: {0}'.format(savepath))
continue
data = {}
with open(html, 'r') as myfile:
doc = Document(myfile.read())
data['title'] = doc.title()
data['content'] = doc.content()
data['summary'] = doc.summary()
with open(savepath, 'w') as saving:
json.dump(data, saving)
示例6: get
# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
def get(self):
sharetype = self.get_argument("sharetype", "goodlink")
link = self.get_argument("link", '')
user_id = self.current_user["user_id"]
assert link
url = link
doc = Webcache.find_one({'url': url}, {'_id': 0})
if not doc:
sessions = requests.session()
sessions.headers[
'User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'
response = sessions.get(url)
# response.encoding = 'utf-8' # TODO
response.encoding = get_charset(response)
logger.info('response.encoding {}'.format(response.encoding))
doc = Document(response.text)
doc_title = doc.title()
summary = doc.summary()
_markdown = html2text.html2text(summary)
_markdown = _markdown.replace('-\n', '-').strip()
res_webcache = {}
res_webcache['url'] = url
res_webcache['title'] = doc_title
res_webcache['markdown'] = _markdown
if _markdown:
webcache = Webcache
webcache.new(res_webcache)
else:
logger.info('already')
doc_title = doc.title
res = {
'title': doc_title,
'sharetype': sharetype,
'link': link,
}
share = Share
res['user_id'] = user_id
share = share.new(res)
user = User.by_sid(user_id)
user.user_leaf += 10
user.save()
self.redirect("/share/" + str(share.id))