當前位置: 首頁>>代碼示例>>Python>>正文


Python Document.title方法代碼示例

本文整理匯總了Python中readability.Document.title方法的典型用法代碼示例。如果您正苦於以下問題:Python Document.title方法的具體用法?Python Document.title怎麽用?Python Document.title使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在readability.Document的用法示例。


在下文中一共展示了Document.title方法的6個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: get

# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
 def get(self):
     url = self.get_argument("url", None)
     # https://www.ifanr.com/1080409
     doc = Webcache.find_one({'url': url}, {'_id': 0})
     if doc:
         self.res = dict(doc)
         return self.write_json()
     try:
         sessions = requests.session()
         sessions.headers[
             'User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'
         response = sessions.get(url)
         # response.encoding = 'utf-8'  # TODO
         response.encoding = get_charset(response)
         doc = Document(response.text)
         title = doc.title()
         summary = doc.summary()
         markdown = html2text.html2text(summary)
         markdown = markdown.replace('-\n', '-')
         markdown = markdown.strip()
         res = {}
         res['url'] = url
         res['title'] = title
         res['markdown'] = markdown
         if title and markdown:
             webcache = Webcache
             webcache.new(res)
             self.res = res
         self.write_json()
     except Exception as e:
         print(e)
開發者ID:anwen,項目名稱:anwen,代碼行數:33,代碼來源:api_share.py

示例2: transform

# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
    def transform(self, row, chan):
        row['response'] = resolve_future(row['response'])

        doc = Document(row['response'].content)

        row['title'] = doc.title()
        summary = doc.summary()
        row['text'] = html2text(summary, bodywidth=160).replace('****', '').strip()

        yield row
開發者ID:hartym,項目名稱:readtheweb,代碼行數:12,代碼來源:transformers.py

示例3: extract_article

# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
def extract_article(url, ip):
    """Extracts the article using readability"""
    title, summary = None, None
    response = get_url(url, ip)
    if response.status_code == 200:
        doc = Document(response.content)
        summary = unicode(doc.summary())
        title = unicode(doc.title())
        return title, summary
    else:
        return None
開發者ID:apg,項目名稱:text-please,代碼行數:13,代碼來源:textplease.py

示例4: view_html

# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
def view_html(url):
    """Converts an html document to a markdown'd string
    using my own fork of python-readability"""
    try:
        from readability import Document
    except ImportError:
        print("Can't convert document: python-readability is not installed")
        return
    
    html = urlopen(url).read()
    doc=Document(html)
    print(wrap(asciify(BOLD+doc.title()+RESET+"\n"+doc.markdown(),strip_newlines=False),80,''))
開發者ID:edd07,項目名稱:resh,代碼行數:14,代碼來源:view.py

示例5: extract_content_texts

# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
def extract_content_texts(name):
    article_archive = os.path.join(DEFAULT_SAVE_PATH, name, 'raw_articles')
    json_archive = os.path.join(DEFAULT_SAVE_PATH, name, 'json_articles')
    mkdir_p(json_archive)
    for html in glob.glob(article_archive+'/*.html'):
        fname = os.path.basename(html)+'.json'
        savepath = os.path.join(json_archive, fname)
        if os.path.exists(savepath):
            logging.info('Skipping existing json data: {0}'.format(savepath))
            continue
        data = {}
        with open(html, 'r') as myfile:
            doc = Document(myfile.read())
            data['title'] = doc.title()
            data['content'] = doc.content()
            data['summary'] = doc.summary()
            with open(savepath, 'w') as saving:
                json.dump(data, saving)
開發者ID:gregjan,項目名稱:bullshit-detector,代碼行數:20,代碼來源:wbm_api.py

示例6: get

# 需要導入模塊: from readability import Document [as 別名]
# 或者: from readability.Document import title [as 別名]
 def get(self):
     sharetype = self.get_argument("sharetype", "goodlink")
     link = self.get_argument("link", '')
     user_id = self.current_user["user_id"]
     assert link
     url = link
     doc = Webcache.find_one({'url': url}, {'_id': 0})
     if not doc:
         sessions = requests.session()
         sessions.headers[
             'User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36'
         response = sessions.get(url)
         # response.encoding = 'utf-8'  # TODO
         response.encoding = get_charset(response)
         logger.info('response.encoding {}'.format(response.encoding))
         doc = Document(response.text)
         doc_title = doc.title()
         summary = doc.summary()
         _markdown = html2text.html2text(summary)
         _markdown = _markdown.replace('-\n', '-').strip()
         res_webcache = {}
         res_webcache['url'] = url
         res_webcache['title'] = doc_title
         res_webcache['markdown'] = _markdown
         if _markdown:
             webcache = Webcache
             webcache.new(res_webcache)
     else:
         logger.info('already')
         doc_title = doc.title
     res = {
         'title': doc_title,
         'sharetype': sharetype,
         'link': link,
     }
     share = Share
     res['user_id'] = user_id
     share = share.new(res)
     user = User.by_sid(user_id)
     user.user_leaf += 10
     user.save()
     self.redirect("/share/" + str(share.id))
開發者ID:anwen,項目名稱:anwen,代碼行數:44,代碼來源:share_by_get.py


注:本文中的readability.Document.title方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。