當前位置: 首頁>>代碼示例>>Python>>正文


Python Menu.make方法代碼示例

本文整理匯總了Python中Menu.Menu.make方法的典型用法代碼示例。如果您正苦於以下問題:Python Menu.make方法的具體用法?Python Menu.make怎麽用?Python Menu.make使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在Menu.Menu的用法示例。


在下文中一共展示了Menu.make方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: __init__

# 需要導入模塊: from Menu import Menu [as 別名]
# 或者: from Menu.Menu import make [as 別名]
class ContentExtractor:

    def __init__(self, extractorUrl=None):
        self.garbageTags = ['script', 'style', 'noscript', 'form', 'input']
        self.articleContainer = None
        self.containers = {}
        self.url = ''
        self.extractorUrl = extractorUrl
        self.robotparser = robotparser.RobotFileParser()
        self.userAgentString = 'Shrinkr/0.9 (http://shrinkr.jonathanjanssens.com/about.php)'
        self.urlComponents = urlparse(self.url)
        self.head = ''
        self.allLinks = []

    def read(self, url):
        self.url = url
        self.fixUrl()
        self.robotparser.set_url('%s://%s/robots.txt' % (self.urlComponents.scheme, self.urlComponents.netloc))
        self.robotparser.read()
        if self.robotparser.can_fetch(self.userAgentString, self.url) is False:
            raise RuntimeError('Shrinkr is blocked by the robots.txt file for this site.')
        opener = urllib2.build_opener()
        opener.addheaders = [('User-agent', self.userAgentString)]
        html = opener.open(self.url)
        self.soup = BeautifulSoup(html)
        self.clean()

    def fixUrl(self):
        if self.url[:4] != 'http':
            self.url = 'http://%s' % (self.url)
        self.urlComponents = urlparse(self.url)


    def clean(self):
        comments = self.soup.find_all(text=lambda text:isinstance(text, Comment))
        for comment in comments:
            comment.extract()
        for tag in self.garbageTags:
            for element in self.soup.find_all(tag):
                element.decompose()
        self.head = self.soup.find('head').extract()
        self.allLinks = self.soup.find_all('a', href=True)
        self.fixRelativeUrls()

    def makeMenu(self):
        self.menu = Menu(self.allLinks, self.urlComponents.netloc, extractorUrl=self.extractorUrl)
        self.menu.sortLinks()
        self.menuHTML = self.menu.make()
        with open('tpl/controls.tpl', 'r') as html:
            self.controlsHTML = html.read().replace('{{ orginal_url }}', self.url)

    def extractArticle(self):
        if self.extractorUrl is not None:
            self.extractLinkedArticles()
        for tag in self.soup.find_all()[::-1]:
            evaluate = Evaluate()
            if tag.name == 'p':
                evaluate.asParagraph(tag)
            else:
                for child in tag.children:
                    if isinstance(child, NavigableString):
                        text = unicode(child).strip()
                        if len(text) > 10:
                            evaluate.asParagraph(child)
                            continue
                    else:
                        evaluate.asContainer(child)
                evaluate.asContainer(tag)
                self.containers[tag] = evaluate.score
        self.containers = OrderedDict(sorted(self.containers.items(), key=lambda t: t[1])) # sort based on value (ASC)
        
        self.articleContainer = self.containers.popitem()[0]

    def getExtractedArticle(self, prependHead=True):
        article = ''
        # append menu and controls to self.articleContainer
        self.articleContainer.insert(1, self.menuHTML)
        self.articleContainer.insert(2, self.controlsHTML)
        # append js and the end and css to self.head
        with open('static/shrinkr.css', 'r') as css:
            self.head.insert(2, '<style>%s</style>' % css.read())
        with open('static/shrinkr.js', 'r') as js:
            self.articleContainer.insert(len(self.articleContainer.contents), '<script>%s</script>' % js.read())
        #
        if prependHead is True:
            article = unicode.join(u'\n',map(unicode,self.head))
        article += unicode.join(u'\n',map(unicode,self.articleContainer))
        return article

    def getExtractedArticleText(self):
        return self.articleContainer.get_text()

    def fixRelativeUrls(self):
        for a in self.soup.find_all('a', href=True):
            if a['href'][:4] != 'http':
                if a['href'][:1] == '#' or a['href'][:1] == '?':
                    a['href'] = '%s://%s%s%s' % (self.urlComponents.scheme, self.urlComponents.netloc, self.urlComponents.path, a['href'])
                else:
                    a['href'] = '%s://%s/%s' % (self.urlComponents.scheme, self.urlComponents.netloc, a['href'])

#.........這裏部分代碼省略.........
開發者ID:gvsurenderreddy,項目名稱:shrinkr,代碼行數:103,代碼來源:ContentExtractor.py


注:本文中的Menu.Menu.make方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。