本文整理匯總了Python中Menu.Menu.make方法的典型用法代碼示例。如果您正苦於以下問題:Python Menu.make方法的具體用法?Python Menu.make怎麽用?Python Menu.make使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類Menu.Menu
的用法示例。
在下文中一共展示了Menu.make方法的1個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: __init__
# 需要導入模塊: from Menu import Menu [as 別名]
# 或者: from Menu.Menu import make [as 別名]
class ContentExtractor:
def __init__(self, extractorUrl=None):
self.garbageTags = ['script', 'style', 'noscript', 'form', 'input']
self.articleContainer = None
self.containers = {}
self.url = ''
self.extractorUrl = extractorUrl
self.robotparser = robotparser.RobotFileParser()
self.userAgentString = 'Shrinkr/0.9 (http://shrinkr.jonathanjanssens.com/about.php)'
self.urlComponents = urlparse(self.url)
self.head = ''
self.allLinks = []
def read(self, url):
self.url = url
self.fixUrl()
self.robotparser.set_url('%s://%s/robots.txt' % (self.urlComponents.scheme, self.urlComponents.netloc))
self.robotparser.read()
if self.robotparser.can_fetch(self.userAgentString, self.url) is False:
raise RuntimeError('Shrinkr is blocked by the robots.txt file for this site.')
opener = urllib2.build_opener()
opener.addheaders = [('User-agent', self.userAgentString)]
html = opener.open(self.url)
self.soup = BeautifulSoup(html)
self.clean()
def fixUrl(self):
if self.url[:4] != 'http':
self.url = 'http://%s' % (self.url)
self.urlComponents = urlparse(self.url)
def clean(self):
comments = self.soup.find_all(text=lambda text:isinstance(text, Comment))
for comment in comments:
comment.extract()
for tag in self.garbageTags:
for element in self.soup.find_all(tag):
element.decompose()
self.head = self.soup.find('head').extract()
self.allLinks = self.soup.find_all('a', href=True)
self.fixRelativeUrls()
def makeMenu(self):
self.menu = Menu(self.allLinks, self.urlComponents.netloc, extractorUrl=self.extractorUrl)
self.menu.sortLinks()
self.menuHTML = self.menu.make()
with open('tpl/controls.tpl', 'r') as html:
self.controlsHTML = html.read().replace('{{ orginal_url }}', self.url)
def extractArticle(self):
if self.extractorUrl is not None:
self.extractLinkedArticles()
for tag in self.soup.find_all()[::-1]:
evaluate = Evaluate()
if tag.name == 'p':
evaluate.asParagraph(tag)
else:
for child in tag.children:
if isinstance(child, NavigableString):
text = unicode(child).strip()
if len(text) > 10:
evaluate.asParagraph(child)
continue
else:
evaluate.asContainer(child)
evaluate.asContainer(tag)
self.containers[tag] = evaluate.score
self.containers = OrderedDict(sorted(self.containers.items(), key=lambda t: t[1])) # sort based on value (ASC)
self.articleContainer = self.containers.popitem()[0]
def getExtractedArticle(self, prependHead=True):
article = ''
# append menu and controls to self.articleContainer
self.articleContainer.insert(1, self.menuHTML)
self.articleContainer.insert(2, self.controlsHTML)
# append js and the end and css to self.head
with open('static/shrinkr.css', 'r') as css:
self.head.insert(2, '<style>%s</style>' % css.read())
with open('static/shrinkr.js', 'r') as js:
self.articleContainer.insert(len(self.articleContainer.contents), '<script>%s</script>' % js.read())
#
if prependHead is True:
article = unicode.join(u'\n',map(unicode,self.head))
article += unicode.join(u'\n',map(unicode,self.articleContainer))
return article
def getExtractedArticleText(self):
return self.articleContainer.get_text()
def fixRelativeUrls(self):
for a in self.soup.find_all('a', href=True):
if a['href'][:4] != 'http':
if a['href'][:1] == '#' or a['href'][:1] == '?':
a['href'] = '%s://%s%s%s' % (self.urlComponents.scheme, self.urlComponents.netloc, self.urlComponents.path, a['href'])
else:
a['href'] = '%s://%s/%s' % (self.urlComponents.scheme, self.urlComponents.netloc, a['href'])
#.........這裏部分代碼省略.........