当前位置: 首页>>代码示例>>Python>>正文


Python CSSSelector.iter方法代码示例

本文整理汇总了Python中lxml.cssselect.CSSSelector.iter方法的典型用法代码示例。如果您正苦于以下问题:Python CSSSelector.iter方法的具体用法?Python CSSSelector.iter怎么用?Python CSSSelector.iter使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在lxml.cssselect.CSSSelector的用法示例。


在下文中一共展示了CSSSelector.iter方法的3个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: process_html

# 需要导入模块: from lxml.cssselect import CSSSelector [as 别名]
# 或者: from lxml.cssselect.CSSSelector import iter [as 别名]
	def process_html(self, html, path):
		parser = etree.HTMLParser(encoding='utf-8')
		tree = etree.fromstring(html.decode('utf-8'), parser).getroottree()
		page = tree.getroot()

		if page is None:
			print(repr(html))
			raise ParserError('Could not parse the html')

		lines = html.splitlines()
		body, = CSSSelector('body')(page)
		self._bodies.append(body)
		if self.optimize_lookup:
			for each in body.iter():
				identifier = each.attrib.get('id')
				if identifier:
				    self._all_ids.add(identifier)
				classes = each.attrib.get('class')
				if classes:
				    for class_ in classes.split():
				        self._all_classes.add(class_)

		for style in CSSSelector('style')(page):
		    first_line = style.text.strip().splitlines()[0]
		    for i, line in enumerate(lines):
				if line.count(first_line):
					key = (i + 1, path)
					self.blocks[key] = style.text
					break
开发者ID:mikelsons,项目名称:css-dust-cleaner,代码行数:31,代码来源:dustcleaner.py

示例2: process_html

# 需要导入模块: from lxml.cssselect import CSSSelector [as 别名]
# 或者: from lxml.cssselect.CSSSelector import iter [as 别名]
    def process_html(self, html, url):
        parser = etree.HTMLParser(encoding='utf-8')
        tree = etree.fromstring(html.encode('utf-8'), parser).getroottree()
        page = tree.getroot()

        if page is None:
            print(repr(html))
            raise ParserError('Could not parse the html')

        lines = html.splitlines()
        body, = CSSSelector('body')(page)
        self._bodies.append(body)
        if self.optimize_lookup:
            for each in body.iter():
                identifier = each.attrib.get('id')
                if identifier:
                    self._all_ids.add(identifier)
                classes = each.attrib.get('class')
                if classes:
                    for class_ in classes.split():
                        self._all_classes.add(class_)

        for style in CSSSelector('style')(page):
            try:
                first_line = style.text.strip().splitlines()[0]
            except IndexError:
                # meaning the inline style tag was just whitespace
                continue
            except AttributeError:
                # happend when the style tag has absolute nothing it
                # not even whitespace
                continue
            for i, line in enumerate(lines):
                if line.count(first_line):
                    key = (i + 1, url)
                    self.blocks[key] = style.text
                    break

        for link in CSSSelector('link')(page):
            if (
                link.attrib.get('rel', '') == 'stylesheet' or
                link.attrib['href'].lower().split('?')[0].endswith('.css')
            ):
                link_url = self.make_absolute_url(url, link.attrib['href'])
                key = (link_url, link.attrib['href'])
                self.blocks[key] = self.download(link_url)
                if self.preserve_remote_urls:
                    self.blocks[key] = self._rewrite_urls(
                        self.blocks[key],
                        link_url
                    )
开发者ID:alanjds,项目名称:mincss,代码行数:53,代码来源:processor.py

示例3: process_html

# 需要导入模块: from lxml.cssselect import CSSSelector [as 别名]
# 或者: from lxml.cssselect.CSSSelector import iter [as 别名]
    def process_html(self, html, url):
        parser = etree.HTMLParser()
        tree = etree.fromstring(html, parser).getroottree()
        page = tree.getroot()

        if page is None:
            print repr(html)
            raise ParserError("Could not parse the html")

        lines = html.splitlines()
        body, = CSSSelector('body')(page)
        self._bodies.append(body)
        if self.optimize_lookup:
            for each in body.iter():
                id = each.attrib.get('id')
                if id:
                    self._all_ids.add(id)
                classes = each.attrib.get('class')
                if classes:
                    for class_ in classes.split():
                        self._all_classes.add(class_)

        for style in CSSSelector('style')(page):
            first_line = style.text.strip().splitlines()[0]
            for i, line in enumerate(lines):
                if line.count(first_line):
                    key = (i + 1, url)
                    self.blocks[key] = style.text
                    break

        for link in CSSSelector('link')(page):
            if (
                link.attrib.get('rel', '') == 'stylesheet' or
                link.attrib['href'].lower().split('?')[0].endswith('.css')
            ):
                link_url = self.make_absolute_url(url, link.attrib['href'])
                key = (link_url, link.attrib['href'])
                self.blocks[key] = self._download(link_url)
                if self.preserve_remote_urls:
                    self.blocks[key] = self._rewrite_urls(
                        self.blocks[key],
                        link_url
                    )
开发者ID:JHei,项目名称:mincss,代码行数:45,代码来源:processor.py


注:本文中的lxml.cssselect.CSSSelector.iter方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。