当前位置: 首页>>代码示例>>Python>>正文


Python Dot.set_simplify方法代码示例

本文整理汇总了Python中pydot.Dot.set_simplify方法的典型用法代码示例。如果您正苦于以下问题:Python Dot.set_simplify方法的具体用法?Python Dot.set_simplify怎么用?Python Dot.set_simplify使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pydot.Dot的用法示例。


在下文中一共展示了Dot.set_simplify方法的1个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: Sitemap

# 需要导入模块: from pydot import Dot [as 别名]
# 或者: from pydot.Dot import set_simplify [as 别名]
class Sitemap(object):
    URI_FAILURE = "failed to read URI '%s'"
    current_contents = None
    base_uri = None
    base_url = None
    current_uri = None
    current_url = None
    site_graph = None

    def __init__(self):
        self.site_dict = { 'pages': [],
                           'links': {},
                           'assets': { 'imgs': [],
                                       'scripts': [],
                                       'stylesheets': [] }
                         }

    def from_uri(self, uri, render_opts={}):
        """ sets the instance URI and renders a sitemap image
        """
        self.base_uri = uri
        self.base_url = urlparse(uri)
        if self.traverse_site( uri ):
            self.render_sitemap( render_opts )
        else:
            return self.URI_FAILURE % uri

    def build_site_graph(self):
        self.site_graph = Dot(graph_type='digraph')
        self.site_graph.set_label( 'Sitemap for "%s"' % self.base_uri )
        self.site_graph.set_simplify( True )
        # add nodes
        for page in self.site_dict['pages']:
            self.site_graph.add_node( page.to_node() )
        # add edges
        for page in self.site_dict['pages']:
            from_node = page.uri
            for link in page.attributes['links']:
              to_node = link
              self.site_graph.add_edge( Edge(from_node, to_node) )

    def render_sitemap(self, options={}):
        if self.site_graph is None:
            self.build_site_graph()
        file_fmt = 'png'
        if 'format' in options.keys() and options['format'] is not None:
            file_fmt = options['format']
        filename = 'sitemap.%s' % file_fmt
        if 'filename' in options.keys() and options['filename'] is not None:
            filename = options['filename']
        self.site_graph.write(filename, 'dot', file_fmt)
        # with open(filename, 'w') as sitemap_img:
        #     sitemap_img.write("TODO")


    def traverse_site(self, uri_str):
        if self.validate_uri( uri_str ):
          # populate site_dict
          print "%d traversing %s" % (int(time.time()), uri_str)
          page_dict = { 'assets': { 'imgs': [],
                                    'scripts': [],
                                    'stylesheets': []
                                  },
                        'links': []
                      }
          self.current_contents = urllib.urlopen( uri_str ).read()
          self.current_uri = uri_str
          self.current_url = urlparse( uri_str )
          self.base_url = self.base_url or self.current_url
          html_doc = BeautifulSoup( self.current_contents )
          # get all scripts, stylesheets and images
          for script in html_doc.find_all('script'):
              src = script.get('src')
              if src is not None:
                  page_dict['assets']['scripts'].append( src )
          sset = set(self.site_dict['assets']['scripts'])
          pset = set(page_dict['assets']['scripts'])
          # list of unique js sources for the site
          self.site_dict['assets']['scripts'] = list( sset | pset )

          for css in html_doc.select('link[rel="stylesheet"]'):
              href = css.get('href')
              if href is not None:
                  page_dict['assets']['stylesheets'].append( href )
          sset = set(self.site_dict['assets']['stylesheets'])
          pset = set(page_dict['assets']['stylesheets'])
          # list of unique stylesheets for the site
          self.site_dict['assets']['stylesheets'] = list( sset | pset )

          for img in html_doc.select('img'):
              src = img.get('src')
              if src is not None and src not in page_dict['assets']['imgs']:
                  page_dict['assets']['imgs'].append( src )
          sset = set(self.site_dict['assets']['imgs'])
          pset = set(page_dict['assets']['imgs'])
          # list of unique images for the site
          self.site_dict['assets']['imgs'] = list( sset | pset )

          # get all internal links on the page
          for link in html_doc.select('a'):
#.........这里部分代码省略.........
开发者ID:keithwoody,项目名称:kwrawler-python,代码行数:103,代码来源:sitemap.py


注:本文中的pydot.Dot.set_simplify方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。