当前位置: 首页>>代码示例>>Python>>正文


Python Session.get_site方法代码示例

本文整理汇总了Python中session.Session.get_site方法的典型用法代码示例。如果您正苦于以下问题:Python Session.get_site方法的具体用法?Python Session.get_site怎么用?Python Session.get_site使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在session.Session的用法示例。


在下文中一共展示了Session.get_site方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: MoneyPL

# 需要导入模块: from session import Session [as 别名]
# 或者: from session.Session import get_site [as 别名]
class MoneyPL(object):

    def __init__(self):
        self.session = Session(encoding = 'iso-8859-2')




    def cache_exists(self, cache_file):
        return os.path.isfile(cache_file)


    def cache_read(self, cache_file):
        with codecs.open(cache_file, 'r', 'utf8') as src:
            return [line.strip() for line in src.readlines()]


    def cache_write(self, cache_file, lines):
        with codecs.open(cache_file, 'w', 'utf8') as sink:
            for line in lines:
                sink.write('{}\n'.format(line))




    def get_sectors_list(self):
        cache_file = './cache/sectors_list'
        if self.cache_exists(cache_file):
            return self.cache_read(cache_file)

        sectors_site    = 'http://www.money.pl/gielda/spolki_gpw/'
        sectors_anchors = ".//li[@class='zwin']/a"
        result = []

        sectors_site    = self.session.get_site(sectors_site)
        sectors_anchors = sectors_site.findall(sectors_anchors)

        for anchor in sectors_anchors:
            result.append( anchor.get('href') )

        self.cache_write(cache_file, result)
        return result

        
    def get_companies_list(self):
        cache_file = './cache/companies_list'
        if self.cache_exists(cache_file):
            return self.cache_read(cache_file)

        companies_anchors_path = ".//div[@class='box lista_for']/ul/li/ul/li/a"
        result = []

        for sector_site in self.get_sectors_list():
            #print(u'Ściągam: {}'.format(sector_site))
            sector_site         = self.session.get_site(sector_site)
            companies_anchors   = sector_site.findall(companies_anchors_path)

            for anchor in companies_anchors:
                result.append( anchor.get('href') )

        self.cache_write(cache_file, result)
        return result


    def get_companies(self):
        for company_address in self.get_companies_list():
            yield CompanySite(self.session, company_address)
开发者ID:skaras,项目名称:ztis-criminal-analyzer,代码行数:69,代码来源:moneypl.py

示例2: KRS

# 需要导入模块: from session import Session [as 别名]
# 或者: from session.Session import get_site [as 别名]

#.........这里部分代码省略.........
        if lat == 0 and lng == 0:
            print('Could not geocode address {}!'.format(address))

        data.append( (u'szer', lat) )
        data.append( (u'dlug', lng) )
        data.append( (u'adres', address) )



    #def _accept_table(self, table):
    #    pass

    # Parsuje jedną tabelę ze strony podmiotu. Dostaje obiekt '_Element'
    # odpowiadający tabeli, zwraca listę znalezionych danych (jeśli tabela
    # miała dwie kolumny). Tu znajdują się też rozpoznania, czy tabela w ogóle
    # zawiera interesujące dane.
    # UPDATE: na razie 'akceptowalna' tabelka to taka, która ma 2 kolumny. Nie
    # jest to nazbyt zmyślne, więc jeśli trzeba będzie czegoś więcej, tu będzie
    # rozpoznawanie rodzaju table. Odrzucanie -> return None.
    def _parse_table(self, table_elem):

        # Tak może wyglądać sprawdzanie poprawności tabeli:
        #if not self._accept_table( table_elem ):
        #    return None

        data = []

        for row in table_elem:
            cells = row.getchildren()
            #print('Children: {}'.format([c.text for c in cells]))
            if len(cells) == 2:
                key = cells[0].text
                val = cells[1].text
                if key and val:
                    key = self.strip_string(key)

                    if u'nazwisko' in key and u'imię' in key:
                        return []

                    data.append( (key,val) )
        return data



    # Wyciąga informacje ze strony jednego podmiotu KRS. Dostaje adres strony,
    # zwraca listę wierszy z danymi.
    def get_entity(self, address):

        entity_site = self.session.get_site(address)
        data_tables = entity_site.findall( KRS.path_data_table )

        result = []
        for table in data_tables:
            parse_result = self._parse_table( table )
            if parse_result is not None:
                result.extend( parse_result )

        self._geocode_address( result )

        return result


    # Parsuje stronę wyników wyszukiwania KRS'u. Dostaje stronę w ElementTree,
    # zwraca linki do podstron, które są rezultatem wyszukania.
    def _parse_search_results(self, results_site):
        links = results_site.findall( KRS.path_search_result )

        get_addr = lambda link:\
            'http://www.krs-online.com.pl/{}'.format(link.get('href'))

        return map(get_addr,  links)



    def search_for(self, look_for):
        search_results = self.session.get_site('http://www.krs-online.com.pl/',
            params = {'p': 6, 'look': look_for})

        search_results = self._parse_search_results( search_results )

        if len(search_results) == 0:
            raise RuntimeError('Nothing found when searching in KRS for {}'
                    .format(look_for))
        elif len(search_results) > 1:
            print('Warning: searching in KRS for id {} returned more than one '
                    'result!. First result will be used.'.format(look_for))

        entity = self.get_entity( search_results[0] )
        return entity



    # Szukanie stron z informacjami o podmiotach wg NIP'u. Zwraca listę linków
    # z wynikami wyszukania.
    # TODO Czy te w ogóle będą potrzebne???
    def get_tax_id(self, nip):
        pass

    def get_registry_id(self, reg_id):
        pass
开发者ID:skaras,项目名称:ztis-criminal-analyzer,代码行数:104,代码来源:krs.py


注:本文中的session.Session.get_site方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。