本文整理汇总了Python中session.Session.get_site方法的典型用法代码示例。如果您正苦于以下问题:Python Session.get_site方法的具体用法?Python Session.get_site怎么用?Python Session.get_site使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类session.Session
的用法示例。
在下文中一共展示了Session.get_site方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: MoneyPL
# 需要导入模块: from session import Session [as 别名]
# 或者: from session.Session import get_site [as 别名]
class MoneyPL(object):
def __init__(self):
self.session = Session(encoding = 'iso-8859-2')
def cache_exists(self, cache_file):
return os.path.isfile(cache_file)
def cache_read(self, cache_file):
with codecs.open(cache_file, 'r', 'utf8') as src:
return [line.strip() for line in src.readlines()]
def cache_write(self, cache_file, lines):
with codecs.open(cache_file, 'w', 'utf8') as sink:
for line in lines:
sink.write('{}\n'.format(line))
def get_sectors_list(self):
cache_file = './cache/sectors_list'
if self.cache_exists(cache_file):
return self.cache_read(cache_file)
sectors_site = 'http://www.money.pl/gielda/spolki_gpw/'
sectors_anchors = ".//li[@class='zwin']/a"
result = []
sectors_site = self.session.get_site(sectors_site)
sectors_anchors = sectors_site.findall(sectors_anchors)
for anchor in sectors_anchors:
result.append( anchor.get('href') )
self.cache_write(cache_file, result)
return result
def get_companies_list(self):
cache_file = './cache/companies_list'
if self.cache_exists(cache_file):
return self.cache_read(cache_file)
companies_anchors_path = ".//div[@class='box lista_for']/ul/li/ul/li/a"
result = []
for sector_site in self.get_sectors_list():
#print(u'Ściągam: {}'.format(sector_site))
sector_site = self.session.get_site(sector_site)
companies_anchors = sector_site.findall(companies_anchors_path)
for anchor in companies_anchors:
result.append( anchor.get('href') )
self.cache_write(cache_file, result)
return result
def get_companies(self):
for company_address in self.get_companies_list():
yield CompanySite(self.session, company_address)
示例2: KRS
# 需要导入模块: from session import Session [as 别名]
# 或者: from session.Session import get_site [as 别名]
#.........这里部分代码省略.........
if lat == 0 and lng == 0:
print('Could not geocode address {}!'.format(address))
data.append( (u'szer', lat) )
data.append( (u'dlug', lng) )
data.append( (u'adres', address) )
#def _accept_table(self, table):
# pass
# Parsuje jedną tabelę ze strony podmiotu. Dostaje obiekt '_Element'
# odpowiadający tabeli, zwraca listę znalezionych danych (jeśli tabela
# miała dwie kolumny). Tu znajdują się też rozpoznania, czy tabela w ogóle
# zawiera interesujące dane.
# UPDATE: na razie 'akceptowalna' tabelka to taka, która ma 2 kolumny. Nie
# jest to nazbyt zmyślne, więc jeśli trzeba będzie czegoś więcej, tu będzie
# rozpoznawanie rodzaju table. Odrzucanie -> return None.
def _parse_table(self, table_elem):
# Tak może wyglądać sprawdzanie poprawności tabeli:
#if not self._accept_table( table_elem ):
# return None
data = []
for row in table_elem:
cells = row.getchildren()
#print('Children: {}'.format([c.text for c in cells]))
if len(cells) == 2:
key = cells[0].text
val = cells[1].text
if key and val:
key = self.strip_string(key)
if u'nazwisko' in key and u'imię' in key:
return []
data.append( (key,val) )
return data
# Wyciąga informacje ze strony jednego podmiotu KRS. Dostaje adres strony,
# zwraca listę wierszy z danymi.
def get_entity(self, address):
entity_site = self.session.get_site(address)
data_tables = entity_site.findall( KRS.path_data_table )
result = []
for table in data_tables:
parse_result = self._parse_table( table )
if parse_result is not None:
result.extend( parse_result )
self._geocode_address( result )
return result
# Parsuje stronę wyników wyszukiwania KRS'u. Dostaje stronę w ElementTree,
# zwraca linki do podstron, które są rezultatem wyszukania.
def _parse_search_results(self, results_site):
links = results_site.findall( KRS.path_search_result )
get_addr = lambda link:\
'http://www.krs-online.com.pl/{}'.format(link.get('href'))
return map(get_addr, links)
def search_for(self, look_for):
search_results = self.session.get_site('http://www.krs-online.com.pl/',
params = {'p': 6, 'look': look_for})
search_results = self._parse_search_results( search_results )
if len(search_results) == 0:
raise RuntimeError('Nothing found when searching in KRS for {}'
.format(look_for))
elif len(search_results) > 1:
print('Warning: searching in KRS for id {} returned more than one '
'result!. First result will be used.'.format(look_for))
entity = self.get_entity( search_results[0] )
return entity
# Szukanie stron z informacjami o podmiotach wg NIP'u. Zwraca listę linków
# z wynikami wyszukania.
# TODO Czy te w ogóle będą potrzebne???
def get_tax_id(self, nip):
pass
def get_registry_id(self, reg_id):
pass