当前位置: 首页>>代码示例>>Python>>正文


Python CanadianLegislator.add_source方法代码示例

本文整理汇总了Python中utils.CanadianLegislator.add_source方法的典型用法代码示例。如果您正苦于以下问题:Python CanadianLegislator.add_source方法的具体用法?Python CanadianLegislator.add_source怎么用?Python CanadianLegislator.add_source使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在utils.CanadianLegislator的用法示例。


在下文中一共展示了CanadianLegislator.add_source方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_people

# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_source [as 别名]
  def get_people(self):
    page = lxmlize(COUNCIL_PAGE)

    mayor_info = page.xpath('//h2[contains(text(), "MAYOR")]//following-sibling::p')[0]
    yield self.scrape_mayor(mayor_info)

    wards = page.xpath('//h3')
    for ward in wards:
      district = re.sub('\AWARD \d+ - ', '', ward.text_content())
      councillors = ward.xpath('following-sibling::p')
      for councillor in councillors:
        name = councillor.xpath('./strong')[0].text_content()

        p = Legislator(name=name, post_id=district, role='Councillor')
        p.add_source(COUNCIL_PAGE)

        info = councillor.xpath('./text()')
        address = info.pop(0)
        p.add_contact('address', address, 'legislature')

        # get phone numbers
        for line in info:
          stuff = re.split(ur'(\xbb)|(\xa0)', line)
          tmp = [y for y in stuff if y and not re.match(ur'\xa0', y)]
          self.get_tel_numbers(tmp, p)

        email = councillor.xpath('string(./a)')
        p.add_contact('email', email, None)

        yield p
        if councillor == councillors[1]:
          break
开发者ID:fchagnon,项目名称:scrapers-ca,代码行数:34,代码来源:people.py

示例2: get_people

# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_source [as 别名]
    def get_people(self):
        # mayor first, can't find email
        page = lxmlize(MAYOR_URL)
        photo_url = page.xpath('string(//img/@src[contains(., "Maire")])')
        name = page.xpath('string(//td[@class="contenu"]/text()[last()])')
        p = Legislator(name=name, post_id=u"Trois-Rivières", role="Maire", image=photo_url)
        p.add_source(MAYOR_URL)
        yield p

        resp = requests.get(COUNCIL_PAGE)
        # page rendering through JS on the client
        page_re = re.compile(r'createItemNiv3.+"District (.+?)".+(index.+)\\"')
        for district, url_rel in page_re.findall(resp.text):
            if district not in ("des Estacades", "des Plateaux", "des Terrasses", "du Sanctuaire"):
                district = re.sub("\A(?:de(?: la)?|des|du) ", "", district)

            url = urljoin(COUNCIL_PAGE, url_rel)
            page = lxmlize(url)
            name = page.xpath("string(//h2)")
            email = page.xpath('string(//a/@href[contains(., "mailto:")])')[len("mailto:") :]
            photo_url = page.xpath('string(//img/@src[contains(., "Conseiller")])')
            p = Legislator(name=name, post_id=district, role="Conseiller", image=photo_url)
            p.add_source(url)
            p.add_contact("email", email, None)
            yield p
开发者ID:rhymeswithcycle,项目名称:scrapers-ca,代码行数:27,代码来源:people.py

示例3: councillor_data

# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_source [as 别名]
def councillor_data(url):
  page = lxmlize(url)

  name = page.xpath('string(//h1[@id="TitleOfPage"])')
  district = page.xpath('string(//h2)')

  # TODO: Councillor emails are built with JS to prevent scraping, but the JS can be scraped.

  address = page.xpath('string(//div[@class="asideContent"])')

  photo = page.xpath('string(//div[@id="contentright"]//img[1]/@src)')
  phone = get_phone_data(page)

  js = page.xpath('string(//span/script)')
  email = email_js(js)

  p = Legislator(name=name, post_id=district, role='Councillor')
  p.add_source(COUNCIL_PAGE)
  p.add_source(url)
  p.add_contact('address', address, 'legislature')
  p.add_contact('voice', phone, 'legislature')
  p.add_contact('email', email, None)
  p.image = photo

  return p
开发者ID:fchagnon,项目名称:scrapers-ca,代码行数:27,代码来源:people.py

示例4: scrape_mayor

# 需要导入模块: from utils import CanadianLegislator [as 别名]
# 或者: from utils.CanadianLegislator import add_source [as 别名]
  def scrape_mayor(self, div):
    url = div.attrib['href']
    page = lxmlize(url)

    name = div.text_content().replace('Mayor ', '')
    contact_url = page.xpath('//ul[@class="navSecondary"]//a[contains(text(),"Contact")]')[0].attrib['href']
    page = lxmlize(contact_url)

    contact_div = page.xpath('//div[@class="col"][2]')[0]

    address = contact_div.xpath('.//p[1]')[0].text_content()
    address = re.findall(r'(City of Greater .*)', address, flags=re.DOTALL)[0]
    phone = contact_div.xpath('.//p[2]')[0].text_content()
    phone = phone.replace('Phone: ', '')
    fax = contact_div.xpath('.//p[3]')[0].text_content()
    fax = fax.split(' ')[-1]
    email = contact_div.xpath('//a[contains(@href, "mailto:")]')[0].text_content()

    p = Legislator(name=name, post_id='Greater Sudbury', role='Mayor')
    p.add_source(COUNCIL_PAGE)
    p.add_source(contact_url)
    p.add_contact('address', address, 'legislature')
    p.add_contact('voice', phone, 'legislature')
    p.add_contact('fax', fax, 'legislature')
    p.add_contact('email', email, None)
    return p
开发者ID:fchagnon,项目名称:scrapers-ca,代码行数:28,代码来源:people.py


注:本文中的utils.CanadianLegislator.add_source方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。