当前位置: 首页>>代码示例>>Python>>正文


Python utils.lxmlize函数代码示例

本文整理汇总了Python中utils.lxmlize函数的典型用法代码示例。如果您正苦于以下问题:Python lxmlize函数的具体用法?Python lxmlize怎么用?Python lxmlize使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。


在下文中一共展示了lxmlize函数的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_people

  def get_people(self):
    page = lxmlize(COUNCIL_PAGE, 'iso-8859-1')
    nodes = page.xpath('//table[@width="484"]//tr')
    try:
      for district_row, councillor_row, contact_row, _ in chunks(nodes, 4):
        post_id = district_row.xpath('string(.//strong)')
        name = councillor_row.xpath('string(.)')[len('Councillor '):]
        # TODO: phone numbers on site don't include area code. Add manually?
        #phone = contact_row.xpath('string(td[2]/text())')
        email = contact_row.xpath('string(td[4]/a)').replace('[at]', '@')

        p = Legislator(name=name, post_id=post_id, role='Councillor')
        p.add_source(COUNCIL_PAGE)
        #p.add_contact('voice', phone, 'legislature')
        p.add_contact('email', email, None)
        yield p
    except ValueError:
      # on the last run through, there will be less than 4 rows to unpack
      pass

    mayor_page = lxmlize(MAYOR_PAGE, 'iso-8859-1')
    name = mayor_page.xpath('string(//h1[contains(., "Bio")])')[:-len(' Bio')]
    contact_page = lxmlize(MAYOR_CONTACT_URL, 'iso-8859-1')
    email = contact_page.xpath('string(//a[contains(., "@")][1])')

    p = Legislator(name=name, post_id='Halifax', role='Councillor')
    p.add_source(MAYOR_PAGE)
    p.add_source(MAYOR_CONTACT_URL)
    p.add_contact('email', email, None)
    yield p
开发者ID:fchagnon,项目名称:scrapers-ca,代码行数:30,代码来源:people.py

示例2: get_people

    def get_people(self):
        # mayor first, can't find email
        page = lxmlize(MAYOR_URL)
        photo_url = page.xpath('string(//img/@src[contains(., "Maire")])')
        name = page.xpath('string(//td[@class="contenu"]/text()[last()])')
        p = Legislator(name=name, post_id=u"Trois-Rivières", role="Maire", image=photo_url)
        p.add_source(MAYOR_URL)
        yield p

        resp = requests.get(COUNCIL_PAGE)
        # page rendering through JS on the client
        page_re = re.compile(r'createItemNiv3.+"District (.+?)".+(index.+)\\"')
        for district, url_rel in page_re.findall(resp.text):
            if district not in ("des Estacades", "des Plateaux", "des Terrasses", "du Sanctuaire"):
                district = re.sub("\A(?:de(?: la)?|des|du) ", "", district)

            url = urljoin(COUNCIL_PAGE, url_rel)
            page = lxmlize(url)
            name = page.xpath("string(//h2)")
            email = page.xpath('string(//a/@href[contains(., "mailto:")])')[len("mailto:") :]
            photo_url = page.xpath('string(//img/@src[contains(., "Conseiller")])')
            p = Legislator(name=name, post_id=district, role="Conseiller", image=photo_url)
            p.add_source(url)
            p.add_contact("email", email, None)
            yield p
开发者ID:rhymeswithcycle,项目名称:scrapers-ca,代码行数:25,代码来源:people.py

示例3: scrape_mayor

  def scrape_mayor(self, div):
    url = div.attrib['href']
    page = lxmlize(url)

    name = div.text_content().replace('Mayor ', '')
    contact_url = page.xpath('//ul[@class="navSecondary"]//a[contains(text(),"Contact")]')[0].attrib['href']
    page = lxmlize(contact_url)

    contact_div = page.xpath('//div[@class="col"][2]')[0]

    address = contact_div.xpath('.//p[1]')[0].text_content()
    address = re.findall(r'(City of Greater .*)', address, flags=re.DOTALL)[0]
    phone = contact_div.xpath('.//p[2]')[0].text_content()
    phone = phone.replace('Phone: ', '')
    fax = contact_div.xpath('.//p[3]')[0].text_content()
    fax = fax.split(' ')[-1]
    email = contact_div.xpath('//a[contains(@href, "mailto:")]')[0].text_content()

    p = Legislator(name=name, post_id='Greater Sudbury', role='Mayor')
    p.add_source(COUNCIL_PAGE)
    p.add_source(contact_url)
    p.add_contact('address', address, 'legislature')
    p.add_contact('voice', phone, 'legislature')
    p.add_contact('fax', fax, 'legislature')
    p.add_contact('email', email, None)
    return p
开发者ID:fchagnon,项目名称:scrapers-ca,代码行数:26,代码来源:people.py

示例4: mayor_data

def mayor_data(url, name):
  page = lxmlize(url)
  photo_url = urljoin(url, 
      page.xpath('string((//div[@id="contentcontainer"]//img)[1]/@src)'))
  contact_page = lxmlize(MAYOR_CONTACT_URL)
  email = contact_page.xpath('string(//a[contains(., "@")][1])')

  m = Legislator(name=name, post_id='Regina', role='Mayor')
  m.add_source(COUNCIL_PAGE)
  m.add_source(url)
  m.add_source(MAYOR_CONTACT_URL)
  m.add_contact('email', email, None)
  m.image = photo_url

  return m
开发者ID:fchagnon,项目名称:scrapers-ca,代码行数:15,代码来源:people.py


注:本文中的utils.lxmlize函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。