当前位置: 首页>>代码示例>>Python>>正文


Python pyquery.PyQuery类代码示例

本文整理汇总了Python中pyquery.PyQuery的典型用法代码示例。如果您正苦于以下问题:Python PyQuery类的具体用法?Python PyQuery怎么用?Python PyQuery使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了PyQuery类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_render_attribute_to_document

def test_render_attribute_to_document():
    document = PyQuery('<a attribute="{ value }" data-riot-id="0"></a>')
    expression =  {'expression': '{ value }', 'attribute': 'attribute', 'type': 'attribute', 'node': document}
    render_document([expression], {'value': 'value'})
    assert document.outer_html() == '<a attribute="value" data-riot-id="0" data-riot-dirty="true"></a>'
    render_document([expression], {'value': 1})
    assert document.outer_html() == '<a attribute="1" data-riot-id="0" data-riot-dirty="true"></a>'
开发者ID:pombredanne,项目名称:riotpy,代码行数:7,代码来源:test_expression.py

示例2: get_saml_response

def get_saml_response(response):
    tree = PyQuery(response.content)
    inputtag = tree.find('input[name="SAMLResponse"]')
    assert len(inputtag) == 1
    encoded_response = inputtag[0].get('value')
    samlresponse = base64.b64decode(encoded_response)
    return samlresponse
开发者ID:miing,项目名称:mci_migo,代码行数:7,代码来源:tests.py

示例3: html_to_records

def html_to_records(html):
    pq = PyQuery(html)
    rows = pq.find('table tr')
    get_row = lambda r: map(lambda th: th.text, r)
    headers = get_row(rows[0])
    for row in rows[1:]:
        yield dict(zip(headers, get_row(row)))
开发者ID:texastribune,项目名称:aeis,代码行数:7,代码来源:files.py

示例4: getResTb

def getResTb():
    html = fileworker.getHTML()
    pq = PyQuery(html)
    result = dict()
    blocks = list()
    for i in pq.items('.row.result'):
        list.append(i)
开发者ID:T10TheKinOfStars,项目名称:crawlerforfun,代码行数:7,代码来源:parser.py

示例5: parseProductPage

def parseProductPage(product, need_img_urls=False):
    """进入商品详情页, 抓取四个新字段
       delivery reviews star total_sales
    """
    if product['product_url']:
       content = fetchContent(product['product_url'], False)
       doc=PyQuery(content)
       #product['delivery'] = doc("div.cost-entries-type > p > em.value").text() 运费JS动态 解决不了
       product['reviews'] = doc('p.satisfaction-number > a > em.value').text()
       product['star'] = doc('p.star-level > i').attr("class")
       product['total_sales'] = doc('p.bargain-number > a > em.value').text()
       if need_img_urls:
           url_list = get_img_urls(content)
           product['img_urls'] = ', '.join(url_list)
       else:
           product['img_urls'] = ''
       product['color'], product['size'] = '', ''
       for index, td in enumerate(doc('div.obj-content > table > tbody > tr > td')):
            tdQ = PyQuery(td)
            if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'颜色':
                product['color'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
            if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'尺寸':
                product['size'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
       product['MOQ'] = extractNum(doc('tr.amount > td.ladder-1-1 > span.value').text().replace(u"≥", ""))
       if not product['MOQ'] or product['MOQ'] == 0:
           product['MOQ'] = extractNum(PyQuery(doc('tr.amount').remove('td.amount-title').children('td').eq(0))('span.value').text())
       if product['MOQ'] == 1:
           #print product['product_url']
           product['sku_size'] = PyQuery(doc('div.unit-detail-spec-operator').eq(0))('span.text').text()
           product['sku_color'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.name').text()
           product['sku_price'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.price').text()
           product['sku_amount'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.count > span > em.value').text()
           print product['sku_id'], '\t', product['sku_size'], "\t", product['sku_color'], "\t", product['sku_price'], "\t", product['sku_amount']
    return product
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:34,代码来源:products.py

示例6: crawl_1688_category_tree

def crawl_1688_category_tree(wb):
    #fr = open("C:users/chenweiqiang/desktop/ye.html", "r") #PyQuery之后取不出来元素
    h = httplib2.Http()
    response, content = h.request("https://ye.1688.com/")
#     fw = open("C:users/chenweiqiang/desktop/ye2.html", "w")
#     fw.write(content)
#     fw.close()
    ws = wb.add_sheet("ye.1688品类树")
    ws.write(0,0,"一级品类")
    ws.write(0,1,"二级品类")
    ws.write(0,2,"三级品类")
    row = 0
    doc = PyQuery(content)
    level1NodeList = doc("li.cat-box")
    for level1Node in level1NodeList:
        level1NodeQ = PyQuery(level1Node)
        level1_category = level1NodeQ('div.cat-main').text().replace(' ', '')
        level2NodeList = level1NodeQ('div.cat-sub-col > dl') # 多余div[class="cat-sub "] > 
        for level2Node in level2NodeList:
            level2NodeQ = PyQuery(level2Node)
            level2_category = level2NodeQ('dt > a').text()
            level3NodeList = level2NodeQ('dd.cat-list > ul > li > a')
            for level3Node in level3NodeList:
                level3NodeQ = PyQuery(level3Node)
                level3_category = level3NodeQ.text()
                row += 1
                ws.write(row, 0, level1_category)
                ws.write(row, 1, level2_category)
                ws.write(row, 2, level3_category)
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:29,代码来源:crawl_categoryTree.py

示例7: list_page

 def list_page(self, response):
     result_content = {}
 
     content_iter = re.finditer(r"STK && STK.pageletM && STK.pageletM.view\((?P<content>\{.*?\})\)", response.content)
     for iter in content_iter:
         ok, content = safe_loads(iter.groupdict()['content'])
         if ok and "pl_weibo_direct" == content.get("pid"):
             result_content = content
             break
     else:
         return {}
     
     pyquery_doc = PyQuery(result_content["html"])
     pyquery_doc.make_links_absolute(response.url)
     
     items = []
     for item in pyquery_doc("DIV.feed_lists>DIV.WB_cardwrap>DIV").items():
         weibo_href = item("DIV.content>DIV.feed_from>A").attr.href
         if weibo_href:
             weibo_pics = []
             for pic in item("DIV.feed_content DIV.media_box IMG").items():
                 weibo_pics.append(pic.attr.src)
                 
             data = {
                 "content": item("DIV.feed_content P.comment_txt").text(),
                 "nickname": item("DIV.feed_content A.W_texta").attr.title,
                 "href": weibo_href,
                 "quote_nickname": item("DIV.feed_content DIV.comment DIV.comment_info A.W_texta").attr.title,
                 "quote_content": item("DIV.feed_content DIV.comment DIV.comment_info P.comment_txt").text(),
                 "pics": ''.join(weibo_pics)
             }
             self.crawl("data:,%s" % weibo_href, callback = self.detail_page, data_fetch_content=data)
开发者ID:jttoday,项目名称:spider,代码行数:32,代码来源:weibo_weixin.py

示例8: get_bounds

def get_bounds(scene_name):
    """Use Earth Explorer metadata to get bounds of a Scene"""
    url_code = get_metadata_code(scene_name)

    metadata = PyQuery(
        'http://earthexplorer.usgs.gov/fgdc/%s/%s/' % (url_code, scene_name)
        )
    metadata = metadata.text()[
        metadata.text().find('G-Ring_Latitude:'):
        metadata.text().find('\n  Keywords:')
        ]
    coords = (
        metadata.replace(' ', '')
        .replace('G-Ring_Latitude:', '')
        .replace('G-Ring_Longitude:', '')
        .split('\n')
        )
    coords = [float(coord) for coord in coords if coord != '']
    # create a list of lists with the coordinates
    coords = [coords[i:i + 2] for i in range(0, len(coords), 2)]
    # use reverse() to change [lat, lon] to [lon, lat]
    [coord.reverse() for coord in coords]
    # repeat the first coordinate on the end of the list
    if coords[0] != coords[-1]:
        coords.append(coords[0])
    return coords
开发者ID:ibamacsr,项目名称:imagery,代码行数:26,代码来源:utils.py

示例9: update_forums

    def update_forums(client, group, session):
        logging.info("Updating forums list for {}".format(group))
        query = Forum.get_forum_page(client, group.gid)
        reg = regex.compile(r"^forum\.php\?mod=forumdisplay&fid=(\d+)$")

        for row in query.find("table.fl_tb>tr"):
            sub_query = PyQuery(row)
            href = sub_query.find("td").eq(1).find("a").attr("href")
            if not href:
                continue

            fid = int(reg.findall(href)[0])

            name = sub_query.find("td").eq(1).find("h2>a").clone().children().remove().end().text()
            last_update = sub_query.find("td").eq(3).find("div>cite").clone().children().remove().end().text()
            last_update = dateparse(last_update)

            existence = session.query(Forum).filter(Forum.fid == fid)
            if existence.count() == 0:
                logging.info("<Forum(fid={})> not found, creating one".format(fid))
                forum = Forum(fid=fid, name=name, updated_at=last_update, group=group, fresh=False)
                session.add(forum)
            else:
                forum = existence.one()
                if forum.updated_at != last_update:
                    logging.info("{} found, stale: against {} ".format(forum, last_update))
                    forum.updated_at = last_update
                    forum.fresh = False
                    session.add(forum)
                else:
                    logging.info("{} found, fresh".format(forum))
开发者ID:doomsplayer,项目名称:dandao-spider,代码行数:31,代码来源:forum.py

示例10: _enhance_text

    def _enhance_text(self):
        """
        Transforms a simplified text into a valid mail.template text.
        :return: mail.template text
        """
        self.ensure_one()
        # Parse and set back the keywords into raw template code
        html_text = PyQuery(self.simplified_text.replace('\n', ''))

        def sort_keywords(kw):
            # Replace first if/for-clauses, then var, then code
            index = kw.position
            if kw.type == 'if' or 'for' in kw.type:
                index += 2*len(self.body_html) * kw.nested_position
                # Take if and for in the appearing order in the text
                index -= kw.position
            elif kw.type == 'var':
                index += len(self.body_html)
            return index

        keywords = self.keyword_ids.sorted(sort_keywords, reverse=True)
        # Replace automatic-generated keywords
        for keyword in keywords:
            keyword_text = html_text('#' + keyword.html_id)
            keyword_text.replace_with(keyword.final_text)

        # Replace user added keywords
        template_text = html_text.html()
        for keyword in keywords.filtered(lambda k: k.type == 'code'):
            to_replace = u"[{}]".format(keyword.short_code)
            template_text = template_text.replace(to_replace, keyword.raw_code)
        final_text = PyQuery(BeautifulSoup(template_text).prettify())
        return final_text('body').html()
开发者ID:maxime-beck,项目名称:compassion-modules,代码行数:33,代码来源:communication_revision.py

示例11: __extract

    def __extract(self, html):
        pq = PyQuery(html).find("main#main #mainArea table")

        selector_ = "thead tr:eq(0) th"
        date_order = [PyQuery(v).text().split('\n')[0] for v in PyQuery(pq).find(selector_)][3:]
        result = {d: {} for d in date_order}

        index = 0
        total = len(PyQuery(pq).find("tbody tr"))
        while index < total:
            td = PyQuery(pq).find("tbody tr:eq(%d) td:eq(0)" % index)

            room_type = td.text().split()[0]
            rowspan = int(td.attr('rowspan'))

            for i in xrange(index, index + rowspan):
                row = PyQuery(pq).find("tbody tr:eq(%d)" % i)

                # smoking or not
                smoking = PyQuery(row).find("td.alC.alM > img").attr("alt")

                room = "%s (%s)" % (room_type, smoking)

                if row.hasClass('clubCardCell'):
                    member_type = 'member'
                else:
                    member_type = 'guest'

                for i, v in enumerate(self.__extract_price_remain(row)):
                    if room not in result[date_order[i]]:
                        result[date_order[i]][room] = {}
                    result[date_order[i]][room][member_type] = v

            index += rowspan
        return result
开发者ID:mkfsn,项目名称:ToyokoInn,代码行数:35,代码来源:ToyokoInn.py

示例12: get_meme_url

def get_meme_url(meme):
    gen = GENERATORS.get(meme)
    if gen:
        pq = PyQuery(url="http://memegenerator.net/%s" % gen[2])
        return pq.find('a img.large').attr('src')
    else:
        return None
开发者ID:ojii,项目名称:memepy,代码行数:7,代码来源:meme.py

示例13: download

def download(threadUrl):
    """
    """
    d = PyQuery(url=threadUrl, parser='soup')
    links = d('a[href^="job.php?action=download&aid="]')

    # 获取 verify 的值
    tmp = d('script:contains("var verifyhash =")').text()
    verify = re.search(r"var verifyhash = '(.*?)'", tmp).group(1)

    total = len(links)
    d.make_links_absolute()
    for i, e in enumerate(links.items(), start=1):
        filename = e.text()
        print('%s/%s %s' % (i, total, filename))

        if not os.path.exists(os.path.join(SAVE_PATH, filename)):
            params = urlencode(
                {'check': 1, 'verify': verify, 'nowtime': int(time.time() * 1000)})
            url = '%s?%s' % (e.attr['href'], params)

            print('  fetch: ' + url)
            downDoc = PyQuery(url, headers=headers)
            # 第0个是电信下载点,第1个是移动下载点
            downUrl = BASE_URL + downDoc('a[href^="remotedown.php"]').eq(1).attr('href')
            addToIDM(downUrl, SAVE_PATH, filename)
            time.sleep(1.5)

    wefiler_urls = checkWefiler(d)
    if wefiler_urls:
        print(wefiler_urls)
开发者ID:fishlee,项目名称:DownloadHelpers,代码行数:31,代码来源:weiphone.py

示例14: _split

def _split(inputfile, outputdir):
    source = open(inputfile, 'r')
    html = source.read()
    source.close()

    if not os.path.isdir(outputdir):
        os.mkdir(outputdir)

    idx_slide=0
    idx_section=0

    parsed = PyQuery(html)
    
    for section in parsed('section'):
        slide = PyQuery(section)        
        if slide.has_class('stack'):
            idx_section+=1
            stack_path = os.path.join(outputdir,'%02d' % idx_section )
            os.mkdir(stack_path)
            for sub_slide in PyQuery(slide.html())('section'):
                idx_slide+=1
                _dump_slide(sub_slide, idx_slide, stack_path)
        else: 
            if not slide.parent().has_class('stack'):
                idx_slide+=1
                _dump_slide(slide, idx_slide, outputdir)                    
开发者ID:tiry,项目名称:reveal-js-tools,代码行数:26,代码来源:split.py

示例15: _parse_table

    def _parse_table(self, table):

        # Initialize table
        parsed_rows = []

        # Parse table
        qtable = PyQuery(table)

        # Get headers
        headers = self._get_headers(qtable)
        if not headers:
            return

        # Get rows
        rows = qtable.find("tr")

        # Loop over rows
        for row in rows:

            # Get columns
            qrow = PyQuery(row)
            cols = qrow.find("td").map(self._get_text)[:]

            # Parse column values
            for colidx in range(len(cols)):
                col = reduce(lambda x, y: re.sub(y[0], y[1], x), self._trans, cols[colidx])
                cols[colidx] = col

            # Append parsed columns
            if cols:
                parsed_rows.append(cols)

        return {"headers": headers, "data": parsed_rows}
开发者ID:jmcarp,项目名称:sciscrape,代码行数:33,代码来源:sciparse.py


注:本文中的pyquery.PyQuery类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。