当前位置: 首页>>代码示例>>Python>>正文


Python PyQuery.attr方法代码示例

本文整理汇总了Python中pyquery.PyQuery.attr方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.attr方法的具体用法?Python PyQuery.attr怎么用?Python PyQuery.attr使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyquery.PyQuery的用法示例。


在下文中一共展示了PyQuery.attr方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_calendar_tag_rendering

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
    def test_calendar_tag_rendering(self, timezone_mock):
        timezone_mock.now.return_value = tz_datetime(2015, 1, 10, 12)
        page_with_apphook = self.create_base_pages()
        other_config = EventsConfig.objects.create(namespace='other')
        self.create_event(
            title='ev1',
            start_date=tz_datetime(2015, 1, 13),
            publish_at=tz_datetime(2015, 1, 10)
        )
        self.create_event(
            title='ev2',
            start_date=tz_datetime(2015, 1, 15),
            publish_at=tz_datetime(2015, 1, 10)
        )
        self.create_event(
            de=dict(
                title='ev3',
                start_date=tz_datetime(2015, 1, 16),
                publish_at=tz_datetime(2015, 1, 10)
            )
        )
        self.create_event(
            title='ev4',
            start_date=tz_datetime(2015, 1, 18),
            publish_at=tz_datetime(2015, 1, 10),
            app_config=other_config
        )
        self.create_event(
            title='ev5',
            start_date=tz_datetime(2015, 1, 22),
            end_date=tz_datetime(2015, 1, 27),
            publish_at=tz_datetime(2015, 1, 10)
        )
        self.create_event(
            title='ev6',
            start_date=tz_datetime(2015, 1, 25),
        )
        # make use of default tests self.app_config namespace, instead of
        # hard coding it
        template_str = """
        {%% load aldryn_events %%}
        {%% calendar 2015 1 'en' '%s' %%}
        """ % self.app_config.namespace
        t = Template(template_str)
        with override('en'):
            html = t.render(SekizaiContext({}))
            table = PyQuery(html)('table.table-calendar')
            page_url_en = page_with_apphook.get_absolute_url()
        links = table.find('td.events, td.multiday-events').find('a')

        # test if tag rendered important elements
        self.assertEqual('1', table.attr('data-month-numeric'), )
        self.assertEqual('2015', table.attr('data-year'))
        self.assertEqual('10', table.find('td.today').text())
        self.assertEqual(8, links.length)  # 13, 15, 22, 23, 24, 25, 26, 27
        expected_days = (13, 15, 22, 23, 24, 25, 26, 27)
        for position, day in enumerate(expected_days):
            event_url = '{0}2015/1/{1}/'.format(page_url_en, day)
            rendered_url = links[position].attrib['href']
            self.assertEqual(event_url, rendered_url)
开发者ID:nephila,项目名称:aldryn-events,代码行数:62,代码来源:test_tags.py

示例2: parseProductPage

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
def parseProductPage(product, need_img_urls=False):
    """进入商品详情页, 抓取四个新字段
       delivery reviews star total_sales
    """
    if product['product_url']:
       content = fetchContent(product['product_url'], False)
       doc=PyQuery(content)
       #product['delivery'] = doc("div.cost-entries-type > p > em.value").text() 运费JS动态 解决不了
       product['reviews'] = doc('p.satisfaction-number > a > em.value').text()
       product['star'] = doc('p.star-level > i').attr("class")
       product['total_sales'] = doc('p.bargain-number > a > em.value').text()
       if need_img_urls:
           url_list = get_img_urls(content)
           product['img_urls'] = ', '.join(url_list)
       else:
           product['img_urls'] = ''
       product['color'], product['size'] = '', ''
       for index, td in enumerate(doc('div.obj-content > table > tbody > tr > td')):
            tdQ = PyQuery(td)
            if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'颜色':
                product['color'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
            if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'尺寸':
                product['size'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
       product['MOQ'] = extractNum(doc('tr.amount > td.ladder-1-1 > span.value').text().replace(u"≥", ""))
       if not product['MOQ'] or product['MOQ'] == 0:
           product['MOQ'] = extractNum(PyQuery(doc('tr.amount').remove('td.amount-title').children('td').eq(0))('span.value').text())
       if product['MOQ'] == 1:
           #print product['product_url']
           product['sku_size'] = PyQuery(doc('div.unit-detail-spec-operator').eq(0))('span.text').text()
           product['sku_color'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.name').text()
           product['sku_price'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.price').text()
           product['sku_amount'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.count > span > em.value').text()
           print product['sku_id'], '\t', product['sku_size'], "\t", product['sku_color'], "\t", product['sku_price'], "\t", product['sku_amount']
    return product
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:36,代码来源:products.py

示例3: fixLinks

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
        def fixLinks(text, parser):
            d = PyQuery(bytes(bytearray(text, encoding='utf-8')), parser=parser)
            for element in d('a, link'):
                e = PyQuery(element)
                href = e.attr('href')

                if href is None:
                    continue

                print '// Drop queryString in included src'
                print 'from: ', href
                result = urlparse(href)

                if result.scheme == 'https':
                    href = href
                elif result.scheme == '':
                    href = result.path + (('#' + result.fragment) if result.fragment != '' else '')
                print 'to: ', href
  
                new_href = re.sub(r'(rss/index\.html)|(rss/?)$', 'rss/index.rss', href)
                if not abs_url_regex.search(href):
                    new_href = re.sub(r'/index\.html$', '/', new_href)

                if href != new_href:
                    e.attr('href', new_href)
                    print "\t", href, "=>", new_href

            if parser == 'html':
                return d.html(method='html').encode('utf8')
            return d.__unicode__().encode('utf8')
开发者ID:copywrite,项目名称:buster,代码行数:32,代码来源:buster.py

示例4: scrape

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
def scrape(slug, url, name, title=None):
    f = urlopen(url)
    doc = f.read()

    doc, errs = tidy_document(
        doc,
        options={
            "output-html": 1,
            #'indent':1,
            "clean": 1,
            "drop-font-tags": 1,
        },
    )
    if errs:
        # raise Exception, errs
        print errs

    doc = html5lib.parse(doc, treebuilder="lxml")  # this didn't work, but above three lines did: encoding='utf-8',
    html.xhtml_to_html(doc)
    jQuery = PyQuery([doc])

    td = jQuery("td#content")
    assert len(td) == 1

    for img in td("img"):
        # print 'img:', PyQuery (img)
        img = PyQuery(img)
        src = img.attr("src")
        # alt = img.attr('alt')

        # if src.startswith ('/image'):
        rslt = getimage(src, slug.split("/")[0])
        img.attr("src", rslt)
        if trace:
            print rslt

    # td =
    # no_fonts (td)

    # need to fix links here

    content = PyQuery(td[0])
    # content = content.html()
    content = no_namespaces(content.html())

    print slug, content[:60]  # .html()  # [:60]

    if dbteeth:
        # q, created = QuickPage.objects.get_or_create (

        qp, created = create_or_update(
            QuickPage,
            keys=dict(slug=slug),
            fields=dict(
                name=name,
                title=title if title else name,
                content=content,
                # defaults = dict (sortorder = sortorder),
            ),
        )
开发者ID:satyadevi-nyros,项目名称:eracks,代码行数:62,代码来源:scrape_pages.py

示例5: _add_nested

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
 def _add_nested(self, k, el):
     """Parse nested element by its children."""
     el = Pq(el)
     tagname = Pq(el)[0].tag
     if tagname in self.invalid_tags:
         return
     id = self._format_id(el.attr('id'))
     classes = self._format_classes(el.attr('class'))
     selector = self._format_selector(el, id, classes)
     children = Pq(el).children()
     if not self._is_root_body_node(el):
         return
     # Add for single nodes only
     if not children:
         self.selectors.add(selector)
     # Build nested css by traversing all child nodes and getting
     # their attributes.
     while children:
         for child in children:
             # 1. Add current
             self.selectors.add(selector)
             # 2. Add child
             child = Pq(child)
             selector += self._add_id_and_classes(child)
             self.selectors.add(selector)
             # # 3. Move to next children
             children = child.children()
开发者ID:christabor,项目名称:codeReflector,代码行数:29,代码来源:css_reflector.py

示例6: getTweets

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
	def getTweets(tweetCriteria):
		refreshCursor = ''
	
		results = []
	
		while True:
			json = TweetManager.getJsonReponse(tweetCriteria, refreshCursor)
			refreshCursor = json['min_position']
			try:
				tweets = PyQuery(json['items_html'])('div.js-stream-tweet')
			except Exception, e:
				print e
				# There was either an error in the request or nothing returned
				return results
			
			
			if len(tweets) == 0:
				break
			
			for tweetHTML in tweets:
				tweetPQ = PyQuery(tweetHTML)
				tweet = models.Tweet()

				# print tweetPQ("p.js-tweet-text").text()
				
				usernameTweet = tweetPQ("span.username.js-action-profile-name b").text()
				txt = re.sub(r"[^\x00-\x7F]", "", tweetPQ("p.js-tweet-text").text()) \
					.replace('# ', '#') \
					.replace('@ ', '@') \
					.replace('www. ', 'www.') \
					.replace('/ ', '/')

				retweets = int(tweetPQ("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""))
				favorites = int(tweetPQ("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""))
				dateSec = int(tweetPQ("small.time span.js-short-timestamp").attr("data-time"))
				id = tweetPQ.attr("data-tweet-id")
				permalink = tweetPQ.attr("data-permalink-path")
				
				geo = ''
				geoSpan = tweetPQ('span.Tweet-geo')
				if len(geoSpan) > 0:
					geo = geoSpan.attr('title')
				
				tweet.id = id
				tweet.permalink = 'https://twitter.com' + permalink
				tweet.username = usernameTweet
				tweet.text = txt
				tweet.date = datetime.datetime.fromtimestamp(dateSec)
				tweet.retweets = retweets
				tweet.favorites = favorites
				tweet.mentions = " ".join(re.compile('(@\\w*)').findall(tweet.text))
				tweet.hashtags = " ".join(re.compile('(#\\w*)').findall(tweet.text))
				tweet.geo = geo
				
				results.append(tweet)
				
				if tweetCriteria.maxTweets > 0 and len(results) >= tweetCriteria.maxTweets:
					return results
开发者ID:aliwebir,项目名称:cagaben,代码行数:60,代码来源:TweetManager.py

示例7: _add

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
 def _add(self, k, el):
     """Parse element, without considering children."""
     el = Pq(el)
     id, classes = el.attr('id'), el.attr('class')
     if id is not None:
         self.selectors['ids'].add(id)
     if classes is not None:
         for _class in classes.split(' '):
             self.selectors['classes'].add(_class.strip())
开发者ID:christabor,项目名称:codeReflector,代码行数:11,代码来源:css_reflector.py

示例8: replace_img

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
 def replace_img(index, node):
     node = PyQuery(node)
     if not node.attr('src'):
         return node
     try:
         node.attr('src', urljoin_rfc(base_url, node.attr('src')))
     except:
         pass
     return node
开发者ID:AlloVince,项目名称:EvaCrawler,代码行数:11,代码来源:article_spider.py

示例9: _absoluteurl

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
    def _absoluteurl(x):
        q = PyQuery(this)
        href = q.attr('href')
        if href and (href.startswith('#') or href.startswith('http') or
            href.startswith('ftp')):
            return

        if href:
            q.attr('href','/' + href)
开发者ID:oikoumene,项目名称:wcc.crawler,代码行数:11,代码来源:ewnsevenweekwater.py

示例10: __processInstagramTag

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
    def __processInstagramTag(self, i, e):
        obj = PyQuery(e)
        url = obj('a').attr('href')
        shortCode = re.match("http://.*/p/(.*)/", url).group(1)
        imageUrl = self.getInstagramImageUrl(shortCode)

        newObj = PyQuery("<img />")
        newObj.attr('src', imageUrl)
        obj.replaceWith(newObj)
开发者ID:arloliu,项目名称:w4-crawler,代码行数:11,代码来源:HtmlParser.py

示例11: fixLinks

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
 def fixLinks(text):
     d = PyQuery(text, parser='html')
     for element in d('a'):
         e = PyQuery(element)
         href = e.attr('href')
         if not abs_url_regex.search(href):
             new_href = re.sub(r'/index\.html$', '/', href)
             e.attr('href', new_href)
             print "\t", href, "=>", new_href
     return d.__unicode__().encode('utf8')
开发者ID:shaunlebron,项目名称:buster,代码行数:12,代码来源:buster.py

示例12: scrape_category

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
def scrape_category (url, title):
    category_slug = slugify (title)

    try:
        f = urlopen (url)
    except ValueError:
        if trace: print 'Retrying:', url
        url = 'http://eracks.com' + url.replace (' ','%20')
        if trace: print 'As:', url
        f = urlopen (url)

    doc = html5lib.parse(f, treebuilder='lxml', namespaceHTMLElements=False)  # this didn't work, but above three lines did: encoding='utf-8',
    html.xhtml_to_html (doc)
    jQuery = PyQuery([doc])

    page_title =  jQuery ('title').text()

    if page_title.startswith ("eRacks Open Source Systems: "):
        page_title = page_title.partition ("eRacks Open Source Systems: ") [-1]

    if page_title.startswith ("eRacks "):
        page_title = page_title.partition ("eRacks ") [-1]

    content = jQuery ('td#content')
    links = content ('a')
    images = content ('img')

    for link in links:
        a = PyQuery (link)
        href = a.attr('href')
        skus = find_sku.findall (href)

        if skus:
            sku = skus [0]
            #a.attr ('href', '/%s/%s/' % (category_slug, slugify (sku)))
            a.attr ('href', '/products/%s/%s/' % (category_slug, sku))
        elif href.startswith ('/Legacy'):
            sku = slugify (href.split ('/') [-1])
            #a.attr ('href', '/%s/%s/' % (category_slug, slugify (sku)))
            a.attr ('href', '/products/%s/%s/' % (category_slug, sku))

        print 'link:', a.attr('href')

    for image in images:
        img = PyQuery (image)
        src = img.attr('src')
        newsrc = getimage (src, 'categories/' + category_slug)
        img.attr ('src', newsrc)
        print 'image:', newsrc

    description = content.html()
    if trace: print description

    if dbteeth:
        cat = Categories.objects.get (name=title)
        cat.comments = cat.comments + '\n\nScraped from Zope as of ' + str(datetime.date.today())
        cat.description = description
        cat.title = page_title
        cat.save()
        print '..saved.'
开发者ID:satyadevi-nyros,项目名称:eracks,代码行数:62,代码来源:scrape_categories.py

示例13: make_possible_feed

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
def make_possible_feed(link_element):
    """ Visits each <link rel="alternate" href="http://..." /> element """
    link = PyQuery(link_element)
    title = 'Unknown'
    if link.attr('title'):
        title = link.attr('title')
    if link.attr('href'):        
        return {'feed_url': link.attr('href'), 'feed_title': title}
    else:
        log.info("Skipping malformed link element for feed, missing href")
        return False
开发者ID:dineshkummarc,项目名称:sudosocial,代码行数:13,代码来源:views.py

示例14: _append_contents

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
 def _append_contents(struct, par):
     tag = struct['tag']
     _node = PyQuery('<%s />' % tag)
     if 'attributes' in struct:
         for key in struct['attributes'].keys():
             _node.attr(key, struct['attributes'][key])
     if 'text' in struct:
         _node.text(struct['text'])
     elif 'children' in struct:
         for (ugh, child) in struct['children'].iteritems():
             _append_contents(child, _node)
     par.append(_node)
开发者ID:kobotoolbox,项目名称:formpack,代码行数:14,代码来源:submission.py

示例15: replace_link

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import attr [as 别名]
        def replace_link(index, node):
            node = PyQuery(node)
            if not node.attr('href'):
                return node

            link = node.attr('href').strip()
            if regex.match(link):
                try:
                    node.attr('href', urljoin_rfc(base_url, link))
                except:
                    pass
            return node
开发者ID:AlloVince,项目名称:EvaCrawler,代码行数:14,代码来源:article_spider.py


注:本文中的pyquery.PyQuery.attr方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。