当前位置: 首页>>代码示例>>Python>>正文


Python PyQuery.text方法代码示例

本文整理汇总了Python中pyquery.PyQuery.text方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.text方法的具体用法?Python PyQuery.text怎么用?Python PyQuery.text使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyquery.PyQuery的用法示例。


在下文中一共展示了PyQuery.text方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: parseProductPage

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def parseProductPage(product, need_img_urls=False):
    """进入商品详情页, 抓取四个新字段
       delivery reviews star total_sales
    """
    if product['product_url']:
       content = fetchContent(product['product_url'], False)
       doc=PyQuery(content)
       #product['delivery'] = doc("div.cost-entries-type > p > em.value").text() 运费JS动态 解决不了
       product['reviews'] = doc('p.satisfaction-number > a > em.value').text()
       product['star'] = doc('p.star-level > i').attr("class")
       product['total_sales'] = doc('p.bargain-number > a > em.value').text()
       if need_img_urls:
           url_list = get_img_urls(content)
           product['img_urls'] = ', '.join(url_list)
       else:
           product['img_urls'] = ''
       product['color'], product['size'] = '', ''
       for index, td in enumerate(doc('div.obj-content > table > tbody > tr > td')):
            tdQ = PyQuery(td)
            if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'颜色':
                product['color'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
            if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'尺寸':
                product['size'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
       product['MOQ'] = extractNum(doc('tr.amount > td.ladder-1-1 > span.value').text().replace(u"≥", ""))
       if not product['MOQ'] or product['MOQ'] == 0:
           product['MOQ'] = extractNum(PyQuery(doc('tr.amount').remove('td.amount-title').children('td').eq(0))('span.value').text())
       if product['MOQ'] == 1:
           #print product['product_url']
           product['sku_size'] = PyQuery(doc('div.unit-detail-spec-operator').eq(0))('span.text').text()
           product['sku_color'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.name').text()
           product['sku_price'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.price').text()
           product['sku_amount'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.count > span > em.value').text()
           print product['sku_id'], '\t', product['sku_size'], "\t", product['sku_color'], "\t", product['sku_price'], "\t", product['sku_amount']
    return product
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:36,代码来源:products.py

示例2: get_bounds

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def get_bounds(scene_name):
    """Use Earth Explorer metadata to get bounds of a Scene"""
    url_code = get_metadata_code(scene_name)

    metadata = PyQuery(
        'http://earthexplorer.usgs.gov/fgdc/%s/%s/' % (url_code, scene_name)
        )
    metadata = metadata.text()[
        metadata.text().find('G-Ring_Latitude:'):
        metadata.text().find('\n  Keywords:')
        ]
    coords = (
        metadata.replace(' ', '')
        .replace('G-Ring_Latitude:', '')
        .replace('G-Ring_Longitude:', '')
        .split('\n')
        )
    coords = [float(coord) for coord in coords if coord != '']
    # create a list of lists with the coordinates
    coords = [coords[i:i + 2] for i in range(0, len(coords), 2)]
    # use reverse() to change [lat, lon] to [lon, lat]
    [coord.reverse() for coord in coords]
    # repeat the first coordinate on the end of the list
    if coords[0] != coords[-1]:
        coords.append(coords[0])
    return coords
开发者ID:ibamacsr,项目名称:imagery,代码行数:28,代码来源:utils.py

示例3: get_year_of_nianjian

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
 def get_year_of_nianjian(self):
     req = urllib2.Request(self.url,None,self.req_header)
     response = urllib2.urlopen(req)
     rawdata= myutils.ungzip(response)
     year_num = len(PyQuery(rawdata.decode('utf-8'))(".list_h li a"))
     i = 0 
     while i < year_num:
         j = 0
         while j < 5 and i < year_num:
             li = PyQuery(PyQuery(rawdata.decode('utf-8'))(".list_h li a")[i])
             folder = myutils.filenameCheck(li.text())
             folder = os.path.join(self.root,folder)
             try:
                 os.mkdir(folder)
             except Exception,e:
                 print "%s created error" %(folder)
                 i = i + 1
                 j = j + 1
             else:
                 href = "http://tongji.cnki.net/kns55/Navi/" + li.attr("href")
                 i = i + 1
                 j = j + 1
                 self.threads.append(YearPage(os.path.join(self.root,li.text()),href))
         for t in self.threads:
             if not t.isAlive():
                 t.start()
         t.join()
         self.threads = []
开发者ID:dequn,项目名称:CNKIEXCEL,代码行数:30,代码来源:NianJian.py

示例4: extract_content

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def extract_content():
    """ 抓取需要的文章题目和链接 """
    for tryurl in to_tryurl_list(): 
        complete_page = PyQuery(tryurl) 

        article_titles = complete_page('.content .unit h1')
        if not article_titles:
            break
        for article_title in article_titles:
            article_title = PyQuery(article_title)
            sub_content = (article_title.text(), article_title.attr("href"))
            print article_title.text(), article_title.find("a").attr('href')
开发者ID:acmerfight,项目名称:python_news,代码行数:14,代码来源:extract_csdn.py

示例5: _append_contents

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
 def _append_contents(struct, par):
     tag = struct['tag']
     _node = PyQuery('<%s />' % tag)
     if 'attributes' in struct:
         for key in struct['attributes'].keys():
             _node.attr(key, struct['attributes'][key])
     if 'text' in struct:
         _node.text(struct['text'])
     elif 'children' in struct:
         for (ugh, child) in struct['children'].iteritems():
             _append_contents(child, _node)
     par.append(_node)
开发者ID:kobotoolbox,项目名称:formpack,代码行数:14,代码来源:submission.py

示例6: process_place

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def process_place(link, marker):
#    print link
    response = urllib2.urlopen(link)
    page = PyQuery(response.read())

    post_body = page('.post_body')
    marker['title'] = post_body('h1').text().encode('utf-8')
    marker['icon'] = post_body('img:first').attr('src')
    addresses = post_body('td:eq(1)').html().split('<br/>')
    marker['address'] = PyQuery(addresses[0]).text().encode('utf-8')[len(ADDRESS):]
    process_position(PyQuery(addresses[1]).text().encode('utf-8'), marker)
    marker['objects'][0]['phone'] = post_body('td:eq(2)').text().encode('utf-8')[len(PHONES):]
    marker['objects'][0]['time'] = post_body('td:eq(3)').text().encode('utf-8')[len(WORK_TIME):]
    try:
        site = post_body('a.inv').attr('href')
        if URL in site:
            site = site[len(URL):]
        marker['site'] = site
    except Exception as ex:
        marker['site'] = ''
        print 'Error on site getting: %s' % link

    coment_details = page('.coment_details')

    beers = []
    for element in coment_details('.coment_content>div:eq(0)')('td:odd')('a'):
        beer = PyQuery(element)
        beer_name = beer.text().encode('utf-8')
        beer_link = beer.attr('href')
        beers.append({'name': beer_name, 'link': beer_link})
    marker['objects'][0]['beers'] = beers

    beer_countries = []
    for element in coment_details('.coment_content>div:eq(1)')('td:odd')('a'):
        beer_country = PyQuery(element)
        beer_country_name = beer_country.text().encode('utf-8')
        beer_country_link = beer_country.attr('href')
        beer_countries.append({'name': beer_country_name, 'link': beer_country_link})
    marker['objects'][0]['beerCountries'] = beer_countries

    beer_sorts = []
    for element in coment_details('.coment_content>p:eq(2)')('a'):
        beer_sort = PyQuery(element)
        beer_sort_name = beer_sort.text().encode('utf-8')
        beer_sort_link = beer_sort.attr('href')
        beer_sorts.append({'name': beer_sort_name, 'link': beer_sort_link})
    marker['objects'][0]['beerSorts'] = beer_sorts

    return marker
开发者ID:tbicr,项目名称:BeerNav,代码行数:51,代码来源:generate_fixtures.py

示例7: fetch_main

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def fetch_main(url=TARGET_SITE):
    contents = []
    d = PQ(url=TARGET_SITE)
    for a in d("table:eq(3) a"):
        ele = PQ(a)
        contents.append([ele.text(), "".join((url, ele.attr('href')))])
    return contents[::-1]
开发者ID:caiknife,项目名称:test-python-project,代码行数:9,代码来源:main.py

示例8: _main

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def _main():
    # u'<title>':'<url>' sets
    mt_pages = {}
    wp_pages = {}

    # MT
    request = requests.get(MT_ARCHIVES_URL)
    document = PyQuery(request.content);
    archive_list = document('#pagebody .archive-list a')
    for archive in archive_list:
        archive = PyQuery(archive)
        mt_pages[archive.text()] = archive.attr('href')

    # WP
    fh = open(WP_ARCHIVES_FILE_PATH, 'r')
    document = PyQuery(fh.read(), parser='xml');
    items = document('channel item')
    for item in items:
        item = PyQuery(item)
        wp_pages[item('title').text()] = item('link').text()

    # Create .htaccess
    fh = open(BASE_DIR + '/tmp/.htaccess', 'a')
    for title, href in mt_pages.items():
        if title in wp_pages:
            fh.write('Redirect permanent %s %s\n' % (
                re.sub(r'http://kjirou\.sakura\.ne\.jp', '', href),
                wp_pages[title],
            ))
    fh.write('Redirect permanent /mt/index.xml http://blog.kjirou.net/feed\n')
    fh.write('Redirect permanent /mt/atom.xml http://blog.kjirou.net/feed\n')
    fh.write('Redirect permanent /mt/archives.html http://blog.kjirou.net\n')
    fh.write('Redirect permanent /mt http://blog.kjirou.net\n')
开发者ID:kjirou,项目名称:goodbye_mt3,代码行数:35,代码来源:create_htaccess.py

示例9: extract_variables

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def extract_variables(doc, data_source):
    # Extract Header Information
    sample_css_class = {'usa': 'sampleColumn',
                        'international': 'shortCountryHeader',
                        'cps': 'shortCountryHeader'}[data_source]
    sample_columns = {}
    variable_column = None
    for i, th in enumerate(doc('tr.abbrevHeader.grayHeader:first > th')):
        th = PyQuery(th)
        if variable_column is None and th.hasClass('mnemonicColumn'):
            variable_column = i
        elif th.hasClass(sample_css_class):
            sample_columns[i] = th.text()
    
    # Extract actual values for the variables.
    variable_info = []
    for row in doc('tr.variables'):
        columns = PyQuery(row)('td')
        variable_name = PyQuery(columns[variable_column]).text().strip()
        availability = [smpl.strip()
                        for i, smpl in sample_columns.items() 
                            if PyQuery(columns[i]).text().strip() != '.']
        variable_info.append({'name': variable_name, 
                              'availability': availability})

    return variable_info
开发者ID:americanist,项目名称:IPUMS_Codebooks,代码行数:28,代码来源:build_var_lists.py

示例10: prn_txt_for_sec

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def prn_txt_for_sec(index, node) :
  ce = PyQuery(node)
  rowTxt = ce.text()
  cols = ce('td')
  curr_dep = len(cols)
  if curr_dep > 1 :
  	print rowTxt.encode('utf-8')
开发者ID:appanp,项目名称:ceqna,代码行数:9,代码来源:html_to_mm.py

示例11: test_user_with_permisions_has_money_report_options

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
 def test_user_with_permisions_has_money_report_options(self):
     user = self.no_perms_user.user
     response = self.get_response(view="eff_report", args=[user.username])
     query = PyQuery(response.content)
     query = query("table#project-report-table a")
     self.assertIn("ODT", query.text())
     self.assertIn("CSV", query.text())
开发者ID:juannorris,项目名称:eff,代码行数:9,代码来源:testUserReportsPerms.py

示例12: test_user_with_permisions_has_detailed_report_options

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
 def test_user_with_permisions_has_detailed_report_options(self):
     user = self.no_perms_user.user
     response = self.get_response(view="eff_report", args=[user.username])
     query = PyQuery(response.content)
     query = query("div#detailed-report a")
     self.assertIn("ODT", query.text())
     self.assertIn("CSV", query.text())
开发者ID:juannorris,项目名称:eff,代码行数:9,代码来源:testUserReportsPerms.py

示例13: test_follower_has_detailed_report_options_for_followed_users

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
 def test_follower_has_detailed_report_options_for_followed_users(self):
     user = self.watcher.watches.all()[0]
     response = self.get_response(view="eff_report", args=[user.username])
     query = PyQuery(response.content)
     query = query("div#detailed-report a")
     self.assertIn("ODT", query.text())
     self.assertIn("CSV", query.text())
开发者ID:juannorris,项目名称:eff,代码行数:9,代码来源:testUserReportsPerms.py

示例14: parseCategories

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
 def parseCategories(self, homepage_content):
     doc = PyQuery(homepage_content)
     #Gifts和Brands li的data-id属性分别为0, 1
     level1NodeList = doc('ul.js-flyout-nav > li').filter(lambda i:PyQuery(this).attr('data-id') > '1')
     categoryList = []
     for level1Node in level1NodeList:
         level1NodeQ = PyQuery(level1Node)
         level1Name = level1NodeQ.children('a').text()
         level2NodeList = doc('div').filter(lambda i, this:PyQuery(this).attr('data-cat-id')==level1NodeQ.attr('data-id')).children('a')
         for level2Node in level2NodeList:
             level2NodeQ = PyQuery(level2Node)
             if not level2NodeQ.attr('class') or not level2NodeQ.text(): #<a class="" href="" data-title=""/>&#13;
                 continue
             categoryInfo = self.newCategory(level2NodeQ.text(), 'http://www.backcountry.com' + level2NodeQ.attr('href'), [level1Name])
             categoryList.append(categoryInfo)
     return categoryList
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:18,代码来源:backcountry.py

示例15: __extract

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
    def __extract(self, html):
        pq = PyQuery(html).find("main#main #mainArea table")

        selector_ = "thead tr:eq(0) th"
        date_order = [PyQuery(v).text().split('\n')[0] for v in PyQuery(pq).find(selector_)][3:]
        result = {d: {} for d in date_order}

        index = 0
        total = len(PyQuery(pq).find("tbody tr"))
        while index < total:
            td = PyQuery(pq).find("tbody tr:eq(%d) td:eq(0)" % index)

            room_type = td.text().split()[0]
            rowspan = int(td.attr('rowspan'))

            for i in xrange(index, index + rowspan):
                row = PyQuery(pq).find("tbody tr:eq(%d)" % i)

                # smoking or not
                smoking = PyQuery(row).find("td.alC.alM > img").attr("alt")

                room = "%s (%s)" % (room_type, smoking)

                if row.hasClass('clubCardCell'):
                    member_type = 'member'
                else:
                    member_type = 'guest'

                for i, v in enumerate(self.__extract_price_remain(row)):
                    if room not in result[date_order[i]]:
                        result[date_order[i]][room] = {}
                    result[date_order[i]][room][member_type] = v

            index += rowspan
        return result
开发者ID:mkfsn,项目名称:ToyokoInn,代码行数:37,代码来源:ToyokoInn.py


注:本文中的pyquery.PyQuery.text方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。