本文整理汇总了Python中pyquery.PyQuery.text方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.text方法的具体用法?Python PyQuery.text怎么用?Python PyQuery.text使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyquery.PyQuery
的用法示例。
在下文中一共展示了PyQuery.text方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parseProductPage
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def parseProductPage(product, need_img_urls=False):
"""进入商品详情页, 抓取四个新字段
delivery reviews star total_sales
"""
if product['product_url']:
content = fetchContent(product['product_url'], False)
doc=PyQuery(content)
#product['delivery'] = doc("div.cost-entries-type > p > em.value").text() 运费JS动态 解决不了
product['reviews'] = doc('p.satisfaction-number > a > em.value').text()
product['star'] = doc('p.star-level > i').attr("class")
product['total_sales'] = doc('p.bargain-number > a > em.value').text()
if need_img_urls:
url_list = get_img_urls(content)
product['img_urls'] = ', '.join(url_list)
else:
product['img_urls'] = ''
product['color'], product['size'] = '', ''
for index, td in enumerate(doc('div.obj-content > table > tbody > tr > td')):
tdQ = PyQuery(td)
if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'颜色':
product['color'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
if tdQ.attr('class') =='de-feature' and tdQ.text().strip() == u'尺寸':
product['size'] = PyQuery(doc('div.obj-content > table > tbody > tr > td')[index+1]).text()
product['MOQ'] = extractNum(doc('tr.amount > td.ladder-1-1 > span.value').text().replace(u"≥", ""))
if not product['MOQ'] or product['MOQ'] == 0:
product['MOQ'] = extractNum(PyQuery(doc('tr.amount').remove('td.amount-title').children('td').eq(0))('span.value').text())
if product['MOQ'] == 1:
#print product['product_url']
product['sku_size'] = PyQuery(doc('div.unit-detail-spec-operator').eq(0))('span.text').text()
product['sku_color'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.name').text()
product['sku_price'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.price').text()
product['sku_amount'] = PyQuery(doc('table.table-sku > tr').eq(0))('td.count > span > em.value').text()
print product['sku_id'], '\t', product['sku_size'], "\t", product['sku_color'], "\t", product['sku_price'], "\t", product['sku_amount']
return product
示例2: get_bounds
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def get_bounds(scene_name):
"""Use Earth Explorer metadata to get bounds of a Scene"""
url_code = get_metadata_code(scene_name)
metadata = PyQuery(
'http://earthexplorer.usgs.gov/fgdc/%s/%s/' % (url_code, scene_name)
)
metadata = metadata.text()[
metadata.text().find('G-Ring_Latitude:'):
metadata.text().find('\n Keywords:')
]
coords = (
metadata.replace(' ', '')
.replace('G-Ring_Latitude:', '')
.replace('G-Ring_Longitude:', '')
.split('\n')
)
coords = [float(coord) for coord in coords if coord != '']
# create a list of lists with the coordinates
coords = [coords[i:i + 2] for i in range(0, len(coords), 2)]
# use reverse() to change [lat, lon] to [lon, lat]
[coord.reverse() for coord in coords]
# repeat the first coordinate on the end of the list
if coords[0] != coords[-1]:
coords.append(coords[0])
return coords
示例3: get_year_of_nianjian
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def get_year_of_nianjian(self):
req = urllib2.Request(self.url,None,self.req_header)
response = urllib2.urlopen(req)
rawdata= myutils.ungzip(response)
year_num = len(PyQuery(rawdata.decode('utf-8'))(".list_h li a"))
i = 0
while i < year_num:
j = 0
while j < 5 and i < year_num:
li = PyQuery(PyQuery(rawdata.decode('utf-8'))(".list_h li a")[i])
folder = myutils.filenameCheck(li.text())
folder = os.path.join(self.root,folder)
try:
os.mkdir(folder)
except Exception,e:
print "%s created error" %(folder)
i = i + 1
j = j + 1
else:
href = "http://tongji.cnki.net/kns55/Navi/" + li.attr("href")
i = i + 1
j = j + 1
self.threads.append(YearPage(os.path.join(self.root,li.text()),href))
for t in self.threads:
if not t.isAlive():
t.start()
t.join()
self.threads = []
示例4: extract_content
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def extract_content():
""" 抓取需要的文章题目和链接 """
for tryurl in to_tryurl_list():
complete_page = PyQuery(tryurl)
article_titles = complete_page('.content .unit h1')
if not article_titles:
break
for article_title in article_titles:
article_title = PyQuery(article_title)
sub_content = (article_title.text(), article_title.attr("href"))
print article_title.text(), article_title.find("a").attr('href')
示例5: _append_contents
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def _append_contents(struct, par):
tag = struct['tag']
_node = PyQuery('<%s />' % tag)
if 'attributes' in struct:
for key in struct['attributes'].keys():
_node.attr(key, struct['attributes'][key])
if 'text' in struct:
_node.text(struct['text'])
elif 'children' in struct:
for (ugh, child) in struct['children'].iteritems():
_append_contents(child, _node)
par.append(_node)
示例6: process_place
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def process_place(link, marker):
# print link
response = urllib2.urlopen(link)
page = PyQuery(response.read())
post_body = page('.post_body')
marker['title'] = post_body('h1').text().encode('utf-8')
marker['icon'] = post_body('img:first').attr('src')
addresses = post_body('td:eq(1)').html().split('<br/>')
marker['address'] = PyQuery(addresses[0]).text().encode('utf-8')[len(ADDRESS):]
process_position(PyQuery(addresses[1]).text().encode('utf-8'), marker)
marker['objects'][0]['phone'] = post_body('td:eq(2)').text().encode('utf-8')[len(PHONES):]
marker['objects'][0]['time'] = post_body('td:eq(3)').text().encode('utf-8')[len(WORK_TIME):]
try:
site = post_body('a.inv').attr('href')
if URL in site:
site = site[len(URL):]
marker['site'] = site
except Exception as ex:
marker['site'] = ''
print 'Error on site getting: %s' % link
coment_details = page('.coment_details')
beers = []
for element in coment_details('.coment_content>div:eq(0)')('td:odd')('a'):
beer = PyQuery(element)
beer_name = beer.text().encode('utf-8')
beer_link = beer.attr('href')
beers.append({'name': beer_name, 'link': beer_link})
marker['objects'][0]['beers'] = beers
beer_countries = []
for element in coment_details('.coment_content>div:eq(1)')('td:odd')('a'):
beer_country = PyQuery(element)
beer_country_name = beer_country.text().encode('utf-8')
beer_country_link = beer_country.attr('href')
beer_countries.append({'name': beer_country_name, 'link': beer_country_link})
marker['objects'][0]['beerCountries'] = beer_countries
beer_sorts = []
for element in coment_details('.coment_content>p:eq(2)')('a'):
beer_sort = PyQuery(element)
beer_sort_name = beer_sort.text().encode('utf-8')
beer_sort_link = beer_sort.attr('href')
beer_sorts.append({'name': beer_sort_name, 'link': beer_sort_link})
marker['objects'][0]['beerSorts'] = beer_sorts
return marker
示例7: fetch_main
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def fetch_main(url=TARGET_SITE):
contents = []
d = PQ(url=TARGET_SITE)
for a in d("table:eq(3) a"):
ele = PQ(a)
contents.append([ele.text(), "".join((url, ele.attr('href')))])
return contents[::-1]
示例8: _main
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def _main():
# u'<title>':'<url>' sets
mt_pages = {}
wp_pages = {}
# MT
request = requests.get(MT_ARCHIVES_URL)
document = PyQuery(request.content);
archive_list = document('#pagebody .archive-list a')
for archive in archive_list:
archive = PyQuery(archive)
mt_pages[archive.text()] = archive.attr('href')
# WP
fh = open(WP_ARCHIVES_FILE_PATH, 'r')
document = PyQuery(fh.read(), parser='xml');
items = document('channel item')
for item in items:
item = PyQuery(item)
wp_pages[item('title').text()] = item('link').text()
# Create .htaccess
fh = open(BASE_DIR + '/tmp/.htaccess', 'a')
for title, href in mt_pages.items():
if title in wp_pages:
fh.write('Redirect permanent %s %s\n' % (
re.sub(r'http://kjirou\.sakura\.ne\.jp', '', href),
wp_pages[title],
))
fh.write('Redirect permanent /mt/index.xml http://blog.kjirou.net/feed\n')
fh.write('Redirect permanent /mt/atom.xml http://blog.kjirou.net/feed\n')
fh.write('Redirect permanent /mt/archives.html http://blog.kjirou.net\n')
fh.write('Redirect permanent /mt http://blog.kjirou.net\n')
示例9: extract_variables
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def extract_variables(doc, data_source):
# Extract Header Information
sample_css_class = {'usa': 'sampleColumn',
'international': 'shortCountryHeader',
'cps': 'shortCountryHeader'}[data_source]
sample_columns = {}
variable_column = None
for i, th in enumerate(doc('tr.abbrevHeader.grayHeader:first > th')):
th = PyQuery(th)
if variable_column is None and th.hasClass('mnemonicColumn'):
variable_column = i
elif th.hasClass(sample_css_class):
sample_columns[i] = th.text()
# Extract actual values for the variables.
variable_info = []
for row in doc('tr.variables'):
columns = PyQuery(row)('td')
variable_name = PyQuery(columns[variable_column]).text().strip()
availability = [smpl.strip()
for i, smpl in sample_columns.items()
if PyQuery(columns[i]).text().strip() != '.']
variable_info.append({'name': variable_name,
'availability': availability})
return variable_info
示例10: prn_txt_for_sec
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def prn_txt_for_sec(index, node) :
ce = PyQuery(node)
rowTxt = ce.text()
cols = ce('td')
curr_dep = len(cols)
if curr_dep > 1 :
print rowTxt.encode('utf-8')
示例11: test_user_with_permisions_has_money_report_options
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def test_user_with_permisions_has_money_report_options(self):
user = self.no_perms_user.user
response = self.get_response(view="eff_report", args=[user.username])
query = PyQuery(response.content)
query = query("table#project-report-table a")
self.assertIn("ODT", query.text())
self.assertIn("CSV", query.text())
示例12: test_user_with_permisions_has_detailed_report_options
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def test_user_with_permisions_has_detailed_report_options(self):
user = self.no_perms_user.user
response = self.get_response(view="eff_report", args=[user.username])
query = PyQuery(response.content)
query = query("div#detailed-report a")
self.assertIn("ODT", query.text())
self.assertIn("CSV", query.text())
示例13: test_follower_has_detailed_report_options_for_followed_users
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def test_follower_has_detailed_report_options_for_followed_users(self):
user = self.watcher.watches.all()[0]
response = self.get_response(view="eff_report", args=[user.username])
query = PyQuery(response.content)
query = query("div#detailed-report a")
self.assertIn("ODT", query.text())
self.assertIn("CSV", query.text())
示例14: parseCategories
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def parseCategories(self, homepage_content):
doc = PyQuery(homepage_content)
#Gifts和Brands li的data-id属性分别为0, 1
level1NodeList = doc('ul.js-flyout-nav > li').filter(lambda i:PyQuery(this).attr('data-id') > '1')
categoryList = []
for level1Node in level1NodeList:
level1NodeQ = PyQuery(level1Node)
level1Name = level1NodeQ.children('a').text()
level2NodeList = doc('div').filter(lambda i, this:PyQuery(this).attr('data-cat-id')==level1NodeQ.attr('data-id')).children('a')
for level2Node in level2NodeList:
level2NodeQ = PyQuery(level2Node)
if not level2NodeQ.attr('class') or not level2NodeQ.text(): #<a class="" href="" data-title=""/>
continue
categoryInfo = self.newCategory(level2NodeQ.text(), 'http://www.backcountry.com' + level2NodeQ.attr('href'), [level1Name])
categoryList.append(categoryInfo)
return categoryList
示例15: __extract
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import text [as 别名]
def __extract(self, html):
pq = PyQuery(html).find("main#main #mainArea table")
selector_ = "thead tr:eq(0) th"
date_order = [PyQuery(v).text().split('\n')[0] for v in PyQuery(pq).find(selector_)][3:]
result = {d: {} for d in date_order}
index = 0
total = len(PyQuery(pq).find("tbody tr"))
while index < total:
td = PyQuery(pq).find("tbody tr:eq(%d) td:eq(0)" % index)
room_type = td.text().split()[0]
rowspan = int(td.attr('rowspan'))
for i in xrange(index, index + rowspan):
row = PyQuery(pq).find("tbody tr:eq(%d)" % i)
# smoking or not
smoking = PyQuery(row).find("td.alC.alM > img").attr("alt")
room = "%s (%s)" % (room_type, smoking)
if row.hasClass('clubCardCell'):
member_type = 'member'
else:
member_type = 'guest'
for i, v in enumerate(self.__extract_price_remain(row)):
if room not in result[date_order[i]]:
result[date_order[i]][room] = {}
result[date_order[i]][room][member_type] = v
index += rowspan
return result