本文整理汇总了Python中pyquery.PyQuery.find方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.find方法的具体用法?Python PyQuery.find怎么用?Python PyQuery.find使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyquery.PyQuery
的用法示例。
在下文中一共展示了PyQuery.find方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_calendar_tag_rendering
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def test_calendar_tag_rendering(self, timezone_mock):
timezone_mock.now.return_value = tz_datetime(2015, 1, 10, 12)
page_with_apphook = self.create_base_pages()
other_config = EventsConfig.objects.create(namespace='other')
self.create_event(
title='ev1',
start_date=tz_datetime(2015, 1, 13),
publish_at=tz_datetime(2015, 1, 10)
)
self.create_event(
title='ev2',
start_date=tz_datetime(2015, 1, 15),
publish_at=tz_datetime(2015, 1, 10)
)
self.create_event(
de=dict(
title='ev3',
start_date=tz_datetime(2015, 1, 16),
publish_at=tz_datetime(2015, 1, 10)
)
)
self.create_event(
title='ev4',
start_date=tz_datetime(2015, 1, 18),
publish_at=tz_datetime(2015, 1, 10),
app_config=other_config
)
self.create_event(
title='ev5',
start_date=tz_datetime(2015, 1, 22),
end_date=tz_datetime(2015, 1, 27),
publish_at=tz_datetime(2015, 1, 10)
)
self.create_event(
title='ev6',
start_date=tz_datetime(2015, 1, 25),
)
# make use of default tests self.app_config namespace, instead of
# hard coding it
template_str = """
{%% load aldryn_events %%}
{%% calendar 2015 1 'en' '%s' %%}
""" % self.app_config.namespace
t = Template(template_str)
with override('en'):
html = t.render(SekizaiContext({}))
table = PyQuery(html)('table.table-calendar')
page_url_en = page_with_apphook.get_absolute_url()
links = table.find('td.events, td.multiday-events').find('a')
# test if tag rendered important elements
self.assertEqual('1', table.attr('data-month-numeric'), )
self.assertEqual('2015', table.attr('data-year'))
self.assertEqual('10', table.find('td.today').text())
self.assertEqual(8, links.length) # 13, 15, 22, 23, 24, 25, 26, 27
expected_days = (13, 15, 22, 23, 24, 25, 26, 27)
for position, day in enumerate(expected_days):
event_url = '{0}2015/1/{1}/'.format(page_url_en, day)
rendered_url = links[position].attrib['href']
self.assertEqual(event_url, rendered_url)
示例2: get_old_fashion_comments
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def get_old_fashion_comments(answer_url):
aid = comment_list_id(answer_url)
comment_box_link = 'http://www.zhihu.com/node/AnswerCommentBoxV2?params=%7B%22answer_id%22%3A%22{}%22%2C%22load_all%22%3Atrue%7D'.format(aid) # | log
# log('comments: ' + comment_box_link)
r = old_client._session.get(comment_box_link)
# print(str(r.content))
doc = PyQuery(str(r.content, encoding='utf-8'))
comments = []
for div in doc.find('div.zm-item-comment'):
div = PyQuery(div)
cid = div.attr('data-id')
vote_count = int(div.find('span.like-num').find('em').text())
content = div.find('div.zm-comment-content').html()
author_text = div.find('div.zm-comment-hd').text().replace('\n', ' ')
if ' 回复 ' in author_text:
author, reply_to = author_text.split(' 回复 ')
else:
author, reply_to = author_text, None
comment = OldFashionComment(cid=cid,
vote_count=vote_count,
content=content,
author=OldFashionAuthor(author),
reply_to=OldFashionAuthor(reply_to) if reply_to else None)
comments.append(comment)
return comments
示例3: update_forums
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def update_forums(client, group, session):
logging.info("Updating forums list for {}".format(group))
query = Forum.get_forum_page(client, group.gid)
reg = regex.compile(r"^forum\.php\?mod=forumdisplay&fid=(\d+)$")
for row in query.find("table.fl_tb>tr"):
sub_query = PyQuery(row)
href = sub_query.find("td").eq(1).find("a").attr("href")
if not href:
continue
fid = int(reg.findall(href)[0])
name = sub_query.find("td").eq(1).find("h2>a").clone().children().remove().end().text()
last_update = sub_query.find("td").eq(3).find("div>cite").clone().children().remove().end().text()
last_update = dateparse(last_update)
existence = session.query(Forum).filter(Forum.fid == fid)
if existence.count() == 0:
logging.info("<Forum(fid={})> not found, creating one".format(fid))
forum = Forum(fid=fid, name=name, updated_at=last_update, group=group, fresh=False)
session.add(forum)
else:
forum = existence.one()
if forum.updated_at != last_update:
logging.info("{} found, stale: against {} ".format(forum, last_update))
forum.updated_at = last_update
forum.fresh = False
session.add(forum)
else:
logging.info("{} found, fresh".format(forum))
示例4: get_phonetic_symbol
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def get_phonetic_symbol(text):
data = { "keyfrom" : "deskdict.mini", "q" : text, "doctype" : "xml", "xmlVersion" : 8.2,
"client" : "deskdict", "id" : "cee84504d9984f1b2", "vendor": "unknown",
"in" : "YoudaoDict", "appVer" : "5.4.46.5554", "appZengqiang" : 0, "le" : "eng", "LTH" : 40}
ret = requests.get("http://dict.youdao.com/search", params=data).text
if isinstance(ret, unicode):
ret = ret.encode('utf-8')
pq = PyQuery(ret, parser="xml")
phonetic_symbol = pq.find('usphone').text()
phonetic_type = _("US")
try:
if phonetic_symbol == '':
phonetic_symbol = pq.find('ukphone').text()
phonetic_type = _("UK")
except:
pass
if phonetic_symbol == '' or phonetic_symbol == None or phonetic_symbol.isspace():
return ""
else:
if isinstance(phonetic_type, unicode):
phonetic_type = phonetic_type.encode('utf-8')
if isinstance(phonetic_symbol, unicode):
phonetic_symbol = phonetic_symbol.encode('utf-8')
return "[%s] %s" % (phonetic_type, phonetic_symbol)
示例5: _parse_table
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def _parse_table(self, table):
# Initialize table
parsed_rows = []
# Parse table
qtable = PyQuery(table)
# Get headers
headers = self._get_headers(qtable)
if not headers:
return
# Get rows
rows = qtable.find("tr")
# Loop over rows
for row in rows:
# Get columns
qrow = PyQuery(row)
cols = qrow.find("td").map(self._get_text)[:]
# Parse column values
for colidx in range(len(cols)):
col = reduce(lambda x, y: re.sub(y[0], y[1], x), self._trans, cols[colidx])
cols[colidx] = col
# Append parsed columns
if cols:
parsed_rows.append(cols)
return {"headers": headers, "data": parsed_rows}
示例6: get_url
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def get_url(url):
response = requests.get(url)
doc = PyQuery(response.text)
for article in doc('article'):
h = PyQuery(article)
print h.find('h1.entry-title').text().encode('utf-8')
print h.find('div.entry-content p').text().encode('utf-8')
示例7: process_chapter
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def process_chapter(chapter_path, index_path, enable_stem) :
# removed characters
remove_chars = ['.',',',';','?','!','-',u'–',u'―',u'—',u'~',':','"',')','(','[',']','/','\\',"'s",u'’s',"'",u'‘',u'’',u'“',u'”', u'¿', '*', '<','>','&','{','}']
restricted_words = ['a', 'and', 'about', 'above', 'across', 'after', 'against', 'along', 'among', 'around', 'at', 'before', 'behind', 'below', 'beneath', 'beside', 'between', 'beyond', 'but', 'by', 'despite', 'down', 'during', 'except', 'for', 'from', 'in', 'inside', 'into', 'like', 'near', 'of', 'off', 'on', 'onto', 'out', 'outside', 'over', 'past', 'since', 'the', 'through', 'throughout', 'till', 'to', 'toward', 'under', 'underneath', 'until', 'up', 'upon', 'with', 'within', 'without']
# create jQuery object
html = open(chapter_path, 'r').read()
jquery = PyQuery(html)
print jquery.find('.chapter').attr('data-osis')
# find all verses, remove all notes and verse numbers
verses = jquery('span.verse')
verses.find('span.note').remove()
verses.find('span.cf').remove()
verses.find('.v-num').remove()
for verse in verses:
v = PyQuery(verse)
osis = v.attr('data-osis')
text = v.text()
# remove punctuation
for s in remove_chars:
text = text.replace(s, '')
words = text.split(' ')
for word in words:
word = word.strip().lower()
#there's got to be a cleaner way to do this in Python
is_restricted = True
try:
restricted_words.index(word)
except:
is_restricted = False
if word != '' and not is_restricted and not word.isnumeric():
# stemmer?
if enable_stem:
word = stem(word)
word_path = index_path + word + '.json'
# check for file
if os.path.exists(word_path):
f = open(word_path,'a')
f.write(',"' + osis + '"')
f.close()
else:
f = open(word_path,'a')
f.write('["' + osis + '"')
f.close()
示例8: getFullInfo
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def getFullInfo(self, interval=0.5):
self.getBasicInfo()
# collCount
if self.INFO['shopType'] in ['7', '1', '3', '4']:
regxrs = re.findall(
r'J_SCollCount\"\s+data\-info\=\"param\=(.+?)\&',
self.content)
if regxrs:
params = {'keys': regxrs[0], 't': '%.0f' % (time.time()
* 1000), 'callback': 'TShop.setShopStat'}
domain = 'http://count.tbcdn.cn/counter3'
tUrl = domain + '?' + '&'.join([k + '=' + v for (k, v) in
params.items()])
r = request(tUrl)
self.INFO['collCount'] = str(re.findall(r'\"\S+\"\:(\d+)',
r.text, re.S)[0])
else:
if PyQuery:
pyjq_obj = PyQuery(self.content.decode(self.res.encoding))
data_info = (pyjq_obj.find('dl.collect-num dt') or pyjq_obj.find('.collect-num span')).attr('data-info')
count_url = re.sub(
r'param=(?P<param>.+?)\&countUrl=(?P<count>.+?)\&.+',
r'\g<count>?callback=jsonp357&t=%d&keys=\g<param>' % (time.time()*1000),
data_info,
re.S)
collCount = re.sub(
r'.+\:\s*(?P<coll>\d+).+',
r'\g<coll>',
request(count_url).content)
if collCount:
self.INFO['collCount'] = int(collCount)
else:
coll_url = 'http://favorite.taobao.com/collect_item_relation.htm?itemtype=0&itemNumid=%s' % self.INFO['shopId']
try:
res = request(coll_url)
pyjq_obj = PyQuery(res.content.decode(res.encoding))
self.INFO['collCount'] = pyjq_obj.find("div.add-fav-msg strong").html().strip()
except:
pass
time.sleep(interval)
# itemAmount
if self.INFO['shopType'] == '2':
tUrl = self.INFO['shopLink'] + '?search=y&orderType=_hotsell'
r = request(tUrl)
try:
self.INFO['itemAmount'] = \
str(re.findall(r'\<div\s+class\=\"search\-result\"\s*\>.+?(\d+)', r.text, re.S)[0])
except:
pass
time.sleep(interval)
return self.INFO
示例9: test_device_elements
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def test_device_elements(self):
response = self.client.get(self.url)
self.assertEqual(response.status_code, 200)
tree = PyQuery(response.content)
radios = tree.find('input[type="radio"]')
self.assertEqual(len(radios), 2)
tree = PyQuery(response.content)
checkboxes = tree.find('input[type="checkbox"]')
self.assertEqual(len(checkboxes), 1)
示例10: test_initial_post
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def test_initial_post(self):
mock_request = self._get_post_request(type='generic')
response, MockDevice = self._post_device_addition(mock_request)
self.assertEqual(response.status_code, 200)
self.assertFalse(MockDevice.objects.create.called)
tree = PyQuery(response.content)
self.assertEqual(len(tree.find('.error')), 0)
device_name = tree.find('input[type=text][name=name]')
self.assertEqual(len(device_name), 1)
self.assertEqual(device_name.val(), 'Authentication device')
示例11: parse
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def parse(self, response):
html = Pq(response.body)
job = items.OnetJob()
job['url'] = response.url
job['alt_title'] = html.find('[class="titleb"]').text()
job['job_sample'] = html.find(
'p:contains("Sample of reported job titles:")').text()
job['summary'] = html.find(
'#realcontent').find('p:eq(0)').text()
job['job_sample'] = job['job_sample'].replace(
'Sample of reported job titles:', '').split(', ')
job['tasks'] = self._list(html, '.section_Tasks .moreinfo')
job['tools'] = self._list(
html, '.section_ToolsTechnology .moreinfo:first')
job['technology'] = self._list(
html, '.section_ToolsTechnology .moreinfo:last')
job['knowledge'] = self._list(html, '.section_Knowledge .moreinfo')
job['skills'] = self._list(html, '.section_Skills .moreinfo')
job['abilities'] = self._list(html, '.section_Abilities .moreinfo')
job['work_activities'] = {
'basic': self._list(html, '.section_WorkActivities .moreinfo'),
'detailed': self._list(
html, '.section_DetailedWorkActivities .moreinfo'),
}
job['work_context'] = self._list(
html, '.section_WorkContext .moreinfo')
job['job_zone'] = self._table(html, '#content table:first')
job['education'] = self._table(html, '#content table:eq(1)')
job['interests'] = self._list(html, None, custom=html.find(
'[name="Interests"]').siblings('.moreinfo:first'))
job['work_styles'] = self._list(
html, '.section_WorkStyles .moreinfo')
job['interests'] = self._list(html, None, custom=html.find(
'[name="WorkValues"]').siblings('.moreinfo:eq(1)'))
job['related_occupations'] = self._table(
html, '.section_RelatedOccupations table')
job['wages_employment'] = self._table(
html, '[summary="Wages & Employment Trends information'
' for this occupation"]')
job['job_openings'] = ''
job['additional_info'] = ''
return job
示例12: _fetch_mdn_page
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def _fetch_mdn_page(url):
data = bleach.clean(_get_page(url), attributes=ALLOWED_ATTRIBUTES,
tags=ALLOWED_TAGS, strip_comments=False)
root = PyQuery(data)
toc = root.find('#article-nav div.page-toc ol')[0]
content = root.find('#pageText')[0]
toc.set('id', 'mdn-toc')
content.set('id', 'mdn-content')
return (etree.tostring(toc, pretty_print=True),
etree.tostring(content, pretty_print=True))
示例13: parse
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def parse(self, response):
category = items.ONetCategory()
html = Pq(response.body)
category['url'] = response.url
category['name'] = html.find('.reportdesc:eq(0)').text().replace(
'Save Table ( XLS / CSV )', '')
category['id'] = response.url.replace('{}?i'.format(
self.root_url), '').replace(
'&g=Go', '').replace('=', '').replace('.', '')
category['bls_url'] = html.find(
'div.reportdesc a:first').attr('href')
category['occupation_data'] = self._extract_occupations(html)
return category
示例14: feed
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def feed(self, data, sentence):
d = PyQuery(data)
sets = d(".sentences_set")
for s in sets:
s = PyQuery(s)
if s.find(".mainSentence .sentenceContent a").text().strip() == sentence:
structure = s.find(".mainSentence .sentenceContent .romanization.furigana").text()
translations = s.find(".translations:first") \
.find(".sentence > img[title='English']") \
.parent().find(".sentenceContent > a") \
.map(lambda i, o: o.text)
return (structure, translations)
return (None, None)
示例15: get_meme_url
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import find [as 别名]
def get_meme_url(meme):
gen = GENERATORS.get(meme)
if gen:
pq = PyQuery(url="http://memegenerator.net/%s" % gen[2])
return pq.find('a img.large').attr('src')
else:
return None