本文整理汇总了Python中pyquery.PyQuery方法的典型用法代码示例。如果您正苦于以下问题:Python pyquery.PyQuery方法的具体用法?Python pyquery.PyQuery怎么用?Python pyquery.PyQuery使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyquery
的用法示例。
在下文中一共展示了pyquery.PyQuery方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: check_paste
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def check_paste ( self, paste_id ):
paste_url = self.PASTEBIN_URL + paste_id
try:
paste_txt = PyQuery ( url = paste_url )('#paste_code').text()
for regex,file,directory in self.regexes:
if re.match ( regex, paste_txt, re.IGNORECASE ):
Logger ().log ( 'Found a matching paste: ' + paste_url + ' (' + file + ')', True, 'CYAN' )
self.save_result ( paste_url,paste_id,file,directory )
return True
Logger ().log ( 'Not matching paste: ' + paste_url )
except KeyboardInterrupt:
raise
except:
Logger ().log ( 'Error reading paste (probably a 404 or encoding issue).', True, 'YELLOW')
return False
示例2: get_ligands
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get_ligands(self, identifier):
"""List the ligands that can be found in a PDB entry
:param identifier: a valid PDB identifier (e.g., 4HHB)
:return: xml document
>>> from bioservices import PDB
>>> s = PDB()
>>> s.get_ligands("4HHB")
Then, ::
x = s.get_ligands("4HHB")
from pyquery import PyQuery as pq
d = pq(x)
"""
res = self.services.http_get("rest/ligandInfo", frmt='xml',
params={'structureId': identifier})
return res
示例3: pyquery
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def pyquery(self):
"""
Returns the response as a `PyQuery <http://pyquery.org/>`_ object.
Only works with HTML and XML responses; other content-types raise
AttributeError.
"""
if 'html' not in self.content_type and 'xml' not in self.content_type:
raise AttributeError(
"Not an HTML or XML response body (content-type: %s)"
% self.content_type)
try:
from pyquery import PyQuery
except ImportError: # pragma: no cover
raise ImportError(
"You must have PyQuery installed to use response.pyquery")
d = PyQuery(self.testbody)
return d
示例4: fetch_note_list
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def fetch_note_list(self):
url = self.account.user.alt + 'notes'
notes = []
while True:
response = self.fetch_url_content(url)
if not response:
break
dom = PyQuery(response.text)
note_items = dom('#content .article>.note-container')
for note_item in note_items:
notes.append(PyQuery(note_item).attr('data-url'))
next_page = dom('#content .article>.paginator>.next>a')
if next_page:
url = next_page.attr('href')
else:
break
return notes
示例5: get
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get(self, douban_id):
try:
subject = db.Note.get(db.Note.douban_id == douban_id)
history = db.NoteHistorical.select().where(db.NoteHistorical.id == subject.id)
except db.Note.DoesNotExist:
raise tornado.web.HTTPError(404)
comments = db.Comment.select().join(db.User).where(
db.Comment.target_type == 'note',
db.Comment.target_douban_id == subject.douban_id
)
dom = PyQuery(subject.content)
dom_iframe = dom('iframe')
dom_iframe.before('<p class="title"><a href="{0}" class="external-link">站外视频</a></p>'.format(dom_iframe.attr('src')))
dom_iframe.remove()
dom('a').add_class('external-link')
self.render('note.html', note=subject, comments=comments, content=dom)
示例6: get_url_and_date
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get_url_and_date(I: str, O: str, id_data_output_path: str) -> None:
'''
Get image url and date.
Saved in the resource directory with names of `O` and `id_data_output_path`.
:param I:
:param O:
:param id_data_output_path:
:return: None
'''
with open(I, encoding='utf-8') as fin:
doc = pyquery.PyQuery(fin.read())
table = doc.attr('id', 'query_result_main')('tbody')
id_data = []
with open(O, 'w', encoding='utf-8') as fout:
for line in table.items():
for tr in line('tr').items():
lst = re.findall(ID_PATTERN, tr.text())
data = re.findall(DATA_PATTERN, tr.text())
if len(lst) == 0:
continue
fout.write('http://www.getchu.com/soft.phtml?id={}&gc=gc\n'.format(lst[-1]))
id_data.append([lst[-1], data[-1]])
with open(id_data_output_path, 'w', encoding='utf-8') as fout:
for each in id_data:
fout.write('{} {}\n'.format(each[0], each[1]))
示例7: parse_reviews_and_users
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def parse_reviews_and_users(self, response):
if not self.is_tabelog(response):
return Request(url=response.url, dont_filter=True)
dom = PyQuery(response.body)
review_nodes = dom('div.rvw-item')
business_id = int(re.findall(r'[a-z]+/A\d{4}/A\d{6}/(\d+)/dtlrvwlst/', response.url)[0])
reviews_and_users = []
for review_node in review_nodes:
user_id = self._extract_user_id(review_node)
review = self._generate_review(review_node, business_id, user_id)
if review:
reviews_and_users.append(review)
user = self._generate_user(review_node, user_id)
if user:
reviews_and_users.append(user)
return reviews_and_users
示例8: query_album_cover_from_xiami
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def query_album_cover_from_xiami(artist_name, album_name):
if not is_network_connected():
return None
if not artist_name and not album_name:
return False
xiami_album_search_url = 'http://www.xiami.com/search/album?key=' + artist_name + '+' + album_name
html = public_curl.get(xiami_album_search_url)
try:
search_result_object = PyQuery(html)
album_info_element = search_result_object(
'div.albumBlock_list div.album_item100_block p.cover a.CDcover100 img')
info_href_attr = album_info_element.attr('src')
if not info_href_attr: return False
return info_href_attr.replace("_1", "_2")
except:
return False
示例9: getOriginalArticalNums
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def getOriginalArticalNums(self,proxies):
main_response = requests.get(self.blogurl,proxies=proxies)
# 判断是否成功获取 (根据状态码来判断)
if main_response.status_code == 200:
print('获取成功')
self.main_html = main_response.text
main_doc = pq(self.main_html)
mainpage_str = main_doc.text() #页面信息去除标签信息
origin_position = mainpage_str.index('原创') #找到原创的位置
end_position = mainpage_str.index('原创',origin_position+1) #最终的位置,即原创底下是数字多少篇博文
self.blog_nums = ''
# 获取写的博客数目
for num in range(3,10):
#判断为空格 则跳出循环
if mainpage_str[end_position + num].isspace() == True:
break
self.blog_nums += mainpage_str[end_position + num]
print(type(str(self.blog_nums)))
cur_blog_nums = (int((self.blog_nums))) #获得当前博客文章数量
return cur_blog_nums #返回博文数量
else:
print('爬取失败')
return 0 #返回0 说明博文数为0或者爬取失败
示例10: get_seasons_for_league
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get_seasons_for_league(self, main_league_results_url):
"""
Params:
(str) main_league_results_url e.g. https://www.oddsportal.com/hockey/usa/nhl/results/
Returns:
(list) urls to each season for given league
"""
seasons = []
logger.info('Getting all seasons for league via %s', main_league_results_url)
if not self.go_to_link(main_league_results_url):
logger.error('League results URL loaded unsuccessfully %s', main_league_results_url)
# Going to send back empty list so this is not processed further
return seasons
html_source = self.get_html_source()
html_querying = pyquery(html_source)
season_links = html_querying.find('div.main-menu2.main-menu-gray > ul.main-filter > li > span > strong > a')
logger.info('Extracted links to %d seasons', len(season_links))
for season_link in season_links:
this_season = Season(season_link.text)
# Start the Season's list of URLs with just the root one
this_season_url = self.base_url + season_link.attrib['href']
this_season.urls.append(this_season_url)
seasons.append(this_season)
return seasons
示例11: test_report_movement_redirect
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def test_report_movement_redirect(self):
"""
When showing a report, if it contains transactions the slug of the transaction is transformed into an
<a> elem, here we test that the <a redirect to an actual change form
:return:
"""
self.client.login(username='super', password='secret')
response = self.client.get(reverse('ra_admin:report', args=('client', 'clientdetailedstatement')),
data={'client_id': self.client1.pk},
HTTP_X_REQUESTED_WITH='XMLHttpRequest')
data = response.json()
a_elem = pq(data['data'][0]['slug'])
doc_type = data['data'][0]['doc_type']
url = a_elem.attr('href')
response = self.client.get(url, follow=True)
self.assertEqual(response.status_code, 200)
instance = response.context['original']
self.assertEqual(instance.slug, a_elem.text())
self.assertEqual(instance.doc_type, doc_type)
示例12: get_attribute
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get_attribute(self, attribute_id):
""" Returns the attribute object for the given attribute id.
Args:
attribute_id (str): the attribute guid
Returns:
Attribute: Attribute object for this guid
Raises:
MstrClientException: if no attribute id is supplied
"""
if not attribute_id:
raise MstrClientException("You must provide an attribute id")
arguments = {'taskId': 'getAttributeForms', 'attributeID': attribute_id, 'sessionState': self._session}
response = self._request(arguments)
d = pq(response)
return Attribute(d('dssid')[0].text, d('n')[0].text)
示例13: fetch_urls
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def fetch_urls(self, queue, quantity):
while not queue.empty():
url = queue.get()
html = self.s.get(url, headers=self.headers).text
pq = PyQuery(html)
size = pq.find('tbody tr').size()
for index in range(size):
item = pq.find('tbody tr').eq(index)
ip = item.find('td').eq(0).text()
port = item.find('td').eq(1).text()
_type = item.find('td').eq(3).text()
self.result_arr.append({
str(_type).lower(): '{0}://{1}:{2}'.format(str(_type).lower(), ip, port)
})
if len(self.result_arr) >= quantity:
break
示例14: get_coin_id
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def get_coin_id(coin_code):
"""
This method fetches the name(id) of currency from the given code
:param coin_code: coin code of a cryptocurrency e.g. btc
:return: coin-id for the a cryptocurrency on the coinmarketcap.com
"""
try:
url = "https://coinmarketcap.com/all/views/all/"
html = get_url_data(url).text
raw_data = pq(html)
coin_code = coin_code.upper()
for _row in raw_data("tr")[1:]:
symbol = _row.cssselect("td.text-left.col-symbol")[0].text_content()
coin_id = _row.values()[0][3:]
if symbol == coin_code:
return coin_id
raise InvalidCoinCode("'{}' coin code is unavailable on coinmarketcap.com".format(coin_code))
except Exception as e:
raise e
示例15: getTypesL2
# 需要导入模块: import pyquery [as 别名]
# 或者: from pyquery import PyQuery [as 别名]
def getTypesL2(target, types, href):
"""
取得二级分类
"""
loger.info(colored(f'fetching {href}', 'yellow'))
resp = await spiderSession.get(href)
async with trio.open_nursery() as nursery:
for item in jq(resp.text)("body > div.content-base > section > div > table > tbody > tr").items():
name = item(
'td:nth-child(1)>a').text().strip().replace(' ', '_').lower()
target[name] = {}
url = urljoin(href, item('td:nth-child(1)>a').attr('href'))
nums = int(item('td:nth-child(2)').text().strip())
target[name]['url'] = url
target[name]['nums'] = nums
target[name]['UA_list'] = []
for page in range(1, math.ceil(nums/PERPAGE)+1):
TASKS.add('__'.join([
types,
name,
f"{url}{page}"
]))