本文整理汇总了Python中pyquery.pyquery.PyQuery类的典型用法代码示例。如果您正苦于以下问题:Python PyQuery类的具体用法?Python PyQuery怎么用?Python PyQuery使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了PyQuery类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
async def parse(self, input_text, *k, **kk):
if not await self._check_support(input_text):
return []
html_text = await get_url_service.get_url_async(input_text)
html = PyQuery(html_text)
title = html('h1.main_title > a').text()
if not title:
for a in html('div.crumb-item > a'):
a = PyQuery(a)
if a.attr('href') in input_text:
title = a.text()
if not title:
try:
title = match1(html_text, '<title>([^<]+)').split('-')[0]
except AttributeError:
pass
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "list",
"caption": "271视频全集"
}
data["data"] = await self._get_list_info_api(html_text)
return data
示例2: extract_data
def extract_data(text):
global total_data
pq = PyQuery(text)
data = pq.find('p.data').text()
total_data = total_data + data
nextState = pq.find('.nextState').attr('value')
return nextState
示例3: parse
def parse(self, input_text, *k, **kk):
html2 = get_url(input_text)
html2 = PyQuery(html2)
w120 = html2("div.gut > div.listTab > div.listPic > div.list > dl.w120 > dt > a")
total = len(w120)
title = html2("div.gut > div.listTab > div.listPic > div.tab:first-child > p.p1 > i").text()
data = {
"data": [],
"more": False,
"title": title,
"total": total,
"type": "list",
"caption": "乐视视频全集"
}
for i in w120:
i = PyQuery(i)
url = i.attr("href")
title = i("a > img").attr("title")
info = {
"name": title,
"no": title,
"subtitle": title,
"url": url
}
data["data"].append(info)
return data
示例4: detail_page
def detail_page(self, response):
t = response.text.replace(' ', '')
d = PyQuery(t)
base = response.save
base_url = response.url
fenbu = dict(map(
lambda x: (x.find('.field-righttit').text(), x.find('ul').text()),
list(d.find(".right-border div").items())
))
basic_info = dict(map(
lambda x: (x.text().replace(u':', "").strip(),
x.parent().text().replace(x.text(), "").strip()),
list(d.find('.fc-gray').items())
))
other_info = dict(map(
lambda x: (x.text().replace(u':', ''), x.next().text()), list(d.find('.xiaoqu-otherinfo dt').items())
))
info_temp = {
'base': base,
'sell_rent_info': fenbu,
'basic_info': basic_info,
'other_info': other_info
}
url = base_url + 'amenities/'
self.crawl(url, callback=self.amenities_page, save=info_temp, retries=100)
return [
2,
response.url,
json.dumps(info_temp),
time.strftime('%Y-%m-%d %X', time.localtime())
]
示例5: urlHandle
def urlHandle(self,input_text):
html = PyQuery(common.getUrl(input_text))
a = html.children('a')
a = PyQuery(a)
url = a.attr("href")
print('urlHandle:"'+input_text+'"-->"'+url+'"')
return url
示例6: Parse_le
def Parse_le(self, input_text):
html = PyQuery(get_url(input_text))
items = html('dt.d_tit')
title = "LETV"
i = 0
data = {
"data": [],
"more": False,
"title": title,
"total": i,
"type": "collection"
}
for item in items:
a = PyQuery(item).children('a')
name = a.text()
no = a.text()
subtitle = a.text()
url = a.attr('href')
if url is None:
continue
if not re.match('^http://www\.le\.com/.+\.html', url):
continue
info = {
"name": name,
"no": no,
"subtitle": subtitle,
"url": url,
"caption": "首页地址列表"
}
data["data"].append(info)
i = i + 1
total = i
data["total"] = total
return data
示例7: url_handle
async def url_handle(self, input_text):
html = await get_url_service.get_url_async(input_text)
html = PyQuery(html)
a = html.children('a')
a = PyQuery(a)
url = a.attr("href")
return url
示例8: onSuccess
def onSuccess(self, tid, context, response,headers):
resp = PyQuery(response)
for h3 in resp.find("h3 a"):
url="http://dev.open.taobao.com/bbs/"+h3.attrib['href']
print h3.text
Spider.executeSql(self,"insert into task (task_type,url,status,http_code,task_context) values('topbbs文章',%s,0,-1,%s)",(url,h3.text))
Spider.onSuccess(self,tid, context,response,headers);
示例9: __getPageAllLink
def __getPageAllLink(self,p):
# if self.kind=="1":
# lis=PyQuery(p)("div.qiuzu li")
# elif self.kind=="2":
# lis=PyQuery(p)("div.qiuzu li")
if self.kind=="1" or self.kind=="2":
lis=PyQuery(p)("div.house")
else:
lis=PyQuery(p)("div.qiuzu li")
links=[]
for li in lis:
# if self.kind=="3":
# tm=PyQuery(li)("p.time span").eq(1).text()
# link=self.baseurl+PyQuery(li)("p.housetitle a").attr("href")
if self.kind=="2" or self.kind=="1":
tm=PyQuery(li)("p.time").text()
tm=tm and tm.replace("个人","") or ""
link=self.baseurl+PyQuery(li)("p.housetitle a").attr("href")
else:
tm=PyQuery(li)("span.li5").text()
link=self.baseurl+PyQuery(li)("span.li2 a").attr("href")
if self.kind=="4":
if PyQuery(li)("span.li1").text()=="合租 ":
continue
# tm=PyQuery(li)("span.li5").text()
# link=self.baseurl+PyQuery(li)("span.li2 a").attr("href")
#link=self.baseurl+PyQuery(li)("span.li2 a").attr("href")
# print link
if u"天" in tm:
s=tm.find(u"天")
tm=tm[:s]
if int(tm)<8:
links.append(link)
else:
break
elif u"小时" in tm:
links.append(link)
elif u"分钟" in tm:
links.append(link)
else:
continue
if 1:#not checkPath(homepath,self.folder,link):
LinkLog.info("%s|%s"%(self.kind,link))
try:
getContent(link,self.citycode,self.kind)
except Exception,e:print "ganji getContent Exception %s"%e
time.sleep(int(self.st))
# fetch_quere.put({"mod":"soufang","link":link,"citycode":self.citycode,"kind":self.kind})
# self.clinks.extend(links)
if self.kind=="1" or self.kind=="2":
if len(links)!=30:
return False
else:
return True
else:
if len(links)!=35:
return False
else:
return True
示例10: url_handle
def url_handle(self, input_text):
html = PyQuery(get_url(input_text))
a = html.children('a')
a = PyQuery(a)
url = a.attr("href")
logging.debug('urlHandle:"' + input_text + '"-->"' + url + '"')
return url
示例11: parse_html_page
def parse_html_page(self):
pq = PyQuery(self.html_page)
main_table = pq('#mainBody > table.coltable')
def find_row(text):
for c in main_table.find('td:first-child').items():
if c.text() == text:
return c.nextAll().items().next()
def find_row_text(text, default=''):
row = find_row(text)
if row:
return row.text()
return default
def find_row_html(text, default=''):
row = find_row(text)
if row:
return row.html()
return default
self.info_hash = find_row_text('Info hash')
self.title = pq.find('#mainBody > h1').text()
self.category, self.subcategory = find_row_text('Type').split(' - ', 1)
self.language = find_row_text('Language')
self.cover_url = find_row('Picture:').find('img').attr('src')
self.small_description = find_row_html('Small Description')
self.description = find_row_html('Description')
self.torrent_url = find_row('Download').find('a#dlNormal').attr('href')
size_string = find_row_text('Size')
match = re.match('.* \((?P<size>\d+(,\d\d\d)*) bytes\)', size_string)
self.torrent_size = int(match.group('size').replace(',', ''))
示例12: Parse_v
def Parse_v(self,input_text):
print(input_text)
html = PyQuery(common.getUrl(input_text))
datainfo_navlist = PyQuery(html("#datainfo-navlist"))
for a in datainfo_navlist.children('a'):
a = PyQuery(a)
url = a.attr("href")
if re.search('www.iqiyi.com/(a_|lib/m)',url):
return self.Parse(url)
示例13: parse
async def parse(self, input_text, *k, **kk):
html = await get_url_service.get_url_async(input_text)
html = PyQuery(html)
title = ""
for meta in html('meta[itemprop="name"]'):
meta = PyQuery(meta)
title = meta.attr("content")
break
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "list",
"caption": "QQ视频全集"
}
for a in html(".mod_episode a"):
a = PyQuery(a)
_title = ""
for span in PyQuery(a("span")):
span = PyQuery(span)
if span.attr("itemprop") == "episodeNumber":
_title = "第%s集" % span.text()
elif span.has_class("mark_v"):
_title += span.children("img").attr("alt")
info = {
"name": _title,
"no": _title,
"subtitle": _title,
"url": a.attr("href")
}
data["data"].append(info)
data["total"] = len(data["data"])
return data
示例14: parse
async def parse(self, input_text, *k, **kk):
html = await get_url_service.get_url_async(input_text)
html = PyQuery(html)
p_title = html("div.pl-title")
title = p_title.attr("title")
list_id = re.search('https?://list.youku.com/albumlist/show/id_(\d+)\.html', input_text).group(1)
ep = 'https://list.youku.com/albumlist/items?id={}&page={}&size=20&ascending=1&callback=a'
first_u = ep.format(list_id, 1)
xhr_page = await get_url_service.get_url_async(first_u)
json_data = json.loads(xhr_page[14:-2])
# print(json_data)
# video_cnt = json_data['data']['total']
xhr_html = json_data['html']
# print(xhr_html)
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "collection",
"caption": "优酷视频全集"
}
last_num = 1
while True:
new_url = ep.format(list_id, last_num)
json_data = await get_url_service.get_url_async(new_url)[14:-2]
info = json.loads(json_data)
if info.get("error", None) == 1 and info.get("message", None) == "success":
new_html = info.get("html", None)
if new_html:
new_html = PyQuery(new_html)
items = new_html("a[target='video'][data-from='2-1']")
for item in items:
item = PyQuery(item)
url = "http:" + item.attr("href")
title = item.attr("title")
info = {
"name": title,
"no": title,
"subtitle": title,
"url": url
}
data["data"].append(info)
last_num += 1
else:
break
else:
break
data["total"] = len(data["data"])
# print(data)
return data
示例15: __initPageNum
def __initPageNum(self):
initurl="%s/%s/&act=personal&options="%(self.baseUrl,self.urlpath)
req=urllib2.Request(initurl, None, self.header)
p=self.br.open(req).read()
pg=PyQuery(p)("div#houses div.fl")
if re.search('''(\d+)''',pg.text()):
pg=re.search('''(\d+)''',pg.text()).group(1)
r=self.__getPageAllLink(p)
if not r:
return
self.pn= [i for i in range(int(pg)+1)][2:]
print ""