本文整理汇总了Python中pyquery.pyquery.PyQuery.attr方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.attr方法的具体用法?Python PyQuery.attr怎么用?Python PyQuery.attr使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyquery.pyquery.PyQuery
的用法示例。
在下文中一共展示了PyQuery.attr方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
async def parse(self, input_text, *k, **kk):
html = await get_url_service.get_url_async(input_text)
html = PyQuery(html)
title = ""
for meta in html('meta[itemprop="name"]'):
meta = PyQuery(meta)
title = meta.attr("content")
break
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "list",
"caption": "QQ视频全集"
}
for a in html(".mod_episode a"):
a = PyQuery(a)
_title = ""
for span in PyQuery(a("span")):
span = PyQuery(span)
if span.attr("itemprop") == "episodeNumber":
_title = "第%s集" % span.text()
elif span.has_class("mark_v"):
_title += span.children("img").attr("alt")
info = {
"name": _title,
"no": _title,
"subtitle": _title,
"url": a.attr("href")
}
data["data"].append(info)
data["total"] = len(data["data"])
return data
示例2: parse
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
async def parse(self, input_text, *k, **kk):
html = await get_url_service.get_url_async(input_text)
html = PyQuery(html)
p_title = html("div.pl-title")
title = p_title.attr("title")
list_id = re.search('https?://list.youku.com/albumlist/show/id_(\d+)\.html', input_text).group(1)
ep = 'https://list.youku.com/albumlist/items?id={}&page={}&size=20&ascending=1&callback=a'
first_u = ep.format(list_id, 1)
xhr_page = await get_url_service.get_url_async(first_u)
json_data = json.loads(xhr_page[14:-2])
# print(json_data)
# video_cnt = json_data['data']['total']
xhr_html = json_data['html']
# print(xhr_html)
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "collection",
"caption": "优酷视频全集"
}
last_num = 1
while True:
new_url = ep.format(list_id, last_num)
json_data = await get_url_service.get_url_async(new_url)[14:-2]
info = json.loads(json_data)
if info.get("error", None) == 1 and info.get("message", None) == "success":
new_html = info.get("html", None)
if new_html:
new_html = PyQuery(new_html)
items = new_html("a[target='video'][data-from='2-1']")
for item in items:
item = PyQuery(item)
url = "http:" + item.attr("href")
title = item.attr("title")
info = {
"name": title,
"no": title,
"subtitle": title,
"url": url
}
data["data"].append(info)
last_num += 1
else:
break
else:
break
data["total"] = len(data["data"])
# print(data)
return data
示例3: Parse_le
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
def Parse_le(self, input_text):
html = PyQuery(get_url(input_text))
items = html('dt.d_tit')
title = "LETV"
i = 0
data = {
"data": [],
"more": False,
"title": title,
"total": i,
"type": "collection"
}
for item in items:
a = PyQuery(item).children('a')
name = a.text()
no = a.text()
subtitle = a.text()
url = a.attr('href')
if url is None:
continue
if not re.match('^http://www\.le\.com/.+\.html', url):
continue
info = {
"name": name,
"no": no,
"subtitle": subtitle,
"url": url,
"caption": "首页地址列表"
}
data["data"].append(info)
i = i + 1
total = i
data["total"] = total
return data
示例4: url_handle
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
def url_handle(self, input_text):
html = PyQuery(get_url(input_text))
a = html.children('a')
a = PyQuery(a)
url = a.attr("href")
logging.debug('urlHandle:"' + input_text + '"-->"' + url + '"')
return url
示例5: url_handle
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
async def url_handle(self, input_text):
html = await get_url_service.get_url_async(input_text)
html = PyQuery(html)
a = html.children('a')
a = PyQuery(a)
url = a.attr("href")
return url
示例6: get_list_info_html
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
def get_list_info_html(html):
#print("get_list_info_html")
data = []
album_items = html('ul.site-piclist').children('li')
for album_item in album_items:
album_item = PyQuery(album_item)
site_piclist_info = PyQuery(album_item.children('div.site-piclist_info'))
site_piclist_info_title = PyQuery(site_piclist_info.children('p.site-piclist_info_title'))
site_piclist_info_title_a = PyQuery(site_piclist_info_title.children('a'))
site_piclist_info_title_fs12 = PyQuery(site_piclist_info.children('p.fs12'))
site_piclist_info_title_fs12_a = PyQuery(site_piclist_info_title_fs12.children('a'))
no = site_piclist_info_title_a.text()
#if re.search("预告",no):
#continue
name = site_piclist_info_title_fs12_a.text()
url = site_piclist_info_title_fs12_a.attr('href')
if url is None:
continue
subtitle = site_piclist_info_title_fs12_a.text()
info = {
"name": name,
"no": no,
"subtitle": subtitle,
"url": url
}
data.append(info)
i = i+1
return data
示例7: urlHandle
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
def urlHandle(self,input_text):
html = PyQuery(common.getUrl(input_text))
a = html.children('a')
a = PyQuery(a)
url = a.attr("href")
print('urlHandle:"'+input_text+'"-->"'+url+'"')
return url
示例8: parse
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
async def parse(self, input_text, *k, **kk):
if not await self._check_support(input_text):
return []
html_text = await get_url_service.get_url_async(input_text)
html = PyQuery(html_text)
title = html('h1.main_title > a').text()
if not title:
for a in html('div.crumb-item > a'):
a = PyQuery(a)
if a.attr('href') in input_text:
title = a.text()
if not title:
try:
title = match1(html_text, '<title>([^<]+)').split('-')[0]
except AttributeError:
pass
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "list",
"caption": "271视频全集"
}
data["data"] = await self._get_list_info_api(html_text)
return data
示例9: parse
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
def parse(self, input_text, *k, **kk):
html2 = get_url(input_text)
html2 = PyQuery(html2)
w120 = html2("div.gut > div.listTab > div.listPic > div.list > dl.w120 > dt > a")
total = len(w120)
title = html2("div.gut > div.listTab > div.listPic > div.tab:first-child > p.p1 > i").text()
data = {
"data": [],
"more": False,
"title": title,
"total": total,
"type": "list",
"caption": "乐视视频全集"
}
for i in w120:
i = PyQuery(i)
url = i.attr("href")
title = i("a > img").attr("title")
info = {
"name": title,
"no": title,
"subtitle": title,
"url": url
}
data["data"].append(info)
return data
示例10: Parse_v
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
def Parse_v(self,input_text):
print(input_text)
html = PyQuery(common.getUrl(input_text))
datainfo_navlist = PyQuery(html("#datainfo-navlist"))
for a in datainfo_navlist.children('a'):
a = PyQuery(a)
url = a.attr("href")
if re.search('www.iqiyi.com/(a_|lib/m)',url):
return self.Parse(url)
示例11: Parse
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
def Parse(self,input_text):
html = PyQuery(self.getUrl(input_text))
items = html('a')
title = html('title').text()
i =0
data = {
"data": [],
"more": False,
"title": title,
"total": i,
"type": "collection"
}
for item in items:
a = PyQuery(item)
name = a.attr('title')
if name is None:
name = a.text()
no = name
subtitle = name
url = a.attr('href')
if url is None:
continue
if name is None or name == "":
continue
if not re.match('(^(http|https)://.+\.(shtml|html))|(^(http|https)://.+/video/)',url):
continue
if re.search('(list|mall|about|help|shop|map|vip|faq|support|download|copyright|contract|product|tencent|upload|common|index.html|v.qq.com/u/|open.baidu.com)',url):
continue
if re.search('(下载|播 放|播放|投诉|评论|(\d{1,2}:\d{1,2}))',no):
continue
unsure = False
info = {
"name": name,
"no": no,
"subtitle": subtitle,
"url": url,
"unsure": unsure
}
data["data"].append(info)
i = i+1
total = i
data["total"] = total
return data
示例12: serializeArray
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
def serializeArray(form):
form = PyQuery(form)
if not form.is_('form'):
return []
source = form.find('input, select, textarea')
data = []
for input in source:
input = PyQuery(input)
if input.is_('[disabled]') or not input.is_('[name]'):
continue
if input.is_('[type=checkbox]') and not input.is_('[checked]'):
continue
data.append((input.attr('name'), input.val()))
return data
示例13: Parse_a
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
#.........这里部分代码省略.........
one['url'] = v['vUrl']
# get more info
one['vid'] = v['vid']
one['time_s'] = v['timeLength']
one['tvid'] = v['tvId']
out.append(one)
# get video info done
return out
# get info from js API port
info2 = get_info_from_js_port(html_text)
# replace vlist with js port data
vlist = []
for i in info2:
one = {}
one['no'] = i['no']
one['subtitle'] = i['subtitle']
one['url'] = i['url']
vlist.append(one)
# done
return vlist
def get_list_info_html(html):
#print("get_list_info_html")
data = []
album_items = html('ul.site-piclist').children('li')
for album_item in album_items:
album_item = PyQuery(album_item)
site_piclist_info = PyQuery(album_item.children('div.site-piclist_info'))
site_piclist_info_title = PyQuery(site_piclist_info.children('p.site-piclist_info_title'))
site_piclist_info_title_a = PyQuery(site_piclist_info_title.children('a'))
site_piclist_info_title_fs12 = PyQuery(site_piclist_info.children('p.fs12'))
site_piclist_info_title_fs12_a = PyQuery(site_piclist_info_title_fs12.children('a'))
no = site_piclist_info_title_a.text()
#if re.search("预告",no):
#continue
name = site_piclist_info_title_fs12_a.text()
url = site_piclist_info_title_fs12_a.attr('href')
if url is None:
continue
subtitle = site_piclist_info_title_fs12_a.text()
info = {
"name": name,
"no": no,
"subtitle": subtitle,
"url": url
}
data.append(info)
i = i+1
return data
#print("2"+input_text)
def run(queue,get_list_info,html_text):
try:
result = get_list_info(html_text)
if result != []:
queue.put(result)
except Exception as e:
#import traceback
#traceback.print_exc()
print(e)
html_text = common.getUrl(input_text)
html = PyQuery(html_text)
title = html('h1.main_title').children('a').text()
for a in html('div.crumb-item').children('a'):
a = PyQuery(a)
if a.attr('href') in input_text:
title = a.text()
i =0
data = {
"data": [],
"more": False,
"title": title,
"total": i,
"type": "list",
"caption": "271视频全集"
}
results = []
parser_threads = []
q_results = queue.Queue()
parser_threads.append(threading.Thread(target=run, args=(q_results,get_list_info_api1,html_text)))
parser_threads.append(threading.Thread(target=run, args=(q_results,get_list_info_api2,html_text)))
for parser_thread in parser_threads:
parser_thread.start()
for parser_thread in parser_threads:
parser_thread.join()
while not q_results.empty():
data["data"] =q_results.get()
break
if data["data"] == []:
try:
data["data"] = get_list_info_html(html)
except Exception as e:
#import traceback
#traceback.print_exc()
print(e)
data["total"] = len(data["data"])
return data
示例14: parse
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
def parse(self, input_text, *k, **kk):
global TWICE_PARSE_TIMEOUT
html = PyQuery(get_url(input_text))
items = html('a')
title = html('title').text()
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "collection"
}
urls = []
for item in items:
a = PyQuery(item)
name = a.attr('title')
if name is None:
name = a.text()
no = name
subtitle = name
url = a.attr('href')
if url is None:
continue
if name is None or name == "":
continue
if re.match('^(http|https|ftp)://.+\.(mp4|mkv|ts|avi)', url):
url = 'direct:' + url
if not re.match('(^(http|https)://.+\.(shtml|html|mp4|mkv|ts|avi))|(^(http|https)://.+/video/)', url):
continue
if re.search(
'[^\?](list|mall|about|help|shop|map|vip|faq|support|download|copyright|contract|product|tencent|upload|common|index.html|v.qq.com/u/|open.baidu.com|www.iqiyi.com/lib/s_|www.iqiyi.com/dv/|top.iqiyi.com)',
url):
continue
if re.search('(下载|播 放|播放|投诉|评论|(\d{1,2}:\d{1,2}))', no):
continue
unsure = False
for temp in urls:
if temp == str(url):
# print("remove:"+url)
url = None
break
if url is None:
continue
urls.append(url)
if re.search('(www.iqiyi.com/a_)|(www.le.com/comic)', url):
unsure = True
info = {
"name": name,
"no": no,
"subtitle": subtitle,
"url": url,
"unsure": unsure
}
data["data"].append(info)
if self.TWICE_PARSE:
try:
from .. import main
except Exception as e:
import main
def runlist_parser(queue, url, pool):
try:
result = main.parse(url, types="list", parsers_name=["iqiyilistparser.IQiYiAListParser",
"iqiyilistparser.IQiYiLibMListParser",
"iqiyilistparser.IQiYiVListParser"],
pool=pool)[0]
if (result is not None) and (result != []) and (result["data"] is not None) and (
result["data"] != []):
queue.put({"result": result, "url": url})
except IndexError:
pass
except Exception as e:
# continue
logging.exception("twice parse %s failed" % url)
# import traceback
# traceback.print_exc()
pool = WorkerPool(20)
parser_threads = []
parse_urls = []
t_results = []
q_results = Queue()
with WorkerPool() as pool:
for url in urls:
pool.spawn(runlist_parser, q_results, url, pool)
pool.join(timeout=self.TWICE_PARSE_TIMEOUT)
while not q_results.empty():
t_results.append(q_results.get())
oldddata = data["data"]
data["data"] = []
for t_result in t_results:
parse_urls.append(t_result["url"])
for tdata in t_result["result"]["data"]:
tdata["no"] = t_result["result"]["title"] + " " + tdata["no"]
data["data"].extend(t_result["result"]["data"])
#.........这里部分代码省略.........
示例15: Parse
# 需要导入模块: from pyquery.pyquery import PyQuery [as 别名]
# 或者: from pyquery.pyquery.PyQuery import attr [as 别名]
def Parse(self,input_text,types=None):
if (types is not None) and ("collection" not in types):
return
html = PyQuery(common.getUrl(input_text))
items = html('a')
title = html('title').text()
data = {
"data": [],
"more": False,
"title": title,
"total": 0,
"type": "collection"
}
urls = []
for item in items:
a = PyQuery(item)
name = a.attr('title')
if name is None:
name = a.text()
no = name
subtitle = name
url = a.attr('href')
if url is None:
continue
if name is None or name == "":
continue
if re.match('^(http|https|ftp)://.+\.(mp4|mkv|ts|avi)',url):
url = 'direct:'+url
if not re.match('(^(http|https)://.+\.(shtml|html|mp4|mkv|ts|avi))|(^(http|https)://.+/video/)',url):
continue
if re.search('(list|mall|about|help|shop|map|vip|faq|support|download|copyright|contract|product|tencent|upload|common|index.html|v.qq.com/u/|open.baidu.com|www.iqiyi.com/lib/s_|www.iqiyi.com/dv/|top.iqiyi.com)',url):
continue
if re.search('(下载|播 放|播放|投诉|评论|(\d{1,2}:\d{1,2}))',no):
continue
unsure = False
for temp in urls:
if temp == str(url):
#print("remove:"+url)
url = None
break
if url is None:
continue
urls.append(url)
if re.search('(www.iqiyi.com/a_)|(www.le.com/comic)',url):
unsure = True
info = {
"name": name,
"no": no,
"subtitle": subtitle,
"url": url,
"unsure": unsure
}
data["data"].append(info)
if self.TWICE_PARSE:
try:
from . import listparser
except Exception as e:
import listparser
try:
from .. import run
except Exception as e:
import run
def runlist_parser(queue,parser,url):
url2 = urlHandle(url)
try:
result = parser.Parse(url2)
if (result is not None) and (result != []) and (result["data"] is not None) and (result["data"] != []):
queue.put({"result":result,"url":url})
except Exception as e:
#continue
print(e)
#import traceback
#traceback.print_exc()
list_parser = listparser.ListParser()
urlHandle = run.urlHandle
parser_threads = []
parse_urls = []
t_results = []
q_results = queue.Queue()
for url in urls:
for filter in list_parser.getfilters():
if re.search(filter,url):
parser_threads.append(threading.Thread(target=runlist_parser, args=(q_results,list_parser,url)))
for parser_thread in parser_threads:
parser_thread.start()
for parser_thread in parser_threads:
parser_thread.join()
while not q_results.empty():
t_results.append(q_results.get())
oldddata = data["data"]
data["data"] = []
for t_result in t_results:
parse_urls.append(t_result["url"])
for tdata in t_result["result"]["data"]:
#.........这里部分代码省略.........