本文整理汇总了Python中pyquery.PyQuery.split方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.split方法的具体用法?Python PyQuery.split怎么用?Python PyQuery.split使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyquery.PyQuery
的用法示例。
在下文中一共展示了PyQuery.split方法的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_user_with_permisions_follows_everyone
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
def test_user_with_permisions_follows_everyone(self):
response = self.get_response()
query = PyQuery(response.content)
query = query("table#queryTable td.name").text()
names = map(lambda u: u.username, (list(self.watcher.watches.all()) + [self.no_perms_user.user]))
names.append("watcher")
self.assertEqual(set(query.split()), set(names))
示例2: test_followed_users_shows_correctly
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
def test_followed_users_shows_correctly(self):
response = self.get_response()
query = PyQuery(response.content)
query = query("table#queryTable td.name").text()
names = map(lambda u: u.username, self.watcher.watches.all())
names.append("watcher")
self.assertEqual(set(query.split()), set(names))
示例3: get_questions
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
def get_questions():
with session() as s:
login(s, register=True)
r = browse(s, BASE_URL)
pq = PyQuery(r)
questions = []
for question_div in pq('div.grid-question'):
link = PyQuery(question_div).parent().attr('href')
if '/play/' in link:
questions.append(link.split('/play/')[-1])
return sorted(questions, key=int)
示例4: get_cloud_rate
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
def get_cloud_rate(scene_name):
"""Read the MTL file and return the cloud_rate of the scene."""
sat = 'L%s' % scene_name[2]
mtl_path = join(settings.MEDIA_ROOT, sat, scene_name, scene_name + '_MTL.txt')
if isfile(mtl_path):
with open(mtl_path, 'r') as f:
lines = f.readlines()
cloud_rate = [float(line.split(' = ')[-1]) for line in lines if 'CLOUD_COVER' in line][0]
return cloud_rate
else:
url_code = get_metadata_code(scene_name)
metadata = PyQuery(
'http://earthexplorer.usgs.gov/metadata/%s/%s/' % (url_code, scene_name)
)
metadata = metadata.text()[metadata.text().find('Cloud Cover '):]
return float(metadata.split(' ')[2])
示例5: get_info_5
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
def get_info_5(file_html):
"""
获取页面关键信息
"""
text_pq = Pq(file_html)
tr_list = text_pq('.normal-fir').find('.name')
for tr_item in tr_list:
td_list = Pq(tr_item).find('a')
item_dict = {
u'公司编号': td_list.attr('company_id'),
u'公司名称': '',
u'公司链接': '',
u'职位名称': td_list.text(),
u'职位链接': td_list.attr('href'),
u'薪资待遇': '',
u'工作地点': '',
u'工作经验': '',
u'最低学历': '',
u'招聘人数': '',
u'公司规模': '',
}
company = Pq(tr_item).find('.s-tit14.fl')
item_dict[u'公司名称'] = company.text()
introduce_list = Pq(tr_item).find('.s-butt.s-bb1 ul li')
for introduce_item in introduce_list:
item_text = Pq(introduce_item).text()
item_list = item_text.split(': ')
if len(item_list) < 2:
continue
key = item_list[0]
value = item_list[1]
item_dict[key] = value
# 获取公司联系方式
contact_dict = get_contact(item_dict[u'公司编号'])
item_dict = dict(item_dict, **contact_dict)
yield item_dict
print '单页共 %s 条记录' % len(tr_list)
示例6: parseDetail
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
def parseDetail(url, place):
logger.info(url)
qDetail = PyQuery(url)
place["pNmEng"] = qDetail(".dest_toptitle > div > div > p").remove('span').text().strip()
try:
place["pDesc"] = qDetail(".toggle_l:first > .text_style").html().strip()
except Exception as ex:
logger.error(ex)
# mapSrc = qDetail(".s_sight_map > a > img").attr('src').split('%7C')[1].split('&')[0]
# place["lng"] = mapSrc.split(',')[1]
# place["lat"] = mapSrc.split(',')[0]
place["lng"] = qDetail("#Lon").val()
place["lat"] = qDetail("#Lat").val()
ctypeAList = qDetail(".s_sight_con:first > a")
place["viewTypes"] = []
for element in ctypeAList[:3]:
viewType = {}
viewHref = PyQuery(element).attr("href")
viewType["codeId"] = viewHref.split("/")[-1].split(".")[0].replace("s", "")
viewType["codeName"] = PyQuery(element).text()
place["viewTypes"].append(viewType)
try:
place["contactTel"] = PyQuery(qDetail(".s_sight_con")[2]).text().strip()
place["website"] = PyQuery(qDetail(".s_sight_con")[3])("a").text()
except Exception as ex:
logger.error(ex)
place["openHours"] = ""
for element in qDetail("dt:contains('开放时间')").nextAll("dd"):
place["openHours"] += PyQuery(element).outerHtml()
place["expense"] = ""
for element in qDetail("dt:contains('门票信息')").nextAll("dd"):
place["expense"] += PyQuery(element).outerHtml()
place["districtid"] = qDetail("#ctmdistrict").val() # 取得圖片用ID, #JS_DistrictId的值一樣
place["resourceid"] = qDetail("#wentClickID").attr("dataresource-cat") # 取得圖片用ID
place["totalImgCount"] = qDetail(".r_text").text().replace("全部", "").replace("张照片", "") # 取得圖片用,數量
place["countryEngName"] = inputCountryJson["eName"]
# place["countryChnName"] = PyQuery(qDetail("i.arrow")[1]).parent('a').text() # 國家中文
place["countryChnName"] = inputCountryJson["cName"] # 國家中文
place["cityEngName"] = qDetail("#EName").val() # city英文
示例7: PyQuery
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
from subprocess import call
url = 'http://www.vagrantup.com/downloads.html'
request = requests.get(url)
links = PyQuery(request.text)
foi = False
for link in links('a'):
if PyQuery(link).attr('href')[-3:] == 'dmg':
foi = True
break
if foi:
remotefilename = PyQuery(link).attr('href')
filename = remotefilename.split('/')[-1]
locfile = os.environ['HOME'] + '/Downloads/' + filename
if os.path.isfile(locfile):
print locfile + ' is already downloaded'
else:
stream = requests.get(remotefilename, stream = True)
with open(locfile, 'wb') as fd:
print "getting " + filename
count = 0
for chunk in stream.iter_content(4096):
count += 1
if count%100 == 0:
sys.stdout.write('.')
fd.write(chunk)
print 'done'
print 'Vagrant dmg file downloaded to ' + os.environ['HOME'] + '/Downloads'
示例8: test_not_billiable_projects_not_shown
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
def test_not_billiable_projects_not_shown(self):
response = self.test_client.get(reverse('client_projects'))
query = PyQuery(response.content)
query = query('table#queryTable td.name').text()
self.assertNotIn('FakeProject3', set(query.split()))
示例9: test_billing_types_for_each_project_shown
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
def test_billing_types_for_each_project_shown(self):
response = self.test_client.get(reverse('client_projects'))
query = PyQuery(response.content)
query = query('table#queryTable td.type').text()
self.assertEqual(set(('HOUR', 'FIXED')), set(query.split()))
示例10: test_all_billiable_projects_shown
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
def test_all_billiable_projects_shown(self):
response = self.test_client.get(reverse('client_projects'))
query = PyQuery(response.content)
query = query('table#queryTable td.name').text()
self.assertEqual(set(('FakeProject1', 'FakeProject2')),
set(query.split()))
示例11: scan_proxy
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
def scan_proxy():
"""
扫描代理资源
:return:
"""
import requests
from pyquery import PyQuery as Pq
source_site = 'http://ip.qiaodm.com/'
header = {
'Host': 'ip.qiaodm.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36'
}
s = requests.session()
# 抓取页面
file_html = s.get(source_site).content
# 保存文件
# with open('test.html', 'a') as f:
# f.write(file_html.encode('utf-8'))
#
# # 读取抓取的页面
# with open('test.html', 'r') as f:
# file_html = f.read()
text_pq = Pq(file_html)
tr_list = text_pq('tbody').find('tr[style="text-align: center;"]')
print '单页共 %s 条记录' % len(tr_list)
for tr_item in tr_list:
# print Pq(tr_item).html()
# print('---------------------')
td_list = Pq(tr_item).find('td')
# print '单条共 %s 列字段' % len(td_list)
field_list = []
for td_item in Pq(td_list):
field = Pq(td_item).text()
field_list.append(field)
# print field
# print('++++++++++++++++++')
# 特殊处理ip地址
ip = Pq(td_list).eq(0).html()
# 去除干扰信息
ip = html.replace_html(ip, r'<p style="display:none;"/>')
ip = html.replace_html(ip, r'<p style="display: none;"/>')
ip = html.replace_html(ip, r'<p style=.*?display:.*?none;.*?>.*?</p>')
# 去除标签
ip = html.strip_html(ip)
# print ip
# 过滤掉非法ip地址
if len(ip.split('.')) != 4:
continue
# 特殊处理端口
port_key = Pq(td_list).eq(1).attr('class').split()[1]
if port_key not in PortDict:
print '发现新端口: %s' % port_key
continue
port = PortDict.get(port_key, '')
ProsyItem['Ip'] = ip.replace(' ', '')
ProsyItem['Port'] = port
ProsyItem['Type'] = field_list[2].strip()
ProsyItem['AnonymousDegree'] = field_list[3].strip()
ProsyItem['Area'] = field_list[4].strip()
ProsyItem['Speed'] = field_list[5].strip()
ProsyItem['ScanTime'] = field_list[6].strip()
# print ProsyItem
proxy_item = json.dumps(ProsyItem, ensure_ascii=False)
html.save_file('proxy.json', proxy_item + '\n', 'a')
示例12: PyQuery
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
rawdata= myutils.ungzip(response)
# print rawdata
pquery = PyQuery(rawdata.decode('utf-8'))
for li in pquery(".TreeList li"):
self.pfolder = myutils.filenameCheck(PyQuery(li)("a").text())
while os.path.exists(os.path.join(self.root,self.pfolder)):
self.pfolder = self.pfolder + "_2"
try:
os.mkdir(os.path.join(self.root,self.pfolder))
except Exception,e:
print "%s created error" %(os.path.join(self.root,self.pfolder))
else:
strParam = PyQuery(li)("a").attr('onclick')
aParam = strParam.split('(')[1].strip(')').split(',')
param = {}
param["id"] = aParam[0].strip().strip("'")
param["code"] = aParam[1].strip().strip("'")+ "?"
param["type"] = aParam[2].strip().strip("'")
param["fileid"] = aParam[3].strip().strip("'")
self.get_child_catalog(param)
time.sleep(1)
self.deal_error()
def get_child_catalog(self,param):
url = "http://tongji.cnki.net/kns55/Navi/GetChildCatalog.aspx"
req = urllib2.Request(url,urllib.urlencode(param),self.req_header)
# print req.get_full_url()
示例13: PyQuery
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import split [as 别名]
from pprint import pprint
'''
how to use
python3 getLineAllCities.py http://you.ctrip.com/sitelist/asia120001.html ./output/cities/asia
python3 getLineAllCities.py http://you.ctrip.com/sitelist/europe120002.html ./output/cities/europe
python3 getLineAllCities.py http://you.ctrip.com/sitelist/northamerica120004.html ./output/cities/northamerica
python3 getLineAllCities.py http://you.ctrip.com/sitelist/southamerica120005.html ./output/cities/southamerica
python3 getLineAllCities.py http://you.ctrip.com/sitelist/oceania120003.html ./output/cities/oceania
python3 getLineAllCities.py http://you.ctrip.com/sitelist/africa120006.html ./output/cities/africa
python3 getLineAllCities.py http://you.ctrip.com/sitelist/nanji120481.html ./output/cities/nanji
todo 南極的place要獨立去抓,因為南極沒有國家
todo 有些國家不在這7個洲裡,需再確認是否要全抓
'''
'''
Main
'''
targetUrl = sys.argv[1]
outputDirectory = sys.argv[2]
qList = PyQuery(targetUrl)
for element in qList('.normalbox')('li > a'):
countryUrl = PyQuery(element).attr('href').replace('/place', '/countrysightlist')
targetJson = outputDirectory + "/" + countryUrl.split('/')[2].replace('.html', '') + ".json"
countryUrl = "http://you.ctrip.com" + countryUrl
# if (countryUrl == "http://you.ctrip.com/countrysightlist/southkorea100042.html"):
pprint(countryUrl)
pprint(targetJson)
getCtripAllCity.main(countryUrl, targetJson)