本文整理汇总了Python中pyquery.PyQuery.replace方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.replace方法的具体用法?Python PyQuery.replace怎么用?Python PyQuery.replace使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyquery.PyQuery
的用法示例。
在下文中一共展示了PyQuery.replace方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: get_bounds
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replace [as 别名]
def get_bounds(scene_name):
"""Use Earth Explorer metadata to get bounds of a Scene"""
url_code = get_metadata_code(scene_name)
metadata = PyQuery(
'http://earthexplorer.usgs.gov/fgdc/%s/%s/' % (url_code, scene_name)
)
metadata = metadata.text()[
metadata.text().find('G-Ring_Latitude:'):
metadata.text().find('\n Keywords:')
]
coords = (
metadata.replace(' ', '')
.replace('G-Ring_Latitude:', '')
.replace('G-Ring_Longitude:', '')
.split('\n')
)
coords = [float(coord) for coord in coords if coord != '']
# create a list of lists with the coordinates
coords = [coords[i:i + 2] for i in range(0, len(coords), 2)]
# use reverse() to change [lat, lon] to [lon, lat]
[coord.reverse() for coord in coords]
# repeat the first coordinate on the end of the list
if coords[0] != coords[-1]:
coords.append(coords[0])
return coords
示例2: get_img_urls
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replace [as 别名]
def get_img_urls(content):
if not content:
return []
url_list = []
doc = PyQuery(content)
nodeList = doc('li.tab-trigger > div.vertical-img > a.box-img > img')
for node in nodeList:
url = PyQuery(node).attr('src')
if not url:
continue
if url.find('60x60') > 0:
url=url.replace('60x60','400x400')
url_list.append(url)
needDescImg = True
if needDescImg:
link_url = doc('div#desc-lazyload-container').attr('data-tfs-url')
if not link_url:
return url_list
desc_content = fetchPageWithUrl(link_url)
#懒惰匹配模式
imgNodes = re.findall('<img[^<>]*>.*?', desc_content)
#desc_content = re.sub('var[\s]*offer_details[\s]*=[\s]*', '', desc_content)
for node in imgNodes:
nodeQ = PyQuery(node)
desc_url = nodeQ('img').attr('src')
if desc_url:
desc_url = desc_url.replace('\\"', '')
if not desc_url:
continue
if 'gif' in desc_url: #gif图片不要
continue
#if '//gd' in desc_url or '/2015/' in desc_url:
url_list.append(desc_url)
return url_list
示例3: scan_proxy
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replace [as 别名]
def scan_proxy():
"""
扫描代理资源
:return:
"""
import requests
from pyquery import PyQuery as Pq
source_site = 'http://ip.qiaodm.com/'
header = {
'Host': 'ip.qiaodm.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36'
}
s = requests.session()
# 抓取页面
file_html = s.get(source_site).content
# 保存文件
# with open('test.html', 'a') as f:
# f.write(file_html.encode('utf-8'))
#
# # 读取抓取的页面
# with open('test.html', 'r') as f:
# file_html = f.read()
text_pq = Pq(file_html)
tr_list = text_pq('tbody').find('tr[style="text-align: center;"]')
print '单页共 %s 条记录' % len(tr_list)
for tr_item in tr_list:
# print Pq(tr_item).html()
# print('---------------------')
td_list = Pq(tr_item).find('td')
# print '单条共 %s 列字段' % len(td_list)
field_list = []
for td_item in Pq(td_list):
field = Pq(td_item).text()
field_list.append(field)
# print field
# print('++++++++++++++++++')
# 特殊处理ip地址
ip = Pq(td_list).eq(0).html()
# 去除干扰信息
ip = html.replace_html(ip, r'<p style="display:none;"/>')
ip = html.replace_html(ip, r'<p style="display: none;"/>')
ip = html.replace_html(ip, r'<p style=.*?display:.*?none;.*?>.*?</p>')
# 去除标签
ip = html.strip_html(ip)
# print ip
# 过滤掉非法ip地址
if len(ip.split('.')) != 4:
continue
# 特殊处理端口
port_key = Pq(td_list).eq(1).attr('class').split()[1]
if port_key not in PortDict:
print '发现新端口: %s' % port_key
continue
port = PortDict.get(port_key, '')
ProsyItem['Ip'] = ip.replace(' ', '')
ProsyItem['Port'] = port
ProsyItem['Type'] = field_list[2].strip()
ProsyItem['AnonymousDegree'] = field_list[3].strip()
ProsyItem['Area'] = field_list[4].strip()
ProsyItem['Speed'] = field_list[5].strip()
ProsyItem['ScanTime'] = field_list[6].strip()
# print ProsyItem
proxy_item = json.dumps(ProsyItem, ensure_ascii=False)
html.save_file('proxy.json', proxy_item + '\n', 'a')
示例4: PyQuery
# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replace [as 别名]
server.sendmail(mailFrom, rcptToList, message.as_string())
server.quit()
if '__main__' == __name__:
configFile = 'config.cfg'
novels = PyQuery(filename = configFile)
message = ''
for novel in novels('novel'):
name = PyQuery(novel)('name').text()
url = PyQuery(novel)('url').text()
prefix = PyQuery(novel)('prefix').text()
next = int(PyQuery(novel)('next').text())
rcptToList = []
for addr in PyQuery(novel)('emails>email'):
rcptToList.append(PyQuery(addr).text())
print rcptToList
html = PyQuery(url = url)
nextUrl = None
for i in html('div.threadlist_title.pull_left.j_th_tit.member_thread_title_frs > a.j_th_tit'):
if i.text.find(number2chinese(next)) != -1:
nextUrl = prefix + PyQuery(i).attr('href')
break
if nextUrl:
next += 1
PyQuery(novel)('next').text(str(next))
text = PyQuery(url=nextUrl)('cc:first > div:first').html()
text = text.replace(u'<br/>', '\n').strip()
subject = name + u' ' + u'第'+unicode(str(next))+u'章'
send_mail('[email protected]', rcptToList, subject.encode('utf8'), text.encode('utf8'))
open(configFile, 'wt').write(str(novels))