当前位置: 首页>>代码示例>>Python>>正文


Python PyQuery.replace方法代码示例

本文整理汇总了Python中pyquery.PyQuery.replace方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.replace方法的具体用法?Python PyQuery.replace怎么用?Python PyQuery.replace使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyquery.PyQuery的用法示例。


在下文中一共展示了PyQuery.replace方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: get_bounds

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replace [as 别名]
def get_bounds(scene_name):
    """Use Earth Explorer metadata to get bounds of a Scene"""
    url_code = get_metadata_code(scene_name)

    metadata = PyQuery(
        'http://earthexplorer.usgs.gov/fgdc/%s/%s/' % (url_code, scene_name)
        )
    metadata = metadata.text()[
        metadata.text().find('G-Ring_Latitude:'):
        metadata.text().find('\n  Keywords:')
        ]
    coords = (
        metadata.replace(' ', '')
        .replace('G-Ring_Latitude:', '')
        .replace('G-Ring_Longitude:', '')
        .split('\n')
        )
    coords = [float(coord) for coord in coords if coord != '']
    # create a list of lists with the coordinates
    coords = [coords[i:i + 2] for i in range(0, len(coords), 2)]
    # use reverse() to change [lat, lon] to [lon, lat]
    [coord.reverse() for coord in coords]
    # repeat the first coordinate on the end of the list
    if coords[0] != coords[-1]:
        coords.append(coords[0])
    return coords
开发者ID:ibamacsr,项目名称:imagery,代码行数:28,代码来源:utils.py

示例2: get_img_urls

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replace [as 别名]
def get_img_urls(content):
    if not content:
        return []
    url_list = []
    doc = PyQuery(content)
    nodeList = doc('li.tab-trigger > div.vertical-img > a.box-img > img')
    for node in nodeList:
        url = PyQuery(node).attr('src')
        if not url:
            continue
        if url.find('60x60') > 0:
            url=url.replace('60x60','400x400')
            url_list.append(url)
    needDescImg = True
    if needDescImg:
        link_url = doc('div#desc-lazyload-container').attr('data-tfs-url')
        if not link_url:
           return url_list
        desc_content = fetchPageWithUrl(link_url)
        #懒惰匹配模式
        imgNodes = re.findall('<img[^<>]*>.*?', desc_content)
        #desc_content = re.sub('var[\s]*offer_details[\s]*=[\s]*', '', desc_content)
        for node in imgNodes:
            nodeQ = PyQuery(node)
            desc_url = nodeQ('img').attr('src')
            if desc_url:
                desc_url = desc_url.replace('\\"', '')
            if not desc_url:
                continue
            if 'gif' in desc_url: #gif图片不要
                continue
            #if '//gd' in desc_url or '/2015/' in desc_url:
            url_list.append(desc_url)
    return url_list
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:36,代码来源:pics_1688.py

示例3: scan_proxy

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replace [as 别名]
def scan_proxy():
    """
    扫描代理资源
    :return:
    """
    import requests
    from pyquery import PyQuery as Pq

    source_site = 'http://ip.qiaodm.com/'

    header = {
        'Host': 'ip.qiaodm.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36'
    }

    s = requests.session()
    # 抓取页面
    file_html = s.get(source_site).content

    # 保存文件
    # with open('test.html', 'a') as f:
    #     f.write(file_html.encode('utf-8'))
    #
    # # 读取抓取的页面
    # with open('test.html', 'r') as f:
    #     file_html = f.read()

    text_pq = Pq(file_html)
    tr_list = text_pq('tbody').find('tr[style="text-align: center;"]')
    print '单页共 %s 条记录' % len(tr_list)
    for tr_item in tr_list:
        # print Pq(tr_item).html()
        # print('---------------------')
        td_list = Pq(tr_item).find('td')
        # print '单条共 %s 列字段' % len(td_list)
        field_list = []
        for td_item in Pq(td_list):
            field = Pq(td_item).text()
            field_list.append(field)
            # print field
            # print('++++++++++++++++++')

        # 特殊处理ip地址
        ip = Pq(td_list).eq(0).html()
        # 去除干扰信息
        ip = html.replace_html(ip, r'<p style="display:none;"/>')
        ip = html.replace_html(ip, r'<p style="display: none;"/>')
        ip = html.replace_html(ip, r'<p style=.*?display:.*?none;.*?>.*?</p>')
        # 去除标签
        ip = html.strip_html(ip)
        # print ip
        # 过滤掉非法ip地址
        if len(ip.split('.')) != 4:
            continue

        # 特殊处理端口
        port_key = Pq(td_list).eq(1).attr('class').split()[1]
        if port_key not in PortDict:
            print '发现新端口: %s' % port_key
            continue
        port = PortDict.get(port_key, '')

        ProsyItem['Ip'] = ip.replace(' ', '')
        ProsyItem['Port'] = port
        ProsyItem['Type'] = field_list[2].strip()
        ProsyItem['AnonymousDegree'] = field_list[3].strip()
        ProsyItem['Area'] = field_list[4].strip()
        ProsyItem['Speed'] = field_list[5].strip()
        ProsyItem['ScanTime'] = field_list[6].strip()
        # print ProsyItem
        proxy_item = json.dumps(ProsyItem, ensure_ascii=False)
        html.save_file('proxy.json', proxy_item + '\n', 'a')
开发者ID:gasongjian,项目名称:python,代码行数:74,代码来源:proxy.py

示例4: PyQuery

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replace [as 别名]
	server.sendmail(mailFrom, rcptToList, message.as_string())
	server.quit()

if '__main__' == __name__:
	configFile = 'config.cfg'
	novels = PyQuery(filename = configFile)
	message = ''
	for novel in novels('novel'):
		name = PyQuery(novel)('name').text()
		url = PyQuery(novel)('url').text()
		prefix = PyQuery(novel)('prefix').text()
		next = int(PyQuery(novel)('next').text())
		rcptToList = []
		for addr in PyQuery(novel)('emails>email'):
			rcptToList.append(PyQuery(addr).text())
		print rcptToList
		html = PyQuery(url = url)
		nextUrl = None
		for i in html('div.threadlist_title.pull_left.j_th_tit.member_thread_title_frs > a.j_th_tit'):
			if i.text.find(number2chinese(next)) != -1:
				nextUrl = prefix + PyQuery(i).attr('href')
				break
		if nextUrl:
			next += 1
			PyQuery(novel)('next').text(str(next))
			text = PyQuery(url=nextUrl)('cc:first > div:first').html()
			text = text.replace(u'<br/>', '\n').strip()
			subject = name + u' ' + u'第'+unicode(str(next))+u'章'
			send_mail('[email protected]', rcptToList, subject.encode('utf8'), text.encode('utf8'))
	open(configFile, 'wt').write(str(novels))
开发者ID:niu2x,项目名称:novelpush,代码行数:32,代码来源:a.py


注:本文中的pyquery.PyQuery.replace方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。