当前位置: 首页>>代码示例>>Python>>正文


Python PyQuery.replaceWith方法代码示例

本文整理汇总了Python中pyquery.PyQuery.replaceWith方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.replaceWith方法的具体用法?Python PyQuery.replaceWith怎么用?Python PyQuery.replaceWith使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyquery.PyQuery的用法示例。


在下文中一共展示了PyQuery.replaceWith方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: no_fonts

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replaceWith [as 别名]
def no_fonts (pq):  # yuk - lxml etree and PyQuery objs get confused - nested ones arent removed, this goes only 2 levels
    raise Exception, "yuk - it's a mess, use tidy!"

    pq = PyQuery (pq)
    #print fonts.__class__.__name__
    for font in pq ('font'):
        font = PyQuery (font)
        #font ('a').remove()
        #print font.__class__.__name__
        #print len (font), font [0]
        #print dir (font)
        #import sys
        #sys.exit()

        #inner = innerhtml (font)  # .text() #.replace (':','').strip()
        #print 'Replacing font with:', font.html()
        font.replaceWith (font.html())
        #font.getparent().replace (font, PyQuery (inner))
        print 'font replaced:', font [:60]

        #font = no_fonts (font)

    for font in pq ('font'):
        font = PyQuery (font)
        font.replaceWith (font.html())
        print 'font 2 replaced:', font [:60]

    return pq
开发者ID:satyadevi-nyros,项目名称:eracks,代码行数:30,代码来源:scrape_utils.py

示例2: __processInstagramTag

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replaceWith [as 别名]
    def __processInstagramTag(self, i, e):
        obj = PyQuery(e)
        url = obj('a').attr('href')
        shortCode = re.match("http://.*/p/(.*)/", url).group(1)
        imageUrl = self.getInstagramImageUrl(shortCode)

        newObj = PyQuery("<img />")
        newObj.attr('src', imageUrl)
        obj.replaceWith(newObj)
开发者ID:arloliu,项目名称:w4-crawler,代码行数:11,代码来源:HtmlParser.py

示例3: __processImageTag

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replaceWith [as 别名]
    def __processImageTag(self, i, e):
        obj = PyQuery(e)
        style = obj.attr('style')

        if style != None and style.find('display: none') != -1:
            obj.remove()
            return

        newObj = PyQuery("<img />")
        newObj.attr('src', obj.attr('rel:bf_image_src'))
        newObj.attr('style', obj.attr('style'))
        newObj.width(obj.width())
        newObj.height(obj.height())
        obj.replaceWith(newObj)
开发者ID:arloliu,项目名称:w4-crawler,代码行数:16,代码来源:HtmlParser.py

示例4: clean_body

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replaceWith [as 别名]
def clean_body(body):
	site = Site.objects.get_current()
	html = PyQuery('<body>' + body + '</body>')
	
	for p in html('p'):
		p = PyQuery(p)
		p.replaceWith('\n\n%s\n\n' % p.html())
	
	html('.alignright').addClass('pull-right').removeClass('alignright')
	html('.alignleft').addClass('pull-left').removeClass('alignleft')
	html('[style="float: left;"]').removeAttr('style').addClass('alignleft')
	html('[style="float: right;"]').removeAttr('style').addClass('alignright')
	
	while '\n\n\n' in body:
		body = body.replace('\n\n\n', '\n\n')
	
	while '\r\r\r' in body:
		body = body.replace('\r\r\r', '\r\r')
	
	body = html.html()
	body = body.replace('<br />', '  \n')
	body = body.replace('<br/>', '  \n')
	body = body.replace('<br>', '  \n')
	body = body.replace('\r\n', '\n')
	body = body.replace('\n\r', '\n')
	
	while body.find('\n\n\n') > -1:
		body = body.replace('\n\n\n', '\n\n')
	
	while body.startswith('\n'):
		body = body[1:]
	
	while body.endswith('\n'):
		body = body[:-1]
	
	while body.startswith('\r'):
		body = body[1:]
	
	while body.endswith('\r'):
		body = body[:-1]
	
	while body.startswith('\t'):
		body = body[1:]
	
	return body
开发者ID:iamsteadman,项目名称:bambu-tools,代码行数:47,代码来源:xmlrpc.py

示例5: sanitise

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replaceWith [as 别名]
	def sanitise(self, text, markdown = True):
		if markdown:
			text = md(text)
		
		dom = PyQuery(text)
		
		for a in dom.find('a[href^="javascript:"]'):
			a = PyQuery(a)
			a.replaceWith(a.text())

		for obj in UNCLEAN_TAGS:
			dom.find(obj).remove()

		for attr in UNCLEAN_ATTRS:
			dom.find('[%s]' % attr).removeAttr(attr)
		
		text = dom.outerHtml()
		if markdown:
			dom = HTML2Text()
			text = dom.handle(text)

		return text
开发者ID:iamsteadman,项目名称:social-comments,代码行数:24,代码来源:__init__.py

示例6: remove_link

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replaceWith [as 别名]
 def remove_link(index, node):
     node = PyQuery(node)
     node.replaceWith(node.text())
     return node
开发者ID:AlloVince,项目名称:EvaCrawler,代码行数:6,代码来源:article_spider.py

示例7: handle

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import replaceWith [as 别名]

#.........这里部分代码省略.........
						
						if subkind != 'attachment' or subparent_id != id:
							continue
						
						s, d, p, a, q, f = urlparse(suburl)
						d, s, filename = p.rpartition('/')
						
						try:
							attachment = post.attachments.get(
								title = subtitle or filename
							)
						except Attachment.DoesNotExist:
							print '- Downloading %s' % filename

							response = requests.get(suburl)
							handle, tmp = mkstemp(
								path.splitext(filename)[-1]
							)

							write(handle, response.content)
							close(handle)
							
							attachment = Attachment(
								title = subtitle or filename,
								file = File(open(tmp, 'r'), name = filename),
								content_type = content_type,
								object_id = post.pk
							)
						
							if '_thumbnail_id' in postmeta[id]:
								if unicode(postmeta[id]['_thumbnail_id']) == unicode(subid):
									attachment.featured = True
						
							attachment.save()
							remove(tmp)
						
						if post.body:
							html = PyQuery('<body>' + post.body + '</body>')
							for a in html(
								'a[href="%(url)s"], [src="%(url)s"]' % {
									'url': suburl
								}
							):
								a = PyQuery(a)
								a.replaceWith('\n\n[attachment %d]\n\n' % ai)
							
							post.body = html.html()
						
						ai += 1
					
					if post.body:
						html = PyQuery('<body>' + post.body + '</body>')
						for a in html('a[href]'):
							href = a.get('href')
							if href.startswith(link):
								href = href.replace(link, 'http://%s' % site.domain)
							
							a = PyQuery(a)
						
						for p in html('p'):
							p = PyQuery(p)
							p.replaceWith('\n\n%s\n\n' % p.html())
						
						html('.alignright').addClass('pull-right').removeClass('alignright')
						html('.alignleft').addClass('pull-left').removeClass('alignleft')
						
						while '\n\n\n' in post.body:
							post.body = post.body.replace('\n\n\n', '\n\n')
						
						while '\r\r\r' in post.body:
							post.body = post.body.replace('\r\r\r', '\r\r')
						
						post.body = html.html()
						post.body = post.body.replace('<br />', '  \n')
						post.body = post.body.replace('<br/>', '  \n')
						post.body = post.body.replace('<br>', '  \n')
						
						while post.body.startswith('\n'):
							post.body = post.body[1:]
						
						while post.body.endswith('\n'):
							post.body = post.body[:-1]
						
						while post.body.startswith('\r'):
							post.body = post.body[1:]
						
						while post.body.endswith('\r'):
							post.body = post.body[:-1]
						
						while post.body.startswith('\t'):
							post.body = post.body[1:]
						
						post.body = post.body.strip()
					
					post.save()
				
				transaction.commit()
			except:
				transaction.rollback()
				raise
开发者ID:iamsteadman,项目名称:bambu-tools,代码行数:104,代码来源:wpimport.py


注:本文中的pyquery.PyQuery.replaceWith方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。