当前位置: 首页>>代码示例>>Python>>正文


Python PyQuery.remove方法代码示例

本文整理汇总了Python中pyquery.PyQuery.remove方法的典型用法代码示例。如果您正苦于以下问题:Python PyQuery.remove方法的具体用法?Python PyQuery.remove怎么用?Python PyQuery.remove使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyquery.PyQuery的用法示例。


在下文中一共展示了PyQuery.remove方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: render_document

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
def render_document(vnode, expressions, context):
    for expression in expressions:
        evaluation = evaluate_expression(expression, context)
        node = expression.get('node')
        if isinstance(expression.get('value'), basestring) and expression.get('value') == evaluation:
            continue
        expression['value'] = evaluation

        if expression.get('type') == 'each':
            if expression.get('parent'):
                parent = expression.get('parent')
            else:
                parent = node.parent()
                expression['parent'] = parent
            riot_id = node.attr['data-riot-id']
            original_children = parent.children('[data-riot-id="%s"]' % riot_id)
            # 0. add placeholder
            placeholder = PyQuery('<text></text>')
            placeholder.insertBefore(original_children.eq(0))
            # 1. remove children
            original_node = original_children.clone()
            original_children.remove()
            expression['node'] = original_node
            # 2. insert children
            loopcontext = {}
            loopcontext.update(context if isinstance(context, dict) else vars(context))
            expressions_col = []
            for loop_index, item in enumerate(evaluation):
                loopcontext.update(item if isinstance(item, dict) else vars(item))
                loopcontext['loopindex'] = loop_index
                child_node = PyQuery(expression.get('impl'))
                child_node.attr['data-riot-loopindex'] = str(loop_index)
                expressions = parse_document_expressions(child_node)
                expressions_col.append((expressions, loopcontext))
                render_document(vnode, expressions, loopcontext)
                child_node.insertBefore(placeholder)
            # 3. remove placeholder
            if len(evaluation) == 0:
                placeholder.attr['data-riot-id'] = str(riot_id)
            else:
                placeholder.remove()
            mark_dirty(parent)
            generate_widget(parent)
            for expressions, loopcontext in expressions_col:
                connect_signals(vnode, expressions, loopcontext)
            continue
        if expression.get('type') == 'markup':
            node.attr['markup'] = json.dumps(evaluation)
            node.html('')
            mark_dirty(node)
            continue
        if expression.get('type') == 'attribute':
            attribute = expression.get('attribute')
            node.attr[attribute] = str(evaluation)
            mark_dirty(node)
            continue
开发者ID:pombredanne,项目名称:riotpy,代码行数:58,代码来源:expression.py

示例2: __processImageTag

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
    def __processImageTag(self, i, e):
        obj = PyQuery(e)
        style = obj.attr('style')

        if style != None and style.find('display: none') != -1:
            obj.remove()
            return

        newObj = PyQuery("<img />")
        newObj.attr('src', obj.attr('rel:bf_image_src'))
        newObj.attr('style', obj.attr('style'))
        newObj.width(obj.width())
        newObj.height(obj.height())
        obj.replaceWith(newObj)
开发者ID:arloliu,项目名称:w4-crawler,代码行数:16,代码来源:HtmlParser.py

示例3: sanitize_description

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
def sanitize_description(value):
    cleaned = PyQuery(value)
    cleaned = cleaned.remove('span.playMetaText')
    cleaned.remove('span.playMetaText')
    cleaned.remove('span.playCount')
    cleaned.remove('time')
    cleaned.remove('strong')

    desc = cleaned.html()

    if desc is None: return ""

    return desc.split('<span>')[-1:][0].replace('</span>', '').strip()
开发者ID:Pehrsons,项目名称:SvtCrawler,代码行数:15,代码来源:__init__.py

示例4: extract

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
    def extract(self):
        self.html = re.sub('<!--.*?-->', '', self.html)
        doc = PyQuery(self.html)
        content_node = doc('div.kb_zw')
        if not content_node:
#            content_node = doc('div.zw_text')
            content_node = PyQuery(self.hxs.select("//div[@class = 'zw_text']").extract()[0])
        
        content_node.remove('script')
        content_node.remove('style')
        content_node.remove('iframe')
        content_node.remove('div[style = "float:left; width:303px; height:250px; display:inline; margin:10px 10px 10px 10px;"]')
        content_node.remove('input')
        
        

        item = ContentItem()
        item['title'] = self.title = doc('td[align = "center"]')('b').text()
        if item['title'] == None:
            item['title'] = self.title = doc('div.zw_bt').text()
        if item['title'] == None:
            item['title'] = self.title = doc('h1.zw_title').text()
        
        
        item['release_time'] = ''
        
        item['source'] = u"新浪"
        item['author'] = ''
        item['pic_url'] = ''

        imgs = content_node('img')
        image_urls = []
        for img in imgs:
            if ".gif" in img.get('src'):
                continue
            if not img.get('src'):
                continue
            else:
                imgs.eq(imgs.index(img)).before('<br>')
                imgs.eq(imgs.index(img)).append('<br>')
                image_urls.append(self.getRealURI(img.get('src')))
        item['image_urls'] = image_urls

        content = content_node.__unicode__()
        item['content'] = self.content = content
        return item
开发者ID:hw20686832,项目名称:iCrawler,代码行数:48,代码来源:zj_a006.py

示例5: parseNextPageUrl

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
 def parseNextPageUrl(self, category_page_content):
     doc = PyQuery(category_page_content)
     nodeAList = doc("span#view_47 > a")
     for nodeA in nodeAList:
         nodeAQ = PyQuery(nodeA)
         if nodeAQ.remove('span').text().strip().lower() == 'next':
             return nodeAQ.attr('href').strip()
     return None
开发者ID:chenweiqiang2016,项目名称:cwq-crawler,代码行数:10,代码来源:wayfair.py

示例6: sanitize_html2

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
def sanitize_html2(value):
    soup = PyQuery(value)
    soup = soup.remove("span.playMetaText")
    soup.remove("span.playMetaText")
    soup.remove("time")
    soup.remove("strong")

    return soup.html().split("<span>")[-1:]
开发者ID:peppelorum,项目名称:WeLovePublicService-VHS,代码行数:10,代码来源:span.py

示例7: sanitize_description

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
def sanitize_description(value):
    cleaned = PyQuery(value)
    cleaned = cleaned.remove('span.playMetaText')
    cleaned.remove('span.playMetaText')
    cleaned.remove('time')
    cleaned.remove('strong')

    return cleaned.html().split('<span>')[-1:][0].replace('</span>', '')
开发者ID:peppelorum,项目名称:SVT-oppetarkiv-crawler,代码行数:10,代码来源:__init__.py

示例8: feed

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
def feed(request, get_feed=get_feed):
    with shows_db() as shows:
        show_list = shows.values()

    d = PyQuery(get_feed(), parser="xml")

    for item in d("item"):
        ditem = PyQuery(item)
        title = ditem.find("title").text()
        match = detect_show(show_list, title)
        if match:
            name, episode = match
            # TODO: Record episode in the feed so that future versions of this episod will be ignored
        else:
            ditem.remove()

    response = Response()
    response.content_type = "application/rss+xml"
    response.ubody = unicode(d)
    response.cache_control = "no-cache"
    return response
开发者ID:ericmoritz,项目名称:tvservice,代码行数:23,代码来源:tvservice.py

示例9: extract

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
    def extract(self):
        self.html = re.sub('<!--.*?-->', '', self.html)
        doc = PyQuery(self.html)
        content_node = doc('.firstTopic')('div')
        content_node.remove('script')
        content_node.remove('.rate')
        content_node.remove('.affixContent')
        content_node.remove('.thread_gold')
        
        
        item = ContentItem()
        imgs = content_node('.p14')('img')
        img_all = []
        for img in imgs:
            if".gif" in img.get('src'):
                continue
            else:  
                imgs.eq(imgs.index(img)).append('<br>')
                imgs.eq(imgs.index(img)).before('<br>')
                img_all.append(self.getRealURI(img.get('src')))
        item['image_urls'] = img_all
        
        item['title'] = self.title = doc('#thread_title').text()
        content = content_node('.p14').__unicode__()
        content = PyQuery(content)
        del_style = content('div')
        for d in del_style:
            if d.get('style'):
                del_style.eq(del_style.index(d)).attr['style'] = ''
                
        content.remove('dl.rate_list')
        content.remove('span[style = "font-size:12px"]')
        content.remove('dl.rate')
        item['content'] = self.content = content.__unicode__()
        
        release_time=doc('.firstTopic')('.postTime').text()
        ob=re.compile(u'20\d\d.*\d\d')
        release_time=ob.findall(release_time)
        
        item['release_time'] = release_time[0]
#        item['release_switch_time'] = self.release_switch_time = time.mktime(time.strptime(release_time[0],u'%Y-%m-%d %H:%M:%S'))
        item['source'] = u"17173论坛"
        item['author'] = doc('.th1').eq(0).text()
        item['pic_url'] = ''
        
        return item
开发者ID:hw20686832,项目名称:iCrawler,代码行数:48,代码来源:lw_tz.py

示例10: process

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
    def process( self ):
        self.discovered_urls = set()
        self.basic_content_type = "text/html"
        self.message_stack = [ "-" * 80 ]

        # resolve the address
        uri = urlparse( self.current_job[ 'url' ] )
        answers = dns.resolver.query( uri.hostname, 'A' )
        for answer in answers:
            self.message_stack.append( "DNS) %s" % answer )

        try:
            self.current_response = requests.get( self.current_job[ 'url' ], stream=True )
            self.basic_content_type = self.current_response.headers[ 'content-type' ].split( ";" )[ 0 ]
        except:
            self.current_response = None
            self.basic_content_type = None

        if self.current_response:
            for r in self.current_response.history:
                self.message_stack.append( "-URL (%s) %s" % ( r.status_code, r.url ) )

            self.message_stack.append( "+URL (%s) %s" % ( self.current_response.status_code, self.current_response.url ) )
            self.message_stack.append( "BASIC CONTENT-TYPE) %s" % self.basic_content_type )
            self.message_stack.append( "CONTENT TYPE) %s" % self.current_response.headers['content-type'] )
            self.message_stack.append( "ENCODING) %s" % self.current_response.encoding )

            if self.basic_content_type in ACCEPTABLE_CONTENT_TYPES:
                # we need to handle the odd, but real case of the mystery <? palantir_blog_list('sidebar') ?> tag
                # tidy_response_text = re.sub( "<\?.*?\?>", "", self.current_response.text )
                tidy_response_text = re.sub( "<\?.*?\?>", "", self.current_response.text )
                tidy_response_text = re.sub( "<!--.*?-->", "", tidy_response_text )

                self.dom = PyQuery( tidy_response_text, parser='html' )

                self.titles = [ safe_str( title.text ) for title in self.dom("title") ]

                for a in self.dom('a'):
                    a = PyQuery(a)
                    new_url = PyQuery(a).attr.href
                    if new_url != None:
                        new_url = urldefrag( urljoin( self.current_response.url, new_url ) )[0]
                        self.discovered_urls.add( new_url )

                self.message_stack.append( "DISCOVERED) %s" % len( self.discovered_urls ) )

                # BOILERPIPE
                for excluded_tag in BOILERPIPE_REMOVE_TAGS:
                    self.dom( excluded_tag ).after( "\n" )
                    self.dom.remove( excluded_tag )

                # remove tags with style="display:none"
                # http://www.microsoft.com/en-us/legal/intellectualproperty/copyright/default.aspx          
                display_none_pattern = re.compile( "display: ?none" )

                for x in self.dom("*"):
                    try:
                        tag = PyQuery(x)
                        if not tag.attr("style") == None:
                            if re.match( display_none_pattern, tag.attr("style") ):
                                tag.remove()
                    except Exception as inst:
                        print type(inst)
                        print inst.args
                        print inst

                self.save()
            else:
                self.message_stack.append( "DISCARDED" )
        else:
            self.message_stack.append( "NO RESPONSE" )
开发者ID:johnjansen,项目名称:Magpie,代码行数:73,代码来源:agent.py

示例11: getTweets

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
	def getTweets(tweetCriteria, receiveBuffer=None, bufferLength=100, proxy=None):
		refreshCursor = ''
	
		results = []
		resultsAux = []
		cookieJar = http.cookiejar.CookieJar()

		active = True

		while active:
			json = TweetManager.getJsonReponse(tweetCriteria, refreshCursor, cookieJar, proxy)
			if len(json['items_html'].strip()) == 0:
				break

			refreshCursor = json['min_position']
			scrapedTweets = PyQuery(json['items_html'])
			#Remove incomplete tweets withheld by Twitter Guidelines
			scrapedTweets.remove('div.withheld-tweet')
			tweets = scrapedTweets('div.js-stream-tweet')
			
			if len(tweets) == 0:
				break
			
			for tweetHTML in tweets:
				tweetPQ = PyQuery(tweetHTML)
				tweet = models.Tweet()
				
				usernameTweet = tweetPQ("span.username.js-action-profile-name b").text()
				txt = re.sub(r"\s+", " ", tweetPQ("p.js-tweet-text").text().replace('# ', '#').replace('@ ', '@'))
				retweets = int(tweetPQ("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""))
				favorites = int(tweetPQ("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""))
				dateSec = int(tweetPQ("small.time span.js-short-timestamp").attr("data-time"))
				id = tweetPQ.attr("data-tweet-id")
				permalink = tweetPQ.attr("data-permalink-path")
				user_id = int(tweetPQ("a.js-user-profile-link").attr("data-user-id"))
				
				geo = ''
				geoSpan = tweetPQ('span.Tweet-geo')
				if len(geoSpan) > 0:
					geo = geoSpan.attr('title')
				urls = []
				for link in tweetPQ("a"):
					try:
						urls.append((link.attrib["data-expanded-url"]))
					except KeyError:
						pass
				tweet.id = id
				tweet.permalink = 'https://twitter.com' + permalink
				tweet.username = usernameTweet
				
				tweet.text = txt
				tweet.date = datetime.datetime.fromtimestamp(dateSec)
				tweet.formatted_date = datetime.datetime.fromtimestamp(dateSec).strftime("%a %b %d %X +0000 %Y")
				tweet.retweets = retweets
				tweet.favorites = favorites
				tweet.mentions = " ".join(re.compile('(@\\w*)').findall(tweet.text))
				tweet.hashtags = " ".join(re.compile('(#\\w*)').findall(tweet.text))
				tweet.geo = geo
				tweet.urls = ",".join(urls)
				tweet.author_id = user_id
				
				results.append(tweet)
				resultsAux.append(tweet)
				
				if receiveBuffer and len(resultsAux) >= bufferLength:
					receiveBuffer(resultsAux)
					resultsAux = []
				
				if tweetCriteria.maxTweets > 0 and len(results) >= tweetCriteria.maxTweets:
					active = False
					break
					
		
		if receiveBuffer and len(resultsAux) > 0:
			receiveBuffer(resultsAux)
		
		return results
开发者ID:ggquinones,项目名称:HighImpactEventSentimentAnalysis,代码行数:79,代码来源:TweetManager.py

示例12: HTMLGenerator

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
class HTMLGenerator(object):
    """HTML Generator
    """

    def __init__(self):
        self.MAX_WORKERS = 4
        self.MULTIPROCESS_BOUND = 20

    def load_tree_template(self):
        """Load tree HTML templates
        """
        with open(os.path.join(os.path.dirname(__file__), 'template', 'tree_template.html')) as f:
            self.template = PyQuery(f.read(), parser='html')
        with open(os.path.join(os.path.dirname(__file__), 'template', 'tree_node_template.html')) as f:
            self.node_template = PyQuery(f.read(), parser='html')
            self.node_template_html = self.node_template.html()

    def import_js(self, js_ids):
        """Import JS to HTML
        :param js_ids: dict type, {script_id with #: js_file_name}
                        exmaple: {"#script_jquery": "jquery.min.js"}
        """
        _path = os.path.dirname(__file__)

        for _id in js_ids.iterkeys():
            self.template(_id).attr("src", "%s/bin/js/%s" % (_path, js_ids[_id]))
            # In case that lxml change <script></script> to <script/>
            self.template(_id).html("var _lxml = 0;")

    def generate_tree_structure_HTML(self, root_node, output):
        """Generate a html file with tree structure.
        :param root_node: RDirNode root of the module
        :param output: Output html file
        """

        # Init
        self.load_tree_template()
        self.tree_nodes = []
        self.max_layer = 0

        self.import_js({
            # script_id : js_file_name
            "#script_jquery": "jquery.min.js",
            "#script_rdir_tree": "rdir_tree.js"
        })
        self.template('#header_name').html(root_node.name)
        self.template('#header_type').html(" &lt;%s&gt;" % root_node.type)

        header_doc = root_node.doc.replace('\t', '&nbsp;' * 4) \
            .replace(' ', '&nbsp;').replace('\n', '<br/>').strip()
        if len(header_doc) > 0:
            self.template('#header_doc').html(header_doc + '<br/>')
        else:
            self.template.remove('#header_doc')
        self.template('title').html(root_node.name)

        # Recur
        if len(root_node.list_children()) == 0:
            # self._add_node_to_HTML("No visible children methods or members.",
            #                        "If you see this, that means this object has nothing else to show.",
            #                        "404",
            #                        0)
            pass
        else:
            self.render_tree_html(root_node)


        # Render html
        for i in xrange(self.max_layer + 1):
            self.template("#choose_layer").append(
                "<option value='%d'>%d</option>" % (i, i)
            )

        self.template('#wrapper').append("\n".join(self.tree_nodes))

        # Write to file
        with open(output, 'w') as f:
            f.write(self.template.html())


    def render_tree_html(self, root_node):
        """ Render the node html. Use multiprocessing to speed up if needed.
        :param root_node: RDirNode root of the module
        """
        job_list = self.get_job_list(root_node)
        job_size = len(job_list)

        if job_size > self.MULTIPROCESS_BOUND:
            jobs_list = Util.split_jobs(job_list, self.MAX_WORKERS)
        else:
            jobs_list = [job_list]
        pool = multiprocessing.Pool(processes=self.MAX_WORKERS)

        result = []
        html = self.node_template.html()
        for jobs in jobs_list:
            if len(jobs) > 0:
                result.append(pool.apply_async(parse_tree_node_worker, (html, jobs)))

        # pool.close()
#.........这里部分代码省略.........
开发者ID:Lhfcws,项目名称:rdir,代码行数:103,代码来源:generate_page.py

示例13: extract

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
    def extract(self):
        self.html = re.sub('<!--.*?-->', '', self.html)
        doc = PyQuery(self.html)
        doc.remove('div#tipswindow')
        content_node = doc('div#Cnt-Main-Article-QQ')
        if not content_node:
            content_node = doc('div#ArticleCnt')
        if not content_node:
            content_node = doc('div#textContent')
        if not content_node:
            content_node = doc('#content')
        if not content_node:
            content_node = doc('div[id = "qnews-content"]')
            
        content_node.remove('script')
        content_node.remove('style')
        content_node.remove('iframe')
        content_node.remove('div.adpip_Aritcle_QQ')
        content_node.remove('table#picInPic')
        content_node.remove('div.dayuw_ad')
        content_node.remove('div.tJieHot_')
        content_node.remove('div.b_new_mod')
        content_node.remove('div#awh_sports')
        content_node.remove('div[id = "photo-warp"]')
        content_node.remove('div#MorePic')
        content_node.remove('div#cmenu')
        content_node.remove('div#flashCff')
        content_node.remove('div#contTxt')
        content_node.remove('div#PGViframe')
        content_node.remove('div#Reading')
        content_node.remove('span[style = "BACKGROUND-COLOR: navy; COLOR: white"]')
        content_node.remove('img[width="592"][height="100"]')

        content = content_node.__unicode__()

        item = ContentItem()
        
        item['title'] = self.title = doc('h1').text()
        if not item['title']:
            item['title'] = self.title = doc('div#ArticleTit').text()
        if not item['title']:
            item['title'] = self.title = doc('h2').text()
            
        item['content'] = self.content = content
        
        item['release_time'] = self.release_time = doc('span.pubTime').text()
        p = re.compile(u"(20\d\d.*\d\d:\d\d)")

        if not self.release_time:
            self.release_time = doc('div[class = "info"]').text()
            if self.release_time == None:
                self.release_time = doc('div[id = "ArtFrom"]').text()
            if self.release_time == None:
                self.release_time = doc('div[class = "pubtime"]').text()
            if self.release_time == None:
                self.release_time = doc('span[id= "Freleasetime"]').text()
            if self.release_time == None:
                self.release_time = doc('td.xborderb1').eq(1).text()
                p = re.compile(u"(20.*-\d\d)")

                
            item['release_time'] = self.release_time = p.search(self.release_time).group()
        #item['release_switch_time'] = time.mktime(time.strptime(self.release_time,time_s))
            
        item['source'] = u"腾讯"
        item['author'] = ''
        item['pic_url'] = ''

        imgs = content_node('img')
        image_urls = []
        for img in imgs:
            if ".gif" in img.get('src'):
                continue
            if not img.get('src'):
                continue
            else:
                imgs.eq(imgs.index(img)).before('<br>')
                image_urls.append(self.getRealURI(img.get('src')))
        item['image_urls'] = image_urls

        return item
开发者ID:hw20686832,项目名称:iCrawler,代码行数:83,代码来源:zj_a001.py

示例14: extract_content

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
 def extract_content(self, html):
     html = re.sub(r'xmlns="[^"]+"', "", html)
     doc = PyQuery(html)
     content_node = doc.find(self.content_css_selector)
     self.should_remove_css_selector and doc.remove(self.should_remove_css_selector)
     return content_node.outer_html()
开发者ID:christofferchen,项目名称:my-python-scripts,代码行数:8,代码来源:EpubBuilder.py

示例15: faltantes

# 需要导入模块: from pyquery import PyQuery [as 别名]
# 或者: from pyquery.PyQuery import remove [as 别名]
def faltantes():
    bajados = {int(l.split('.')[0]) - 1 for l in glob.glob('*.md')}
    links = get_all_links()
    faltan = set(range(len(links))) - bajados
    return [links[i] if i in faltan else None for i in range(len(links))]


for did, url in enumerate(faltantes()):
    if not url:
        continue
    try:
        d = PyQuery(url=url, headers=headers)

        # cleanups
        d.remove('ul.actions, #fb-root, script, div[style="clear:both"]')
        for cf in d('.clearfix'):
            if d(cf).text() == "":
                d(cf).remove()

        fecha = d('dd.published').text()
        d('.article-info').before(u'<p>[{}]</p>'.format(fecha))
        d.remove('.article-info')

        # no link in the title
        titulo = d('.item-page h2 a').text().decode('utf8')
        d('.item-page h2').text(titulo)

        # clean html content
        discurso = d('.item-page').html()
        import ipdb;ipdb.set_trace()
开发者ID:leomartinez,项目名称:discursos_cfk,代码行数:32,代码来源:scrapper.py


注:本文中的pyquery.PyQuery.remove方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。