当前位置: 首页>>代码示例>>Python>>正文


Python parsel.Selector方法代码示例

本文整理汇总了Python中parsel.Selector方法的典型用法代码示例。如果您正苦于以下问题:Python parsel.Selector方法的具体用法?Python parsel.Selector怎么用?Python parsel.Selector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在parsel的用法示例。


在下文中一共展示了parsel.Selector方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: scrape

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def scrape():
    driver = webdriver.PhantomJS()
    driver.get('http://quotes.toscrape.com/js-onclick')
    while True:
        sel = parsel.Selector(text=driver.page_source)
        for quote in sel.css('div.quote'):
            print({
                'text': quote.css('span.text::text').extract_first(),
                'author': quote.css('span small::text').extract_first(),
                'tags': quote.css('div.tags a.tag::text').extract(),
            })
        try:
            next_button = driver.find_element_by_css_selector('li.next > a')
            next_button.click()
        except NoSuchElementException:
            break 
开发者ID:scrapinghub,项目名称:scrapy-training,代码行数:18,代码来源:spider_4_standalone_selenium.py

示例2: parse

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def parse(self, id):
        coll = MONGO_CLIENT['ppp']['proj_text']
        resp = coll.find_one({'_id': id})['text']
        hxs = Selector(text=resp)
        title_re_time = list(self.parse_value(hxs, self.re_time_x))
        details = list(self.parse_value(hxs, self.details_x))
        # print(details)
        title = title_re_time[0]
        re_time = title_re_time[1]
        keywords = ['????', '????', '?????', '????', '????', '????', '??????/??', '?????', '????']
        proj_dict = {}
        proj_dict['????'] = title
        proj_dict['??????'] = re_time.replace('???????', '')
        for keyword in keywords:
            for i in range(len(details)):
                if details[i] == keyword:
                    try:
                        if details[i + 1] not in keywords:
                            proj_dict[keyword] = details[i + 1]
                        else:
                            proj_dict[keyword] = 'null'
                    except Exception as e:
                        proj_dict[keyword] = 'null'
        return proj_dict
        # print(title, re_time, area, trade, money, state, start_time, get_form, level, man, phone) 
开发者ID:HughWen,项目名称:wen_spiders,代码行数:27,代码来源:PPPSpider.py

示例3: find_matches

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def find_matches(self, sel):
        """
        Generator to find live and upcoming matches in parsel.Selector object
        :returns: yields eticker.Match objects
        """
        matches = sel.xpath("//table[@id='gb-matches']//tr")
        for match in matches:
            item = self._find_match(match)
            # Populate stream data if match is live
            if not item['time_secs']:
                resp = requests.get(item['url'])
                sel_detailed = Selector(text=resp.text)
                item['stream'] = sel_detailed.xpath("//div[@class='matches-streams']"
                                                    "/span[.//a[re:test(text(),'english', 'i')]]"
                                                    "//iframe/@src").extract_first()
                item['stream'] = clean_stream_url(item['stream'])
            yield item 
开发者ID:Granitosaurus,项目名称:ggmt,代码行数:19,代码来源:matchticker.py

示例4: parse_tags

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def parse_tags(id_get):
    if not (tags_coll.find_one({'id': id_get})):
        url = API%id_get
        resp = requests.get(url, headers=headers, timeout=10)
        hxs = Selector(text=resp.text)
        tags=parse_value(hxs,tag_x)[0]
        print tags
        item = {
            'id':id_get,
            'tags':tags
        }
        save_tags(id_get, item)
        return item
    else:
        item = tags_coll.find_one({'id': id_get})
        return item 
开发者ID:HughWen,项目名称:database_project,代码行数:18,代码来源:getContent.py

示例5: parse_words

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def parse_words(self, url):
        resp0 = self.p_get(url)
        hxs = Selector(text=resp0.text)
        word_list = self.parse_value(hxs, self.words_x)
        return list(word_list) 
开发者ID:HughWen,项目名称:wen_spiders,代码行数:7,代码来源:WordsSpider.py

示例6: parse

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def parse(self):
        self.sublist_url.append(self.base_url)

        # build the sublist_url
        print('building the sublist_url...')
        resp = self.p_get(self.base_url)
        hxs = Selector(text=resp.text)
        self.sublist_url += self.parse_value(hxs, self.sublist_x)

        # build the page_url
        print('building the page_url...')
        for url in self.sublist_url:
            time.sleep(1)
            resp0 = self.p_get(url)
            hxs = Selector(text=resp0.text)
            inner_list = self.parse_value(hxs, self.page_x)
            for inner in inner_list:
                if inner not in self.page_url:
                    self.page_url.append(inner)

        # parse the word and write to file
        print('parsing the words and writing to file...')
        with open('oxford_words.txt', 'w') as words_f:
            # get the words of sublist_url
            for url in self.page_url:
                word_list = self.parse_words(url)
                print(url)
                print(str(len(word_list)) + 'words')
                for word in word_list:
                    words_f.write(word + '\n')

            # get the words of page_url
            for url in self.sublist_url:
                word_list = self.parse_words(url)
                print(url)
                print(str(len(word_list)) + 'words')
                for word in word_list:
                    words_f.write(word + '\n') 
开发者ID:HughWen,项目名称:wen_spiders,代码行数:40,代码来源:WordsSpider.py

示例7: select

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def select(self, css):
        """
        Select ops from the graph using css-like selectors. The available selectors
        and corresponding op attributes are:
            - element: Op type
            - id: Op name
            - class: Op label
            - attribute: Any key-value pair from op metadata
            - hierarchy: Scopes provide op hierarchy

        Arguments:
            css (str): A css selector string

        Returns:
            list of ops

        Examples:
            # Get all ops with the "bias" label
            subgraph.select(".bias")

            # Get the op named "conv_filter'
            subgraph.select("#conv_filter")

            # Get the "bias" ops within Affine layers
            subgraph.select("Affine .bias")

            # Get all TensorValueOps
            subgraph.select("TensorValueOp")

            # Get all ops from timestep 3 in an RNN (ie with metadata "recurrent_step=3")
            subgraph.select("[recurrent_step=3]")
        """

        ops = list()
        for selected in parsel.Selector(self._to_xml()).css(css):
            op = self._selector_to_op(selected)
            if op is not None:
                ops.append(op)

        return ops 
开发者ID:NervanaSystems,项目名称:ngraph,代码行数:42,代码来源:graph.py

示例8: download_matches

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def download_matches(self):
        """
        Downloads live and upcoming matches.
        :return: list of eticker.Match objects
        """
        resp = requests.get(self.game_url)
        if resp.status_code != 200:
            raise ConnectionRefusedError('Got response error {}'.format(resp.status_code))
        sel = Selector(text=resp.text)
        return self.find_matches(sel) 
开发者ID:Granitosaurus,项目名称:ggmt,代码行数:12,代码来源:matchticker.py

示例9: download_history

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def download_history(self):
        """
        Downloads recent matches.
        :return: list of eticker.Match objects
        """
        resp = requests.get('{}/gosubet'.format(self.game_url))
        if resp.status_code != 200:
            raise ConnectionRefusedError('Got response error {}'.format(resp.status_code))
        sel = Selector(text=resp.text)
        return self.find_history(sel) 
开发者ID:Granitosaurus,项目名称:ggmt,代码行数:12,代码来源:matchticker.py

示例10: find_history

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def find_history(self, sel):
        """
        Generator to find recent matches in parsel.Selector object
        :returns: yields eticker.Match objects
        """
        matches = sel.xpath("//h2[contains(text(),'Recent')]/..//tr")
        for match in matches:
            item = self._find_match(match)
            yield item 
开发者ID:Granitosaurus,项目名称:ggmt,代码行数:11,代码来源:matchticker.py

示例11: _test_match

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def _test_match(self, game):
        gt = GosuTicker(game)
        data = pkg_resources.resource_string('tests', f'/html/match_{game}.html').decode('utf-8')
        result = pkg_resources.resource_string('tests', f'/html/match_{game}.json').decode('utf-8')
        sel = Selector(text=data)
        matches = list(gt.find_matches(sel))
        assert json.dumps(matches) == result 
开发者ID:Granitosaurus,项目名称:ggmt,代码行数:9,代码来源:test_matchticker.py

示例12: get_version

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def get_version(cls, entry, *matchers):
        body = entry['response']['content']['text']
        selector = Selector(text=body)

        for xpath, regexp in matchers:
            value = selector.xpath(xpath).extract_first()
            if not value:
                continue

            version = extract_version(value, regexp)
            if version:
                return version 
开发者ID:alertot,项目名称:detectem,代码行数:14,代码来源:matchers.py

示例13: check_presence

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def check_presence(cls, entry, *matchers):
        body = entry['response']['content']['text']
        selector = Selector(text=body)

        for xpath in matchers:
            sel = selector.xpath(xpath)
            if sel:
                return True

        return False 
开发者ID:alertot,项目名称:detectem,代码行数:12,代码来源:matchers.py

示例14: get_module_name

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def get_module_name(cls, entry, *matchers):
        body = entry['response']['content']['text']
        selector = Selector(text=body)

        for xpath, regexp in matchers:
            value = selector.xpath(xpath).extract_first()
            if not value:
                continue

            name = extract_name(value, regexp)
            if name:
                return name 
开发者ID:alertot,项目名称:detectem,代码行数:14,代码来源:matchers.py

示例15: __init__

# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def __init__(self, text, type='html'):
		self.sel = Selector(text, type=type)
		self.set = set() 
开发者ID:lymlhhj123,项目名称:scrapy_redis_splash_spider,代码行数:5,代码来源:items.py


注:本文中的parsel.Selector方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。