本文整理汇总了Python中parsel.Selector方法的典型用法代码示例。如果您正苦于以下问题:Python parsel.Selector方法的具体用法?Python parsel.Selector怎么用?Python parsel.Selector使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类parsel
的用法示例。
在下文中一共展示了parsel.Selector方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: scrape
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def scrape():
driver = webdriver.PhantomJS()
driver.get('http://quotes.toscrape.com/js-onclick')
while True:
sel = parsel.Selector(text=driver.page_source)
for quote in sel.css('div.quote'):
print({
'text': quote.css('span.text::text').extract_first(),
'author': quote.css('span small::text').extract_first(),
'tags': quote.css('div.tags a.tag::text').extract(),
})
try:
next_button = driver.find_element_by_css_selector('li.next > a')
next_button.click()
except NoSuchElementException:
break
示例2: parse
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def parse(self, id):
coll = MONGO_CLIENT['ppp']['proj_text']
resp = coll.find_one({'_id': id})['text']
hxs = Selector(text=resp)
title_re_time = list(self.parse_value(hxs, self.re_time_x))
details = list(self.parse_value(hxs, self.details_x))
# print(details)
title = title_re_time[0]
re_time = title_re_time[1]
keywords = ['????', '????', '?????', '????', '????', '????', '??????/??', '?????', '????']
proj_dict = {}
proj_dict['????'] = title
proj_dict['??????'] = re_time.replace('???????', '')
for keyword in keywords:
for i in range(len(details)):
if details[i] == keyword:
try:
if details[i + 1] not in keywords:
proj_dict[keyword] = details[i + 1]
else:
proj_dict[keyword] = 'null'
except Exception as e:
proj_dict[keyword] = 'null'
return proj_dict
# print(title, re_time, area, trade, money, state, start_time, get_form, level, man, phone)
示例3: find_matches
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def find_matches(self, sel):
"""
Generator to find live and upcoming matches in parsel.Selector object
:returns: yields eticker.Match objects
"""
matches = sel.xpath("//table[@id='gb-matches']//tr")
for match in matches:
item = self._find_match(match)
# Populate stream data if match is live
if not item['time_secs']:
resp = requests.get(item['url'])
sel_detailed = Selector(text=resp.text)
item['stream'] = sel_detailed.xpath("//div[@class='matches-streams']"
"/span[.//a[re:test(text(),'english', 'i')]]"
"//iframe/@src").extract_first()
item['stream'] = clean_stream_url(item['stream'])
yield item
示例4: parse_tags
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def parse_tags(id_get):
if not (tags_coll.find_one({'id': id_get})):
url = API%id_get
resp = requests.get(url, headers=headers, timeout=10)
hxs = Selector(text=resp.text)
tags=parse_value(hxs,tag_x)[0]
print tags
item = {
'id':id_get,
'tags':tags
}
save_tags(id_get, item)
return item
else:
item = tags_coll.find_one({'id': id_get})
return item
示例5: parse_words
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def parse_words(self, url):
resp0 = self.p_get(url)
hxs = Selector(text=resp0.text)
word_list = self.parse_value(hxs, self.words_x)
return list(word_list)
示例6: parse
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def parse(self):
self.sublist_url.append(self.base_url)
# build the sublist_url
print('building the sublist_url...')
resp = self.p_get(self.base_url)
hxs = Selector(text=resp.text)
self.sublist_url += self.parse_value(hxs, self.sublist_x)
# build the page_url
print('building the page_url...')
for url in self.sublist_url:
time.sleep(1)
resp0 = self.p_get(url)
hxs = Selector(text=resp0.text)
inner_list = self.parse_value(hxs, self.page_x)
for inner in inner_list:
if inner not in self.page_url:
self.page_url.append(inner)
# parse the word and write to file
print('parsing the words and writing to file...')
with open('oxford_words.txt', 'w') as words_f:
# get the words of sublist_url
for url in self.page_url:
word_list = self.parse_words(url)
print(url)
print(str(len(word_list)) + 'words')
for word in word_list:
words_f.write(word + '\n')
# get the words of page_url
for url in self.sublist_url:
word_list = self.parse_words(url)
print(url)
print(str(len(word_list)) + 'words')
for word in word_list:
words_f.write(word + '\n')
示例7: select
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def select(self, css):
"""
Select ops from the graph using css-like selectors. The available selectors
and corresponding op attributes are:
- element: Op type
- id: Op name
- class: Op label
- attribute: Any key-value pair from op metadata
- hierarchy: Scopes provide op hierarchy
Arguments:
css (str): A css selector string
Returns:
list of ops
Examples:
# Get all ops with the "bias" label
subgraph.select(".bias")
# Get the op named "conv_filter'
subgraph.select("#conv_filter")
# Get the "bias" ops within Affine layers
subgraph.select("Affine .bias")
# Get all TensorValueOps
subgraph.select("TensorValueOp")
# Get all ops from timestep 3 in an RNN (ie with metadata "recurrent_step=3")
subgraph.select("[recurrent_step=3]")
"""
ops = list()
for selected in parsel.Selector(self._to_xml()).css(css):
op = self._selector_to_op(selected)
if op is not None:
ops.append(op)
return ops
示例8: download_matches
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def download_matches(self):
"""
Downloads live and upcoming matches.
:return: list of eticker.Match objects
"""
resp = requests.get(self.game_url)
if resp.status_code != 200:
raise ConnectionRefusedError('Got response error {}'.format(resp.status_code))
sel = Selector(text=resp.text)
return self.find_matches(sel)
示例9: download_history
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def download_history(self):
"""
Downloads recent matches.
:return: list of eticker.Match objects
"""
resp = requests.get('{}/gosubet'.format(self.game_url))
if resp.status_code != 200:
raise ConnectionRefusedError('Got response error {}'.format(resp.status_code))
sel = Selector(text=resp.text)
return self.find_history(sel)
示例10: find_history
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def find_history(self, sel):
"""
Generator to find recent matches in parsel.Selector object
:returns: yields eticker.Match objects
"""
matches = sel.xpath("//h2[contains(text(),'Recent')]/..//tr")
for match in matches:
item = self._find_match(match)
yield item
示例11: _test_match
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def _test_match(self, game):
gt = GosuTicker(game)
data = pkg_resources.resource_string('tests', f'/html/match_{game}.html').decode('utf-8')
result = pkg_resources.resource_string('tests', f'/html/match_{game}.json').decode('utf-8')
sel = Selector(text=data)
matches = list(gt.find_matches(sel))
assert json.dumps(matches) == result
示例12: get_version
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def get_version(cls, entry, *matchers):
body = entry['response']['content']['text']
selector = Selector(text=body)
for xpath, regexp in matchers:
value = selector.xpath(xpath).extract_first()
if not value:
continue
version = extract_version(value, regexp)
if version:
return version
示例13: check_presence
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def check_presence(cls, entry, *matchers):
body = entry['response']['content']['text']
selector = Selector(text=body)
for xpath in matchers:
sel = selector.xpath(xpath)
if sel:
return True
return False
示例14: get_module_name
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def get_module_name(cls, entry, *matchers):
body = entry['response']['content']['text']
selector = Selector(text=body)
for xpath, regexp in matchers:
value = selector.xpath(xpath).extract_first()
if not value:
continue
name = extract_name(value, regexp)
if name:
return name
示例15: __init__
# 需要导入模块: import parsel [as 别名]
# 或者: from parsel import Selector [as 别名]
def __init__(self, text, type='html'):
self.sel = Selector(text, type=type)
self.set = set()