本文整理汇总了Python中scrapy.loader.ItemLoader.default_input_processor方法的典型用法代码示例。如果您正苦于以下问题:Python ItemLoader.default_input_processor方法的具体用法?Python ItemLoader.default_input_processor怎么用?Python ItemLoader.default_input_processor使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.loader.ItemLoader
的用法示例。
在下文中一共展示了ItemLoader.default_input_processor方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: parse
# 需要导入模块: from scrapy.loader import ItemLoader [as 别名]
# 或者: from scrapy.loader.ItemLoader import default_input_processor [as 别名]
def parse(self, response):
match = re.search('/displaySeminarList/',response.url)
if match:
urls = response.xpath('//div[@class="internList splitEntry"]//@href').extract()
for url in urls:
url = response.urljoin(url)
yield scrapy.Request(url, self.parse)
else:
table = response.xpath(self.seminar_list_xpath)
corpId = parse_qs(urlparse(response.url).query)['corpId']
for index,semi in enumerate(table):
loader = ItemLoader(SeminarItem(),semi)
loader.default_input_processor = MapCompose(unicode.strip)
loader.default_output_processor = Join()
loader.add_value('companyid',corpId)
loader.add_xpath('name','//div[@id="headerWrap"]//h3/text()')
loader.add_xpath('date','.//td[@class="date"]/text()',re='\d+\/\d+\/\d+')
loader.add_xpath('time','.//td[@class="time"]/text()')
loader.add_xpath('area','.//td[@class="area"]/text()')
loader.add_xpath('place','.//td[@class="place"]/text()')
loader.add_xpath('loc_n','.//td[@class="place"]//a', re='mycom_loc\|(\d+\/\d+\/\d+\.\d+)\,\d+\/\d+\/\d+\.\d+')
loader.add_xpath('loc_e','.//td[@class="place"]//a', re='mycom_loc\|\d+\/\d+\/\d+\.\d+\,(\d+\/\d+\/\d+\.\d+)')
loader.add_xpath('target','.//td[@class="target"]/text()')
yield loader.load_item()
示例2: get_player_info
# 需要导入模块: from scrapy.loader import ItemLoader [as 别名]
# 或者: from scrapy.loader.ItemLoader import default_input_processor [as 别名]
def get_player_info(self, response):
loader = ItemLoader(item=NFL_Player_2015(), response=response)
loader.default_input_processor = MapCompose(unicode.strip)
loader.default_output_processor = Join()
number_and_position = response.xpath('//*[@id="content"]/div[3]/div[2]/div[3]/ul[1]/li[1]/text()').extract()[0]
number_and_position = response.xpath('//*[@id="content"]/div[3]/div[2]/div[3]/ul[1]/li[1]/text()').extract()
if type(number_and_position) is list:
number_and_position = number_and_position[0]
number = number_and_position.split()[0]
position = number_and_position.split()[1]
else:
number = ''
position = ''
loader.add_value('number', number)
loader.add_value('position', position)
loader.add_xpath('name', '//*[@id="content"]/div[3]/div[2]/h1/text()')
loader.add_xpath('team', '//*[@id="content"]/div[3]/div[2]/div[3]/ul[1]/li[3]/a/text()')
# loader.add_xpath('name', '//*[@id="content"]/div[3]/div[2]/h1/text()')
# loader.add_xpath('team', '//*[@id="content"]/div[3]/div[2]/div[3]/ul[1]/li[3]/a/text()')
yield loader.load_item()
示例3: parse_depth_chart
# 需要导入模块: from scrapy.loader import ItemLoader [as 别名]
# 或者: from scrapy.loader.ItemLoader import default_input_processor [as 别名]
def parse_depth_chart(self, response):
loader = ItemLoader(item=NFL_Team_2015(), response=response)
loader.default_input_processor = MapCompose(unicode.strip)
loader.default_output_processor = Join()
loader.add_xpath("division", '//*[@id="sub-branding"]/div[2]/text()')
loader.add_xpath("name", '//*[@id="sub-branding"]/h2/a/b/text()')
yield loader.load_item()
示例4: parse_auction_item
# 需要导入模块: from scrapy.loader import ItemLoader [as 别名]
# 或者: from scrapy.loader.ItemLoader import default_input_processor [as 别名]
def parse_auction_item(self, response):
loader = ItemLoader(AuctionItems(), response=response)
loader.default_input_processor = MapCompose(lambda v: v.split(), replace_escape_chars)
loader.default_output_processor = Join()
for field, xpath in auction_item_fields.iteritems():
loader.add_xpath(field, xpath)
yield loader.load_item()
示例5: parse_bids
# 需要导入模块: from scrapy.loader import ItemLoader [as 别名]
# 或者: from scrapy.loader.ItemLoader import default_input_processor [as 别名]
def parse_bids(self, response):
selector = Selector(response)
for bid in selector.select(self.bid_list_xpath) :
loader = ItemLoader(BidItems(), selector=bid)
loader.default_input_processor = MapCompose(lambda v: v.split(), replace_escape_chars)
loader.default_output_processor = Join()
for field, xpath in auction_bid_fields.iteritems():
loader.add_xpath(field, xpath)
yield loader.load_item()
示例6: parse
# 需要导入模块: from scrapy.loader import ItemLoader [as 别名]
# 或者: from scrapy.loader.ItemLoader import default_input_processor [as 别名]
def parse(self, response):
def strip_dollar(x):
return x.strip('$')
self.driver.get(response.url)
try:
WebDriverWait(self.driver, 15).until(
EC.presence_of_element_located(
(By.XPATH,
'//*[@id="depart-container"]/div[2]/div[1]/div/[@style="width: 0%;"]')))
except TimeoutException:
print 'Page load time out'
pass
while True:
try:
try:
WebDriverWait(self.driver, 15).until(
EC.presence_of_element_located(
(By.XPATH,
'//*[@id="depart-container"]/div/div/div/button')))
except TimeoutException:
break
next = self.driver.find_element_by_xpath(
'//*[@id="depart-container"]/div/div/div/button')
next.click()
except ElementNotVisibleException:
break
for trips in Selector(
text=self.driver.page_source).xpath(self.trips_list_xpath):
loader = ItemLoader(BusTrip(), selector=trips)
loader.default_input_processor = MapCompose(unicode.strip)
loader.default_output_processor = Join()
loader.price_in = MapCompose(strip_dollar)
for field, xpath in self.item_fields.iteritems():
loader.add_xpath(field, xpath)
dateoftrip = str(response.url).split("/")[-1]
loader.add_value('dateoftrip', dateoftrip.decode('unicode-escape'))
yield loader.load_item()
示例7: parse_answers
# 需要导入模块: from scrapy.loader import ItemLoader [as 别名]
# 或者: from scrapy.loader.ItemLoader import default_input_processor [as 别名]
def parse_answers(self, response):
# use selector to extract answers
selector = Selector(response)
# iterate over answers
for answer in selector.xpath(self.answers_list_xpath):
loader = ItemLoader(item=ZhihuAnswer(), selector=answer)
# define processors
loader.default_input_processor = MapCompose(unicode.strip)
loader.default_output_processor = Join()
# iterate over fields and add xpaths to the loader
for field, xpath in self.item_fields.iteritems():
loader.add_xpath(field, xpath)
item = loader.load_item()
# convert the full text of answer into html
item["answer"] = item["answer"].encode('ascii', 'xmlcharrefreplace')
# if summary has image, convert it to html
if "summary_img" in item:
item["summary_img"] = item["summary_img"].encode('ascii', 'xmlcharrefreplace')
else:
item['summary_img'] = ""
# change vote to integer
item["vote"] = int(item["vote"])
# in case of anonymous authors
if "author" not in item:
item["author"] = u'匿名用户'
# complete links
item["question_link"] = u"http://www.zhihu.com" + item["question_link"]
if "author_link" in item:
item["author_link"] = u"http://www.zhihu.com" + item["author_link"]
else:
item["author_link"] = ""
# add the date when scraped
item["date"] = date.today()
yield item