本文整理汇总了Python中scrapy.item.Item方法的典型用法代码示例。如果您正苦于以下问题:Python item.Item方法的具体用法?Python item.Item怎么用?Python item.Item使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.item
的用法示例。
在下文中一共展示了item.Item方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from scrapy import item [as 别名]
# 或者: from scrapy.item import Item [as 别名]
def main():
"""Main routine for the execution of the Spider"""
# set up signal to catch items scraped
def catch_item(sender, item, **kwargs):
print("Item extracted:", item)
dispatcher.connect(catch_item, signal=signals.item_passed)
settings = Settings()
settings.set("USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36")
settings.set("LOG_ENABLED",False)
# setup crawler
from scrapy.crawler import CrawlerProcess
crawler = CrawlerProcess(settings)
# define the spider for the crawler
crawler.crawl(EuropythonSpyder())
# start scrapy
print("STARTING ENGINE")
crawler.start() #iniciar el crawler llamando al spider definido
print("ENGINE STOPPED")
开发者ID:PacktPublishing,项目名称:Learning-Python-Networking-Second-Edition,代码行数:25,代码来源:EuropythonSpyder.py
示例2: binary_check
# 需要导入模块: from scrapy import item [as 别名]
# 或者: from scrapy.item import Item [as 别名]
def binary_check(fx_obj, cb_obj, encoding):
if isinstance(cb_obj, (dict, Item)):
fx_obj = {
key: binary_check(value, cb_obj[key], encoding)
for key, value in fx_obj.items()
}
if isinstance(cb_obj, list):
fx_obj = [
binary_check(fxitem, cbitem, encoding)
for fxitem, cbitem in zip(fx_obj, cb_obj)
]
if isinstance(cb_obj, Request):
headers = {}
for key, value in fx_obj['headers'].items():
key = to_bytes(key, encoding)
headers[key] = [to_bytes(v, encoding) for v in value]
fx_obj['headers'] = headers
fx_obj['body'] = to_bytes(fx_obj['body'], encoding)
if isinstance(cb_obj, six.binary_type):
fx_obj = fx_obj.encode(encoding)
return fx_obj
示例3: another_process_item
# 需要导入模块: from scrapy import item [as 别名]
# 或者: from scrapy.item import Item [as 别名]
def another_process_item(self, result, item, info):
"""
custom process_item func,so it will manage the Request result.
"""
assert isinstance(result, (Item, Request)), \
"WoaiduBookFile pipeline' item_completed must return Item or Request, got %s" % \
(type(result))
if isinstance(result, Item):
return result
elif isinstance(result, Request):
dlist = [self._process_request(r, info) for r in arg_to_iter(result)]
dfd = DeferredList(dlist, consumeErrors=1)
dfd.addCallback(self.item_completed, item, info)
# XXX:This will cause one item maybe return many times,it depends on how many
# times the download url failed.But it doesn't matter.Because when raise errors,
# the items are no longer processed by further pipeline components.And when all
# url download failed we can drop that item which book_file or book_file_url are
# empty.
return dfd.addCallback(self.another_process_item, item, info)
else:
raise NofilesDrop
示例4: _get_item_field_attr
# 需要导入模块: from scrapy import item [as 别名]
# 或者: from scrapy.item import Item [as 别名]
def _get_item_field_attr(self, field_name, key, default=None):
if isinstance(self.item, Item):
value = self.item.fields[field_name].get(key, default)
else:
value = default
return value