本文整理汇总了Python中scrapy.utils.serialize.ScrapyJSONEncoder.encode方法的典型用法代码示例。如果您正苦于以下问题:Python ScrapyJSONEncoder.encode方法的具体用法?Python ScrapyJSONEncoder.encode怎么用?Python ScrapyJSONEncoder.encode使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类scrapy.utils.serialize.ScrapyJSONEncoder
的用法示例。
在下文中一共展示了ScrapyJSONEncoder.encode方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: JsonEncoderTestCase
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class JsonEncoderTestCase(unittest.TestCase):
def setUp(self):
self.encoder = ScrapyJSONEncoder()
def test_encode_decode(self):
dt = datetime.datetime(2010, 1, 2, 10, 11, 12)
dts = "2010-01-02 10:11:12"
d = datetime.date(2010, 1, 2)
ds = "2010-01-02"
t = datetime.time(10, 11, 12)
ts = "10:11:12"
dec = Decimal("1000.12")
decs = "1000.12"
for input, output in [('foo', 'foo'), (d, ds), (t, ts), (dt, dts),
(dec, decs), (['foo', d], ['foo', ds])]:
self.assertEqual(self.encoder.encode(input), json.dumps(output))
def test_encode_deferred(self):
self.assertIn('Deferred', self.encoder.encode(defer.Deferred()))
def test_encode_request(self):
r = Request("http://www.example.com/lala")
rs = self.encoder.encode(r)
self.assertIn(r.method, rs)
self.assertIn(r.url, rs)
def test_encode_response(self):
r = Response("http://www.example.com/lala")
rs = self.encoder.encode(r)
self.assertIn(r.url, rs)
self.assertIn(str(r.status), rs)
示例2: DockerhubExtension
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class DockerhubExtension(object):
@classmethod
def from_crawler(cls, crawler):
return cls(crawler)
def __init__(self, crawler):
self.crawler = crawler
self.job_path = crawler.settings.get('JOB_PATH')
if not self.job_path:
raise NotConfigured('no JOB_PATH set')
self.json_encoder = ScrapyJSONEncoder()
self.looping_call = LoopingCall(self.store_job_info)
self.looping_call.start(5)
crawler.signals.connect(self.store_job_info,
signal=signals.spider_closed)
def store_job_info(self):
with open(self.job_path, 'w') as f:
stats = self.crawler.stats.get_stats()
job_info = {
'stats': stats
}
job_info_json = self.json_encoder.encode(job_info)
f.write(job_info_json)
示例3: RedisPipeline
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class RedisPipeline(object):
"""Pushes serialized item into a redis list/queue"""
def __init__(self, server):
self.server = server
self.encoder = ScrapyJSONEncoder()
@classmethod
def from_settings(cls, settings):
server = connection.from_settings(settings)
return cls(server)
@classmethod
def from_crawler(cls, crawler):
return cls.from_settings(crawler.settings)
def process_item(self, item, spider):
return deferToThread(self._process_item, item, spider)
def _process_item(self, item, spider):
key = self.item_key(item, spider)
data = self.encoder.encode(item)
self.server.rpush(key, data)
return item
def item_key(self, item, spider):
"""Returns redis key based on given spider"""
return "%s:items" % spider.name
示例4: RedisPipeline
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class RedisPipeline(object):
"""Pushes serialized item into a redis list/queue"""
def __init__(self, host, port, queue_type):
self.server = redis.Redis(host, port)
self.encoder = ScrapyJSONEncoder()
self.queue_type = queue_type
@classmethod
def from_settings(cls, settings):
host = settings.get('REDIS_HOST', 'localhost')
port = settings.get('REDIS_PORT', 6379)
queue_type = settings.get('QUEUE_TYPE', 'FIFO')
return cls(host, port)
def process_item(self, item, spider):
return deferToThread(self._process_item, item, spider)
def _process_item(self, item, spider):
key = self.item_key(item, spider)
data = self.encoder.encode(dict(item))
self.server.lpush(key, data)
if (self.queue_type == 'LIFO'):
self.server.lpush(key, data)
else:
self.server.rpush(key, data)
return item
def item_key(self, item, spider):
"""Returns redis key based on given spider"""
return "%s:items" % spider.name
示例5: RedisPipeline
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class RedisPipeline(object):
"""Pushes serialized item into a scrapy_redis list/queue"""
def __init__(self, host, port):
self.server = redis.Redis(host, port)
self.encoder = ScrapyJSONEncoder()
@classmethod
def from_settings(cls, settings):
host = settings.get("REDIS_HOST", "localhost")
port = settings.get("REDIS_PORT", 6379)
return cls(host, port)
def process_item(self, item, spider):
return deferToThread(self._process_item, item, spider)
def _process_item(self, item, spider):
key = self.item_key(item, spider)
data = self.encoder.encode(dict(item))
self.server.rpush(key, data)
return item
def item_key(self, item, spider):
"""Returns scrapy_redis key based on given spider"""
return "%s:items" % spider.name
示例6: JsonItemExporter
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class JsonItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
self._configure(kwargs, dont_fail=True)
self.file = file
# there is a small difference between the behaviour or JsonItemExporter.indent
# and ScrapyJSONEncoder.indent. ScrapyJSONEncoder.indent=None is needed to prevent
# the addition of newlines everywhere
json_indent = self.indent if self.indent is not None and self.indent > 0 else None
kwargs.setdefault('indent', json_indent)
kwargs.setdefault('ensure_ascii', not self.encoding)
self.encoder = ScrapyJSONEncoder(**kwargs)
self.first_item = True
def _beautify_newline(self):
if self.indent is not None:
self.file.write(b'\n')
def start_exporting(self):
self.file.write(b"[")
self._beautify_newline()
def finish_exporting(self):
self._beautify_newline()
self.file.write(b"]")
def export_item(self, item):
if self.first_item:
self.first_item = False
else:
self.file.write(b',')
self._beautify_newline()
itemdict = dict(self._get_serialized_fields(item))
data = self.encoder.encode(itemdict)
self.file.write(to_bytes(data, self.encoding))
示例7: RedisPipeline
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class RedisPipeline(object):
"""
Pushes serialized item into a redis.
Specific for SocialSpiders
"""
def __init__(self, server):
self.server = server
self.encoder = ScrapyJSONEncoder()
@classmethod
def from_settings(cls, settings):
server = connection.from_settings(settings)
return cls(server)
@classmethod
def from_crawler(cls, crawler):
return cls.from_settings(crawler.settings)
def process_item(self, item, spider):
return deferToThread(self._process_item, item, spider)
def _process_item(self, item, spider):
key = self.item_key(item, spider)
data = self.encoder.encode(item)
self.server.set(key, data.decode('utf-8'))
return item
def item_key(self, item, spider):
"""Returns redis key based on given spider"""
return "{}_{}".format(spider.name, item['search_name'])
示例8: RabbitMQPipeline
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class RabbitMQPipeline(object):
"""Pushes serialized item into a RabbitMQ list/queue"""
def __init__(self, server):
self.server = server
self.encoder = ScrapyJSONEncoder()
@classmethod
def from_settings(cls, settings):
server = connection.from_settings(settings)
return cls(server)
@classmethod
def from_crawler(cls, crawler):
return cls.from_settings(crawler.settings)
def process_item(self, item, spider):
return deferToThread(self._process_item, item, spider)
def _process_item(self, item, spider):
key = self.item_key(item, spider)
data = self.encoder.encode(item)
self.server.basic_publish(exchange='',
routing_key=key,
body=data)
return item
def item_key(self, item, spider):
"""Returns RabbitMQ key based on given spider"""
return "%s:items" % spider.name
示例9: RabbitMQPipeline
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class RabbitMQPipeline(object):
"""Pushes serialized item into a RabbitMQ list/queue"""
def __init__(self, server, exchange_name):
self.server = server
self.exchange_name = exchange_name
self.encoder = ScrapyJSONEncoder()
@classmethod
def from_settings(cls, settings):
server, redis_server = connection.from_settings(settings)
exchange_name = settings.get('RABBITMQ_EXCHANGE_NAME', EXCHANGE_NAME)
return cls(server, exchange_name)
@classmethod
def from_crawler(cls, crawler):
return cls.from_settings(crawler.settings)
def process_item(self, item, spider):
key = self.item_key(item, spider)
data = self.encoder.encode(item)
self.server.basic_publish(exchange=self.exchange_name,
routing_key=key,
body=data)
return item
def item_key(self, item, spider):
"""Returns RabbitMQ key based on given spider"""
return "%s:items" % spider.name
示例10: RedisStoragePipeline
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class RedisStoragePipeline(object):
def __init__(self, server):
self.server = server
self.encoder = ScrapyJSONEncoder()
@classmethod
def from_settings(cls, settings):
server = connection.from_settings(settings)
return cls(server)
@classmethod
def from_crawler(cls, crawler):
return cls.from_settings(crawler.settings)
def process_item(self, item, spider):
return deferToThread(self._process_item, item, spider)
def _process_item(self, item, spider):
data = self.encoder.encode(item)
if isinstance(item, GubaPostListItem):
key = self.item_key_list(item, spider)
if isinstance(item, GubaPostDetailItem):
key = self.item_key_detail(item, spider)
self.server.rpush(key, data)
return item
def item_key_list(self, item, spider):
stock_id = item['stock_id']
return "%s:list_items" % stock_id
def item_key_detail(self, item, spider):
stock_id = item['stock_id']
return "%s:detail_items" % stock_id
示例11: RedisPipeline
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class RedisPipeline(object):
def __init__(self, server):
self.server = server
self.encoder = ScrapyJSONEncoder()
@classmethod
def from_settings(cls, settings):
server = connection.from_settings(settings)
return cls(server)
@classmethod
def from_crawler(cls, crawler):
return cls.from_settings(crawler.settings)
def process_item(self, item, spider):
return deferToThread(self._process_item, item, spider)
def _process_item(self, item, spider):
key = self.item_key(item, spider)
data = self.encoder.encode(item)
self.server.rpush(key, data)
return item
def item_key(self, item, spider):
return "%s:items" % spider.name
示例12: HadoopExporter
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class HadoopExporter(BaseItemExporter):
def __init__(self, hadoop, **kwargs):
#self.con = file_write.Connection()
#self.con.connect(hadoop.ip, hadoop.port)
self.encoder = ScrapyJSONEncoder(**kwargs)
#self.seq = file_write.SeqFileSaver(self.con, '/common/crawler/%s/' % hadoop.username.replace(".", "/"),
# 1, '%s' % hadoop.username.replace(".", "_"))
self.encoding = 'utf-8'
self.fields_to_export = None
self.export_empty_fields = False
self.writer = SeqWriter(os.path.join(Utils.settings['SEQFILE_DIR'], hadoop.username.replace(".", "/")),
hadoop.username.replace(".", "_"))
def close_file(self):
print "close"
self.writer.close()
#self.seq.set_is_end()
#self.con.close()
def start_exporting(self):
pass
def finish_exporting(self):
pass
def export_item(self, item):
value = self.encoder.encode(dict(self._get_serialized_fields(item)))
self.writer.writeData(
item['key'] if 'key' in item else item['url'],
value
)
示例13: process_item
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
def process_item(self, item, spider):
url = "http://localhost:9200/articles/%s" % (item["publication"].lower())
encoder = ScrapyJSONEncoder()
json_body = encoder.encode(item)
resp = requests.post(url, data=json_body)
log.msg("Item added to elasticSearch node. Response: " + resp.text)
return item
示例14: JsonLinesItemExporter
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class JsonLinesItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
self._configure(kwargs, dont_fail=True)
self.file = file
self.encoder = ScrapyJSONEncoder(**kwargs)
def export_item(self, item):
itemdict = dict(self._get_serialized_fields(item))
self.file.write(to_bytes(self.encoder.encode(itemdict) + "\n"))
示例15: JsonLinesItemExporter
# 需要导入模块: from scrapy.utils.serialize import ScrapyJSONEncoder [as 别名]
# 或者: from scrapy.utils.serialize.ScrapyJSONEncoder import encode [as 别名]
class JsonLinesItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
self._configure(kwargs, dont_fail=True)
self.file = file
kwargs.setdefault('ensure_ascii', not self.encoding)
self.encoder = ScrapyJSONEncoder(**kwargs)
def export_item(self, item):
itemdict = dict(self._get_serialized_fields(item))
data = self.encoder.encode(itemdict) + '\n'
self.file.write(to_bytes(data, self.encoding))