当前位置: 首页>>代码示例>>Python>>正文


Python serialize.ScrapyJSONEncoder类代码示例

本文整理汇总了Python中scrapy.utils.serialize.ScrapyJSONEncoder的典型用法代码示例。如果您正苦于以下问题:Python ScrapyJSONEncoder类的具体用法?Python ScrapyJSONEncoder怎么用?Python ScrapyJSONEncoder使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。


在下文中一共展示了ScrapyJSONEncoder类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: JsonEncoderTestCase

class JsonEncoderTestCase(unittest.TestCase):

    def setUp(self):
        self.encoder = ScrapyJSONEncoder()

    def test_encode_decode(self):
        dt = datetime.datetime(2010, 1, 2, 10, 11, 12)
        dts = "2010-01-02 10:11:12"
        d = datetime.date(2010, 1, 2)
        ds = "2010-01-02"
        t = datetime.time(10, 11, 12)
        ts = "10:11:12"
        dec = Decimal("1000.12")
        decs = "1000.12"

        for input, output in [('foo', 'foo'), (d, ds), (t, ts), (dt, dts),
                              (dec, decs), (['foo', d], ['foo', ds])]:
            self.assertEqual(self.encoder.encode(input), json.dumps(output))

    def test_encode_deferred(self):
        self.assertIn('Deferred', self.encoder.encode(defer.Deferred()))

    def test_encode_request(self):
        r = Request("http://www.example.com/lala")
        rs = self.encoder.encode(r)
        self.assertIn(r.method, rs)
        self.assertIn(r.url, rs)

    def test_encode_response(self):
        r = Response("http://www.example.com/lala")
        rs = self.encoder.encode(r)
        self.assertIn(r.url, rs)
        self.assertIn(str(r.status), rs)
开发者ID:01-,项目名称:scrapy,代码行数:33,代码来源:test_utils_serialize.py

示例2: process_item

    def process_item(self, item, spider):
        url = "http://localhost:9200/articles/%s" % (item["publication"].lower())
        encoder = ScrapyJSONEncoder()
        json_body = encoder.encode(item)
        resp = requests.post(url, data=json_body)
        log.msg("Item added to elasticSearch node. Response: " + resp.text)

        return item
开发者ID:cornjuliox,项目名称:powersearch_backend,代码行数:8,代码来源:pipelines.py

示例3: DockerhubExtension

class DockerhubExtension(object):

    @classmethod
    def from_crawler(cls, crawler):
        return cls(crawler)

    def __init__(self, crawler):
        self.crawler = crawler
        self.job_path = crawler.settings.get('JOB_PATH')
        if not self.job_path:
            raise NotConfigured('no JOB_PATH set')

        self.json_encoder = ScrapyJSONEncoder()
        self.looping_call = LoopingCall(self.store_job_info)
        self.looping_call.start(5)
        crawler.signals.connect(self.store_job_info,
                                signal=signals.spider_closed)

    def store_job_info(self):
        with open(self.job_path, 'w') as f:
            stats = self.crawler.stats.get_stats()
            job_info = {
                'stats': stats
            }
            job_info_json = self.json_encoder.encode(job_info)
            f.write(job_info_json)
开发者ID:Huskyeder,项目名称:scrapy-dockerhub,代码行数:26,代码来源:extension.py

示例4: JsonItemExporter

class JsonItemExporter(BaseItemExporter):

    def __init__(self, file, **kwargs):
        self._configure(kwargs, dont_fail=True)
        self.file = file
        # there is a small difference between the behaviour or JsonItemExporter.indent
        # and ScrapyJSONEncoder.indent. ScrapyJSONEncoder.indent=None is needed to prevent
        # the addition of newlines everywhere
        json_indent = self.indent if self.indent is not None and self.indent > 0 else None
        kwargs.setdefault('indent', json_indent)
        kwargs.setdefault('ensure_ascii', not self.encoding)
        self.encoder = ScrapyJSONEncoder(**kwargs)
        self.first_item = True

    def _beautify_newline(self):
        if self.indent is not None:
            self.file.write(b'\n')

    def start_exporting(self):
        self.file.write(b"[")
        self._beautify_newline()

    def finish_exporting(self):
        self._beautify_newline()
        self.file.write(b"]")

    def export_item(self, item):
        if self.first_item:
            self.first_item = False
        else:
            self.file.write(b',')
            self._beautify_newline()
        itemdict = dict(self._get_serialized_fields(item))
        data = self.encoder.encode(itemdict)
        self.file.write(to_bytes(data, self.encoding))
开发者ID:ArturGaspar,项目名称:scrapy,代码行数:35,代码来源:exporters.py

示例5: HadoopExporter

class HadoopExporter(BaseItemExporter):
    def __init__(self, hadoop, **kwargs):
        #self.con = file_write.Connection()
        #self.con.connect(hadoop.ip, hadoop.port)
        self.encoder = ScrapyJSONEncoder(**kwargs)
        #self.seq = file_write.SeqFileSaver(self.con, '/common/crawler/%s/' % hadoop.username.replace(".", "/"),
        #                                   1, '%s' % hadoop.username.replace(".", "_"))
        self.encoding = 'utf-8'
        self.fields_to_export = None
        self.export_empty_fields = False
        self.writer = SeqWriter(os.path.join(Utils.settings['SEQFILE_DIR'], hadoop.username.replace(".", "/")),
                                hadoop.username.replace(".", "_"))

    def close_file(self):
        print "close"
        self.writer.close()
        #self.seq.set_is_end()
        #self.con.close()

    def start_exporting(self):
        pass

    def finish_exporting(self):
        pass

    def export_item(self, item):
        value = self.encoder.encode(dict(self._get_serialized_fields(item)))
        self.writer.writeData(
            item['key'] if 'key' in item else item['url'],
            value
        )
开发者ID:xunyuw,项目名称:iFlyQA,代码行数:31,代码来源:base_exporter.py

示例6: RabbitMQPipeline

class RabbitMQPipeline(object):
    """Pushes serialized item into a RabbitMQ list/queue"""

    def __init__(self, server):
        self.server = server
        self.encoder = ScrapyJSONEncoder()

    @classmethod
    def from_settings(cls, settings):
        server = connection.from_settings(settings)
        return cls(server)

    @classmethod
    def from_crawler(cls, crawler):
        return cls.from_settings(crawler.settings)

    def process_item(self, item, spider):
        return deferToThread(self._process_item, item, spider)

    def _process_item(self, item, spider):
        key = self.item_key(item, spider)
        data = self.encoder.encode(item)
        self.server.basic_publish(exchange='',
                                  routing_key=key,
                                  body=data)
        return item

    def item_key(self, item, spider):
        """Returns RabbitMQ key based on given spider"""
        return "%s:items" % spider.name
开发者ID:openslack,项目名称:openslack-crawler,代码行数:30,代码来源:rabbitmq.py

示例7: RedisPipeline

class RedisPipeline(object):
    """Pushes serialized item into a redis list/queue"""

    def __init__(self, host, port, queue_type):
        self.server = redis.Redis(host, port)
        self.encoder = ScrapyJSONEncoder()
	self.queue_type = queue_type

    @classmethod
    def from_settings(cls, settings):
        host = settings.get('REDIS_HOST', 'localhost')
        port = settings.get('REDIS_PORT', 6379)
        queue_type = settings.get('QUEUE_TYPE', 'FIFO')
        return cls(host, port)

    def process_item(self, item, spider):
        return deferToThread(self._process_item, item, spider)

    def _process_item(self, item, spider):
        key = self.item_key(item, spider)
        data = self.encoder.encode(dict(item))
        self.server.lpush(key, data)
	if (self.queue_type == 'LIFO'):
            self.server.lpush(key, data)
	else:
            self.server.rpush(key, data)
        return item

    def item_key(self, item, spider):
        """Returns redis key based on given spider"""
        return "%s:items" % spider.name
开发者ID:darthbear,项目名称:scrapy-redis,代码行数:31,代码来源:pipelines.py

示例8: __init__

    def __init__(self, host, port, db, queue_name, store_id):
        self.encoder = ScrapyJSONEncoder()
        self.store_id = store_id
        self.queue_name = queue_name

        self.server = redis.Redis(host, port, db)
        self.queue = rq.Queue(queue_name, connection=self.server)
开发者ID:dboyliao,项目名称:scraper,代码行数:7,代码来源:items_rq.py

示例9: RedisPipeline

class RedisPipeline(object):
    """Pushes serialized item into a scrapy_redis list/queue"""

    def __init__(self, host, port):
        self.server = redis.Redis(host, port)
        self.encoder = ScrapyJSONEncoder()

    @classmethod
    def from_settings(cls, settings):
        host = settings.get("REDIS_HOST", "localhost")
        port = settings.get("REDIS_PORT", 6379)
        return cls(host, port)

    def process_item(self, item, spider):
        return deferToThread(self._process_item, item, spider)

    def _process_item(self, item, spider):
        key = self.item_key(item, spider)
        data = self.encoder.encode(dict(item))
        self.server.rpush(key, data)
        return item

    def item_key(self, item, spider):
        """Returns scrapy_redis key based on given spider"""
        return "%s:items" % spider.name
开发者ID:echobfy,项目名称:weiboSearchCrawler,代码行数:25,代码来源:pipelines.py

示例10: RedisPipeline

class RedisPipeline(object):

    def __init__(self, server):
        self.server = server
        self.encoder = ScrapyJSONEncoder()

    @classmethod
    def from_settings(cls, settings):
        server = connection.from_settings(settings)
        return cls(server)

    @classmethod
    def from_crawler(cls, crawler):
        return cls.from_settings(crawler.settings)

    def process_item(self, item, spider):
        return deferToThread(self._process_item, item, spider)

    def _process_item(self, item, spider):
        key = self.item_key(item, spider)
        data = self.encoder.encode(item)
        self.server.rpush(key, data)
        return item

    def item_key(self, item, spider):
        return "%s:items" % spider.name
开发者ID:mezhou887,项目名称:scrapysystem,代码行数:26,代码来源:pipelines.py

示例11: RedisStoragePipeline

class RedisStoragePipeline(object):
    def __init__(self, server):
        self.server = server
        self.encoder = ScrapyJSONEncoder()

    @classmethod
    def from_settings(cls, settings):
        server = connection.from_settings(settings)
        return cls(server)

    @classmethod
    def from_crawler(cls, crawler):
        return cls.from_settings(crawler.settings)

    def process_item(self, item, spider):
        return deferToThread(self._process_item, item, spider)

    def _process_item(self, item, spider):
        data = self.encoder.encode(item)
        if isinstance(item, GubaPostListItem):
            key = self.item_key_list(item, spider)
        if isinstance(item, GubaPostDetailItem):
            key = self.item_key_detail(item, spider)
        self.server.rpush(key, data)

        return item

    def item_key_list(self, item, spider):
        stock_id = item['stock_id']
        return "%s:list_items" % stock_id

    def item_key_detail(self, item, spider):
        stock_id = item['stock_id']
        return "%s:detail_items" % stock_id
开发者ID:jselabzxl,项目名称:scrapy_guba_redis,代码行数:34,代码来源:pipelines.py

示例12: RedisPipeline

class RedisPipeline(object):
    """
    Pushes serialized item into a redis.
    Specific for SocialSpiders
    """

    def __init__(self, server):
        self.server = server
        self.encoder = ScrapyJSONEncoder()

    @classmethod
    def from_settings(cls, settings):
        server = connection.from_settings(settings)
        return cls(server)

    @classmethod
    def from_crawler(cls, crawler):
        return cls.from_settings(crawler.settings)

    def process_item(self, item, spider):
        return deferToThread(self._process_item, item, spider)

    def _process_item(self, item, spider):
        key = self.item_key(item, spider)
        data = self.encoder.encode(item)
        self.server.set(key, data.decode('utf-8'))
        return item

    def item_key(self, item, spider):
        """Returns redis key based on given spider"""
        return "{}_{}".format(spider.name, item['search_name'])
开发者ID:huokedu,项目名称:social_scraper,代码行数:31,代码来源:pipelines.py

示例13: RedisPipeline

class RedisPipeline(object):
    """Pushes serialized item into a redis list/queue"""

    def __init__(self, server):
        self.server = server
        self.encoder = ScrapyJSONEncoder()

    @classmethod
    def from_settings(cls, settings):
        server = connection.from_settings(settings)
        return cls(server)

    @classmethod
    def from_crawler(cls, crawler):
        return cls.from_settings(crawler.settings)

    def process_item(self, item, spider):
        return deferToThread(self._process_item, item, spider)

    def _process_item(self, item, spider):
        key = self.item_key(item, spider)
        data = self.encoder.encode(item)
        self.server.rpush(key, data)
        return item

    def item_key(self, item, spider):
        """Returns redis key based on given spider"""
        return "%s:items" % spider.name
开发者ID:leveryd,项目名称:python-security,代码行数:28,代码来源:pipelines.py

示例14: RabbitMQPipeline

class RabbitMQPipeline(object):
    """Pushes serialized item into a RabbitMQ list/queue"""

    def __init__(self, server, exchange_name):
        self.server = server
        self.exchange_name = exchange_name
        self.encoder = ScrapyJSONEncoder()

    @classmethod
    def from_settings(cls, settings):
        server, redis_server = connection.from_settings(settings)
        exchange_name = settings.get('RABBITMQ_EXCHANGE_NAME', EXCHANGE_NAME)
        return cls(server, exchange_name)

    @classmethod
    def from_crawler(cls, crawler):
        return cls.from_settings(crawler.settings)

    def process_item(self, item, spider):
        key = self.item_key(item, spider)
        data = self.encoder.encode(item)
        self.server.basic_publish(exchange=self.exchange_name,
                                  routing_key=key,
                                  body=data)
        return item

    def item_key(self, item, spider):
        """Returns RabbitMQ key based on given spider"""
        return "%s:items" % spider.name
开发者ID:rdcprojects,项目名称:scrapy-mq-redis,代码行数:29,代码来源:pipelines.py

示例15: __init__

    def __init__(self, recipients, mail, compressor, crawler):
        self.recipients = recipients
        self.mail = mail
        self.encoder = ScrapyJSONEncoder(crawler=crawler)
        self.files = defaultdict(compressor)

        self.num_items = 0
        self.num_errors = 0
开发者ID:JayveeHe,项目名称:spider_senz,代码行数:8,代码来源:statusmailer.py


注:本文中的scrapy.utils.serialize.ScrapyJSONEncoder类示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。