当前位置: 首页>>代码示例>>Python>>正文


Python KafkaUtils.createDirectStream方法代码示例

本文整理汇总了Python中pyspark.streaming.kafka.KafkaUtils.createDirectStream方法的典型用法代码示例。如果您正苦于以下问题:Python KafkaUtils.createDirectStream方法的具体用法?Python KafkaUtils.createDirectStream怎么用?Python KafkaUtils.createDirectStream使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.streaming.kafka.KafkaUtils的用法示例。


在下文中一共展示了KafkaUtils.createDirectStream方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_kafka_direct_stream_foreach_get_offsetRanges

# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def test_kafka_direct_stream_foreach_get_offsetRanges(self):
        """Test the Python direct Kafka stream foreachRDD get offsetRanges."""
        topic = self._randomTopic()
        sendData = {"a": 1, "b": 2, "c": 3}
        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
                       "auto.offset.reset": "smallest"}

        self._kafkaTestUtils.createTopic(topic)
        self._kafkaTestUtils.sendMessages(topic, sendData)

        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)

        offsetRanges = []

        def getOffsetRanges(_, rdd):
            for o in rdd.offsetRanges():
                offsetRanges.append(o)

        stream.foreachRDD(getOffsetRanges)
        self.ssc.start()
        self.wait_for(offsetRanges, 1)

        self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))]) 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:25,代码来源:tests.py

示例2: test_kafka_direct_stream

# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def test_kafka_direct_stream(self):
        """Test the Python direct Kafka stream API."""
        topic = self._randomTopic()
        sendData = {"a": 1, "b": 2, "c": 3}
        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
                       "auto.offset.reset": "smallest"}

        self._kafkaTestUtils.createTopic(topic)
        self._kafkaTestUtils.sendMessages(topic, sendData)

        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)
        self._validateStreamResult(sendData, stream) 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:14,代码来源:tests.py

示例3: test_kafka_direct_stream_from_offset

# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def test_kafka_direct_stream_from_offset(self):
        """Test the Python direct Kafka stream API with start offset specified."""
        topic = self._randomTopic()
        sendData = {"a": 1, "b": 2, "c": 3}
        fromOffsets = {TopicAndPartition(topic, 0): long(0)}
        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress()}

        self._kafkaTestUtils.createTopic(topic)
        self._kafkaTestUtils.sendMessages(topic, sendData)

        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams, fromOffsets)
        self._validateStreamResult(sendData, stream) 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:14,代码来源:tests.py

示例4: test_kafka_direct_stream_transform_get_offsetRanges

# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def test_kafka_direct_stream_transform_get_offsetRanges(self):
        """Test the Python direct Kafka stream transform get offsetRanges."""
        topic = self._randomTopic()
        sendData = {"a": 1, "b": 2, "c": 3}
        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
                       "auto.offset.reset": "smallest"}

        self._kafkaTestUtils.createTopic(topic)
        self._kafkaTestUtils.sendMessages(topic, sendData)

        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)

        offsetRanges = []

        def transformWithOffsetRanges(rdd):
            for o in rdd.offsetRanges():
                offsetRanges.append(o)
            return rdd

        # Test whether it is ok mixing KafkaTransformedDStream and TransformedDStream together,
        # only the TransformedDstreams can be folded together.
        stream.transform(transformWithOffsetRanges).map(lambda kv: kv[1]).count().pprint()
        self.ssc.start()
        self.wait_for(offsetRanges, 1)

        self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))]) 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:28,代码来源:tests.py

示例5: main

# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def main():
    """Run Spark Streaming"""
    conf = SparkConf()
    sc = SparkContext(appName='Ozymandias', conf=conf)
    sc.setLogLevel('WARN')
    
    with open(ROOT + 'channels.json', 'r') as f:
        channels = json.load(f)
    topics = [t['topic'] for t in channels['channels']]
    
    n_secs = 0.5
    ssc = StreamingContext(sc, n_secs)
    stream = KafkaUtils.createDirectStream(ssc, topics, {
                        'bootstrap.servers':'localhost:9092', 
                        'group.id':'ozy-group', 
                        'fetch.message.max.bytes':'15728640',
                        'auto.offset.reset':'largest'})
    
    stream.map(
            deserializer
        ).map(
            image_detector
        ).foreachRDD(
            message_sender)
    
    ssc.start()
    ssc.awaitTermination() 
开发者ID:pambot,项目名称:ozymandias,代码行数:29,代码来源:ozy_streaming.py

示例6: create_context

# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def create_context():
    spark = get_session(SPARK_CONF)
    ssc = StreamingContext(spark.sparkContext, BATCH_DURATION)
    ssc.checkpoint(CHECKPOINT)
    # start offsets from beginning
    # won't work if we have a chackpoint
    offsets = {TopicAndPartition(topic, 0): 0 for topic in TOPICS}
    stream = KafkaUtils.createDirectStream(ssc, TOPICS, KAFKA_PARAMS, offsets)
    main(stream)
    return ssc 
开发者ID:ksindi,项目名称:kafka-compose,代码行数:12,代码来源:process.py

示例7: test_kafka_direct_stream_transform_with_checkpoint

# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def test_kafka_direct_stream_transform_with_checkpoint(self):
        """Test the Python direct Kafka stream transform with checkpoint correctly recovered."""
        topic = self._randomTopic()
        sendData = {"a": 1, "b": 2, "c": 3}
        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
                       "auto.offset.reset": "smallest"}

        self._kafkaTestUtils.createTopic(topic)
        self._kafkaTestUtils.sendMessages(topic, sendData)

        offsetRanges = []

        def transformWithOffsetRanges(rdd):
            for o in rdd.offsetRanges():
                offsetRanges.append(o)
            return rdd

        self.ssc.stop(False)
        self.ssc = None
        tmpdir = "checkpoint-test-%d" % random.randint(0, 10000)

        def setup():
            ssc = StreamingContext(self.sc, 0.5)
            ssc.checkpoint(tmpdir)
            stream = KafkaUtils.createDirectStream(ssc, [topic], kafkaParams)
            stream.transform(transformWithOffsetRanges).count().pprint()
            return ssc

        try:
            ssc1 = StreamingContext.getOrCreate(tmpdir, setup)
            ssc1.start()
            self.wait_for(offsetRanges, 1)
            self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))])

            # To make sure some checkpoint is written
            time.sleep(3)
            ssc1.stop(False)
            ssc1 = None

            # Restart again to make sure the checkpoint is recovered correctly
            ssc2 = StreamingContext.getOrCreate(tmpdir, setup)
            ssc2.start()
            ssc2.awaitTermination(3)
            ssc2.stop(stopSparkContext=False, stopGraceFully=True)
            ssc2 = None
        finally:
            shutil.rmtree(tmpdir) 
开发者ID:runawayhorse001,项目名称:LearningApacheSpark,代码行数:49,代码来源:tests.py


注:本文中的pyspark.streaming.kafka.KafkaUtils.createDirectStream方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。