當前位置: 首頁>>代碼示例>>Python>>正文


Python KafkaUtils.createDirectStream方法代碼示例

本文整理匯總了Python中pyspark.streaming.kafka.KafkaUtils.createDirectStream方法的典型用法代碼示例。如果您正苦於以下問題:Python KafkaUtils.createDirectStream方法的具體用法?Python KafkaUtils.createDirectStream怎麽用?Python KafkaUtils.createDirectStream使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在pyspark.streaming.kafka.KafkaUtils的用法示例。


在下文中一共展示了KafkaUtils.createDirectStream方法的7個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。

示例1: test_kafka_direct_stream_foreach_get_offsetRanges

# 需要導入模塊: from pyspark.streaming.kafka import KafkaUtils [as 別名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 別名]
def test_kafka_direct_stream_foreach_get_offsetRanges(self):
        """Test the Python direct Kafka stream foreachRDD get offsetRanges."""
        topic = self._randomTopic()
        sendData = {"a": 1, "b": 2, "c": 3}
        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
                       "auto.offset.reset": "smallest"}

        self._kafkaTestUtils.createTopic(topic)
        self._kafkaTestUtils.sendMessages(topic, sendData)

        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)

        offsetRanges = []

        def getOffsetRanges(_, rdd):
            for o in rdd.offsetRanges():
                offsetRanges.append(o)

        stream.foreachRDD(getOffsetRanges)
        self.ssc.start()
        self.wait_for(offsetRanges, 1)

        self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))]) 
開發者ID:runawayhorse001,項目名稱:LearningApacheSpark,代碼行數:25,代碼來源:tests.py

示例2: test_kafka_direct_stream

# 需要導入模塊: from pyspark.streaming.kafka import KafkaUtils [as 別名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 別名]
def test_kafka_direct_stream(self):
        """Test the Python direct Kafka stream API."""
        topic = self._randomTopic()
        sendData = {"a": 1, "b": 2, "c": 3}
        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
                       "auto.offset.reset": "smallest"}

        self._kafkaTestUtils.createTopic(topic)
        self._kafkaTestUtils.sendMessages(topic, sendData)

        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)
        self._validateStreamResult(sendData, stream) 
開發者ID:runawayhorse001,項目名稱:LearningApacheSpark,代碼行數:14,代碼來源:tests.py

示例3: test_kafka_direct_stream_from_offset

# 需要導入模塊: from pyspark.streaming.kafka import KafkaUtils [as 別名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 別名]
def test_kafka_direct_stream_from_offset(self):
        """Test the Python direct Kafka stream API with start offset specified."""
        topic = self._randomTopic()
        sendData = {"a": 1, "b": 2, "c": 3}
        fromOffsets = {TopicAndPartition(topic, 0): long(0)}
        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress()}

        self._kafkaTestUtils.createTopic(topic)
        self._kafkaTestUtils.sendMessages(topic, sendData)

        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams, fromOffsets)
        self._validateStreamResult(sendData, stream) 
開發者ID:runawayhorse001,項目名稱:LearningApacheSpark,代碼行數:14,代碼來源:tests.py

示例4: test_kafka_direct_stream_transform_get_offsetRanges

# 需要導入模塊: from pyspark.streaming.kafka import KafkaUtils [as 別名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 別名]
def test_kafka_direct_stream_transform_get_offsetRanges(self):
        """Test the Python direct Kafka stream transform get offsetRanges."""
        topic = self._randomTopic()
        sendData = {"a": 1, "b": 2, "c": 3}
        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
                       "auto.offset.reset": "smallest"}

        self._kafkaTestUtils.createTopic(topic)
        self._kafkaTestUtils.sendMessages(topic, sendData)

        stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)

        offsetRanges = []

        def transformWithOffsetRanges(rdd):
            for o in rdd.offsetRanges():
                offsetRanges.append(o)
            return rdd

        # Test whether it is ok mixing KafkaTransformedDStream and TransformedDStream together,
        # only the TransformedDstreams can be folded together.
        stream.transform(transformWithOffsetRanges).map(lambda kv: kv[1]).count().pprint()
        self.ssc.start()
        self.wait_for(offsetRanges, 1)

        self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))]) 
開發者ID:runawayhorse001,項目名稱:LearningApacheSpark,代碼行數:28,代碼來源:tests.py

示例5: main

# 需要導入模塊: from pyspark.streaming.kafka import KafkaUtils [as 別名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 別名]
def main():
    """Run Spark Streaming"""
    conf = SparkConf()
    sc = SparkContext(appName='Ozymandias', conf=conf)
    sc.setLogLevel('WARN')
    
    with open(ROOT + 'channels.json', 'r') as f:
        channels = json.load(f)
    topics = [t['topic'] for t in channels['channels']]
    
    n_secs = 0.5
    ssc = StreamingContext(sc, n_secs)
    stream = KafkaUtils.createDirectStream(ssc, topics, {
                        'bootstrap.servers':'localhost:9092', 
                        'group.id':'ozy-group', 
                        'fetch.message.max.bytes':'15728640',
                        'auto.offset.reset':'largest'})
    
    stream.map(
            deserializer
        ).map(
            image_detector
        ).foreachRDD(
            message_sender)
    
    ssc.start()
    ssc.awaitTermination() 
開發者ID:pambot,項目名稱:ozymandias,代碼行數:29,代碼來源:ozy_streaming.py

示例6: create_context

# 需要導入模塊: from pyspark.streaming.kafka import KafkaUtils [as 別名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 別名]
def create_context():
    spark = get_session(SPARK_CONF)
    ssc = StreamingContext(spark.sparkContext, BATCH_DURATION)
    ssc.checkpoint(CHECKPOINT)
    # start offsets from beginning
    # won't work if we have a chackpoint
    offsets = {TopicAndPartition(topic, 0): 0 for topic in TOPICS}
    stream = KafkaUtils.createDirectStream(ssc, TOPICS, KAFKA_PARAMS, offsets)
    main(stream)
    return ssc 
開發者ID:ksindi,項目名稱:kafka-compose,代碼行數:12,代碼來源:process.py

示例7: test_kafka_direct_stream_transform_with_checkpoint

# 需要導入模塊: from pyspark.streaming.kafka import KafkaUtils [as 別名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 別名]
def test_kafka_direct_stream_transform_with_checkpoint(self):
        """Test the Python direct Kafka stream transform with checkpoint correctly recovered."""
        topic = self._randomTopic()
        sendData = {"a": 1, "b": 2, "c": 3}
        kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
                       "auto.offset.reset": "smallest"}

        self._kafkaTestUtils.createTopic(topic)
        self._kafkaTestUtils.sendMessages(topic, sendData)

        offsetRanges = []

        def transformWithOffsetRanges(rdd):
            for o in rdd.offsetRanges():
                offsetRanges.append(o)
            return rdd

        self.ssc.stop(False)
        self.ssc = None
        tmpdir = "checkpoint-test-%d" % random.randint(0, 10000)

        def setup():
            ssc = StreamingContext(self.sc, 0.5)
            ssc.checkpoint(tmpdir)
            stream = KafkaUtils.createDirectStream(ssc, [topic], kafkaParams)
            stream.transform(transformWithOffsetRanges).count().pprint()
            return ssc

        try:
            ssc1 = StreamingContext.getOrCreate(tmpdir, setup)
            ssc1.start()
            self.wait_for(offsetRanges, 1)
            self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))])

            # To make sure some checkpoint is written
            time.sleep(3)
            ssc1.stop(False)
            ssc1 = None

            # Restart again to make sure the checkpoint is recovered correctly
            ssc2 = StreamingContext.getOrCreate(tmpdir, setup)
            ssc2.start()
            ssc2.awaitTermination(3)
            ssc2.stop(stopSparkContext=False, stopGraceFully=True)
            ssc2 = None
        finally:
            shutil.rmtree(tmpdir) 
開發者ID:runawayhorse001,項目名稱:LearningApacheSpark,代碼行數:49,代碼來源:tests.py


注:本文中的pyspark.streaming.kafka.KafkaUtils.createDirectStream方法示例由純淨天空整理自Github/MSDocs等開源代碼及文檔管理平台,相關代碼片段篩選自各路編程大神貢獻的開源項目,源碼版權歸原作者所有,傳播和使用請參考對應項目的License;未經允許,請勿轉載。