本文整理汇总了Python中pyspark.streaming.kafka.KafkaUtils.createDirectStream方法的典型用法代码示例。如果您正苦于以下问题:Python KafkaUtils.createDirectStream方法的具体用法?Python KafkaUtils.createDirectStream怎么用?Python KafkaUtils.createDirectStream使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.streaming.kafka.KafkaUtils
的用法示例。
在下文中一共展示了KafkaUtils.createDirectStream方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_kafka_direct_stream_foreach_get_offsetRanges
# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def test_kafka_direct_stream_foreach_get_offsetRanges(self):
"""Test the Python direct Kafka stream foreachRDD get offsetRanges."""
topic = self._randomTopic()
sendData = {"a": 1, "b": 2, "c": 3}
kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
"auto.offset.reset": "smallest"}
self._kafkaTestUtils.createTopic(topic)
self._kafkaTestUtils.sendMessages(topic, sendData)
stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)
offsetRanges = []
def getOffsetRanges(_, rdd):
for o in rdd.offsetRanges():
offsetRanges.append(o)
stream.foreachRDD(getOffsetRanges)
self.ssc.start()
self.wait_for(offsetRanges, 1)
self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))])
示例2: test_kafka_direct_stream
# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def test_kafka_direct_stream(self):
"""Test the Python direct Kafka stream API."""
topic = self._randomTopic()
sendData = {"a": 1, "b": 2, "c": 3}
kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
"auto.offset.reset": "smallest"}
self._kafkaTestUtils.createTopic(topic)
self._kafkaTestUtils.sendMessages(topic, sendData)
stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)
self._validateStreamResult(sendData, stream)
示例3: test_kafka_direct_stream_from_offset
# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def test_kafka_direct_stream_from_offset(self):
"""Test the Python direct Kafka stream API with start offset specified."""
topic = self._randomTopic()
sendData = {"a": 1, "b": 2, "c": 3}
fromOffsets = {TopicAndPartition(topic, 0): long(0)}
kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress()}
self._kafkaTestUtils.createTopic(topic)
self._kafkaTestUtils.sendMessages(topic, sendData)
stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams, fromOffsets)
self._validateStreamResult(sendData, stream)
示例4: test_kafka_direct_stream_transform_get_offsetRanges
# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def test_kafka_direct_stream_transform_get_offsetRanges(self):
"""Test the Python direct Kafka stream transform get offsetRanges."""
topic = self._randomTopic()
sendData = {"a": 1, "b": 2, "c": 3}
kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
"auto.offset.reset": "smallest"}
self._kafkaTestUtils.createTopic(topic)
self._kafkaTestUtils.sendMessages(topic, sendData)
stream = KafkaUtils.createDirectStream(self.ssc, [topic], kafkaParams)
offsetRanges = []
def transformWithOffsetRanges(rdd):
for o in rdd.offsetRanges():
offsetRanges.append(o)
return rdd
# Test whether it is ok mixing KafkaTransformedDStream and TransformedDStream together,
# only the TransformedDstreams can be folded together.
stream.transform(transformWithOffsetRanges).map(lambda kv: kv[1]).count().pprint()
self.ssc.start()
self.wait_for(offsetRanges, 1)
self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))])
示例5: main
# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def main():
"""Run Spark Streaming"""
conf = SparkConf()
sc = SparkContext(appName='Ozymandias', conf=conf)
sc.setLogLevel('WARN')
with open(ROOT + 'channels.json', 'r') as f:
channels = json.load(f)
topics = [t['topic'] for t in channels['channels']]
n_secs = 0.5
ssc = StreamingContext(sc, n_secs)
stream = KafkaUtils.createDirectStream(ssc, topics, {
'bootstrap.servers':'localhost:9092',
'group.id':'ozy-group',
'fetch.message.max.bytes':'15728640',
'auto.offset.reset':'largest'})
stream.map(
deserializer
).map(
image_detector
).foreachRDD(
message_sender)
ssc.start()
ssc.awaitTermination()
示例6: create_context
# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def create_context():
spark = get_session(SPARK_CONF)
ssc = StreamingContext(spark.sparkContext, BATCH_DURATION)
ssc.checkpoint(CHECKPOINT)
# start offsets from beginning
# won't work if we have a chackpoint
offsets = {TopicAndPartition(topic, 0): 0 for topic in TOPICS}
stream = KafkaUtils.createDirectStream(ssc, TOPICS, KAFKA_PARAMS, offsets)
main(stream)
return ssc
示例7: test_kafka_direct_stream_transform_with_checkpoint
# 需要导入模块: from pyspark.streaming.kafka import KafkaUtils [as 别名]
# 或者: from pyspark.streaming.kafka.KafkaUtils import createDirectStream [as 别名]
def test_kafka_direct_stream_transform_with_checkpoint(self):
"""Test the Python direct Kafka stream transform with checkpoint correctly recovered."""
topic = self._randomTopic()
sendData = {"a": 1, "b": 2, "c": 3}
kafkaParams = {"metadata.broker.list": self._kafkaTestUtils.brokerAddress(),
"auto.offset.reset": "smallest"}
self._kafkaTestUtils.createTopic(topic)
self._kafkaTestUtils.sendMessages(topic, sendData)
offsetRanges = []
def transformWithOffsetRanges(rdd):
for o in rdd.offsetRanges():
offsetRanges.append(o)
return rdd
self.ssc.stop(False)
self.ssc = None
tmpdir = "checkpoint-test-%d" % random.randint(0, 10000)
def setup():
ssc = StreamingContext(self.sc, 0.5)
ssc.checkpoint(tmpdir)
stream = KafkaUtils.createDirectStream(ssc, [topic], kafkaParams)
stream.transform(transformWithOffsetRanges).count().pprint()
return ssc
try:
ssc1 = StreamingContext.getOrCreate(tmpdir, setup)
ssc1.start()
self.wait_for(offsetRanges, 1)
self.assertEqual(offsetRanges, [OffsetRange(topic, 0, long(0), long(6))])
# To make sure some checkpoint is written
time.sleep(3)
ssc1.stop(False)
ssc1 = None
# Restart again to make sure the checkpoint is recovered correctly
ssc2 = StreamingContext.getOrCreate(tmpdir, setup)
ssc2.start()
ssc2.awaitTermination(3)
ssc2.stop(stopSparkContext=False, stopGraceFully=True)
ssc2 = None
finally:
shutil.rmtree(tmpdir)