当前位置: 首页>>代码示例>>Python>>正文


Python StreamingContext.awaitTermination方法代码示例

本文整理汇总了Python中pyspark.streaming.StreamingContext.awaitTermination方法的典型用法代码示例。如果您正苦于以下问题:Python StreamingContext.awaitTermination方法的具体用法?Python StreamingContext.awaitTermination怎么用?Python StreamingContext.awaitTermination使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.streaming.StreamingContext的用法示例。


在下文中一共展示了StreamingContext.awaitTermination方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def main():
    sym_dict = {}
    conf = SparkConf().setAppName("symbol stream")
    sc = SparkContext(conf=conf)
    ssc = StreamingContext(sc, .1)

    lines = ssc.socketTextStream("localhost", 1337)
    
    def print_now():
        print sym_dict

    def predict(prices):
        print prices

    def add_to_dict(line):
        symbol, price, volume = line.split(',') 
        if symbol in sym_dict:
            print 'made it here'
            sym_dict[symbol][0].append(price)
            sym_dict[symbol][1].append(volume)
            if len(sym_dict[0]) > 10:
                sym_dict[0].pop(0)
                sym_dict[1].pop(0)
                predict(sym_dict[0])
        else:
            sym_dict[symbol] = [[price],[volume]]
    
    
    #test = lines.map(lambda line: json.dumps(line)) 
    test = lines.map(lambda line: line)
    test.pprint()
    ssc.start()
    ssc.awaitTermination()
开发者ID:redame,项目名称:quote_streaming,代码行数:35,代码来源:spark_stream.py

示例2: start

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def start():
    sconf = SparkConf()
    sconf.set('spark.cores.max', 2)
    sc = SparkContext(appName='KafkaDirectWordCount', conf=sconf)
    ssc = StreamingContext(sc, 2)

    brokers = "192.192.0.27:9092"
    topics = ['topic7']

    kafkaStreams_lines = KafkaUtils.createDirectStream(ssc, topics, kafkaParams={"metadata.broker.list": brokers})

    lines1 = kafkaStreams_lines.map(lambda x: x[1])  # 注意 取tuple下的第二个即为接收到的kafka流

    words = lines1.flatMap(lambda line: line.split(" "))

    pairs = words.map(lambda word: (word, 1))

    wordcounts = pairs.reduceByKey(lambda x, y: x + y)

    wordcounts.saveAsTextFiles("/var/lib/hadoop-hdfs/spark-libin/kafka")

    wordcounts.pprint()
    # 统计生成的随机数的分布情况
    ssc.start()  # Start the computation
    ssc.awaitTermination()  # Wait for the computation to terminate
开发者ID:blair1,项目名称:hadoop-spark,代码行数:27,代码来源:kafka_streaming_direct.py

示例3: start_spark

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def start_spark(timeout=None, max_items_per_rdd_sent=None):
    sc = SparkContext("local[4]", "twitter.trending")
    ssc = StreamingContext(sc, 5)

    ssc.checkpoint('hdfs://localhost:9000/user/spark/checkpoint/')

    kafka_params = {
        'zookeeper.connect': config.get('zookeeper', 'host'),
        'group.id': config.get('kafka', 'group_id'),
        'metadata.broker.list': config.get('kafka', 'hosts')
    }

    ksc = KafkaUtils.createDirectStream(ssc,
                                        [config.get('kafka', 'topic')],
                                        kafka_params)

    hashtag_counts = get_word_counts(ksc)
    filtered_tweet_count = filter_tweets(hashtag_counts)
    send_dstream_data(filtered_tweet_count, max_items_per_rdd_sent)
    ssc.start()
    if timeout:
        ssc.awaitTermination(timeout)
        ssc.stop(stopSparkContext=True, stopGraceFully=True)
    else:
        ssc.awaitTermination()
开发者ID:joychugh,项目名称:learning-kafka,代码行数:27,代码来源:spark_example.py

示例4: main

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def main():
    parser = argparse.ArgumentParser(
        description='process some log messages, storing them and signaling '
                    'a rest server')
    parser.add_argument('--mongo', help='the mongodb url',
                        required=True)
    parser.add_argument('--rest', help='the rest endpoint to signal',
                        required=True)
    parser.add_argument('--port', help='the port to receive from '
                        '(default: 1984)',
                        default=1984, type=int)
    parser.add_argument('--appname', help='the name of the spark application '
                        '(default: SparkharaLogCounter)',
                        default='SparkharaLogCounter')
    parser.add_argument('--master',
                        help='the master url for the spark cluster')
    parser.add_argument('--socket',
                        help='the socket to attach for streaming text data '
                        '(default: caravan-pathfinder)',
                        default='caravan-pathfinder')
    args = parser.parse_args()
    mongo_url = args.mongo
    rest_url = args.rest

    sconf = SparkConf().setAppName(args.appname)
    if args.master:
        sconf.setMaster(args.master)
    sc = SparkContext(conf=sconf)
    ssc = StreamingContext(sc, 1)

    lines = ssc.socketTextStream(args.socket, args.port)
    lines.foreachRDD(lambda rdd: process_generic(rdd, mongo_url, rest_url))

    ssc.start()
    ssc.awaitTermination()
开发者ID:mattf,项目名称:sparkhara-sources,代码行数:37,代码来源:caravan_master.py

示例5: main

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def main():
    if len(sys.argv) != 4:
        print("Usage: kafka_wordcount.py <zk> <topic> <timeout>",
              file=sys.stderr)
        exit(-1)

    sc = SparkContext(appName="PythonStreamingKafkaWordCount")
    ssc = StreamingContext(sc, 1)
    timeout = None
    if len(sys.argv) == 4:
        zk, topic, timeout = sys.argv[1:]
        timeout = int(timeout)
    else:
        zk, topic = sys.argv[1:]
    kvs = KafkaUtils.createStream(
        ssc, zk, "spark-streaming-consumer", {topic: 1})
    lines = kvs.map(lambda x: x[1])
    counts = lines.flatMap(lambda line: (line.split(" "))
                           .map(lambda word: (word, 1))
                           .reduceByKey(lambda a, b: a+b))
    counts.pprint()
    kwargs = {}
    if timeout:
        kwargs['timeout'] = timeout
    ssc.start()
    ssc.awaitTermination(**kwargs)
开发者ID:butterfy76,项目名称:sahara,代码行数:28,代码来源:spark-kafka-example.py

示例6: start

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def start():
    sconf = SparkConf()
    sconf.set('spark.cores.max', 2)
    sc = SparkContext(appName='KafkaDirectWordCount', conf=sconf)
    ssc = StreamingContext(sc, 2)

    brokers = "localhost:9092"
    topics = ['test']

    kafkaStreams_lines = KafkaUtils.createDirectStream(ssc, topics, kafkaParams={"metadata.broker.list": brokers})

    lines1 = kafkaStreams_lines.map(lambda x: x[1])  # 注意 取tuple下的第二个即为接收到的kafka流

    words = lines1.flatMap(lambda line: line.split(" "))

    pairs = words.map(lambda word: (word, 1))

    wordcounts = pairs.reduceByKey(lambda x, y: x + y)

    print(wordcounts)

    kafkaStreams_lines.transform(storeOffsetRanges).foreachRDD(printOffsetRanges)

    wordcounts.pprint()
    # 统计生成的随机数的分布情况
    ssc.start()  # Start the computation
    ssc.awaitTermination()  # Wait for the computation to terminate
开发者ID:blair1,项目名称:hadoop-spark,代码行数:29,代码来源:kafka-direct.py

示例7: bro_parse

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def bro_parse(zk,topic,db,db_table,num_of_workers):
    
    app_name = "ONI-INGEST-{0}".format(topic)
    wrks = int(num_of_workers)

 	# create spark context
    sc = SparkContext(appName=app_name)
    ssc = StreamingContext(sc,1)
    sqc = HiveContext(sc)

    # create DStream for each topic partition.
    topic_dstreams = [ KafkaUtils.createStream(ssc, zk, app_name, {topic: 1}, keyDecoder=oni_decoder, valueDecoder=oni_decoder) for _ in range (wrks)  ] 
    tp_stream = ssc.union(*topic_dstreams)

    # Parallelism in Data Processing
    #processingDStream = tp_stream(wrks)

    # parse the RDD content.
    proxy_logs = tp_stream.map(lambda x: proxy_parser(x[1]))

    # save RDD into hive .
    proxy_logs.foreachRDD(lambda x: save_to_hive(x,sqc,db,db_table,topic))

    ssc.start()
    ssc.awaitTermination()
开发者ID:Open-Network-Insight,项目名称:oni-ingest,代码行数:27,代码来源:bro_parser.py

示例8: ss_direct_kafka_bucket_counter

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def ss_direct_kafka_bucket_counter(brokers, topic, bucket_interval, output_msg, message_parse, valueDecoder=None):
    """Starts a Spark Streaming job from a Kafka input and parses message time

	WARNING!! This function only works for spark 1.4.0+ 

	Args:
		brokers: the kafka broker that we look at for the topic
		topic: the kafka topic for input
		timeinterval: the time interval in seconds (int) that the job will 
			bucket

	Returns:
		None
		
	"""
    sc = SparkContext(appName="PythonKafkaBucketCounter")
    ssc = StreamingContext(sc, timeinterval + 5)

    if valueDecoder:
        kvs = KafkaUtils.createDirectStream(ssc, [topic], {"metadata.broker.list": brokers}, valueDecoder=valueDecoder)
    else:
        kvs = KafkaUtils.createDirectStream(ssc, [topic], {"metadata.broker.list": brokers})

    lines = kvs.map(lambda x: x[1])
    interval_counts = lines.map(lambda line: (message_parse(line), 1)).reduceByKey(lambda a, b: a + b)

    output_msg_func = output_msg(sc, ssc)

    interval_counts.foreachRDD(output_msg_func)

    ssc.start()
    ssc.awaitTermination()
开发者ID:kelvinfann,项目名称:spark-streaming-kafka-bucket-counter,代码行数:34,代码来源:spark-streaming-kafka-bucket-counter.py

示例9: main

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def main():
    sc = SparkContext(appName="IntrusionDetector")
    ssc = StreamingContext(sc, batch_durations)

    kvs = KafkaUtils.createDirectStream(ssc, [input_topic], {"metadata.broker.list": broker})
    kvs.foreachRDD(processRDD)
    ssc.start()
    ssc.awaitTermination()
开发者ID:dfeldman,项目名称:intrusion-detector,代码行数:10,代码来源:processor.py

示例10: kafka_spark_streaming_sql_main

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def kafka_spark_streaming_sql_main(app_name, brokers, topic, interval_seconds, sql_function):
    sc = SparkContext(appName=app_name)
    sqlContext = SQLContext(sc)
    # ssc = StreamingContext(sc, interval_seconds)
    ssc = StreamingContext(sc, 10)
    kvs = KafkaUtils.createDirectStream(ssc, [topic], {"metadata.broker.list": brokers})
    kvs.foreachRDD(sql_function)
    ssc.start()
    ssc.awaitTermination()
开发者ID:clearclouds-spark,项目名称:spark-sql-py,代码行数:11,代码来源:http_util.py

示例11: read_tweets

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def read_tweets():

    sc = SparkContext(appName="sentimentProducer")
    ssc = StreamingContext(sc,600)  # Test 60 segundos
    brokers = "localhost:9092"
    kvs = KafkaUtils.createDirectStream(ssc, ["test"], {"metadata.broker.list": brokers})
    kvs.foreachRDD(create_format)
    producer.flush()
    ssc.start()
    ssc.awaitTermination()
开发者ID:lrsolorzano,项目名称:BigDataProject3,代码行数:12,代码来源:sentimentProducer.py

示例12: main

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def main():
    conf = SparkConf().setAppName("kafka_source_mongo_sink_pymongo_filtered")
    sc = SparkContext(conf=conf)
    ssc = StreamingContext(sc, 1)
    try:
        kafka_streams = KafkaUtils.createStream(ssc, "localhost:2181", "spark-streaming-consumer", {"splash_json": 2})
        kafka_streams.foreachRDD(process_rdd)
    except Exception as e:
        print e
    ssc.start()
    ssc.awaitTermination()
开发者ID:petergdoyle,项目名称:StreamWorks,代码行数:13,代码来源:kafka_source_mongo_sink_pymongo_filtered.py

示例13: invoke

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def invoke():
    # object to keep track of offsets
    ConfigInitializer.basic_config()

    # app name
    application_name = "mon_metrics_kafka"

    my_spark_conf = SparkConf().setAppName(application_name)

    spark_context = SparkContext(conf=my_spark_conf)

    # read at the configured interval
    spark_streaming_context = \
        StreamingContext(spark_context, cfg.CONF.service.stream_interval)

    kafka_stream = MonMetricsKafkaProcessor.get_kafka_stream(
        cfg.CONF.messaging.topic,
        spark_streaming_context)

    # transform to recordstore
    MonMetricsKafkaProcessor.transform_to_recordstore(kafka_stream)

    # catch interrupt, stop streaming context gracefully
    # signal.signal(signal.SIGINT, signal_handler)

    # start processing
    spark_streaming_context.start()

    # FIXME: stop spark context to relinquish resources

    # FIXME: specify cores, so as not to use all the resources on the cluster.

    # FIXME: HA deploy multiple masters, may be one on each control node

    try:
        # Wait for the Spark driver to "finish"
        spark_streaming_context.awaitTermination()
    except Exception as e:
        MonMetricsKafkaProcessor.log_debug(
            "Exception raised during Spark execution : " + str(e))
        # One exception that can occur here is the result of the saved
        # kafka offsets being obsolete/out of range.  Delete the saved
        # offsets to improve the chance of success on the next execution.

        # TODO(someone) prevent deleting all offsets for an application,
        # but just the latest revision
        MonMetricsKafkaProcessor.log_debug(
            "Deleting saved offsets for chance of success on next execution")

        MonMetricsKafkaProcessor.reset_kafka_offsets(application_name)

        # delete pre hourly processor offsets
        if cfg.CONF.stage_processors.pre_hourly_processor_enabled:
            PreHourlyProcessor.reset_kafka_offsets()
开发者ID:openstack,项目名称:monasca-transform,代码行数:56,代码来源:mon_metrics_kafka.py

示例14: sparkTask

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def sparkTask():
    from textblob import TextBlob
    import re    
    from pyspark import SparkContext
    from pyspark.streaming import StreamingContext
    sc = SparkContext()
    ssc = StreamingContext(sc, 1)
    quotes = ssc.socketTextStream("localhost", 9999)
    dataSentencesPolarity = quotes.map(lambda x: TextBlob(re.sub('[^A-Za-z0-9 \.\']+', '',x))).map(lambda y: (str(y.upper())[:60], y.sentiment.polarity))
    dataSentencesPolarity.pprint()
    ssc.start()             # Start the computation
    ssc.awaitTermination(20)  # Wait for the computation to terminate    
开发者ID:sh19871122,项目名称:TM_2014-2015S2,代码行数:14,代码来源:twitterTestStreaming.py

示例15: main

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTermination [as 别名]
def main():
    # Create a local StreamingContext with two working thread and batch interval of 5 second
    sc = SparkContext("spark://ip-172-31-29-29:7077", "MyKafkaStream")

    # stream interval of 5 seconds
    ssc = StreamingContext(sc, 5)
    kafkaStream = KafkaUtils.createStream(ssc, "52.3.61.194:2181", "GroupNameDoesntMatter", {"parking_sensor_data": 2})
    messages = kafkaStream.flatMap(lambda s: create_tuple(s[1])).reduceByKey(lambda a,b: (int(a)+int(b))/2)
    messages1 = messages.filter(lambda s: s[1] > 0)
    messages1.pprint()

    ssc.start()             # Start the computation
    ssc.awaitTermination()  # Wait for the computation to terminate
开发者ID:neostoic,项目名称:ParkMate,代码行数:15,代码来源:process_stream.py


注:本文中的pyspark.streaming.StreamingContext.awaitTermination方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。