当前位置: 首页>>代码示例>>Python>>正文


Python StreamingContext.awaitTerminationOrTimeout方法代码示例

本文整理汇总了Python中pyspark.streaming.StreamingContext.awaitTerminationOrTimeout方法的典型用法代码示例。如果您正苦于以下问题:Python StreamingContext.awaitTerminationOrTimeout方法的具体用法?Python StreamingContext.awaitTerminationOrTimeout怎么用?Python StreamingContext.awaitTerminationOrTimeout使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.streaming.StreamingContext的用法示例。


在下文中一共展示了StreamingContext.awaitTerminationOrTimeout方法的7个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1:

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTerminationOrTimeout [as 别名]
    #datamap = tx_fee_rdd.map(lambda x: ("tx_fee",x) )  
    #( rowkey , [ row key , column family , column name , value ] )
    datamap = tx_fee_rdd.map(lambda x: (str(x[0]),
					       	[str(x[0]),"tx_fee_col","tx_fee",str(x[1])])
    						)
 			
    datamap.saveAsNewAPIHadoopDataset(conf=conf,
    								  keyConverter=keyConv,
    								  valueConverter=valueConv)


lines = ssc.socketTextStream("localhost", 8888)
dump_rdd = lines.map(lambda x: json.dumps(x))
load_rdd = dump_rdd.map(lambda x: json.loads(x)).map(lambda x : x.decode('unicode_escape').encode('ascii','ignore'))
#load_rdd.pprint(2)

split_blk_rdd = load_rdd.map(lambda x: x.split(":"))
#split_blk_rdd.pprint()

tx_fee_rdd = split_blk_rdd.map(lambda x : (x[14][1:7],x[15][1:-15])) #this gets transaction fee
#tx_fee_rdd.pprint(200)		#works
tx_fee_rdd.foreachRDD(SaveRecord)		#function call




ssc.start()             # Start the computation
#ssc.awaitTermination()  # Wait for the computation to terminate
ssc.awaitTerminationOrTimeout(15000) #13000#time out in 3 hours
#ssc.stop()  # Wait for the computation to terminate
开发者ID:tariq786,项目名称:datafying_bitcoin,代码行数:32,代码来源:sp_stream_api.py

示例2: SparkConf

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTerminationOrTimeout [as 别名]
dataFilePathOnHdfs = "hdfs://{}/btsdata/aviation/ontime/".format(master)

conf = SparkConf().setAppName(APP_NAME).setMaster('spark://{}:7077'.format(master))
sc = SparkContext(conf)
ssc = StreamingContext(sc, STREAMING_INTERVAL)
ssc.checkpoint('/tmp/ccc')

lines = ssc.textFileStream(dataFilePathOnHdfs)



res2_2 = lines.map(lambda line : line.split(","))				\
			  .filter(lambda line : line[6] == originAirport)		\ 	# 2nd argument: 'SRQ', 'CMH', 'JFK', 'SEA', or 'BOS'
			  .map(lambda line : (line[7], float(line[12])))	\	# (Carrier, Departure Delay)
			  .combineByKey(lambda x : (x, 1), 					\
			  				lambda x, y : (x[0] + y, x[1] + 1), \	# (sum, count)
			  				lambda x, y : (x[0] + y[0], x[1] + y[1]) ) \
			  .map(lambda (key, (valueSum, count) : (key, valueSum / count))) \
			  .sortByKey('ascending')


ssc.start()
while true:
	if ssc.awaitTerminationOrTimeout(10):
		break
	else:
		pass
print res2_2.take(10)
print "Gracefully stopping Spark Streaming Application"
ssc.stop(stopSparkContext = True, stopGracefully = True)
print "Application stoppped"
开发者ID:xzw0005,项目名称:pySparkAirlines,代码行数:33,代码来源:group2_2.py

示例3:

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTerminationOrTimeout [as 别名]
	#print gen_tx_json
	return gen_tx_json


#get lines RDD
lines = ssc.socketTextStream("localhost", 9999)
dump_rdd = lines.map(lambda x: json.dumps(x))
#print dump_rdd.take(2)
load_rdd = dump_rdd.map(lambda x: json.loads(x)).map(lambda x : x.decode('unicode_escape').encode('ascii','ignore'))
#print load_rdd.take(2)

#load_rdd.pprint(100)
#tx = load_rdd.flatMap(lambda x: x.split(":")) #this works
split_blk_rdd = load_rdd.map(lambda x: x.split(":"))
#split_blk_rdd.pprint()

gen_tx_rdd = split_blk_rdd.map(lambda x : (x[8][1:7],x[6][4:68]) ) #this gets generation transactions
#gen_tx_rdd.pprint()		#works

tx_json_rdd = gen_tx_rdd.map(lambda x: (x[0],get_tx_fee(x[1])) )	#function call			  
tx_fee_rdd = tx_json_rdd.map(lambda x : (x[0],x[1].items()
										[3][1][0]["value"]-25) )#.filter(lambda x : "value" in x)

tx_fee_rdd.foreachRDD(SaveRecord)		#function call


ssc.start()             # Start the computation
#ssc.awaitTermination()  # Wait for the computation to terminate
ssc.awaitTerminationOrTimeout(12000) #time out 3.33 hours
#ssc.stop()  # Wait for the computation to terminate
开发者ID:tariq786,项目名称:datafying_bitcoin,代码行数:32,代码来源:sp_stream.py

示例4: get_cass

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTerminationOrTimeout [as 别名]
  dstream = dstream.flatMap(extract_carr_arr_delay).reduceByKey(lambda a, b: (a[0] + b[0], a[1] + b[1])).foreachRDD(top_average)
elif args.task == 'q13':
  dstream = dstream.flatMap(extract_weekday_arr_delay).reduceByKey(lambda a, b: (a[0] + b[0], a[1] + b[1])).foreachRDD(top_average)
elif args.task == 'q21':
  dstream = dstream.flatMap(extract_origin_carrier_dep_delay).reduceByKey(lambda a, b: (a[0] + b[0], a[1] + b[1])).foreachRDD(top_complex_average)
elif args.task == 'q22':
  dstream = dstream.flatMap(extract_origin_destination_dep_delay).reduceByKey(lambda a, b: (a[0] + b[0], a[1] + b[1])).foreachRDD(top_complex_average)
elif args.task == 'q23':
  dstream = dstream.flatMap(extract_route_carrier_arr_delay).reduceByKey(lambda a, b: (a[0] + b[0], a[1] + b[1])).foreachRDD(top_complex_average)
elif args.task == 'q24':
  dstream = dstream.flatMap(extract_route_arr_delay).reduceByKey(lambda a, b: (a[0] + b[0], a[1] + b[1])).foreachRDD(top_average)
elif args.task == 'q32':
  get_cass().execute('truncate %s' % schema['table'])
  dstream = dstream.flatMap(extract_trip_info).foreachRDD(save_trip)
else:
  print("Unknown task")

# runner
ts_last_data = time.time()
ssc.start()
while True:
  res = ssc.awaitTerminationOrTimeout(args.run_interval)
  if res:
    # stopped elsewhere
    break
  else:
    # still running
    if time.time() - ts_last_data > args.idle_time:
      dump("No data received for %d seconds, stopping..." % args.idle_time)
      ssc.stop(stopSparkContext=True, stopGraceFully=False)
开发者ID:whisk,项目名称:ccc,代码行数:32,代码来源:task.py

示例5: BasicOperationTests

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTerminationOrTimeout [as 别名]

#.........这里部分代码省略.........
                s = []
            s.extend(vs)
            return s

        input = [[('k', i)] for i in range(5)]

        def func(dstream):
            return dstream.updateStateByKey(updater)

        expected = [[0], [0, 1], [0, 1, 2], [0, 1, 2, 3], [0, 1, 2, 3, 4]]
        expected = [[('k', v)] for v in expected]
        self._test_func(input, func, expected)

    def test_update_state_by_key_initial_rdd(self):

        def updater(vs, s):
            if not s:
                s = []
            s.extend(vs)
            return s

        initial = [('k', [0, 1])]
        initial = self.sc.parallelize(initial, 1)

        input = [[('k', i)] for i in range(2, 5)]

        def func(dstream):
            return dstream.updateStateByKey(updater, initialRDD=initial)

        expected = [[0, 1, 2], [0, 1, 2, 3], [0, 1, 2, 3, 4]]
        expected = [[('k', v)] for v in expected]
        self._test_func(input, func, expected)

    def test_failed_func(self):
        # Test failure in
        # TransformFunction.apply(rdd: Option[RDD[_]], time: Time)
        input = [self.sc.parallelize([d], 1) for d in range(4)]
        input_stream = self.ssc.queueStream(input)

        def failed_func(i):
            raise ValueError("This is a special error")

        input_stream.map(failed_func).pprint()
        self.ssc.start()
        try:
            self.ssc.awaitTerminationOrTimeout(10)
        except:
            import traceback
            failure = traceback.format_exc()
            self.assertTrue("This is a special error" in failure)
            return

        self.fail("a failed func should throw an error")

    def test_failed_func2(self):
        # Test failure in
        # TransformFunction.apply(rdd: Option[RDD[_]], rdd2: Option[RDD[_]], time: Time)
        input = [self.sc.parallelize([d], 1) for d in range(4)]
        input_stream1 = self.ssc.queueStream(input)
        input_stream2 = self.ssc.queueStream(input)

        def failed_func(rdd1, rdd2):
            raise ValueError("This is a special error")

        input_stream1.transformWith(failed_func, input_stream2, True).pprint()
        self.ssc.start()
        try:
            self.ssc.awaitTerminationOrTimeout(10)
        except:
            import traceback
            failure = traceback.format_exc()
            self.assertTrue("This is a special error" in failure)
            return

        self.fail("a failed func should throw an error")

    def test_failed_func_with_reseting_failure(self):
        input = [self.sc.parallelize([d], 1) for d in range(4)]
        input_stream = self.ssc.queueStream(input)

        def failed_func(i):
            if i == 1:
                # Make it fail in the second batch
                raise ValueError("This is a special error")
            else:
                return i

        # We should be able to see the results of the 3rd and 4th batches even if the second batch
        # fails
        expected = [[0], [2], [3]]
        self.assertEqual(expected, self._collect(input_stream.map(failed_func), 3))
        try:
            self.ssc.awaitTerminationOrTimeout(10)
        except:
            import traceback
            failure = traceback.format_exc()
            self.assertTrue("This is a special error" in failure)
            return

        self.fail("a failed func should throw an error")
开发者ID:JingchengDu,项目名称:spark,代码行数:104,代码来源:test_dstream.py

示例6: StreamingContextTests

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTerminationOrTimeout [as 别名]

#.........这里部分代码省略.........
        for name in ('a', 'b'):
            time.sleep(1)
            with open(os.path.join(d, name), "wb") as f:
                f.write(bytearray(range(10)))
        self.wait_for(result, 2)
        self.assertEqual([list(range(10)), list(range(10))], [list(v[0]) for v in result])

    def test_union(self):
        input = [list(range(i + 1)) for i in range(3)]
        dstream = self.ssc.queueStream(input)
        dstream2 = self.ssc.queueStream(input)
        dstream3 = self.ssc.union(dstream, dstream2)
        result = self._collect(dstream3, 3)
        expected = [i * 2 for i in input]
        self.assertEqual(expected, result)

    def test_transform(self):
        dstream1 = self.ssc.queueStream([[1]])
        dstream2 = self.ssc.queueStream([[2]])
        dstream3 = self.ssc.queueStream([[3]])

        def func(rdds):
            rdd1, rdd2, rdd3 = rdds
            return rdd2.union(rdd3).union(rdd1)

        dstream = self.ssc.transform([dstream1, dstream2, dstream3], func)

        self.assertEqual([2, 3, 1], self._take(dstream, 3))

    def test_transform_pairrdd(self):
        # This regression test case is for SPARK-17756.
        dstream = self.ssc.queueStream(
            [[1], [2], [3]]).transform(lambda rdd: rdd.cartesian(rdd))
        self.assertEqual([(1, 1), (2, 2), (3, 3)], self._take(dstream, 3))

    def test_get_active(self):
        self.assertEqual(StreamingContext.getActive(), None)

        # Verify that getActive() returns the active context
        self.ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
        self.ssc.start()
        self.assertEqual(StreamingContext.getActive(), self.ssc)

        # Verify that getActive() returns None
        self.ssc.stop(False)
        self.assertEqual(StreamingContext.getActive(), None)

        # Verify that if the Java context is stopped, then getActive() returns None
        self.ssc = StreamingContext(self.sc, self.duration)
        self.ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
        self.ssc.start()
        self.assertEqual(StreamingContext.getActive(), self.ssc)
        self.ssc._jssc.stop(False)
        self.assertEqual(StreamingContext.getActive(), None)

    def test_get_active_or_create(self):
        # Test StreamingContext.getActiveOrCreate() without checkpoint data
        # See CheckpointTests for tests with checkpoint data
        self.ssc = None
        self.assertEqual(StreamingContext.getActive(), None)

        def setupFunc():
            ssc = StreamingContext(self.sc, self.duration)
            ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
            self.setupCalled = True
            return ssc

        # Verify that getActiveOrCreate() (w/o checkpoint) calls setupFunc when no context is active
        self.setupCalled = False
        self.ssc = StreamingContext.getActiveOrCreate(None, setupFunc)
        self.assertTrue(self.setupCalled)

        # Verify that getActiveOrCreate() returns active context and does not call the setupFunc
        self.ssc.start()
        self.setupCalled = False
        self.assertEqual(StreamingContext.getActiveOrCreate(None, setupFunc), self.ssc)
        self.assertFalse(self.setupCalled)

        # Verify that getActiveOrCreate() calls setupFunc after active context is stopped
        self.ssc.stop(False)
        self.setupCalled = False
        self.ssc = StreamingContext.getActiveOrCreate(None, setupFunc)
        self.assertTrue(self.setupCalled)

        # Verify that if the Java context is stopped, then getActive() returns None
        self.ssc = StreamingContext(self.sc, self.duration)
        self.ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
        self.ssc.start()
        self.assertEqual(StreamingContext.getActive(), self.ssc)
        self.ssc._jssc.stop(False)
        self.setupCalled = False
        self.ssc = StreamingContext.getActiveOrCreate(None, setupFunc)
        self.assertTrue(self.setupCalled)

    def test_await_termination_or_timeout(self):
        self._add_input_stream()
        self.ssc.start()
        self.assertFalse(self.ssc.awaitTerminationOrTimeout(0.001))
        self.ssc.stop(False)
        self.assertTrue(self.ssc.awaitTerminationOrTimeout(0.001))
开发者ID:Brett-A,项目名称:spark,代码行数:104,代码来源:test_context.py

示例7: print

# 需要导入模块: from pyspark.streaming import StreamingContext [as 别名]
# 或者: from pyspark.streaming.StreamingContext import awaitTerminationOrTimeout [as 别名]
                  
    airportAirports.checkpoint(60)
    airportAirports.foreachRDD(outputQ2N2)

    carriersA2A.checkpoint(60)
    carriersA2A.foreachRDD(outputQ2N3)

    topHopFlights.checkpoint(60)
    topHopFlights.foreachRDD(outputQ3N2)
    
    print("STARTED!")
    ssc.start()
    runStatus = 1
    
    while True:
        res = ssc.awaitTerminationOrTimeout(10) # 10 seconds timeout
        if dataSaved1 and dataSaved2 and dataSaved3 and dataSaved4 and dataSaved5 and dataSaved6:
            runStatus = 0
        if res:
            # stopped elsewhere
            break
        else:
            # still running
            timerCount+=1
            print("still running...%d" % timerCount)
                        
            if runStatus == 0:
                print("Finish saving data. Stopping streaming...")
                ssc.stop(stopSparkContext=True, stopGraceFully=True)
                break
            
开发者ID:sunbaoshi1975,项目名称:MyStudy,代码行数:32,代码来源:dk_airportquery.py


注:本文中的pyspark.streaming.StreamingContext.awaitTerminationOrTimeout方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。