本文整理汇总了Python中pyspark.streaming.context.StreamingContext.stop方法的典型用法代码示例。如果您正苦于以下问题:Python StreamingContext.stop方法的具体用法?Python StreamingContext.stop怎么用?Python StreamingContext.stop使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.streaming.context.StreamingContext
的用法示例。
在下文中一共展示了StreamingContext.stop方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _writeAndVerify
# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import stop [as 别名]
def _writeAndVerify(self, ports):
# Set up the streaming context and input streams
ssc = StreamingContext(self.sc, self.duration)
try:
addresses = [("localhost", port) for port in ports]
dstream = FlumeUtils.createPollingStream(
ssc,
addresses,
maxBatchSize=self._utils.eventsPerBatch(),
parallelism=5)
outputBuffer = []
def get_output(_, rdd):
for e in rdd.collect():
outputBuffer.append(e)
dstream.foreachRDD(get_output)
ssc.start()
self._utils.sendDatAndEnsureAllDataHasBeenReceived()
self.wait_for(outputBuffer, self._utils.getTotalEvents())
outputHeaders = [event[0] for event in outputBuffer]
outputBodies = [event[1] for event in outputBuffer]
self._utils.assertOutput(outputHeaders, outputBodies)
finally:
ssc.stop(False)
示例2: PySparkStreamingTestCase
# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import stop [as 别名]
class PySparkStreamingTestCase(unittest.TestCase):
timeout = 10 # seconds
duration = .5
@classmethod
def setUpClass(cls):
class_name = cls.__name__
conf = SparkConf().set("spark.default.parallelism", 1)
cls.sc = SparkContext(appName=class_name, conf=conf)
cls.sc.setCheckpointDir("/tmp")
@classmethod
def tearDownClass(cls):
cls.sc.stop()
# Clean up in the JVM just in case there has been some issues in Python API
jSparkContextOption = SparkContext._jvm.SparkContext.get()
if jSparkContextOption.nonEmpty():
jSparkContextOption.get().stop()
def setUp(self):
self.ssc = StreamingContext(self.sc, self.duration)
def tearDown(self):
if self.ssc is not None:
self.ssc.stop(False)
# Clean up in the JVM just in case there has been some issues in Python API
jStreamingContextOption = StreamingContext._jvm.SparkContext.getActive()
if jStreamingContextOption.nonEmpty():
jStreamingContextOption.get().stop(False)
def wait_for(self, result, n):
start_time = time.time()
while len(result) < n and time.time() - start_time < self.timeout:
time.sleep(0.01)
if len(result) < n:
print("timeout after", self.timeout)
def _take(self, dstream, n):
"""
Return the first `n` elements in the stream (will start and stop).
"""
results = []
def take(_, rdd):
if rdd and len(results) < n:
results.extend(rdd.take(n - len(results)))
dstream.foreachRDD(take)
self.ssc.start()
self.wait_for(results, n)
return results
def _collect(self, dstream, n, block=True):
"""
Collect each RDDs into the returned list.
:return: list, which will have the collected items.
"""
result = []
def get_output(_, rdd):
if rdd and len(result) < n:
r = rdd.collect()
if r:
result.append(r)
dstream.foreachRDD(get_output)
if not block:
return result
self.ssc.start()
self.wait_for(result, n)
return result
def _test_func(self, input, func, expected, sort=False, input2=None):
"""
@param input: dataset for the test. This should be list of lists.
@param func: wrapped function. This function should return PythonDStream object.
@param expected: expected output for this testcase.
"""
if not isinstance(input[0], RDD):
input = [self.sc.parallelize(d, 1) for d in input]
input_stream = self.ssc.queueStream(input)
if input2 and not isinstance(input2[0], RDD):
input2 = [self.sc.parallelize(d, 1) for d in input2]
input_stream2 = self.ssc.queueStream(input2) if input2 is not None else None
# Apply test function to stream.
if input2:
stream = func(input_stream, input_stream2)
else:
stream = func(input_stream)
result = self._collect(stream, len(expected))
if sort:
self._sort_result_based_on_key(result)
self._sort_result_based_on_key(expected)
#.........这里部分代码省略.........
示例3: StreamingTestCase
# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import stop [as 别名]
class StreamingTestCase(SparkTestingBaseReuse):
"""Basic common test case for Spark Streaming tests. Provides a
Spark Streaming context as well as some helper methods for creating
streaming input and collecting streaming output.
Modeled after PySparkStreamingTestCase."""
timeout = 15 # seconds
duration = .5
@classmethod
def setUpClass(cls):
super(StreamingTestCase, cls).setUpClass()
cls.sc.setCheckpointDir("/tmp")
@classmethod
def tearDownClass(cls):
super(StreamingTestCase, cls).tearDownClass()
@classmethod
def _sort_result_based_on_key(cls, result):
return map(lambda x: sorted(x), result)
def setUp(self):
self.ssc = StreamingContext(self.sc, self.duration)
def tearDown(self):
self.ssc.stop(False)
def wait_for(self, result, n):
start_time = time.time()
while len(result) < n and time.time() - start_time < self.timeout:
time.sleep(0.01)
if len(result) < n:
print("timeout after", self.timeout)
def _take(self, dstream, n):
"""
Return the first `n` elements in the stream (will start and stop).
"""
results = []
def take(_, rdd):
if rdd and len(results) < n:
results.extend(rdd.take(n - len(results)))
dstream.foreachRDD(take)
self.ssc.start()
self.wait_for(results, n)
return results
def _collect(self, dstream, n, block=True):
"""
Collect each RDDs into the returned list.
:return: list, which will have the collected items.
"""
result = []
def get_output(_, rdd):
if rdd and len(result) < n:
r = rdd.collect()
if r:
result.append(r)
dstream.foreachRDD(get_output)
if not block:
return result
self.ssc.start()
self.wait_for(result, n)
return result
def run_func(self, input, func, expected, sort=False, input2=None):
"""
@param input: dataset for the test. This should be list of lists
or list of RDDs.
@param input2: Optional second dataset for the test. If provided your
func must take two PythonDStreams as input.
@param func: wrapped function. This function should return
PythonDStream.
@param expected: expected output for this testcase.
Warning: If output is longer than expected this will silently
discard the additional output. TODO: fail when this happens.
"""
if not isinstance(input[0], RDD):
input = [self.sc.parallelize(d, 1) for d in input]
input_stream = self.ssc.queueStream(input)
if input2 and not isinstance(input2[0], RDD):
input2 = [self.sc.parallelize(d, 1) for d in input2]
# Apply test function to stream.
if input2:
input_stream2 = self.ssc.queueStream(input2)
stream = func(input_stream, input_stream2)
else:
stream = func(input_stream)
#.........这里部分代码省略.........
示例4: StreamingContextTests
# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import stop [as 别名]
class StreamingContextTests(PySparkStreamingTestCase):
duration = 0.1
setupCalled = False
def _add_input_stream(self):
inputs = [range(1, x) for x in range(101)]
stream = self.ssc.queueStream(inputs)
self._collect(stream, 1, block=False)
def test_stop_only_streaming_context(self):
self._add_input_stream()
self.ssc.start()
self.ssc.stop(False)
self.assertEqual(len(self.sc.parallelize(range(5), 5).glom().collect()), 5)
def test_stop_multiple_times(self):
self._add_input_stream()
self.ssc.start()
self.ssc.stop(False)
self.ssc.stop(False)
def test_queue_stream(self):
input = [list(range(i + 1)) for i in range(3)]
dstream = self.ssc.queueStream(input)
result = self._collect(dstream, 3)
self.assertEqual(input, result)
def test_text_file_stream(self):
d = tempfile.mkdtemp()
self.ssc = StreamingContext(self.sc, self.duration)
dstream2 = self.ssc.textFileStream(d).map(int)
result = self._collect(dstream2, 2, block=False)
self.ssc.start()
for name in ('a', 'b'):
time.sleep(1)
with open(os.path.join(d, name), "w") as f:
f.writelines(["%d\n" % i for i in range(10)])
self.wait_for(result, 2)
self.assertEqual([list(range(10)), list(range(10))], result)
def test_binary_records_stream(self):
d = tempfile.mkdtemp()
self.ssc = StreamingContext(self.sc, self.duration)
dstream = self.ssc.binaryRecordsStream(d, 10).map(
lambda v: struct.unpack("10b", bytes(v)))
result = self._collect(dstream, 2, block=False)
self.ssc.start()
for name in ('a', 'b'):
time.sleep(1)
with open(os.path.join(d, name), "wb") as f:
f.write(bytearray(range(10)))
self.wait_for(result, 2)
self.assertEqual([list(range(10)), list(range(10))], [list(v[0]) for v in result])
def test_union(self):
input = [list(range(i + 1)) for i in range(3)]
dstream = self.ssc.queueStream(input)
dstream2 = self.ssc.queueStream(input)
dstream3 = self.ssc.union(dstream, dstream2)
result = self._collect(dstream3, 3)
expected = [i * 2 for i in input]
self.assertEqual(expected, result)
def test_transform(self):
dstream1 = self.ssc.queueStream([[1]])
dstream2 = self.ssc.queueStream([[2]])
dstream3 = self.ssc.queueStream([[3]])
def func(rdds):
rdd1, rdd2, rdd3 = rdds
return rdd2.union(rdd3).union(rdd1)
dstream = self.ssc.transform([dstream1, dstream2, dstream3], func)
self.assertEqual([2, 3, 1], self._take(dstream, 3))
def test_get_active(self):
self.assertEqual(StreamingContext.getActive(), None)
# Verify that getActive() returns the active context
self.ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
self.ssc.start()
self.assertEqual(StreamingContext.getActive(), self.ssc)
# Verify that getActive() returns None
self.ssc.stop(False)
self.assertEqual(StreamingContext.getActive(), None)
# Verify that if the Java context is stopped, then getActive() returns None
self.ssc = StreamingContext(self.sc, self.duration)
self.ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
self.ssc.start()
self.assertEqual(StreamingContext.getActive(), self.ssc)
self.ssc._jssc.stop(False)
self.assertEqual(StreamingContext.getActive(), None)
def test_get_active_or_create(self):
# Test StreamingContext.getActiveOrCreate() without checkpoint data
# See CheckpointTests for tests with checkpoint data
#.........这里部分代码省略.........
示例5: TestStreamingContextSuite
# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import stop [as 别名]
class TestStreamingContextSuite(unittest.TestCase):
"""
Should we have conf property in SparkContext?
@property
def conf(self):
return self._conf
"""
def setUp(self):
self.master = "local[2]"
self.appName = self.__class__.__name__
self.batachDuration = Milliseconds(500)
self.sparkHome = "SomeDir"
self.envPair = {"key": "value"}
self.ssc = None
self.sc = None
def tearDown(self):
# Do not call pyspark.streaming.context.StreamingContext.stop directly because
# we do not wait to shutdown py4j client.
# We need change this simply calll streamingConxt.Stop
#self.ssc._jssc.stop()
if self.ssc is not None:
self.ssc.stop()
if self.sc is not None:
self.sc.stop()
# Why does it long time to terminate StremaingContext and SparkContext?
# Should we change the sleep time if this depends on machine spec?
time.sleep(1)
@classmethod
def tearDownClass(cls):
# Make sure tp shutdown the callback server
SparkContext._gateway._shutdown_callback_server()
def test_from_no_conf_constructor(self):
self.ssc = StreamingContext(master=self.master, appName=self.appName,
duration=self.batachDuration)
# Alternative call master: ssc.sparkContext.master
# I try to make code close to Scala.
self.assertEqual(self.ssc.sparkContext._conf.get("spark.master"), self.master)
self.assertEqual(self.ssc.sparkContext._conf.get("spark.app.name"), self.appName)
def test_from_no_conf_plus_spark_home(self):
self.ssc = StreamingContext(master=self.master, appName=self.appName,
sparkHome=self.sparkHome, duration=self.batachDuration)
self.assertEqual(self.ssc.sparkContext._conf.get("spark.home"), self.sparkHome)
def test_from_no_conf_plus_spark_home_plus_env(self):
self.ssc = StreamingContext(master=self.master, appName=self.appName,
sparkHome=self.sparkHome, environment=self.envPair,
duration=self.batachDuration)
self.assertEqual(self.ssc.sparkContext._conf.get("spark.executorEnv.key"), self.envPair["key"])
def test_from_existing_spark_context(self):
self.sc = SparkContext(master=self.master, appName=self.appName)
self.ssc = StreamingContext(sparkContext=self.sc, duration=self.batachDuration)
def test_existing_spark_context_with_settings(self):
conf = SparkConf()
conf.set("spark.cleaner.ttl", "10")
self.sc = SparkContext(master=self.master, appName=self.appName, conf=conf)
self.ssc = StreamingContext(sparkContext=self.sc, duration=self.batachDuration)
self.assertEqual(int(self.ssc.sparkContext._conf.get("spark.cleaner.ttl")), 10)
def test_from_conf_with_settings(self):
conf = SparkConf()
conf.set("spark.cleaner.ttl", "10")
conf.setMaster(self.master)
conf.setAppName(self.appName)
self.ssc = StreamingContext(conf=conf, duration=self.batachDuration)
self.assertEqual(int(self.ssc.sparkContext._conf.get("spark.cleaner.ttl")), 10)
def test_stop_only_streaming_context(self):
self.sc = SparkContext(master=self.master, appName=self.appName)
self.ssc = StreamingContext(sparkContext=self.sc, duration=self.batachDuration)
self._addInputStream(self.ssc)
self.ssc.start()
self.ssc.stop(False)
self.assertEqual(len(self.sc.parallelize(range(5), 5).glom().collect()), 5)
def test_stop_multiple_times(self):
self.ssc = StreamingContext(master=self.master, appName=self.appName,
duration=self.batachDuration)
self._addInputStream(self.ssc)
self.ssc.start()
self.ssc.stop()
self.ssc.stop()
def _addInputStream(self, s):
# Make sure each length of input is over 3 and
# numSlice is 2 due to deserializer problem in pyspark.streaming
test_inputs = map(lambda x: range(1, x), range(5, 101))
test_stream = s._testInputStream(test_inputs, 2)
# Register fake output operation
result = list()
test_stream._test_output(result)