当前位置: 首页>>代码示例>>Python>>正文


Python StreamingContext.stop方法代码示例

本文整理汇总了Python中pyspark.streaming.context.StreamingContext.stop方法的典型用法代码示例。如果您正苦于以下问题:Python StreamingContext.stop方法的具体用法?Python StreamingContext.stop怎么用?Python StreamingContext.stop使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.streaming.context.StreamingContext的用法示例。


在下文中一共展示了StreamingContext.stop方法的5个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: _writeAndVerify

# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import stop [as 别名]
    def _writeAndVerify(self, ports):
        # Set up the streaming context and input streams
        ssc = StreamingContext(self.sc, self.duration)
        try:
            addresses = [("localhost", port) for port in ports]
            dstream = FlumeUtils.createPollingStream(
                ssc,
                addresses,
                maxBatchSize=self._utils.eventsPerBatch(),
                parallelism=5)
            outputBuffer = []

            def get_output(_, rdd):
                for e in rdd.collect():
                    outputBuffer.append(e)

            dstream.foreachRDD(get_output)
            ssc.start()
            self._utils.sendDatAndEnsureAllDataHasBeenReceived()

            self.wait_for(outputBuffer, self._utils.getTotalEvents())
            outputHeaders = [event[0] for event in outputBuffer]
            outputBodies = [event[1] for event in outputBuffer]
            self._utils.assertOutput(outputHeaders, outputBodies)
        finally:
            ssc.stop(False)
开发者ID:anitatailor,项目名称:spark,代码行数:28,代码来源:tests.py

示例2: PySparkStreamingTestCase

# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import stop [as 别名]
class PySparkStreamingTestCase(unittest.TestCase):

    timeout = 10  # seconds
    duration = .5

    @classmethod
    def setUpClass(cls):
        class_name = cls.__name__
        conf = SparkConf().set("spark.default.parallelism", 1)
        cls.sc = SparkContext(appName=class_name, conf=conf)
        cls.sc.setCheckpointDir("/tmp")

    @classmethod
    def tearDownClass(cls):
        cls.sc.stop()
        # Clean up in the JVM just in case there has been some issues in Python API
        jSparkContextOption = SparkContext._jvm.SparkContext.get()
        if jSparkContextOption.nonEmpty():
            jSparkContextOption.get().stop()

    def setUp(self):
        self.ssc = StreamingContext(self.sc, self.duration)

    def tearDown(self):
        if self.ssc is not None:
            self.ssc.stop(False)
        # Clean up in the JVM just in case there has been some issues in Python API
        jStreamingContextOption = StreamingContext._jvm.SparkContext.getActive()
        if jStreamingContextOption.nonEmpty():
            jStreamingContextOption.get().stop(False)

    def wait_for(self, result, n):
        start_time = time.time()
        while len(result) < n and time.time() - start_time < self.timeout:
            time.sleep(0.01)
        if len(result) < n:
            print("timeout after", self.timeout)

    def _take(self, dstream, n):
        """
        Return the first `n` elements in the stream (will start and stop).
        """
        results = []

        def take(_, rdd):
            if rdd and len(results) < n:
                results.extend(rdd.take(n - len(results)))

        dstream.foreachRDD(take)

        self.ssc.start()
        self.wait_for(results, n)
        return results

    def _collect(self, dstream, n, block=True):
        """
        Collect each RDDs into the returned list.

        :return: list, which will have the collected items.
        """
        result = []

        def get_output(_, rdd):
            if rdd and len(result) < n:
                r = rdd.collect()
                if r:
                    result.append(r)

        dstream.foreachRDD(get_output)

        if not block:
            return result

        self.ssc.start()
        self.wait_for(result, n)
        return result

    def _test_func(self, input, func, expected, sort=False, input2=None):
        """
        @param input: dataset for the test. This should be list of lists.
        @param func: wrapped function. This function should return PythonDStream object.
        @param expected: expected output for this testcase.
        """
        if not isinstance(input[0], RDD):
            input = [self.sc.parallelize(d, 1) for d in input]
        input_stream = self.ssc.queueStream(input)
        if input2 and not isinstance(input2[0], RDD):
            input2 = [self.sc.parallelize(d, 1) for d in input2]
        input_stream2 = self.ssc.queueStream(input2) if input2 is not None else None

        # Apply test function to stream.
        if input2:
            stream = func(input_stream, input_stream2)
        else:
            stream = func(input_stream)

        result = self._collect(stream, len(expected))
        if sort:
            self._sort_result_based_on_key(result)
            self._sort_result_based_on_key(expected)
#.........这里部分代码省略.........
开发者ID:anitatailor,项目名称:spark,代码行数:103,代码来源:tests.py

示例3: StreamingTestCase

# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import stop [as 别名]
class StreamingTestCase(SparkTestingBaseReuse):

    """Basic common test case for Spark Streaming tests. Provides a
    Spark Streaming context as well as some helper methods for creating
    streaming input and collecting streaming output.
    Modeled after PySparkStreamingTestCase."""

    timeout = 15  # seconds
    duration = .5

    @classmethod
    def setUpClass(cls):
        super(StreamingTestCase, cls).setUpClass()
        cls.sc.setCheckpointDir("/tmp")

    @classmethod
    def tearDownClass(cls):
        super(StreamingTestCase, cls).tearDownClass()

    @classmethod
    def _sort_result_based_on_key(cls, result):
        return map(lambda x: sorted(x), result)

    def setUp(self):
        self.ssc = StreamingContext(self.sc, self.duration)

    def tearDown(self):
        self.ssc.stop(False)

    def wait_for(self, result, n):
        start_time = time.time()
        while len(result) < n and time.time() - start_time < self.timeout:
            time.sleep(0.01)
        if len(result) < n:
            print("timeout after", self.timeout)

    def _take(self, dstream, n):
        """
        Return the first `n` elements in the stream (will start and stop).
        """
        results = []

        def take(_, rdd):
            if rdd and len(results) < n:
                results.extend(rdd.take(n - len(results)))

        dstream.foreachRDD(take)

        self.ssc.start()
        self.wait_for(results, n)
        return results

    def _collect(self, dstream, n, block=True):
        """
        Collect each RDDs into the returned list.

        :return: list, which will have the collected items.
        """
        result = []

        def get_output(_, rdd):
            if rdd and len(result) < n:
                r = rdd.collect()
                if r:
                    result.append(r)

        dstream.foreachRDD(get_output)

        if not block:
            return result

        self.ssc.start()
        self.wait_for(result, n)
        return result

    def run_func(self, input, func, expected, sort=False, input2=None):
        """
        @param input: dataset for the test. This should be list of lists
        or list of RDDs.
        @param input2: Optional second dataset for the test. If provided your
        func must take two PythonDStreams as input.
        @param func: wrapped function. This function should return
        PythonDStream.
        @param expected: expected output for this testcase.
        Warning: If output is longer than expected this will silently
        discard the additional output. TODO: fail when this happens.
        """
        if not isinstance(input[0], RDD):
            input = [self.sc.parallelize(d, 1) for d in input]
        input_stream = self.ssc.queueStream(input)
        if input2 and not isinstance(input2[0], RDD):
            input2 = [self.sc.parallelize(d, 1) for d in input2]

        # Apply test function to stream.
        if input2:
            input_stream2 = self.ssc.queueStream(input2)
            stream = func(input_stream, input_stream2)
        else:
            stream = func(input_stream)

#.........这里部分代码省略.........
开发者ID:daha,项目名称:spark-testing-base,代码行数:103,代码来源:streamingtestcase.py

示例4: StreamingContextTests

# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import stop [as 别名]
class StreamingContextTests(PySparkStreamingTestCase):

    duration = 0.1
    setupCalled = False

    def _add_input_stream(self):
        inputs = [range(1, x) for x in range(101)]
        stream = self.ssc.queueStream(inputs)
        self._collect(stream, 1, block=False)

    def test_stop_only_streaming_context(self):
        self._add_input_stream()
        self.ssc.start()
        self.ssc.stop(False)
        self.assertEqual(len(self.sc.parallelize(range(5), 5).glom().collect()), 5)

    def test_stop_multiple_times(self):
        self._add_input_stream()
        self.ssc.start()
        self.ssc.stop(False)
        self.ssc.stop(False)

    def test_queue_stream(self):
        input = [list(range(i + 1)) for i in range(3)]
        dstream = self.ssc.queueStream(input)
        result = self._collect(dstream, 3)
        self.assertEqual(input, result)

    def test_text_file_stream(self):
        d = tempfile.mkdtemp()
        self.ssc = StreamingContext(self.sc, self.duration)
        dstream2 = self.ssc.textFileStream(d).map(int)
        result = self._collect(dstream2, 2, block=False)
        self.ssc.start()
        for name in ('a', 'b'):
            time.sleep(1)
            with open(os.path.join(d, name), "w") as f:
                f.writelines(["%d\n" % i for i in range(10)])
        self.wait_for(result, 2)
        self.assertEqual([list(range(10)), list(range(10))], result)

    def test_binary_records_stream(self):
        d = tempfile.mkdtemp()
        self.ssc = StreamingContext(self.sc, self.duration)
        dstream = self.ssc.binaryRecordsStream(d, 10).map(
            lambda v: struct.unpack("10b", bytes(v)))
        result = self._collect(dstream, 2, block=False)
        self.ssc.start()
        for name in ('a', 'b'):
            time.sleep(1)
            with open(os.path.join(d, name), "wb") as f:
                f.write(bytearray(range(10)))
        self.wait_for(result, 2)
        self.assertEqual([list(range(10)), list(range(10))], [list(v[0]) for v in result])

    def test_union(self):
        input = [list(range(i + 1)) for i in range(3)]
        dstream = self.ssc.queueStream(input)
        dstream2 = self.ssc.queueStream(input)
        dstream3 = self.ssc.union(dstream, dstream2)
        result = self._collect(dstream3, 3)
        expected = [i * 2 for i in input]
        self.assertEqual(expected, result)

    def test_transform(self):
        dstream1 = self.ssc.queueStream([[1]])
        dstream2 = self.ssc.queueStream([[2]])
        dstream3 = self.ssc.queueStream([[3]])

        def func(rdds):
            rdd1, rdd2, rdd3 = rdds
            return rdd2.union(rdd3).union(rdd1)

        dstream = self.ssc.transform([dstream1, dstream2, dstream3], func)

        self.assertEqual([2, 3, 1], self._take(dstream, 3))

    def test_get_active(self):
        self.assertEqual(StreamingContext.getActive(), None)

        # Verify that getActive() returns the active context
        self.ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
        self.ssc.start()
        self.assertEqual(StreamingContext.getActive(), self.ssc)

        # Verify that getActive() returns None
        self.ssc.stop(False)
        self.assertEqual(StreamingContext.getActive(), None)

        # Verify that if the Java context is stopped, then getActive() returns None
        self.ssc = StreamingContext(self.sc, self.duration)
        self.ssc.queueStream([[1]]).foreachRDD(lambda rdd: rdd.count())
        self.ssc.start()
        self.assertEqual(StreamingContext.getActive(), self.ssc)
        self.ssc._jssc.stop(False)
        self.assertEqual(StreamingContext.getActive(), None)

    def test_get_active_or_create(self):
        # Test StreamingContext.getActiveOrCreate() without checkpoint data
        # See CheckpointTests for tests with checkpoint data
#.........这里部分代码省略.........
开发者ID:anitatailor,项目名称:spark,代码行数:103,代码来源:tests.py

示例5: TestStreamingContextSuite

# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import stop [as 别名]
class TestStreamingContextSuite(unittest.TestCase):
    """
    Should we have conf property in  SparkContext?
    @property
    def conf(self):
        return self._conf

    """
    def setUp(self):
        self.master = "local[2]"
        self.appName = self.__class__.__name__
        self.batachDuration = Milliseconds(500)
        self.sparkHome = "SomeDir"
        self.envPair = {"key": "value"}
        self.ssc = None
        self.sc = None

    def tearDown(self):
        # Do not call pyspark.streaming.context.StreamingContext.stop directly because
        # we do not wait to shutdown py4j client.
        # We need change this simply calll streamingConxt.Stop
        #self.ssc._jssc.stop()
        if self.ssc is not None:
            self.ssc.stop()
        if self.sc is not None:
            self.sc.stop()
        # Why does it long time to terminate StremaingContext and SparkContext?
        # Should we change the sleep time if this depends on machine spec?
        time.sleep(1)

    @classmethod
    def tearDownClass(cls):
        # Make sure tp shutdown the callback server
        SparkContext._gateway._shutdown_callback_server()

    def test_from_no_conf_constructor(self):
        self.ssc = StreamingContext(master=self.master, appName=self.appName,
                               duration=self.batachDuration)
        # Alternative call master: ssc.sparkContext.master
        # I try to make code close to Scala.
        self.assertEqual(self.ssc.sparkContext._conf.get("spark.master"), self.master)
        self.assertEqual(self.ssc.sparkContext._conf.get("spark.app.name"), self.appName)

    def test_from_no_conf_plus_spark_home(self):
        self.ssc = StreamingContext(master=self.master, appName=self.appName, 
                               sparkHome=self.sparkHome, duration=self.batachDuration)
        self.assertEqual(self.ssc.sparkContext._conf.get("spark.home"), self.sparkHome)

    def test_from_no_conf_plus_spark_home_plus_env(self):
        self.ssc = StreamingContext(master=self.master, appName=self.appName, 
                               sparkHome=self.sparkHome, environment=self.envPair,
                               duration=self.batachDuration)
        self.assertEqual(self.ssc.sparkContext._conf.get("spark.executorEnv.key"), self.envPair["key"])

    def test_from_existing_spark_context(self):
        self.sc = SparkContext(master=self.master, appName=self.appName)
        self.ssc = StreamingContext(sparkContext=self.sc, duration=self.batachDuration)

    def test_existing_spark_context_with_settings(self):
        conf = SparkConf()
        conf.set("spark.cleaner.ttl", "10")
        self.sc = SparkContext(master=self.master, appName=self.appName, conf=conf)
        self.ssc = StreamingContext(sparkContext=self.sc, duration=self.batachDuration)
        self.assertEqual(int(self.ssc.sparkContext._conf.get("spark.cleaner.ttl")), 10)

    def test_from_conf_with_settings(self):
        conf = SparkConf()
        conf.set("spark.cleaner.ttl", "10")
        conf.setMaster(self.master)
        conf.setAppName(self.appName)
        self.ssc = StreamingContext(conf=conf, duration=self.batachDuration)
        self.assertEqual(int(self.ssc.sparkContext._conf.get("spark.cleaner.ttl")), 10)

    def test_stop_only_streaming_context(self):
        self.sc = SparkContext(master=self.master, appName=self.appName)
        self.ssc = StreamingContext(sparkContext=self.sc, duration=self.batachDuration)
        self._addInputStream(self.ssc)
        self.ssc.start()
        self.ssc.stop(False)
        self.assertEqual(len(self.sc.parallelize(range(5), 5).glom().collect()), 5)

    def test_stop_multiple_times(self):
        self.ssc = StreamingContext(master=self.master, appName=self.appName,
                               duration=self.batachDuration)
        self._addInputStream(self.ssc)
        self.ssc.start()
        self.ssc.stop()
        self.ssc.stop()

    def _addInputStream(self, s):
        # Make sure each length of input is over 3 and 
        # numSlice is 2 due to deserializer problem in pyspark.streaming
        test_inputs = map(lambda x: range(1, x), range(5, 101))
        test_stream = s._testInputStream(test_inputs, 2)
        # Register fake output operation
        result = list()
        test_stream._test_output(result)
开发者ID:giworld,项目名称:spark,代码行数:99,代码来源:tests.py


注:本文中的pyspark.streaming.context.StreamingContext.stop方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。