当前位置: 首页>>代码示例>>Python>>正文


Python StreamingContext.getOrCreate方法代码示例

本文整理汇总了Python中pyspark.streaming.context.StreamingContext.getOrCreate方法的典型用法代码示例。如果您正苦于以下问题:Python StreamingContext.getOrCreate方法的具体用法?Python StreamingContext.getOrCreate怎么用?Python StreamingContext.getOrCreate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.streaming.context.StreamingContext的用法示例。


在下文中一共展示了StreamingContext.getOrCreate方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_get_or_create

# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import getOrCreate [as 别名]
    def test_get_or_create(self):
        inputd = tempfile.mkdtemp()
        outputd = tempfile.mkdtemp() + "/"

        def updater(vs, s):
            return sum(vs, s or 0)

        def setup():
            conf = SparkConf().set("spark.default.parallelism", 1)
            sc = SparkContext(conf=conf)
            ssc = StreamingContext(sc, 0.5)
            dstream = ssc.textFileStream(inputd).map(lambda x: (x, 1))
            wc = dstream.updateStateByKey(updater)
            wc.map(lambda x: "%s,%d" % x).saveAsTextFiles(outputd + "test")
            wc.checkpoint(0.5)
            return ssc

        cpd = tempfile.mkdtemp("test_streaming_cps")
        ssc = StreamingContext.getOrCreate(cpd, setup)
        ssc.start()

        def check_output(n):
            while not os.listdir(outputd):
                time.sleep(0.01)
            time.sleep(1)  # make sure mtime is larger than the previous one
            with open(os.path.join(inputd, str(n)), "w") as f:
                f.writelines(["%d\n" % i for i in range(10)])

            while True:
                p = os.path.join(outputd, max(os.listdir(outputd)))
                if "_SUCCESS" not in os.listdir(p):
                    # not finished
                    time.sleep(0.01)
                    continue
                ordd = ssc.sparkContext.textFile(p).map(lambda line: line.split(","))
                d = ordd.values().map(int).collect()
                if not d:
                    time.sleep(0.01)
                    continue
                self.assertEqual(10, len(d))
                s = set(d)
                self.assertEqual(1, len(s))
                m = s.pop()
                if n > m:
                    continue
                self.assertEqual(n, m)
                break

        check_output(1)
        check_output(2)
        ssc.stop(True, True)

        time.sleep(1)
        ssc = StreamingContext.getOrCreate(cpd, setup)
        ssc.start()
        check_output(3)
        ssc.stop(True, True)
开发者ID:LakeCarrot,项目名称:EC2_Initializing,代码行数:59,代码来源:tests.py

示例2: test_transform_function_serializer_failure

# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import getOrCreate [as 别名]
    def test_transform_function_serializer_failure(self):
        inputd = tempfile.mkdtemp()
        self.cpd = tempfile.mkdtemp("test_transform_function_serializer_failure")

        def setup():
            conf = SparkConf().set("spark.default.parallelism", 1)
            sc = SparkContext(conf=conf)
            ssc = StreamingContext(sc, 0.5)

            # A function that cannot be serialized
            def process(time, rdd):
                sc.parallelize(range(1, 10))

            ssc.textFileStream(inputd).foreachRDD(process)
            return ssc

        self.ssc = StreamingContext.getOrCreate(self.cpd, setup)
        try:
            self.ssc.start()
        except:
            import traceback
            failure = traceback.format_exc()
            self.assertTrue(
                "It appears that you are attempting to reference SparkContext" in failure)
            return

        self.fail("using SparkContext in process should fail because it's not Serializable")
开发者ID:ahnqirage,项目名称:spark,代码行数:29,代码来源:tests.py

示例3: test_get_or_create_and_get_active_or_create

# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import getOrCreate [as 别名]
    def test_get_or_create_and_get_active_or_create(self):
        inputd = tempfile.mkdtemp()
        outputd = tempfile.mkdtemp() + "/"

        def updater(vs, s):
            return sum(vs, s or 0)

        def setup():
            conf = SparkConf().set("spark.default.parallelism", 1)
            sc = SparkContext(conf=conf)
            ssc = StreamingContext(sc, 0.5)
            dstream = ssc.textFileStream(inputd).map(lambda x: (x, 1))
            wc = dstream.updateStateByKey(updater)
            wc.map(lambda x: "%s,%d" % x).saveAsTextFiles(outputd + "test")
            wc.checkpoint(.5)
            self.setupCalled = True
            return ssc

        cpd = tempfile.mkdtemp("test_streaming_cps")
        self.ssc = StreamingContext.getOrCreate(cpd, setup)
        self.ssc.start()

        def check_output(n):
            while not os.listdir(outputd):
                time.sleep(0.01)
            time.sleep(1)  # make sure mtime is larger than the previous one
            with open(os.path.join(inputd, str(n)), 'w') as f:
                f.writelines(["%d\n" % i for i in range(10)])

            while True:
                p = os.path.join(outputd, max(os.listdir(outputd)))
                if '_SUCCESS' not in os.listdir(p):
                    # not finished
                    time.sleep(0.01)
                    continue
                ordd = self.ssc.sparkContext.textFile(p).map(lambda line: line.split(","))
                d = ordd.values().map(int).collect()
                if not d:
                    time.sleep(0.01)
                    continue
                self.assertEqual(10, len(d))
                s = set(d)
                self.assertEqual(1, len(s))
                m = s.pop()
                if n > m:
                    continue
                self.assertEqual(n, m)
                break

        check_output(1)
        check_output(2)

        # Verify the getOrCreate() recovers from checkpoint files
        self.ssc.stop(True, True)
        time.sleep(1)
        self.setupCalled = False
        self.ssc = StreamingContext.getOrCreate(cpd, setup)
        self.assertFalse(self.setupCalled)
        self.ssc.start()
        check_output(3)

        # Verify the getActiveOrCreate() recovers from checkpoint files
        self.ssc.stop(True, True)
        time.sleep(1)
        self.setupCalled = False
        self.ssc = StreamingContext.getActiveOrCreate(cpd, setup)
        self.assertFalse(self.setupCalled)
        self.ssc.start()
        check_output(4)

        # Verify that getActiveOrCreate() returns active context
        self.setupCalled = False
        self.assertEquals(StreamingContext.getActiveOrCreate(cpd, setup), self.ssc)
        self.assertFalse(self.setupCalled)

        # Verify that getActiveOrCreate() calls setup() in absence of checkpoint files
        self.ssc.stop(True, True)
        shutil.rmtree(cpd)  # delete checkpoint directory
        self.setupCalled = False
        self.ssc = StreamingContext.getActiveOrCreate(cpd, setup)
        self.assertTrue(self.setupCalled)
        self.ssc.stop(True, True)
开发者ID:anitatailor,项目名称:spark,代码行数:84,代码来源:tests.py

示例4: updateFunc

# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import getOrCreate [as 别名]
from pyspark.streaming.context import StreamingContext


def updateFunc(newValues, currentValue):
    if currentValue is None:
        currentValue = 0
    return sum(newValues, currentValue)


def createSSC():
    # ssc 생성
    conf = SparkConf()
    sc = SparkContext(master="local[*]", appName="CheckpointSample", conf=conf)
    ssc = StreamingContext(sc, 3)

    # DStream 생성
    ids1 = ssc.socketTextStream("127.0.0.1", 9000)
    ids2 = ids1.flatMap(lambda v: v.split(" ")).map(lambda v: (v, 1))

    # updateStateByKey
    ids2.updateStateByKey(updateFunc).pprint()

    # checkpoint
    ssc.checkpoint("./checkPoints/checkPointSample/Python")

    # return
    return ssc

ssc = StreamingContext.getOrCreate("./checkPoints/checkPointSample/Python", createSSC)
ssc.start()
ssc.awaitTerminationOrTimeout()
开发者ID:oopchoi,项目名称:spark,代码行数:33,代码来源:checkpoint_sample.py


注:本文中的pyspark.streaming.context.StreamingContext.getOrCreate方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。