本文整理汇总了Python中pyspark.streaming.context.StreamingContext.getOrCreate方法的典型用法代码示例。如果您正苦于以下问题:Python StreamingContext.getOrCreate方法的具体用法?Python StreamingContext.getOrCreate怎么用?Python StreamingContext.getOrCreate使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.streaming.context.StreamingContext
的用法示例。
在下文中一共展示了StreamingContext.getOrCreate方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_get_or_create
# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import getOrCreate [as 别名]
def test_get_or_create(self):
inputd = tempfile.mkdtemp()
outputd = tempfile.mkdtemp() + "/"
def updater(vs, s):
return sum(vs, s or 0)
def setup():
conf = SparkConf().set("spark.default.parallelism", 1)
sc = SparkContext(conf=conf)
ssc = StreamingContext(sc, 0.5)
dstream = ssc.textFileStream(inputd).map(lambda x: (x, 1))
wc = dstream.updateStateByKey(updater)
wc.map(lambda x: "%s,%d" % x).saveAsTextFiles(outputd + "test")
wc.checkpoint(0.5)
return ssc
cpd = tempfile.mkdtemp("test_streaming_cps")
ssc = StreamingContext.getOrCreate(cpd, setup)
ssc.start()
def check_output(n):
while not os.listdir(outputd):
time.sleep(0.01)
time.sleep(1) # make sure mtime is larger than the previous one
with open(os.path.join(inputd, str(n)), "w") as f:
f.writelines(["%d\n" % i for i in range(10)])
while True:
p = os.path.join(outputd, max(os.listdir(outputd)))
if "_SUCCESS" not in os.listdir(p):
# not finished
time.sleep(0.01)
continue
ordd = ssc.sparkContext.textFile(p).map(lambda line: line.split(","))
d = ordd.values().map(int).collect()
if not d:
time.sleep(0.01)
continue
self.assertEqual(10, len(d))
s = set(d)
self.assertEqual(1, len(s))
m = s.pop()
if n > m:
continue
self.assertEqual(n, m)
break
check_output(1)
check_output(2)
ssc.stop(True, True)
time.sleep(1)
ssc = StreamingContext.getOrCreate(cpd, setup)
ssc.start()
check_output(3)
ssc.stop(True, True)
示例2: test_transform_function_serializer_failure
# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import getOrCreate [as 别名]
def test_transform_function_serializer_failure(self):
inputd = tempfile.mkdtemp()
self.cpd = tempfile.mkdtemp("test_transform_function_serializer_failure")
def setup():
conf = SparkConf().set("spark.default.parallelism", 1)
sc = SparkContext(conf=conf)
ssc = StreamingContext(sc, 0.5)
# A function that cannot be serialized
def process(time, rdd):
sc.parallelize(range(1, 10))
ssc.textFileStream(inputd).foreachRDD(process)
return ssc
self.ssc = StreamingContext.getOrCreate(self.cpd, setup)
try:
self.ssc.start()
except:
import traceback
failure = traceback.format_exc()
self.assertTrue(
"It appears that you are attempting to reference SparkContext" in failure)
return
self.fail("using SparkContext in process should fail because it's not Serializable")
示例3: test_get_or_create_and_get_active_or_create
# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import getOrCreate [as 别名]
def test_get_or_create_and_get_active_or_create(self):
inputd = tempfile.mkdtemp()
outputd = tempfile.mkdtemp() + "/"
def updater(vs, s):
return sum(vs, s or 0)
def setup():
conf = SparkConf().set("spark.default.parallelism", 1)
sc = SparkContext(conf=conf)
ssc = StreamingContext(sc, 0.5)
dstream = ssc.textFileStream(inputd).map(lambda x: (x, 1))
wc = dstream.updateStateByKey(updater)
wc.map(lambda x: "%s,%d" % x).saveAsTextFiles(outputd + "test")
wc.checkpoint(.5)
self.setupCalled = True
return ssc
cpd = tempfile.mkdtemp("test_streaming_cps")
self.ssc = StreamingContext.getOrCreate(cpd, setup)
self.ssc.start()
def check_output(n):
while not os.listdir(outputd):
time.sleep(0.01)
time.sleep(1) # make sure mtime is larger than the previous one
with open(os.path.join(inputd, str(n)), 'w') as f:
f.writelines(["%d\n" % i for i in range(10)])
while True:
p = os.path.join(outputd, max(os.listdir(outputd)))
if '_SUCCESS' not in os.listdir(p):
# not finished
time.sleep(0.01)
continue
ordd = self.ssc.sparkContext.textFile(p).map(lambda line: line.split(","))
d = ordd.values().map(int).collect()
if not d:
time.sleep(0.01)
continue
self.assertEqual(10, len(d))
s = set(d)
self.assertEqual(1, len(s))
m = s.pop()
if n > m:
continue
self.assertEqual(n, m)
break
check_output(1)
check_output(2)
# Verify the getOrCreate() recovers from checkpoint files
self.ssc.stop(True, True)
time.sleep(1)
self.setupCalled = False
self.ssc = StreamingContext.getOrCreate(cpd, setup)
self.assertFalse(self.setupCalled)
self.ssc.start()
check_output(3)
# Verify the getActiveOrCreate() recovers from checkpoint files
self.ssc.stop(True, True)
time.sleep(1)
self.setupCalled = False
self.ssc = StreamingContext.getActiveOrCreate(cpd, setup)
self.assertFalse(self.setupCalled)
self.ssc.start()
check_output(4)
# Verify that getActiveOrCreate() returns active context
self.setupCalled = False
self.assertEquals(StreamingContext.getActiveOrCreate(cpd, setup), self.ssc)
self.assertFalse(self.setupCalled)
# Verify that getActiveOrCreate() calls setup() in absence of checkpoint files
self.ssc.stop(True, True)
shutil.rmtree(cpd) # delete checkpoint directory
self.setupCalled = False
self.ssc = StreamingContext.getActiveOrCreate(cpd, setup)
self.assertTrue(self.setupCalled)
self.ssc.stop(True, True)
示例4: updateFunc
# 需要导入模块: from pyspark.streaming.context import StreamingContext [as 别名]
# 或者: from pyspark.streaming.context.StreamingContext import getOrCreate [as 别名]
from pyspark.streaming.context import StreamingContext
def updateFunc(newValues, currentValue):
if currentValue is None:
currentValue = 0
return sum(newValues, currentValue)
def createSSC():
# ssc 생성
conf = SparkConf()
sc = SparkContext(master="local[*]", appName="CheckpointSample", conf=conf)
ssc = StreamingContext(sc, 3)
# DStream 생성
ids1 = ssc.socketTextStream("127.0.0.1", 9000)
ids2 = ids1.flatMap(lambda v: v.split(" ")).map(lambda v: (v, 1))
# updateStateByKey
ids2.updateStateByKey(updateFunc).pprint()
# checkpoint
ssc.checkpoint("./checkPoints/checkPointSample/Python")
# return
return ssc
ssc = StreamingContext.getOrCreate("./checkPoints/checkPointSample/Python", createSSC)
ssc.start()
ssc.awaitTerminationOrTimeout()