本文整理汇总了Python中pyspark.SparkContext.getConf方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext.getConf方法的具体用法?Python SparkContext.getConf怎么用?Python SparkContext.getConf使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.SparkContext
的用法示例。
在下文中一共展示了SparkContext.getConf方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_create_spark_context_first_then_spark_session
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getConf [as 别名]
def test_create_spark_context_first_then_spark_session(self):
sc = None
session = None
try:
conf = SparkConf().set("key1", "value1")
sc = SparkContext('local[4]', "SessionBuilderTests", conf=conf)
session = SparkSession.builder.config("key2", "value2").getOrCreate()
self.assertEqual(session.conf.get("key1"), "value1")
self.assertEqual(session.conf.get("key2"), "value2")
self.assertEqual(session.sparkContext, sc)
self.assertFalse(sc.getConf().contains("key2"))
self.assertEqual(sc.getConf().get("key1"), "value1")
finally:
if session is not None:
session.stop()
if sc is not None:
sc.stop()
示例2: main
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import getConf [as 别名]
def main():
#parse command line options
(options,args)=parseOptions()
if len(args) != 2:
raise Exception("need an input file and an output path")
#set number of file partitions/parallelism
if options.numPartitions==None:
#pick number of partitions based on default amount of parallelism and filesize
partFactor=1#how many times the default parallelism. Defaul Parallelism is
#related to the number of cores on the machine.
numPartitions=sc.defaultParallelism*partFactor
else:
numPartitions=options.numPartitions
conf=SparkConf().setAppName("wordCount").setMaster("local["+str(numPartitions)+"]")
sc = SparkContext(conf=conf)
conf=sc.getConf()
print("conf="+str(conf.getAll()))
print("defaultMinPartitions="+str(sc.defaultMinPartitions))
print("defaultParallelism="+str(sc.defaultParallelism))
inputFileName = args[0]
outputFileName= args[1]
timeStart=time.time()
file = sc.textFile(inputFileName,minPartitions=numPartitions)
counts = file.count()
timeEnd=time.time()
dtRead=timeEnd-timeStart#time in seconds
#write out to a file
timeStart=time.time()
file.saveAsTextFile(outputFileName)
timeEnd=time.time()
dtWrite=timeEnd-timeStart#time in seconds
print("read+count time="+str(dtRead)+" s")
print("write time="+str(dtWrite)+" s")
print("number of lines="+str(counts))
print("num Partitions="+str(file.getNumPartitions()))