本文整理汇总了Python中pyspark.SparkConf.getAll方法的典型用法代码示例。如果您正苦于以下问题:Python SparkConf.getAll方法的具体用法?Python SparkConf.getAll怎么用?Python SparkConf.getAll使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.SparkConf
的用法示例。
在下文中一共展示了SparkConf.getAll方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: OWSparkContext
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
class OWSparkContext(SharedSparkContext, widget.OWWidget):
priority = 0
name = "Context"
description = "Create a shared Spark (sc) and Hive (hc) Contexts"
icon = "../icons/spark.png"
want_main_area = False
resizing_enabled = True
conf = None
def __init__(self):
super().__init__()
# The main label of the Control's GUI.
# gui.label(self.controlArea, self, "Spark Context")
self.conf = SparkConf()
all_prefedined = dict(self.conf.getAll())
# Create parameters Box.
box = gui.widgetBox(self.controlArea, "Spark Application", addSpace = True)
self.gui_parameters = OrderedDict()
main_parameters = OrderedDict()
main_parameters['spark.app.name'] = 'OrangeSpark'
main_parameters['spark.master'] = 'yarn-client'
main_parameters["spark.executor.instances"] = "8"
main_parameters["spark.executor.cores"] = "4"
main_parameters["spark.executor.memory"] = "8g"
main_parameters["spark.driver.cores"] = "4"
main_parameters["spark.driver.memory"] = "2g"
main_parameters["spark.logConf"] = "false"
main_parameters["spark.app.id"] = "dummy"
for k, v in main_parameters.items():
default_value = all_prefedined.setdefault(k, v)
self.gui_parameters[k] = GuiParam(parent_widget = box, label = k, default_value = v)
all_prefedined.pop(k)
for k, v in all_prefedined.items():
self.gui_parameters[k] = GuiParam(parent_widget = box, label = k, default_value = str(v))
action_box = gui.widgetBox(box)
# Action Button
self.create_sc_btn = gui.button(action_box, self, label = 'Submit', callback = self.create_context)
def onDeleteWidget(self):
if self.sc:
self.sc.stop()
def create_context(self):
for key, parameter in self.gui_parameters.items():
self.conf.set(key, parameter.get_value())
self.sc = SparkContext(conf = self.conf)
self.hc = HiveContext(self.sc)
示例2: create_sc
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
def create_sc():
sc_conf = SparkConf()
sc_conf.setAppName("finance-similarity-app")
sc_conf.setMaster('spark://10.21.208.21:7077')
sc_conf.set('spark.executor.memory', '2g')
sc_conf.set('spark.executor.cores', '4')
sc_conf.set('spark.cores.max', '40')
sc_conf.set('spark.logConf', True)
print sc_conf.getAll()
sc = None
try:
sc.stop()
sc = SparkContext(conf=sc_conf)
except:
sc = SparkContext(conf=sc_conf)
return sc
示例3: main
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
def main():
#parse command line options
(options,args)=parseOptions()
if len(args) != 2:
raise Exception("need an input file and an output path")
#set number of file partitions/parallelism
if options.numPartitions==None:
#pick number of partitions based on default amount of parallelism and filesize
partFactor=1#how many times the default parallelism. Defaul Parallelism is
#related to the number of cores on the machine.
numPartitions=sc.defaultParallelism*partFactor
else:
numPartitions=options.numPartitions
conf=SparkConf().setAppName("wordCount").setMaster("local["+str(numPartitions)+"]")
sc = SparkContext(conf=conf)
conf=sc.getConf()
print("conf="+str(conf.getAll()))
print("defaultMinPartitions="+str(sc.defaultMinPartitions))
print("defaultParallelism="+str(sc.defaultParallelism))
inputFileName = args[0]
outputFileName= args[1]
timeStart=time.time()
file = sc.textFile(inputFileName,minPartitions=numPartitions)
counts = file.count()
timeEnd=time.time()
dtRead=timeEnd-timeStart#time in seconds
#write out to a file
timeStart=time.time()
file.saveAsTextFile(outputFileName)
timeEnd=time.time()
dtWrite=timeEnd-timeStart#time in seconds
print("read+count time="+str(dtRead)+" s")
print("write time="+str(dtWrite)+" s")
print("number of lines="+str(counts))
print("num Partitions="+str(file.getNumPartitions()))
示例4: SparkConf
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
#!/usr/bin/env python
from pyspark import SparkConf, SparkContext
# Spark Options:
# https://spark.apache.org/docs/1.6.1/api/java/org/apache/spark/SparkConf.html
conf = SparkConf().setMaster("local").setAppName("MyApp")
sc = SparkContext(conf=conf)
print conf.getAll()
示例5: SparkConf
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
run from command line
spark-submit --master yarn-client --conf key=value --conf someotherkey=someothervalue you_code.py
"""
from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext, Row
conf = SparkConf().setAppName("hello-world").setMaster('yarn-client')
conf.set("spark.files.overwrite","true")
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)
#log
log4jLogger = sc._jvm.org.apache.log4j
LOG = log4jLogger.LogManager.getLogger("hello.world.spark")
LOG.info("Args = " + conf.getAll().__str__())
inputFile = conf.get("spark.input")
outputFile = conf.get("spark.output")
wordcount = sc.textFile(inputFile).map(lambda line: line.replace("\"", " ").replace("{", " ").replace("}", " ").replace(".", " ").replace(":", " ")) \
.flatMap(lambda line: line.split(" ")) \
.map(lambda word: (word, 1)) \
.reduceByKey(lambda a, b: a + b) \
.map(lambda (k, v): (v, k)) \
.sortByKey(ascending=False) \
.map(lambda (k, v): (v, k))
df = wordcount.toDF(['word', 'count'])
df.save(path=outputFile, source='json', mode='overwrite')
示例6: CommonSparkContext
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
class CommonSparkContext(object):
__metaclass__ = Singleton
def __init__(self):
"""
Create a spark context.
The spark configuration is taken from xframes/config.ini and from
the values set in SparkInitContext.set() if this has been called.
"""
# This is placed here because otherwise it causes an error when used in a spark slave.
from pyspark import SparkConf, SparkContext, SQLContext, HiveContext
# This reads from default.ini and then xframes/config.ini
# if they exist.
self._env = Environment.create()
context = create_spark_config(self._env)
verbose = self._env.get_config("xframes", "verbose", "false").lower() == "true"
hdfs_user_name = self._env.get_config("webhdfs", "user", "hdfs")
os.environ["HADOOP_USER_NAME"] = hdfs_user_name
config_pairs = [(k, v) for k, v in context.iteritems()]
self._config = SparkConf().setAll(config_pairs)
if verbose:
print "Spark Config: {}".format(config_pairs)
self._sc = SparkContext(conf=self._config)
self._sqlc = SQLContext(self._sc)
self._hivec = HiveContext(self._sc)
self.zip_path = []
version = [int(n) for n in self._sc.version.split(".")]
self.status_tracker = self._sc.statusTracker()
if cmp(version, [1, 4, 1]) >= 0:
self.application_id = self._sc.applicationId
else:
self.application_id = None
if verbose:
print "Spark Version: {}".format(self._sc.version)
if self.application_id:
print "Application Id: {}".format(self.application_id)
if not context["spark.master"].startswith("local"):
zip_path = self.build_zip(get_xframes_home())
if zip_path:
self._sc.addPyFile(zip_path)
self.zip_path.append(zip_path)
trace_flag = self._env.get_config("xframes", "rdd-trace", "false").lower() == "true"
XRdd.set_trace(trace_flag)
atexit.register(self.close_context)
def spark_add_files(self, dirs):
"""
Adds python files in the given directory or directories.
Parameters
----------
dirs: str or list(str)
If a str, the pathname to a directory containing a python module.
If a list, then it is a list of such directories.
The python files in each directory are compiled, packed into a zip, distributed to each
spark slave, and placed in PYTHONPATH.
This is only done if spark is deployed on a cluster.
"""
props = self.config()
if props.get("spark.master", "local").startswith("local"):
return
if isinstance(dirs, basestring):
dirs = [dirs]
for path in dirs:
zip_path = self.build_zip(path)
if zip_path:
self._sc.addPyFile(zip_path)
self.zip_path.append(zip_path)
def close_context(self):
if self._sc:
self._sc.stop()
self._sc = None
for zip_path in self.zip_path:
os.remove(zip_path)
def config(self):
"""
Gets the configuration parameters used to initialize the spark context.
Returns
-------
out : dict
A dict of the properties used to initialize the spark context.
"""
props = self._config.getAll()
return {prop[0]: prop[1] for prop in props}
def env(self):
"""
Gets the config environment.
#.........这里部分代码省略.........