当前位置: 首页>>代码示例>>Python>>正文


Python SparkConf.getAll方法代码示例

本文整理汇总了Python中pyspark.SparkConf.getAll方法的典型用法代码示例。如果您正苦于以下问题:Python SparkConf.getAll方法的具体用法?Python SparkConf.getAll怎么用?Python SparkConf.getAll使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.SparkConf的用法示例。


在下文中一共展示了SparkConf.getAll方法的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: OWSparkContext

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
class OWSparkContext(SharedSparkContext, widget.OWWidget):
    priority = 0
    name = "Context"
    description = "Create a shared Spark (sc) and Hive (hc) Contexts"
    icon = "../icons/spark.png"

    want_main_area = False
    resizing_enabled = True

    conf = None

    def __init__(self):
        super().__init__()

        # The main label of the Control's GUI.
        # gui.label(self.controlArea, self, "Spark Context")

        self.conf = SparkConf()
        all_prefedined = dict(self.conf.getAll())
        # Create parameters Box.
        box = gui.widgetBox(self.controlArea, "Spark Application", addSpace = True)

        self.gui_parameters = OrderedDict()

        main_parameters = OrderedDict()
        main_parameters['spark.app.name'] = 'OrangeSpark'
        main_parameters['spark.master'] = 'yarn-client'
        main_parameters["spark.executor.instances"] = "8"
        main_parameters["spark.executor.cores"] = "4"
        main_parameters["spark.executor.memory"] = "8g"
        main_parameters["spark.driver.cores"] = "4"
        main_parameters["spark.driver.memory"] = "2g"
        main_parameters["spark.logConf"] = "false"
        main_parameters["spark.app.id"] = "dummy"

        for k, v in main_parameters.items():
            default_value = all_prefedined.setdefault(k, v)
            self.gui_parameters[k] = GuiParam(parent_widget = box, label = k, default_value = v)
            all_prefedined.pop(k)

        for k, v in all_prefedined.items():
            self.gui_parameters[k] = GuiParam(parent_widget = box, label = k, default_value = str(v))

        action_box = gui.widgetBox(box)
        # Action Button
        self.create_sc_btn = gui.button(action_box, self, label = 'Submit', callback = self.create_context)

    def onDeleteWidget(self):
        if self.sc:
            self.sc.stop()

    def create_context(self):

        for key, parameter in self.gui_parameters.items():
            self.conf.set(key, parameter.get_value())

        self.sc = SparkContext(conf = self.conf)
        self.hc = HiveContext(self.sc)
开发者ID:kernc,项目名称:Orange3-Spark,代码行数:60,代码来源:spark_context.py

示例2: create_sc

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
def create_sc():
    sc_conf = SparkConf()
    sc_conf.setAppName("finance-similarity-app")
    sc_conf.setMaster('spark://10.21.208.21:7077')
    sc_conf.set('spark.executor.memory', '2g')
    sc_conf.set('spark.executor.cores', '4')
    sc_conf.set('spark.cores.max', '40')
    sc_conf.set('spark.logConf', True)
    print sc_conf.getAll()

    sc = None
    try:
        sc.stop()
        sc = SparkContext(conf=sc_conf)
    except:
        sc = SparkContext(conf=sc_conf)

    return sc
开发者ID:litaotao,项目名称:Spark-in-Finance-Quantitative-Investing,代码行数:20,代码来源:finance_similarity.py

示例3: main

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
def main():
  
  #parse command line options
  (options,args)=parseOptions()
  
  if len(args) != 2:
   raise Exception("need an input file and an output path")
  
  #set number of file partitions/parallelism
  if options.numPartitions==None:
    #pick number of partitions based on default amount of parallelism and filesize
    partFactor=1#how many times the default parallelism. Defaul Parallelism is 
      #related to the number of cores on the machine.
    numPartitions=sc.defaultParallelism*partFactor
  else:
    numPartitions=options.numPartitions
  
  conf=SparkConf().setAppName("wordCount").setMaster("local["+str(numPartitions)+"]")
  sc = SparkContext(conf=conf)
  conf=sc.getConf()
  print("conf="+str(conf.getAll()))
  print("defaultMinPartitions="+str(sc.defaultMinPartitions))
  print("defaultParallelism="+str(sc.defaultParallelism))
  
  inputFileName = args[0]
  outputFileName= args[1]
  
  timeStart=time.time()
  file = sc.textFile(inputFileName,minPartitions=numPartitions)
  counts = file.count()
  timeEnd=time.time()
  dtRead=timeEnd-timeStart#time in seconds
  
  #write out to a file
  timeStart=time.time()
  file.saveAsTextFile(outputFileName)
  timeEnd=time.time()
  dtWrite=timeEnd-timeStart#time in seconds
  
  print("read+count time="+str(dtRead)+" s")
  print("write time="+str(dtWrite)+" s")
  print("number of lines="+str(counts))
  print("num Partitions="+str(file.getNumPartitions()))
开发者ID:cgeroux,项目名称:big_data_benchmark,代码行数:45,代码来源:linecount.py

示例4: SparkConf

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
#!/usr/bin/env python
from pyspark import SparkConf, SparkContext

# Spark Options:
# https://spark.apache.org/docs/1.6.1/api/java/org/apache/spark/SparkConf.html
conf = SparkConf().setMaster("local").setAppName("MyApp")
sc = SparkContext(conf=conf)

print conf.getAll()
开发者ID:colinbjohnson,项目名称:snippets,代码行数:11,代码来源:spark_examine_context.py

示例5: SparkConf

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
run from command line
spark-submit --master yarn-client --conf key=value --conf someotherkey=someothervalue you_code.py
"""

from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext, Row

conf = SparkConf().setAppName("hello-world").setMaster('yarn-client')
conf.set("spark.files.overwrite","true")
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)

#log
log4jLogger = sc._jvm.org.apache.log4j
LOG = log4jLogger.LogManager.getLogger("hello.world.spark")
LOG.info("Args = " + conf.getAll().__str__())

inputFile = conf.get("spark.input")
outputFile = conf.get("spark.output")

wordcount = sc.textFile(inputFile).map(lambda line: line.replace("\"", " ").replace("{", " ").replace("}", " ").replace(".", " ").replace(":", " ")) \
    .flatMap(lambda line: line.split(" ")) \
    .map(lambda word: (word, 1)) \
    .reduceByKey(lambda a, b: a + b) \
    .map(lambda (k, v): (v, k)) \
    .sortByKey(ascending=False) \
    .map(lambda (k, v): (v, k))

df = wordcount.toDF(['word', 'count'])
df.save(path=outputFile, source='json', mode='overwrite')
开发者ID:jeremieSimon,项目名称:hello-spark,代码行数:32,代码来源:hello_world.py

示例6: CommonSparkContext

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import getAll [as 别名]
class CommonSparkContext(object):
    __metaclass__ = Singleton

    def __init__(self):
        """
        Create a spark context.

        The spark configuration is taken from xframes/config.ini and from
        the values set in SparkInitContext.set() if this has been called.
        """

        # This is placed here because otherwise it causes an error when used in a spark slave.
        from pyspark import SparkConf, SparkContext, SQLContext, HiveContext

        # This reads from default.ini and then xframes/config.ini
        # if they exist.
        self._env = Environment.create()
        context = create_spark_config(self._env)
        verbose = self._env.get_config("xframes", "verbose", "false").lower() == "true"
        hdfs_user_name = self._env.get_config("webhdfs", "user", "hdfs")
        os.environ["HADOOP_USER_NAME"] = hdfs_user_name
        config_pairs = [(k, v) for k, v in context.iteritems()]
        self._config = SparkConf().setAll(config_pairs)
        if verbose:
            print "Spark Config: {}".format(config_pairs)

        self._sc = SparkContext(conf=self._config)
        self._sqlc = SQLContext(self._sc)
        self._hivec = HiveContext(self._sc)
        self.zip_path = []
        version = [int(n) for n in self._sc.version.split(".")]
        self.status_tracker = self._sc.statusTracker()
        if cmp(version, [1, 4, 1]) >= 0:
            self.application_id = self._sc.applicationId
        else:
            self.application_id = None

        if verbose:
            print "Spark Version: {}".format(self._sc.version)
            if self.application_id:
                print "Application Id: {}".format(self.application_id)

        if not context["spark.master"].startswith("local"):
            zip_path = self.build_zip(get_xframes_home())
            if zip_path:
                self._sc.addPyFile(zip_path)
                self.zip_path.append(zip_path)

        trace_flag = self._env.get_config("xframes", "rdd-trace", "false").lower() == "true"
        XRdd.set_trace(trace_flag)
        atexit.register(self.close_context)

    def spark_add_files(self, dirs):
        """
        Adds python files in the given directory or directories.

        Parameters
        ----------
        dirs: str or list(str)
            If a str, the pathname to a directory containing a python module.
            If a list, then it is a list of such directories.

            The python files in each directory are compiled, packed into a zip, distributed to each
            spark slave, and placed in PYTHONPATH.

            This is only done if spark is deployed on a cluster.
        """
        props = self.config()
        if props.get("spark.master", "local").startswith("local"):
            return
        if isinstance(dirs, basestring):
            dirs = [dirs]
        for path in dirs:
            zip_path = self.build_zip(path)
            if zip_path:
                self._sc.addPyFile(zip_path)
                self.zip_path.append(zip_path)

    def close_context(self):
        if self._sc:
            self._sc.stop()
            self._sc = None
            for zip_path in self.zip_path:
                os.remove(zip_path)

    def config(self):
        """
        Gets the configuration parameters used to initialize the spark context.

        Returns
        -------
        out : dict
            A dict of the properties used to initialize the spark context.
        """
        props = self._config.getAll()
        return {prop[0]: prop[1] for prop in props}

    def env(self):
        """
        Gets the config environment.
#.........这里部分代码省略.........
开发者ID:Atigeo,项目名称:xpatterns-xframe,代码行数:103,代码来源:spark_context.py


注:本文中的pyspark.SparkConf.getAll方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。