当前位置: 首页>>代码示例>>Python>>正文


Python SparkConf.setAppName方法代码示例

本文整理汇总了Python中pyspark.SparkConf.setAppName方法的典型用法代码示例。如果您正苦于以下问题:Python SparkConf.setAppName方法的具体用法?Python SparkConf.setAppName怎么用?Python SparkConf.setAppName使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.SparkConf的用法示例。


在下文中一共展示了SparkConf.setAppName方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: spark_config

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
    def spark_config(self):
        if self._spark_config is None:
            os.environ['SPARK_SUBMIT_CLASSPATH'] = ','.join(self.spex_conf.spark_config.jars)

            conf = SparkConf()
            conf.setAppName(self.spex_conf.spark_config.name)
            conf.setMaster(self.spex_conf.spark_config.master)

            conf.set('spark.rdd.compress', 'true')
            conf.set('spark.io.compression.codec', 'lz4')
            conf.set('spark.mesos.coarse',
                     'true' if self.spex_conf.spark_config.coarse_mode else 'false')

            # TODO - Setup all the other cruft as needed
            #conf.set('spark.executor.memory', '4g')
            #conf.set('spark.cores.max', '16')
            #conf.set('spark.task.cpus', '6')

            # TODO - bind port for spark web ui

            self._spark_config = conf

        config = self._spark_config

        # These are always set, if someone changes them we simply set them back
        config.set('spark.executor.uri', self.artifact_resolver(self.spex_conf.spark_distro))
        config.setExecutorEnv(key='PYSPARK_PYTHON', value='./%s daemon' % self.spex_conf.spex_name)
        return config
开发者ID:GregBowyer,项目名称:spex,代码行数:30,代码来源:context.py

示例2: main

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
def main():
    """
    Main entry point of the application
    """

    # Create spark configuration and spark context
    include_path = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), 'preprocessing.py'))
    conf = SparkConf()
    conf.set('spark.executor.memory', '1500m')
    conf.setAppName("Generating predictions")
    sc = SparkContext(conf=conf, pyFiles=[include_path])

    # Set S3 configuration
    sc._jsc.hadoopConfiguration().set("fs.s3n.awsAccessKeyId", os.environ['AWS_ACCESS_KEY'])
    sc._jsc.hadoopConfiguration().set("fs.s3n.awsSecretAccessKey", os.environ['AWS_SECRET_KEY'])

    # Single-pass predictions
    fast_predict(sc, file_input="s3n://twitter-stream-data/twitter-*",
                 file_output="s3n://twitter-stream-predictions/final",
                 sports_model="PyTwitterNews/models/sports.model",
                 politics_model="PyTwitterNews/models/politics.model",
                 technology_model="PyTwitterNews/models/technology.model")

    # Stop application
    sc.stop()
开发者ID:alialavia,项目名称:TwitterNews,代码行数:27,代码来源:predict.py

示例3: __init__

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
class SparkContextFactory:
  def __init__(self):
    # not sure why windows environment variable can't be read, I set it 
    ##os.environ["SPARK_HOME"] = "C:\Spark"
    # not sure why windows environment variable can't be read, I set it 
    ##os.environ["HADOOP_CONF_DIR"] = "C:\hdp\bin"
    ##sys.path.append("C:\Spark\python")
    ##sys.path.append("C:\Spark\bin")

    # specify spark home
    os.environ["SPARK_HOME"] = "/opt/cloudera/parcels/CDH-5.4.4-1.cdh5.4.4.p0.4/lib/spark"
    # specify pyspark path so its libraries can be accessed by this application
    sys.path.append("/opt/cloudera/parcels/CDH-5.4.4-1.cdh5.4.4.p0.4/lib/spark/python")
    from pyspark import SparkContext, SparkConf
    from pyspark.sql import SQLContext

    self.conf = SparkConf().setMaster("yarn-client")
    self.conf.setAppName("MrT")
    self.conf.set("spark.executor.memory", "5g")
    self.conf.set("spark.driver.memory", "10g")

    self.sc = SparkContext(conf = self.conf, pyFiles =
    ["ComputeCovHistory.py", "go.py", "risk_DSconvert.py", "ewstats.py", "ewstatsRDD.py", "ewstatswrap.py"])

    """
    toDF method is a monkey patch executed inside SQLContext constructor
    so to be able to use it you have to create a SQLContext first
    """
    self.sqlContextInstance = SQLContext(self.sc)


  def disconnect(self):
    self.sc.stop()
开发者ID:howardx,项目名称:pyspark,代码行数:35,代码来源:risk_SparkContextFactory.py

示例4: main

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
def main():
    # Setting the cluster configuration parameters
    conf = SparkConf()
    conf.setMaster("spark://localhost:7077")
    conf.setAppName("Tweet App")
    conf.set("spark.executor.memory", "3g")
    conf.set("spark.driver.memory", "4g")

    # Creating a Spark Context with conf file
    sc = SparkContext(conf=conf)

    # Creating and SQL context to perform SQL queries
    sqlContext = SQLContext(sc)

    # Define the data path
    curr_path = os.path.dirname(os.path.abspath(__file__))
    json_name = "out.json"

    json_file_path = os.path.join(curr_path +
                                  "/../Spark_Jobs/data/",
                                  json_name)

    parquet_file_path = createSQLContext(json_file_path, sqlContext)
    print(parquet_file_path)

    # Read from parquet file
    parquetFile = sqlContext.read.parquet(parquet_file_path)
    parquetFile.registerTempTable("tweets")
    counter = sqlContext.sql("SELECT count(*) as cnt FROM tweets")
    print("============= Count =================")
    print("Count:: " + str(counter.collect()[0].cnt))
开发者ID:alt-code,项目名称:AutoSpark,代码行数:33,代码来源:tweet_scanner.py

示例5: configureSpark

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
def configureSpark():
	conf = SparkConf()
	conf.setMaster("local")
	conf.setAppName("Apache Spark Alarm Parser")
	conf.set("spark.executor.memory", "1g")
	sc = SparkContext(conf = conf)
	return sc
开发者ID:ChinmaySKulkarni,项目名称:Alarm_Tracker,代码行数:9,代码来源:spark_parser.py

示例6: __connected_yarn_spark_cluster

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
    def __connected_yarn_spark_cluster(self, pilotcompute_description):

        number_cores=1
        if pilotcompute_description.has_key("number_cores"):
            number_cores=int(pilotcompute_description["number_cores"])
        
        number_of_processes = 1
        if pilotcompute_description.has_key("number_of_processes"):
            number_of_processes = int(pilotcompute_description["number_of_processes"])

        executor_memory="1g"
        if pilotcompute_description.has_key("number_of_processes"):
            executor_memory = pilotcompute_description["physical_memory_per_process"]

        conf = SparkConf()
        conf.set("spark.num.executors", str(number_of_processes))
        conf.set("spark.executor.instances", str(number_of_processes))
        conf.set("spark.executor.memory", executor_memory)
        conf.set("spark.executor.cores", number_cores)
        if pilotcompute_description!=None:
            for i in pilotcompute_description.keys():
                if i.startswith("spark"):
                    conf.set(i, pilotcompute_description[i])
        conf.setAppName("Pilot-Spark")
        conf.setMaster("yarn-client")
        sc = SparkContext(conf=conf)
        sqlCtx = SQLContext(sc)
        pilot = PilotCompute(spark_context=sc, spark_sql_context=sqlCtx)
        return pilot
开发者ID:drelu,项目名称:SAGA-Hadoop,代码行数:31,代码来源:__init__.py

示例7: sparkconfig

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
def sparkconfig():
    # spark configuration options

    # conf = SparkConf()
    # conf.setMaster("spark://3.168.100.58:7077") # uncomment for standalone cluster
    # conf.setMaster("local")   # uncomment for local execution
    # conf.setAppName("demo_chain")
    # conf.set("spark.executor.memory", "2g")
    # conf.set("spark.default.parallelism", 56)  # 48)
    # conf.set("spark.sql.inMemoryColumnarStorage.compressed","true")
    # conf.set("sql.inMemoryColumnarStorage.batchSize",2000)

    # AMAZON AWS EMR
    conf = SparkConf()
    conf.setMaster("yarn-client")	#client gets output to terminals
    #conf.setMaster("yarn-cluster")	# this seems to runf aster but can't confirm
    conf.set("spark.default.parallelism",648)
    conf.setAppName("spark_markov_chain")
    conf.set("spark.executor.memory", "22g")
    conf.set("spark.executor.instances",9)
    conf.set("spark.executor.cores",9)
    conf.set("spark.yarn.executor.memoryOverhead",800)
    conf.set("spark.rdd.compress","True")
    conf.set("spark.shuffle.consolidateFiles","True")
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

    return conf
开发者ID:namebrandon,项目名称:Sparkov,代码行数:29,代码来源:Sparkov_AWS.py

示例8: stackexchange_xml_spark_job

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
def stackexchange_xml_spark_job():
    server = bluebook_conf.HDFS_FQDN
    conf = SparkConf()

    xml_file_address = "hdfs://" + server + "/" +\
                       bluebook_conf.STACKEXCHANGE_XML_FOLDER_NAME +\
                       bluebook_conf.STACKEXCHANGE_XML_FILE_NAME
                         
    json_ques_folder_address = "hdfs://" + server + "/" +\
                               bluebook_conf.STACKEXCHANGE_JSON_QUES_FOLDER_NAME
    json_ans_folder_address = "hdfs://" + server + "/" +\
                              bluebook_conf.STACKEXCHANGE_JSON_ANS_FOLDER_NAME
        
    conf.setAppName('stackexchange_xml_spark_job')
    spark_context = SparkContext(conf=conf)
        
    file = spark_context.textFile(xml_file_address)

    # Ques and Ans files are stored seperately depending of their 'posttypeid'
    # Ques -> posttypeid == 1
    # Ans -> posttypeid == 2
    ques = file.map(stackexchange_xml_mapper)\
               .filter(lambda dic: 'posttypeid' in dic.keys())\
               .filter(lambda dic: dic['posttypeid'] == '1')\
               .map(lambda d: jsoner(d))
    ans = file.map(stackexchange_xml_mapper)\
               .filter(lambda dic: 'posttypeid' in dic.keys())\
               .filter(lambda dic: dic['posttypeid'] == '2')\
               .map(lambda d: jsoner(d))
    ques.saveAsTextFile(json_ques_folder_address)
    ans.saveAsTextFile(json_ans_folder_address)
开发者ID:nave91,项目名称:rebot,代码行数:33,代码来源:parser_app.py

示例9: main

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
def main(args):

    if len(args) < 2:
        sys.exit(1)

    # Setting the cluster configuration parameters
    spark_master = args[0]
    spark_data_file_name = args[1]
    file_path = CURR_DIR + "/" + spark_data_file_name

    conf = SparkConf()
    conf.setMaster(spark_master)
    conf.setAppName("Log Scanner")

    # Creating a Spark Context with conf file
    sc = SparkContext(conf=conf)

    txt_logs = sc.textFile(file_path).filter(lambda line: check(line))
    access_logs = txt_logs.map(lambda line: AccessLog(line))

    #  Getting response_codes from log objects and caching it
    response_codes = access_logs.map(lambda log: log.get_status()).cache()
    log_count = response_codes.count()
    print("Total Resonse Codes: " + str(log_count))
    cnt = response_codes.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
    response200 = cnt.filter(lambda x: x[0] == "200").map(lambda (x, y): y).collect()
    print("###########################")
    print("##  Success Rate : " + str(int(response200[0])*100/log_count) + " %  ##")
    print("###########################")
开发者ID:alt-code,项目名称:AutoSpark,代码行数:31,代码来源:log_scanner.py

示例10: read_conf

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
def read_conf():
    """
    Setting up spark contexts
    """
    conf = SparkConf()
    conf.setMaster("local[*]")
    conf.setAppName("Testing")
    return conf
开发者ID:Ather23,项目名称:machine_learning,代码行数:10,代码来源:testing_spark.py

示例11: getSparkConf

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
 def getSparkConf(self):
     conf = SparkConf()
     conf.setAppName(self.PROJECT_NAME)
     conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
     conf.set("spark.cleaner.ttl", self.TTL)
     # es
     conf.set("es.index.auto.create", "true")
     conf.set("es.nodes", self.ES_NODES)
     return conf
开发者ID:sekaiamber,项目名称:KSE-Sample,代码行数:11,代码来源:submit.py

示例12: init_spark_context

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
def init_spark_context():
    # load spark context
    conf = SparkConf().setAppName("event-contour-server")
    conf.setMaster("local[4]")
    conf.setAppName("reduce")
    conf.set("spark.executor.memory", "4g")
    # IMPORTANT: pass aditional Python modules to each worker
    sc = SparkContext(conf=conf, pyFiles=['app.py', 'contourGenerator.py','EventParallelize.py'])
 
    return sc
开发者ID:debjyoti385,项目名称:quakeanalysis,代码行数:12,代码来源:server.py

示例13: get_sc

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
def get_sc():
    """ Defines and returns a SparkContext from some configurations via SparkConf. """
    conf = SparkConf()
    conf.setAppName("Jon's PySpark")
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
    conf.set("spark.kryroserializer.buffer.mb", "256")
    conf.set("spark.akka.frameSize", "500")
    conf.set("spark.akka.askTimeout", "30")
    
    return SparkContext(conf=conf)
开发者ID:lowcloudnine,项目名称:singularity-spark,代码行数:12,代码来源:character_count.py

示例14: init

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
 def init(self):
     os.environ["SPARK_HOME"] = "/Users/abhinavrungta/Desktop/setups/spark-1.5.2"
     # os.environ['AWS_ACCESS_KEY_ID'] = <YOURKEY>
     # os.environ['AWS_SECRET_ACCESS_KEY'] = <YOURKEY>
     conf = SparkConf()
     conf.setMaster("local[10]")
     conf.setAppName("PySparkShell")
     conf.set("spark.executor.memory", "2g")
     conf.set("spark.driver.memory", "1g")
     self.sc = SparkContext(conf=conf)
     self.sqlContext = SQLContext(self.sc)        
开发者ID:abhinavrungta,项目名称:SNC-WEB,代码行数:13,代码来源:smalldata.py

示例15: __init__

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setAppName [as 别名]
    def __init__(self, master, name):
        self.name=name
        self.master=master

        print "init spark ..."
        os.environ["HADOOP_HOME"]="D:\code\wqr\hadoop-common-2.2.0-bin"
        conf = SparkConf()
        conf.setMaster(self.master)
        conf.setAppName(self.name)

        self.sc = SparkContext(conf=conf)
开发者ID:wuzhongdehua,项目名称:my_pyspark,代码行数:13,代码来源:Demo.py


注:本文中的pyspark.SparkConf.setAppName方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。