当前位置: 首页>>代码示例>>Python>>正文


Python SparkConf.setMaster方法代码示例

本文整理汇总了Python中pyspark.SparkConf.setMaster方法的典型用法代码示例。如果您正苦于以下问题:Python SparkConf.setMaster方法的具体用法?Python SparkConf.setMaster怎么用?Python SparkConf.setMaster使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pyspark.SparkConf的用法示例。


在下文中一共展示了SparkConf.setMaster方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: main

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def main():
    parser = argparse.ArgumentParser(
        description='process some log messages, storing them and signaling '
                    'a rest server')
    parser.add_argument('--mongo', help='the mongodb url',
                        required=True)
    parser.add_argument('--rest', help='the rest endpoint to signal',
                        required=True)
    parser.add_argument('--port', help='the port to receive from '
                        '(default: 1984)',
                        default=1984, type=int)
    parser.add_argument('--appname', help='the name of the spark application '
                        '(default: SparkharaLogCounter)',
                        default='SparkharaLogCounter')
    parser.add_argument('--master',
                        help='the master url for the spark cluster')
    parser.add_argument('--socket',
                        help='the socket to attach for streaming text data '
                        '(default: caravan-pathfinder)',
                        default='caravan-pathfinder')
    args = parser.parse_args()
    mongo_url = args.mongo
    rest_url = args.rest

    sconf = SparkConf().setAppName(args.appname)
    if args.master:
        sconf.setMaster(args.master)
    sc = SparkContext(conf=sconf)
    ssc = StreamingContext(sc, 1)

    lines = ssc.socketTextStream(args.socket, args.port)
    lines.foreachRDD(lambda rdd: process_generic(rdd, mongo_url, rest_url))

    ssc.start()
    ssc.awaitTermination()
开发者ID:mattf,项目名称:sparkhara-sources,代码行数:37,代码来源:caravan_master.py

示例2: main

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def main(args):

    if len(args) < 2:
        sys.exit(1)

    # Setting the cluster configuration parameters
    spark_master = args[0]
    spark_data_file_name = args[1]
    file_path = CURR_DIR + "/" + spark_data_file_name

    conf = SparkConf()
    conf.setMaster(spark_master)
    conf.setAppName("Log Scanner")

    # Creating a Spark Context with conf file
    sc = SparkContext(conf=conf)

    txt_logs = sc.textFile(file_path).filter(lambda line: check(line))
    access_logs = txt_logs.map(lambda line: AccessLog(line))

    #  Getting response_codes from log objects and caching it
    response_codes = access_logs.map(lambda log: log.get_status()).cache()
    log_count = response_codes.count()
    print("Total Resonse Codes: " + str(log_count))
    cnt = response_codes.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
    response200 = cnt.filter(lambda x: x[0] == "200").map(lambda (x, y): y).collect()
    print("###########################")
    print("##  Success Rate : " + str(int(response200[0])*100/log_count) + " %  ##")
    print("###########################")
开发者ID:alt-code,项目名称:AutoSpark,代码行数:31,代码来源:log_scanner.py

示例3: spark_config

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
    def spark_config(self):
        if self._spark_config is None:
            os.environ['SPARK_SUBMIT_CLASSPATH'] = ','.join(self.spex_conf.spark_config.jars)

            conf = SparkConf()
            conf.setAppName(self.spex_conf.spark_config.name)
            conf.setMaster(self.spex_conf.spark_config.master)

            conf.set('spark.rdd.compress', 'true')
            conf.set('spark.io.compression.codec', 'lz4')
            conf.set('spark.mesos.coarse',
                     'true' if self.spex_conf.spark_config.coarse_mode else 'false')

            # TODO - Setup all the other cruft as needed
            #conf.set('spark.executor.memory', '4g')
            #conf.set('spark.cores.max', '16')
            #conf.set('spark.task.cpus', '6')

            # TODO - bind port for spark web ui

            self._spark_config = conf

        config = self._spark_config

        # These are always set, if someone changes them we simply set them back
        config.set('spark.executor.uri', self.artifact_resolver(self.spex_conf.spark_distro))
        config.setExecutorEnv(key='PYSPARK_PYTHON', value='./%s daemon' % self.spex_conf.spex_name)
        return config
开发者ID:GregBowyer,项目名称:spex,代码行数:30,代码来源:context.py

示例4: __connected_yarn_spark_cluster

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
    def __connected_yarn_spark_cluster(self, pilotcompute_description):

        number_cores=1
        if pilotcompute_description.has_key("number_cores"):
            number_cores=int(pilotcompute_description["number_cores"])
        
        number_of_processes = 1
        if pilotcompute_description.has_key("number_of_processes"):
            number_of_processes = int(pilotcompute_description["number_of_processes"])

        executor_memory="1g"
        if pilotcompute_description.has_key("number_of_processes"):
            executor_memory = pilotcompute_description["physical_memory_per_process"]

        conf = SparkConf()
        conf.set("spark.num.executors", str(number_of_processes))
        conf.set("spark.executor.instances", str(number_of_processes))
        conf.set("spark.executor.memory", executor_memory)
        conf.set("spark.executor.cores", number_cores)
        if pilotcompute_description!=None:
            for i in pilotcompute_description.keys():
                if i.startswith("spark"):
                    conf.set(i, pilotcompute_description[i])
        conf.setAppName("Pilot-Spark")
        conf.setMaster("yarn-client")
        sc = SparkContext(conf=conf)
        sqlCtx = SQLContext(sc)
        pilot = PilotCompute(spark_context=sc, spark_sql_context=sqlCtx)
        return pilot
开发者ID:drelu,项目名称:SAGA-Hadoop,代码行数:31,代码来源:__init__.py

示例5: sparkconfig

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def sparkconfig():
    # spark configuration options

    # conf = SparkConf()
    # conf.setMaster("spark://3.168.100.58:7077") # uncomment for standalone cluster
    # conf.setMaster("local")   # uncomment for local execution
    # conf.setAppName("demo_chain")
    # conf.set("spark.executor.memory", "2g")
    # conf.set("spark.default.parallelism", 56)  # 48)
    # conf.set("spark.sql.inMemoryColumnarStorage.compressed","true")
    # conf.set("sql.inMemoryColumnarStorage.batchSize",2000)

    # AMAZON AWS EMR
    conf = SparkConf()
    conf.setMaster("yarn-client")	#client gets output to terminals
    #conf.setMaster("yarn-cluster")	# this seems to runf aster but can't confirm
    conf.set("spark.default.parallelism",648)
    conf.setAppName("spark_markov_chain")
    conf.set("spark.executor.memory", "22g")
    conf.set("spark.executor.instances",9)
    conf.set("spark.executor.cores",9)
    conf.set("spark.yarn.executor.memoryOverhead",800)
    conf.set("spark.rdd.compress","True")
    conf.set("spark.shuffle.consolidateFiles","True")
    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")

    return conf
开发者ID:namebrandon,项目名称:Sparkov,代码行数:29,代码来源:Sparkov_AWS.py

示例6: configureSpark

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def configureSpark():
	conf = SparkConf()
	conf.setMaster("local")
	conf.setAppName("Apache Spark Alarm Parser")
	conf.set("spark.executor.memory", "1g")
	sc = SparkContext(conf = conf)
	return sc
开发者ID:ChinmaySKulkarni,项目名称:Alarm_Tracker,代码行数:9,代码来源:spark_parser.py

示例7: main

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def main():
    # Setting the cluster configuration parameters
    conf = SparkConf()
    conf.setMaster("spark://localhost:7077")
    conf.setAppName("Tweet App")
    conf.set("spark.executor.memory", "3g")
    conf.set("spark.driver.memory", "4g")

    # Creating a Spark Context with conf file
    sc = SparkContext(conf=conf)

    # Creating and SQL context to perform SQL queries
    sqlContext = SQLContext(sc)

    # Define the data path
    curr_path = os.path.dirname(os.path.abspath(__file__))
    json_name = "out.json"

    json_file_path = os.path.join(curr_path +
                                  "/../Spark_Jobs/data/",
                                  json_name)

    parquet_file_path = createSQLContext(json_file_path, sqlContext)
    print(parquet_file_path)

    # Read from parquet file
    parquetFile = sqlContext.read.parquet(parquet_file_path)
    parquetFile.registerTempTable("tweets")
    counter = sqlContext.sql("SELECT count(*) as cnt FROM tweets")
    print("============= Count =================")
    print("Count:: " + str(counter.collect()[0].cnt))
开发者ID:alt-code,项目名称:AutoSpark,代码行数:33,代码来源:tweet_scanner.py

示例8: read_conf

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def read_conf():
    """
    Setting up spark contexts
    """
    conf = SparkConf()
    conf.setMaster("local[*]")
    conf.setAppName("Testing")
    return conf
开发者ID:Ather23,项目名称:machine_learning,代码行数:10,代码来源:testing_spark.py

示例9: _test_broadcast_on_driver

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
 def _test_broadcast_on_driver(self, *extra_confs):
     conf = SparkConf()
     for key, value in extra_confs:
         conf.set(key, value)
     conf.setMaster("local-cluster[2,1,1024]")
     self.sc = SparkContext(conf=conf)
     bs = self.sc.broadcast(value=5)
     self.assertEqual(5, bs.value)
开发者ID:Brett-A,项目名称:spark,代码行数:10,代码来源:test_broadcast.py

示例10: init_spark_context

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def init_spark_context():
    # load spark context
    conf = SparkConf().setAppName("event-contour-server")
    conf.setMaster("local[4]")
    conf.setAppName("reduce")
    conf.set("spark.executor.memory", "4g")
    # IMPORTANT: pass aditional Python modules to each worker
    sc = SparkContext(conf=conf, pyFiles=['app.py', 'contourGenerator.py','EventParallelize.py'])
 
    return sc
开发者ID:debjyoti385,项目名称:quakeanalysis,代码行数:12,代码来源:server.py

示例11: __init__

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
    def __init__(self, master, name):
        self.name=name
        self.master=master

        print "init spark ..."
        os.environ["HADOOP_HOME"]="D:\code\wqr\hadoop-common-2.2.0-bin"
        conf = SparkConf()
        conf.setMaster(self.master)
        conf.setAppName(self.name)

        self.sc = SparkContext(conf=conf)
开发者ID:wuzhongdehua,项目名称:my_pyspark,代码行数:13,代码来源:Demo.py

示例12: init

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
 def init(self):
     os.environ["SPARK_HOME"] = "/Users/abhinavrungta/Desktop/setups/spark-1.5.2"
     # os.environ['AWS_ACCESS_KEY_ID'] = <YOURKEY>
     # os.environ['AWS_SECRET_ACCESS_KEY'] = <YOURKEY>
     conf = SparkConf()
     conf.setMaster("local[10]")
     conf.setAppName("PySparkShell")
     conf.set("spark.executor.memory", "2g")
     conf.set("spark.driver.memory", "1g")
     self.sc = SparkContext(conf=conf)
     self.sqlContext = SQLContext(self.sc)        
开发者ID:abhinavrungta,项目名称:SNC-WEB,代码行数:13,代码来源:smalldata.py

示例13: _test_multiple_broadcasts

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
 def _test_multiple_broadcasts(self, *extra_confs):
     """
     Test broadcast variables make it OK to the executors.  Tests multiple broadcast variables,
     and also multiple jobs.
     """
     conf = SparkConf()
     for key, value in extra_confs:
         conf.set(key, value)
     conf.setMaster("local-cluster[2,1,1024]")
     self.sc = SparkContext(conf=conf)
     self._test_encryption_helper([5])
     self._test_encryption_helper([5, 10, 20])
开发者ID:JkSelf,项目名称:spark,代码行数:14,代码来源:test_broadcast.py

示例14: __connected_spark_cluster

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
 def __connected_spark_cluster(self, resource_url, pilot_description=None):
     conf = SparkConf()
     conf.setAppName("Pilot-Spark")
     if pilot_description!=None:
         for i in pilot_description.keys():
             if i.startswith("spark"):
                 conf.set(i, pilot_description[i])
     conf.setMaster(resource_url)
     print(conf.toDebugString())
     sc = SparkContext(conf=conf)
     sqlCtx = SQLContext(sc)
     pilot = PilotCompute(spark_context=sc, spark_sql_context=sqlCtx)
     return pilot
开发者ID:drelu,项目名称:SAGA-Hadoop,代码行数:15,代码来源:__init__.py

示例15: main

# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def main():
   count=0
   #Initializing Spark Configuration for the Master Node
   config = SparkConf().setAppName('DiskDetection_App')
   config.setMaster('local[6]')                                                    #indicates the number of threads on the master node
   sc = SparkContext(conf=config)                                                  # Initializing the Spark Context
   for i in os.listdir(os.environ["MODEL_CSV_FILEPATH"]):
       # Loop to restrict training to 20 models (only for better analysis purpose)
       if count < 20:
          modelName = os.path.splitext(i)[0]
          print modelName
          predictMain(modelName,sc)
          count+=1
开发者ID:abhishekakumar,项目名称:GatorSquad,代码行数:15,代码来源:SparkElephasModel.py


注:本文中的pyspark.SparkConf.setMaster方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。