本文整理汇总了Python中pyspark.SparkConf.setMaster方法的典型用法代码示例。如果您正苦于以下问题:Python SparkConf.setMaster方法的具体用法?Python SparkConf.setMaster怎么用?Python SparkConf.setMaster使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.SparkConf
的用法示例。
在下文中一共展示了SparkConf.setMaster方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def main():
parser = argparse.ArgumentParser(
description='process some log messages, storing them and signaling '
'a rest server')
parser.add_argument('--mongo', help='the mongodb url',
required=True)
parser.add_argument('--rest', help='the rest endpoint to signal',
required=True)
parser.add_argument('--port', help='the port to receive from '
'(default: 1984)',
default=1984, type=int)
parser.add_argument('--appname', help='the name of the spark application '
'(default: SparkharaLogCounter)',
default='SparkharaLogCounter')
parser.add_argument('--master',
help='the master url for the spark cluster')
parser.add_argument('--socket',
help='the socket to attach for streaming text data '
'(default: caravan-pathfinder)',
default='caravan-pathfinder')
args = parser.parse_args()
mongo_url = args.mongo
rest_url = args.rest
sconf = SparkConf().setAppName(args.appname)
if args.master:
sconf.setMaster(args.master)
sc = SparkContext(conf=sconf)
ssc = StreamingContext(sc, 1)
lines = ssc.socketTextStream(args.socket, args.port)
lines.foreachRDD(lambda rdd: process_generic(rdd, mongo_url, rest_url))
ssc.start()
ssc.awaitTermination()
示例2: main
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def main(args):
if len(args) < 2:
sys.exit(1)
# Setting the cluster configuration parameters
spark_master = args[0]
spark_data_file_name = args[1]
file_path = CURR_DIR + "/" + spark_data_file_name
conf = SparkConf()
conf.setMaster(spark_master)
conf.setAppName("Log Scanner")
# Creating a Spark Context with conf file
sc = SparkContext(conf=conf)
txt_logs = sc.textFile(file_path).filter(lambda line: check(line))
access_logs = txt_logs.map(lambda line: AccessLog(line))
# Getting response_codes from log objects and caching it
response_codes = access_logs.map(lambda log: log.get_status()).cache()
log_count = response_codes.count()
print("Total Resonse Codes: " + str(log_count))
cnt = response_codes.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
response200 = cnt.filter(lambda x: x[0] == "200").map(lambda (x, y): y).collect()
print("###########################")
print("## Success Rate : " + str(int(response200[0])*100/log_count) + " % ##")
print("###########################")
示例3: spark_config
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def spark_config(self):
if self._spark_config is None:
os.environ['SPARK_SUBMIT_CLASSPATH'] = ','.join(self.spex_conf.spark_config.jars)
conf = SparkConf()
conf.setAppName(self.spex_conf.spark_config.name)
conf.setMaster(self.spex_conf.spark_config.master)
conf.set('spark.rdd.compress', 'true')
conf.set('spark.io.compression.codec', 'lz4')
conf.set('spark.mesos.coarse',
'true' if self.spex_conf.spark_config.coarse_mode else 'false')
# TODO - Setup all the other cruft as needed
#conf.set('spark.executor.memory', '4g')
#conf.set('spark.cores.max', '16')
#conf.set('spark.task.cpus', '6')
# TODO - bind port for spark web ui
self._spark_config = conf
config = self._spark_config
# These are always set, if someone changes them we simply set them back
config.set('spark.executor.uri', self.artifact_resolver(self.spex_conf.spark_distro))
config.setExecutorEnv(key='PYSPARK_PYTHON', value='./%s daemon' % self.spex_conf.spex_name)
return config
示例4: __connected_yarn_spark_cluster
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def __connected_yarn_spark_cluster(self, pilotcompute_description):
number_cores=1
if pilotcompute_description.has_key("number_cores"):
number_cores=int(pilotcompute_description["number_cores"])
number_of_processes = 1
if pilotcompute_description.has_key("number_of_processes"):
number_of_processes = int(pilotcompute_description["number_of_processes"])
executor_memory="1g"
if pilotcompute_description.has_key("number_of_processes"):
executor_memory = pilotcompute_description["physical_memory_per_process"]
conf = SparkConf()
conf.set("spark.num.executors", str(number_of_processes))
conf.set("spark.executor.instances", str(number_of_processes))
conf.set("spark.executor.memory", executor_memory)
conf.set("spark.executor.cores", number_cores)
if pilotcompute_description!=None:
for i in pilotcompute_description.keys():
if i.startswith("spark"):
conf.set(i, pilotcompute_description[i])
conf.setAppName("Pilot-Spark")
conf.setMaster("yarn-client")
sc = SparkContext(conf=conf)
sqlCtx = SQLContext(sc)
pilot = PilotCompute(spark_context=sc, spark_sql_context=sqlCtx)
return pilot
示例5: sparkconfig
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def sparkconfig():
# spark configuration options
# conf = SparkConf()
# conf.setMaster("spark://3.168.100.58:7077") # uncomment for standalone cluster
# conf.setMaster("local") # uncomment for local execution
# conf.setAppName("demo_chain")
# conf.set("spark.executor.memory", "2g")
# conf.set("spark.default.parallelism", 56) # 48)
# conf.set("spark.sql.inMemoryColumnarStorage.compressed","true")
# conf.set("sql.inMemoryColumnarStorage.batchSize",2000)
# AMAZON AWS EMR
conf = SparkConf()
conf.setMaster("yarn-client") #client gets output to terminals
#conf.setMaster("yarn-cluster") # this seems to runf aster but can't confirm
conf.set("spark.default.parallelism",648)
conf.setAppName("spark_markov_chain")
conf.set("spark.executor.memory", "22g")
conf.set("spark.executor.instances",9)
conf.set("spark.executor.cores",9)
conf.set("spark.yarn.executor.memoryOverhead",800)
conf.set("spark.rdd.compress","True")
conf.set("spark.shuffle.consolidateFiles","True")
conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
return conf
示例6: configureSpark
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def configureSpark():
conf = SparkConf()
conf.setMaster("local")
conf.setAppName("Apache Spark Alarm Parser")
conf.set("spark.executor.memory", "1g")
sc = SparkContext(conf = conf)
return sc
示例7: main
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def main():
# Setting the cluster configuration parameters
conf = SparkConf()
conf.setMaster("spark://localhost:7077")
conf.setAppName("Tweet App")
conf.set("spark.executor.memory", "3g")
conf.set("spark.driver.memory", "4g")
# Creating a Spark Context with conf file
sc = SparkContext(conf=conf)
# Creating and SQL context to perform SQL queries
sqlContext = SQLContext(sc)
# Define the data path
curr_path = os.path.dirname(os.path.abspath(__file__))
json_name = "out.json"
json_file_path = os.path.join(curr_path +
"/../Spark_Jobs/data/",
json_name)
parquet_file_path = createSQLContext(json_file_path, sqlContext)
print(parquet_file_path)
# Read from parquet file
parquetFile = sqlContext.read.parquet(parquet_file_path)
parquetFile.registerTempTable("tweets")
counter = sqlContext.sql("SELECT count(*) as cnt FROM tweets")
print("============= Count =================")
print("Count:: " + str(counter.collect()[0].cnt))
示例8: read_conf
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def read_conf():
"""
Setting up spark contexts
"""
conf = SparkConf()
conf.setMaster("local[*]")
conf.setAppName("Testing")
return conf
示例9: _test_broadcast_on_driver
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def _test_broadcast_on_driver(self, *extra_confs):
conf = SparkConf()
for key, value in extra_confs:
conf.set(key, value)
conf.setMaster("local-cluster[2,1,1024]")
self.sc = SparkContext(conf=conf)
bs = self.sc.broadcast(value=5)
self.assertEqual(5, bs.value)
示例10: init_spark_context
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def init_spark_context():
# load spark context
conf = SparkConf().setAppName("event-contour-server")
conf.setMaster("local[4]")
conf.setAppName("reduce")
conf.set("spark.executor.memory", "4g")
# IMPORTANT: pass aditional Python modules to each worker
sc = SparkContext(conf=conf, pyFiles=['app.py', 'contourGenerator.py','EventParallelize.py'])
return sc
示例11: __init__
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def __init__(self, master, name):
self.name=name
self.master=master
print "init spark ..."
os.environ["HADOOP_HOME"]="D:\code\wqr\hadoop-common-2.2.0-bin"
conf = SparkConf()
conf.setMaster(self.master)
conf.setAppName(self.name)
self.sc = SparkContext(conf=conf)
示例12: init
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def init(self):
os.environ["SPARK_HOME"] = "/Users/abhinavrungta/Desktop/setups/spark-1.5.2"
# os.environ['AWS_ACCESS_KEY_ID'] = <YOURKEY>
# os.environ['AWS_SECRET_ACCESS_KEY'] = <YOURKEY>
conf = SparkConf()
conf.setMaster("local[10]")
conf.setAppName("PySparkShell")
conf.set("spark.executor.memory", "2g")
conf.set("spark.driver.memory", "1g")
self.sc = SparkContext(conf=conf)
self.sqlContext = SQLContext(self.sc)
示例13: _test_multiple_broadcasts
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def _test_multiple_broadcasts(self, *extra_confs):
"""
Test broadcast variables make it OK to the executors. Tests multiple broadcast variables,
and also multiple jobs.
"""
conf = SparkConf()
for key, value in extra_confs:
conf.set(key, value)
conf.setMaster("local-cluster[2,1,1024]")
self.sc = SparkContext(conf=conf)
self._test_encryption_helper([5])
self._test_encryption_helper([5, 10, 20])
示例14: __connected_spark_cluster
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def __connected_spark_cluster(self, resource_url, pilot_description=None):
conf = SparkConf()
conf.setAppName("Pilot-Spark")
if pilot_description!=None:
for i in pilot_description.keys():
if i.startswith("spark"):
conf.set(i, pilot_description[i])
conf.setMaster(resource_url)
print(conf.toDebugString())
sc = SparkContext(conf=conf)
sqlCtx = SQLContext(sc)
pilot = PilotCompute(spark_context=sc, spark_sql_context=sqlCtx)
return pilot
示例15: main
# 需要导入模块: from pyspark import SparkConf [as 别名]
# 或者: from pyspark.SparkConf import setMaster [as 别名]
def main():
count=0
#Initializing Spark Configuration for the Master Node
config = SparkConf().setAppName('DiskDetection_App')
config.setMaster('local[6]') #indicates the number of threads on the master node
sc = SparkContext(conf=config) # Initializing the Spark Context
for i in os.listdir(os.environ["MODEL_CSV_FILEPATH"]):
# Loop to restrict training to 20 models (only for better analysis purpose)
if count < 20:
modelName = os.path.splitext(i)[0]
print modelName
predictMain(modelName,sc)
count+=1