本文整理汇总了Python中pyspark.SparkFiles.getRootDirectory方法的典型用法代码示例。如果您正苦于以下问题:Python SparkFiles.getRootDirectory方法的具体用法?Python SparkFiles.getRootDirectory怎么用?Python SparkFiles.getRootDirectory使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.SparkFiles
的用法示例。
在下文中一共展示了SparkFiles.getRootDirectory方法的4个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: train_partition
# 需要导入模块: from pyspark import SparkFiles [as 别名]
# 或者: from pyspark.SparkFiles import getRootDirectory [as 别名]
def train_partition(idx, iterator):
port = 50000 + idx % 256
main = SparkFiles.get("main.py")
architecture = SparkFiles.get("train_val.prototxt")
model = SparkFiles.get("deepq16.caffemodel")
solver = SparkFiles.get("solver.prototxt")
root = SparkFiles.getRootDirectory()
dset = os.path.join(root, "dset-%02d.hdf5" % idx)
flag_file = "flags/__BARISTA_READY__.%d" % port
if os.path.isfile(flag_file):
os.remove(flag_file)
# out = open(os.path.join(root, "barista.log"), 'w')
subprocess.Popen(["python", main, architecture, model,
"--dataset", dset,
"--solver", solver,
"--dset-size", "30000",
"--initial-replay", "20000",
"--debug",
"--overwrite",
"--port", str(port)])
while not os.path.isfile(flag_file):
pass
for step in iterator:
dc = DummyClient("127.0.0.1", port)
dc.send(barista.GRAD_UPDATE)
response = dc.recv()
yield response
示例2: spawn_barista
# 需要导入模块: from pyspark import SparkFiles [as 别名]
# 或者: from pyspark.SparkFiles import getRootDirectory [as 别名]
def spawn_barista(partition):
main = SparkFiles.get("main.py")
architecture = SparkFiles.get("train_val.prototxt")
model = SparkFiles.get("deepq16.caffemodel")
solver = SparkFiles.get("solver.prototxt")
root = SparkFiles.getRootDirectory()
dset = os.path.join(root, "dset.hdf5")
flag_file = "flags/__BARISTA_READY__"
if os.path.isfile(flag_file):
os.remove("flags/__BARISTA_READY__")
out = open(os.path.join(root, "barista.log"), 'w')
subprocess.Popen(["python", main, architecture, model,
"--dataset", dset,
"--solver", solver],
stdout=out,
stderr=subprocess.STDOUT)
while not os.path.isfile("flags/__BARISTA_READY__"):
pass
示例3: start_spark
# 需要导入模块: from pyspark import SparkFiles [as 别名]
# 或者: from pyspark.SparkFiles import getRootDirectory [as 别名]
def start_spark(app_name='my_spark_app', master='local[*]', jar_packages=[],
files=[], spark_config={}):
"""Start Spark session, get the Spark logger and load config files.
Start a Spark session on the worker node and register the Spark
application with the cluster. NOTE - only the app_name argument
will apply when this is called from a script sent to spark-submit
(i.e. when __name__ = '__main__'). All other arguments exist solely
for testing the script from within an interactive Python console.
This function also looks for a file ending in 'config.json' that
can be sent with the Spark job. If it is found, it is opened,
the contents parsed (assuming it contains valid JSON for the ETL job
configuration), into a dict of ETL job configuration parameters,
which are returned as the last element in the tuple returned by
this function. If the file cannot be found then the return tuple
only contains the Spark session and Spark logger objects.
:param app_name: Name of Spark app.
:param master: Cluster connection details (defaults to local[*].
:param jar_packages: List of Spark JAR package names.
:param files: List of files to send to Spark cluster (master and
workers).
:param spark_config: Dictionary of config key-value pairs.
:return: A tuple of references to the Spark session, logger and
config dict (only if available).
"""
if __name__ == '__main__':
# get Spark session factory
spark_builder = (
SparkSession
.builder
.appName(app_name))
else:
# get Spark session factory
spark_builder = (
SparkSession
.builder
.master(master)
.appName(app_name))
# create Spark JAR packages string
spark_jars_packages = ','.join(list(jar_packages))
spark_builder.config('spark.jars.packages', spark_jars_packages)
spark_files = ','.join(list(files))
spark_builder.config('spark.files', spark_files)
# add other config params
for key, val in spark_config.items():
spark_builder.config(key, val)
# create session and retrieve Spark logger object
spark_sess = spark_builder.getOrCreate()
spark_logger = logging.Log4j(spark_sess)
# get config file if sent to cluster with --files
spark_files_dir = SparkFiles.getRootDirectory()
config_files = [filename
for filename in listdir(spark_files_dir)
if filename.endswith('config.json')]
if len(config_files) != 0:
path_to_config_file = path.join(spark_files_dir, config_files[0])
with open(path_to_config_file, 'r') as config_file:
config_json = config_file.read().replace('\n', '')
config_dict = loads(config_json)
spark_logger.warn('loaded config from ' + config_files[0])
else:
config_dict = None
# build return tuple conditional on presence of config
if config_dict is not None:
return_tup = spark_sess, spark_logger, config_dict
else:
return_tup = spark_sess, spark_logger
return return_tup
示例4: __ls
# 需要导入模块: from pyspark import SparkFiles [as 别名]
# 或者: from pyspark.SparkFiles import getRootDirectory [as 别名]
def __ls(broadcast_vars, iterator):
"""
Get the list of files in the worker-local directory
"""
return [__get_hostname(), os.listdir(SparkFiles.getRootDirectory())]