本文整理汇总了Python中pyspark.SparkContext.statusTracker方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext.statusTracker方法的具体用法?Python SparkContext.statusTracker怎么用?Python SparkContext.statusTracker使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.SparkContext
的用法示例。
在下文中一共展示了SparkContext.statusTracker方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: main
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import statusTracker [as 别名]
def main():
conf = SparkConf().set("spark.ui.showConsoleProgress", "false")
sc = SparkContext(appName="PythonStatusAPIDemo", conf=conf)
def run():
rdd = sc.parallelize(range(10), 10).map(delayed(2))
reduced = rdd.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x + y)
return reduced.map(delayed(2)).collect()
result = call_in_background(run)
status = sc.statusTracker()
while result.empty():
ids = status.getJobIdsForGroup()
for id in ids:
job = status.getJobInfo(id)
print "Job", id, "status: ", job.status
for sid in job.stageIds:
info = status.getStageInfo(sid)
if info:
print "Stage %d: %d tasks total (%d active, %d complete)" % \
(sid, info.numTasks, info.numActiveTasks, info.numCompletedTasks)
time.sleep(1)
print "Job results are:", result.get()
sc.stop()
示例2: CommonSparkContext
# 需要导入模块: from pyspark import SparkContext [as 别名]
# 或者: from pyspark.SparkContext import statusTracker [as 别名]
class CommonSparkContext(object):
__metaclass__ = Singleton
def __init__(self):
"""
Create a spark context.
The spark configuration is taken from xframes/config.ini and from
the values set in SparkInitContext.set() if this has been called.
"""
# This is placed here because otherwise it causes an error when used in a spark slave.
from pyspark import SparkConf, SparkContext, SQLContext, HiveContext
# This reads from default.ini and then xframes/config.ini
# if they exist.
self._env = Environment.create()
context = create_spark_config(self._env)
verbose = self._env.get_config("xframes", "verbose", "false").lower() == "true"
hdfs_user_name = self._env.get_config("webhdfs", "user", "hdfs")
os.environ["HADOOP_USER_NAME"] = hdfs_user_name
config_pairs = [(k, v) for k, v in context.iteritems()]
self._config = SparkConf().setAll(config_pairs)
if verbose:
print "Spark Config: {}".format(config_pairs)
self._sc = SparkContext(conf=self._config)
self._sqlc = SQLContext(self._sc)
self._hivec = HiveContext(self._sc)
self.zip_path = []
version = [int(n) for n in self._sc.version.split(".")]
self.status_tracker = self._sc.statusTracker()
if cmp(version, [1, 4, 1]) >= 0:
self.application_id = self._sc.applicationId
else:
self.application_id = None
if verbose:
print "Spark Version: {}".format(self._sc.version)
if self.application_id:
print "Application Id: {}".format(self.application_id)
if not context["spark.master"].startswith("local"):
zip_path = self.build_zip(get_xframes_home())
if zip_path:
self._sc.addPyFile(zip_path)
self.zip_path.append(zip_path)
trace_flag = self._env.get_config("xframes", "rdd-trace", "false").lower() == "true"
XRdd.set_trace(trace_flag)
atexit.register(self.close_context)
def spark_add_files(self, dirs):
"""
Adds python files in the given directory or directories.
Parameters
----------
dirs: str or list(str)
If a str, the pathname to a directory containing a python module.
If a list, then it is a list of such directories.
The python files in each directory are compiled, packed into a zip, distributed to each
spark slave, and placed in PYTHONPATH.
This is only done if spark is deployed on a cluster.
"""
props = self.config()
if props.get("spark.master", "local").startswith("local"):
return
if isinstance(dirs, basestring):
dirs = [dirs]
for path in dirs:
zip_path = self.build_zip(path)
if zip_path:
self._sc.addPyFile(zip_path)
self.zip_path.append(zip_path)
def close_context(self):
if self._sc:
self._sc.stop()
self._sc = None
for zip_path in self.zip_path:
os.remove(zip_path)
def config(self):
"""
Gets the configuration parameters used to initialize the spark context.
Returns
-------
out : dict
A dict of the properties used to initialize the spark context.
"""
props = self._config.getAll()
return {prop[0]: prop[1] for prop in props}
def env(self):
"""
Gets the config environment.
#.........这里部分代码省略.........