本文整理汇总了Python中pyspark.context.SparkContext.addPyFile方法的典型用法代码示例。如果您正苦于以下问题:Python SparkContext.addPyFile方法的具体用法?Python SparkContext.addPyFile怎么用?Python SparkContext.addPyFile使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pyspark.context.SparkContext
的用法示例。
在下文中一共展示了SparkContext.addPyFile方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: validate_arguments
# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import addPyFile [as 别名]
model = pkl.load(open(args.existing_model_pkl))
elif args.existing_model_proto:
model = LOPQModel.load_proto(args.existing_model_proto)
args = validate_arguments(args, model)
# Build descriptive app name
get_step_name = lambda x: {STEP_COARSE: 'coarse', STEP_ROTATION: 'rotations', STEP_SUBQUANT: 'subquantizers'}.get(x, None)
steps_str = ', '.join(filter(lambda x: x is not None, map(get_step_name, sorted(args.steps))))
APP_NAME = 'LOPQ{V=%d,M=%d}; training %s' % (args.V, args.M, steps_str)
sc = SparkContext(appName=APP_NAME)
# Load UDF module if provided and load training data RDD
if args.data_udf:
sc.addPyFile('hdfs://memex/user/skaraman/build-lopq-index/lopq/spark/memex_udf.py')
sc.addPyFile('hdfs://memex/user/skaraman/build-lopq-index/lopq/spark/deepsentibanktf_udf.py')
udf_module = __import__(args.data_udf, fromlist=['udf'])
load_udf = udf_module.udf
# NB: load data method splits vectors into 2 parts, after applying pca if model is provided
data = load_data(sc, args, data_load_fn=load_udf)
else:
# NB: load data method splits vectors into 2 parts, after applying pca if model is provided
data = load_data(sc, args)
# Initialize parameters
Cs = Rs = mus = subs = None
# Get coarse quantizers
if STEP_COARSE in args.steps:
Cs = train_coarse(sc, data, args.V, seed=args.seed)
示例2: SparkContext
# 需要导入模块: from pyspark.context import SparkContext [as 别名]
# 或者: from pyspark.context.SparkContext import addPyFile [as 别名]
jcontext.sparkContext())
sc = SparkContext(gateway=gateway, jsc=jsc, conf=spark_conf)
context = SparkSession(sc, jcontext.spark())
else:
customContext = job.build_context(gateway, jcontext, spark_conf)
if customContext is not None:
context = customContext
else:
exit_with_failure(
"Expected JavaSparkContext, SQLContext "
"or HiveContext but received %s" % repr(context_class), 2)
egg_path = os.environ.get("EGGPATH", None)
if egg_path and sc:
try:
sc.addPyFile(egg_path)
except Exception as error:
exit_with_failure(
"Error while adding Python Egg to Spark Context: %s\n%s" %
(repr(error), traceback.format_exc()), 5)
try:
job_data = job.validate(context, None, job_config)
except Exception as error:
exit_with_failure(
"Error while calling 'validate': %s\n%s" %
(repr(error), traceback.format_exc()), 3)
if isinstance(job_data, list) and \
isinstance(job_data[0], ValidationProblem):
entry_point.setValidationProblems([p.problem for p in job_data])
exit_with_failure("Validation problems in job, exiting")
else: