本文整理汇总了Java中org.apache.spark.api.java.JavaRDD.repartition方法的典型用法代码示例。如果您正苦于以下问题:Java JavaRDD.repartition方法的具体用法?Java JavaRDD.repartition怎么用?Java JavaRDD.repartition使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaRDD
的用法示例。
在下文中一共展示了JavaRDD.repartition方法的2个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: sparkTrain
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
public boolean sparkTrain(JavaRDD<String> rdd) {
JavaRDD<String> repartition = rdd.repartition(slaveNum);
JavaRDD<Boolean> partRDD = repartition.mapPartitionsWithIndex(trainFunc, true);
List<Boolean> res = partRDD.collect();
for (boolean result : res) {
if (!result) {
return false;
}
}
return true;
}
示例2: parseQuads
import org.apache.spark.api.java.JavaRDD; //导入方法依赖的package包/类
@Override
public JavaRDD<Quad> parseQuads(String path) {
Configuration conf = new Configuration();
Integer batchSize = config.getBatchSize();
conf.set(NLineInputFormat.LINES_PER_MAP, batchSize.toString());
if (config.getErrorHandling() == ParseErrorHandling.Throw) {
conf.set(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, "false");
} else {
conf.set(RdfIOConstants.INPUT_IGNORE_BAD_TUPLES, "true");
}
Boolean isLineBased = config.getLineBasedFormat();
if (isLineBased == null) {
isLineBased = guessIsLineBasedFormat(path);
}
JavaRDD<Quad> quads;
Integer partitions = config.getRepartition();
if (isLineBased) {
log.info("Parsing RDF in parallel with batch size: {}", batchSize);
quads = sc.newAPIHadoopFile(path,
NQuadsInputFormat.class,
LongWritable.class, // position
QuadWritable.class, // value
conf).values().map(QuadWritable::get);
} else {
// let Jena guess the format, load whole files
log.info("Input format is not line based, parsing RDF by Master node only.");
quads = sc.newAPIHadoopFile(path,
TriplesOrQuadsInputFormat.class,
LongWritable.class, // position
QuadWritable.class, // value
conf).values().map(QuadWritable::get);
if (partitions == null) {
log.warn("Reading non-line based formats by master node only, consider setting --parsing.repartition to redistribute work to other nodes.");
}
}
if (partitions != null) {
log.info("Distributing workload, repartitioning into {} partitions", partitions);
quads = quads.repartition(partitions);
}
final List<String> acceptedLanguages = config.getAcceptedLanguages();
// if only some languages are accepted
if (!acceptedLanguages.isEmpty()) {
// filter out literals of unsupported languages
quads = quads.filter(quad ->
!quad.getObject().isLiteral() ||
quad.getObject().getLiteralLanguage() == null ||
quad.getObject().getLiteralLanguage().isEmpty() ||
acceptedLanguages.contains(quad.getObject().getLiteralLanguage())
);
}
return quads;
}