本文整理汇总了Java中org.apache.spark.Accumulable类的典型用法代码示例。如果您正苦于以下问题:Java Accumulable类的具体用法?Java Accumulable怎么用?Java Accumulable使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
Accumulable类属于org.apache.spark包,在下文中一共展示了Accumulable类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: main
import org.apache.spark.Accumulable; //导入依赖的package包/类
public static void main( String[] args )
{
if (args.length == 0) {
}
if (args.length == 0) {
System.out.println("UniqueSeqGenerator {master} {inputPath} {outputFolder}");
}
String master = args[0];
String inputPath = args[1];
String outputPath = args[2];
JavaSparkContext jsc = new JavaSparkContext(master, "UniqueSeqGenerator", null, "SeqGenerator.jar");
SeqMapCounter seqMapCounter = new SeqMapCounter();
Accumulable<SeqMapCounter, String> accumulable = jsc.accumulable(seqMapCounter, new SeqMapCounterAccumulableParam());
JavaRDD<String> javaRdd = jsc.textFile(inputPath);
javaRdd.foreach(new ForEachMapperPartitionCounter(accumulable));
seqMapCounter = accumulable.value();
System.out.println("--------");
System.out.println(seqMapCounter.getSummery());
System.out.println("--------");
Broadcast<SeqMapCounter> broadcast = jsc.broadcast(seqMapCounter);
JavaRDD<String> seqRdd = javaRdd.map(new MapAssignSequence(broadcast));
seqRdd.saveAsTextFile(outputPath);
}
示例2: main
import org.apache.spark.Accumulable; //导入依赖的package包/类
public static void main(String[] args) {
if (args.length == 0) {
}
if (args.length == 0) {
System.out.println("ValidateSeqGeneration {master} {inputPath}");
}
String master = args[0];
String inputPath = args[1];
JavaSparkContext jsc = new JavaSparkContext(master,
"ValidateSeqGeneration", null, "SeqGenerator.jar");
MinMaxAccumulator minMaxAccumulator = new MinMaxAccumulator();
Accumulable<MinMaxAccumulator, Long> accumulable = jsc.accumulable(
minMaxAccumulator, new MinMaxAccumulatorParam());
JavaRDD<String> javaRdd = jsc.textFile(inputPath);
javaRdd.foreach(new ForEachMapperPartitionCounter(accumulable));
minMaxAccumulator = accumulable.value();
TreeMap<Long, Long> treeMap = new TreeMap<Long, Long>();
for (Tuple2<Counter, Counter> minMax: minMaxAccumulator.minMaxRanges) {
treeMap.put(minMax._1.val, minMax._2.val);
}
System.out.println("------");
for (Entry<Long, Long> entry: treeMap.entrySet()) {
System.out.println(entry.getKey() + "," + entry.getValue());
}
System.out.println("------");
}
示例3: updateDistributionMatrix
import org.apache.spark.Accumulable; //导入依赖的package包/类
protected void updateDistributionMatrix(JavaSparkContext sc, JavaRDD<MultilabelPoint> docs, double[][] localDM, WeakHypothesis localWH) {
Broadcast<WeakHypothesis> distWH = sc.broadcast(localWH);
Broadcast<double[][]> distDM = sc.broadcast(localDM);
Accumulable<ArrayList<SingleDMUpdate>, DMPartialResult> partialResults = sc.accumulable(new ArrayList<SingleDMUpdate>(),
new DMPartialResultAccumulableParam());
Double[] normArray = new Double[localDM.length];
for (int i = 0; i < normArray.length; i++)
normArray[i] = 0.0;
Accumulable<ArrayList<Double>, DMPartialResult> normalizations = sc.accumulable(new ArrayList<Double>(Arrays.asList(normArray)), new DMNormalizationAccumulableParam());
docs.map(doc -> {
int[] validFeatures = doc.getFeatures().indices();
HashMap<Integer, Integer> dictFeatures = new HashMap<>();
for (int featID : validFeatures)
dictFeatures.put(featID, featID);
HashMap<Integer, Integer> dictLabels = new HashMap<>();
for (int idx = 0; idx < doc.getLabels().length; idx++)
dictLabels.put(doc.getLabels()[idx], doc.getLabels()[idx]);
double[][] dm = distDM.getValue();
WeakHypothesis wh = distWH.getValue();
double[] labelsRes = new double[dm.length];
for (int labelID = 0; labelID < dm.length; labelID++) {
float catValue = 1;
if (dictLabels.containsKey(labelID)) {
catValue = -1;
}
// Compute the weak hypothesis value.
double value = 0;
WeakHypothesis.WeakHypothesisData v = wh.getLabelData(labelID);
int pivot = v.getFeatureID();
if (dictFeatures.containsKey(pivot))
value = v.getC1();
else
value = v.getC0();
double partialRes = dm[labelID][doc.getPointID()] * Math.exp(catValue * value);
labelsRes[labelID] = partialRes;
}
return new DMPartialResult(doc.getPointID(), labelsRes);
}).foreach(r -> {
partialResults.add(r);
normalizations.add(r);
});
// Update distribution matrix.
ArrayList<SingleDMUpdate> updates = partialResults.value();
ArrayList<Double> normalizationValues = normalizations.value();
for (int i = 0; i < updates.size(); i++) {
SingleDMUpdate update = updates.get(i);
localDM[update.getLabelID()][update.getDocID()] = update.getResult() / normalizationValues.get(update.getLabelID());
}
}
示例4: ForEachMapperPartitionCounter
import org.apache.spark.Accumulable; //导入依赖的package包/类
public ForEachMapperPartitionCounter(Accumulable<SeqMapCounter, String> accumulable) {
this.accumulable = accumulable;
}
示例5: ForEachMapperPartitionCounter
import org.apache.spark.Accumulable; //导入依赖的package包/类
public ForEachMapperPartitionCounter(Accumulable<MinMaxAccumulator, Long> accumulable) {
this.accumulable = accumulable;
}
示例6: readExternal
import org.apache.spark.Accumulable; //导入依赖的package包/类
@Override
public void readExternal(ObjectInput in)
throws IOException, ClassNotFoundException{
Credentials credentials = null;
if (in.readBoolean()) {
// we've got credentials to apply
Broadcast<SerializableWritable<Credentials>> bcast = (Broadcast<SerializableWritable<Credentials>>) in.readObject();
credentials = bcast.getValue().value();
}
badRecordsSeen = in.readLong();
badRecordThreshold = in.readLong();
permissive=in.readBoolean();
SpliceSpark.setupSpliceStaticComponents(credentials);
boolean isOp=in.readBoolean();
if(isOp){
broadcastedActivation = (BroadcastedActivation)in.readObject();
op=(Op)broadcastedActivation.getActivationHolder().getOperationsMap().get(in.readInt());
activation=broadcastedActivation.getActivationHolder().getActivation();
}
rowsRead=(LongAccumulator)in.readObject();
rowsFiltered=(LongAccumulator)in.readObject();
rowsWritten=(LongAccumulator)in.readObject();
retryAttempts =(LongAccumulator)in.readObject();
regionTooBusyExceptions =(LongAccumulator)in.readObject();
rowsJoinedLeft=(LongAccumulator)in.readObject();
rowsJoinedRight=(LongAccumulator)in.readObject();
rowsProduced=(LongAccumulator)in.readObject();
badRecordsAccumulator = (Accumulable<BadRecordsRecorder,String>) in.readObject();
thrownErrorsRows=(LongAccumulator)in.readObject();
retriedRows=(LongAccumulator)in.readObject();
partialRows=(LongAccumulator)in.readObject();
partialThrownErrorRows=(LongAccumulator)in.readObject();
partialRetriedRows=(LongAccumulator)in.readObject();
partialIgnoredRows=(LongAccumulator)in.readObject();
partialWrite=(LongAccumulator)in.readObject();
ignoredRows=(LongAccumulator)in.readObject();
catchThrownRows=(LongAccumulator)in.readObject();
catchRetriedRows=(LongAccumulator)in.readObject();
pipelineRowsWritten=(LongAccumulator)in.readObject();
}