本文整理汇总了Java中org.apache.spark.api.java.JavaPairRDD.foreach方法的典型用法代码示例。如果您正苦于以下问题:Java JavaPairRDD.foreach方法的具体用法?Java JavaPairRDD.foreach怎么用?Java JavaPairRDD.foreach使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类org.apache.spark.api.java.JavaPairRDD
的用法示例。
在下文中一共展示了JavaPairRDD.foreach方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: interleaveSplitFastq
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
zips.foreach( splits -> {
Path path = splits._1.getPath();
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
});
}
示例2: interleaveSplitFastq
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
zips.foreach( splits -> {
Path path = splits._1.getPath();
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
});
}
示例3: interleaveSplitFastq
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {
String[] ns = fst.getPath().getName().split("\\.");
//TODO: Handle also compressed files
List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);
JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);
zips.foreach( splits -> {
Path path = splits._1.getPath();
FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir, path.getParent().getName()+"_"+splits._1.getStart()+".fq");
});
}
示例4: validate
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
GaussianMixtureDetectionModel gaussianMixtureDetectionModel,
GaussianMixtureValidationSummary gaussianMixtureValidationSummary) {
List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
GaussianMixtureModel gaussianMixtureModel = (GaussianMixtureModel) gaussianMixtureDetectionModel.getDetectionModel();
int numberOfTargetValue = listOfTargetFeatures.size();
Normalizer normalizer = new Normalizer();
mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
long start2 = System.nanoTime(); // <-- start
BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
BSONObject idx = (BSONObject) t._2();
Vector normedForVal;
double[] values = new double[numberOfTargetValue];
for (int j = 0; j < numberOfTargetValue; j++) {
values[j] = 0;
if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
if (obj instanceof Long) {
values[j] = (Long) obj;
} else if (obj instanceof Double) {
values[j] = (Double) obj;
} else if (obj instanceof Boolean) {
values[j] = (Boolean) obj ? 1 : 0;
} else {
return;
}
//check weight
if (weight.containsKey(listOfTargetFeatures.get(j))) {
values[j] *= weight.get(listOfTargetFeatures.get(j));
}
//check absolute
if (athenaMLFeatureConfiguration.isAbsolute()){
values[j] = Math.abs(values[j]);
}
}
}
if (athenaMLFeatureConfiguration.isNormalization()) {
normedForVal = normalizer.transform(Vectors.dense(values));
} else {
normedForVal = Vectors.dense(values);
}
int detectIdx = gaussianMixtureModel.predict(normedForVal);
gaussianMixtureValidationSummary.updateSummary(detectIdx, idx, feature);
long end2 = System.nanoTime();
long result2 = end2 - start2;
gaussianMixtureValidationSummary.addTotalNanoSeconds(result2);
}
});
gaussianMixtureValidationSummary.calculateDetectionRate();
gaussianMixtureValidationSummary.getAverageNanoSeconds();
gaussianMixtureValidationSummary.setGaussianMixtureDetectionAlgorithm(
(GaussianMixtureDetectionAlgorithm)gaussianMixtureDetectionModel.getDetectionAlgorithm());
}
示例5: validate
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
KMeansDetectionModel kMeansDetectionModel,
KmeansValidationSummary kmeansValidationSummary) {
List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
KMeansModel cluster = (KMeansModel) kMeansDetectionModel.getDetectionModel();
int numberOfTargetValue = listOfTargetFeatures.size();
Normalizer normalizer = new Normalizer();
mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
long start2 = System.nanoTime(); // <-- start
BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
BSONObject idx = (BSONObject) t._2();
Vector normedForVal;
double[] values = new double[numberOfTargetValue];
for (int j = 0; j < numberOfTargetValue; j++) {
values[j] = 0;
if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
if (obj instanceof Long) {
values[j] = (Long) obj;
} else if (obj instanceof Double) {
values[j] = (Double) obj;
} else if (obj instanceof Boolean) {
values[j] = (Boolean) obj ? 1 : 0;
} else {
return;
}
//check weight
if (weight.containsKey(listOfTargetFeatures.get(j))) {
values[j] *= weight.get(listOfTargetFeatures.get(j));
}
//check absolute
if (athenaMLFeatureConfiguration.isAbsolute()) {
values[j] = Math.abs(values[j]);
}
}
}
if (athenaMLFeatureConfiguration.isNormalization()) {
normedForVal = normalizer.transform(Vectors.dense(values));
} else {
normedForVal = Vectors.dense(values);
}
int detectIdx = cluster.predict(normedForVal);
kmeansValidationSummary.updateSummary(detectIdx, idx, feature);
long end2 = System.nanoTime();
long result2 = end2 - start2;
kmeansValidationSummary.addTotalNanoSeconds(result2);
}
});
kmeansValidationSummary.calculateDetectionRate();
kmeansValidationSummary.getAverageNanoSeconds();
kmeansValidationSummary.setkMeansDetectionAlgorithm((KMeansDetectionAlgorithm) kMeansDetectionModel.getDetectionAlgorithm());
}
示例6: validate
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
GradientBoostedTreesDetectionModel gradientBoostedTreesDetectionModel,
GradientBoostedTreesValidationSummary gradientBoostedTreesValidationSummary) {
List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
Marking marking = gradientBoostedTreesDetectionModel.getMarking();
GradientBoostedTreesModel model = (GradientBoostedTreesModel) gradientBoostedTreesDetectionModel.getDetectionModel();
Normalizer normalizer = new Normalizer();
int numberOfTargetValue = listOfTargetFeatures.size();
mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
long start2 = System.nanoTime(); // <-- start
BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
BSONObject idx = (BSONObject) t._2();
int originLabel = marking.checkClassificationMarkingElements(idx,feature);
double[] values = new double[numberOfTargetValue];
for (int j = 0; j < numberOfTargetValue; j++) {
values[j] = 0;
if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
if (obj instanceof Long) {
values[j] = (Long) obj;
} else if (obj instanceof Double) {
values[j] = (Double) obj;
} else if (obj instanceof Boolean) {
values[j] = (Boolean) obj ? 1 : 0;
} else {
return;
}
//check weight
if (weight.containsKey(listOfTargetFeatures.get(j))) {
values[j] *= weight.get(listOfTargetFeatures.get(j));
}
//check absolute
if (athenaMLFeatureConfiguration.isAbsolute()){
values[j] = Math.abs(values[j]);
}
}
}
Vector normedForVal;
if (athenaMLFeatureConfiguration.isNormalization()) {
normedForVal = normalizer.transform(Vectors.dense(values));
} else {
normedForVal = Vectors.dense(values);
}
LabeledPoint p = new LabeledPoint(originLabel,normedForVal);
int validatedLabel = (int) model.predict(p.features());
gradientBoostedTreesValidationSummary.updateSummary(validatedLabel,idx,feature);
long end2 = System.nanoTime();
long result2 = end2 - start2;
gradientBoostedTreesValidationSummary.addTotalNanoSeconds(result2);
}
});
gradientBoostedTreesValidationSummary.getAverageNanoSeconds();
gradientBoostedTreesValidationSummary.setGradientBoostedTreesDetectionAlgorithm((GradientBoostedTreesDetectionAlgorithm) gradientBoostedTreesDetectionModel.getDetectionAlgorithm());
}
示例7: validate
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
RandomForestDetectionModel randomForestDetectionModel,
RandomForestValidationSummary randomForestValidationSummary) {
List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
Marking marking = randomForestDetectionModel.getMarking();
RandomForestModel model = (RandomForestModel) randomForestDetectionModel.getDetectionModel();
Normalizer normalizer = new Normalizer();
int numberOfTargetValue = listOfTargetFeatures.size();
mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
long start2 = System.nanoTime(); // <-- start
BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
BSONObject idx = (BSONObject) t._2();
int originLabel = marking.checkClassificationMarkingElements(idx,feature);
double[] values = new double[numberOfTargetValue];
for (int j = 0; j < numberOfTargetValue; j++) {
values[j] = 0;
if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
if (obj instanceof Long) {
values[j] = (Long) obj;
} else if (obj instanceof Double) {
values[j] = (Double) obj;
} else if (obj instanceof Boolean) {
values[j] = (Boolean) obj ? 1 : 0;
} else {
return;
}
//check weight
if (weight.containsKey(listOfTargetFeatures.get(j))) {
values[j] *= weight.get(listOfTargetFeatures.get(j));
}
//check absolute
if (athenaMLFeatureConfiguration.isAbsolute()){
values[j] = Math.abs(values[j]);
}
}
}
Vector normedForVal;
if (athenaMLFeatureConfiguration.isNormalization()) {
normedForVal = normalizer.transform(Vectors.dense(values));
} else {
normedForVal = Vectors.dense(values);
}
LabeledPoint p = new LabeledPoint(originLabel,normedForVal);
int validatedLabel = (int) model.predict(p.features());
randomForestValidationSummary.updateSummary(validatedLabel,idx,feature);
long end2 = System.nanoTime();
long result2 = end2 - start2;
randomForestValidationSummary.addTotalNanoSeconds(result2);
}
});
randomForestValidationSummary.getAverageNanoSeconds();
randomForestValidationSummary.setRandomForestDetectionAlgorithm((RandomForestDetectionAlgorithm) randomForestDetectionModel.getDetectionAlgorithm());
}
示例8: validate
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
SVMDetectionModel SVMDetectionModel,
SVMValidationSummary SVMValidationSummary) {
List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
Marking marking = SVMDetectionModel.getMarking();
SVMModel model = (SVMModel) SVMDetectionModel.getDetectionModel();
Normalizer normalizer = new Normalizer();
int numberOfTargetValue = listOfTargetFeatures.size();
mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
long start2 = System.nanoTime(); // <-- start
BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
BSONObject idx = (BSONObject) t._2();
int originLabel = marking.checkClassificationMarkingElements(idx,feature);
double[] values = new double[numberOfTargetValue];
for (int j = 0; j < numberOfTargetValue; j++) {
values[j] = 0;
if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
if (obj instanceof Long) {
values[j] = (Long) obj;
} else if (obj instanceof Double) {
values[j] = (Double) obj;
} else if (obj instanceof Boolean) {
values[j] = (Boolean) obj ? 1 : 0;
} else {
return;
}
//check weight
if (weight.containsKey(listOfTargetFeatures.get(j))) {
values[j] *= weight.get(listOfTargetFeatures.get(j));
}
//check absolute
if (athenaMLFeatureConfiguration.isAbsolute()){
values[j] = Math.abs(values[j]);
}
}
}
Vector normedForVal;
if (athenaMLFeatureConfiguration.isNormalization()) {
normedForVal = normalizer.transform(Vectors.dense(values));
} else {
normedForVal = Vectors.dense(values);
}
LabeledPoint p = new LabeledPoint(originLabel,normedForVal);
//Only SVM!!
int validatedLabel;// = (int) model.predict(p.features());
double score = model.predict(p.features());
if (score > 0){
//detection
validatedLabel = 1;
} else {
validatedLabel = 0;
}
SVMValidationSummary.updateSummary(validatedLabel,idx,feature);
long end2 = System.nanoTime();
long result2 = end2 - start2;
SVMValidationSummary.addTotalNanoSeconds(result2);
}
});
SVMValidationSummary.getAverageNanoSeconds();
SVMValidationSummary.setSvmDetectionAlgorithm((SVMDetectionAlgorithm) SVMDetectionModel.getDetectionAlgorithm());
}
示例9: validate
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
LogisticRegressionDetectionModel logisticRegressionDetectionModel,
LogisticRegressionValidationSummary logisticRegressionValidationSummary) {
List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
Marking marking = logisticRegressionDetectionModel.getMarking();
LogisticRegressionModel model = (LogisticRegressionModel) logisticRegressionDetectionModel.getDetectionModel();
Normalizer normalizer = new Normalizer();
int numberOfTargetValue = listOfTargetFeatures.size();
mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
long start2 = System.nanoTime(); // <-- start
BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
BSONObject idx = (BSONObject) t._2();
int originLabel = marking.checkClassificationMarkingElements(idx,feature);
double[] values = new double[numberOfTargetValue];
for (int j = 0; j < numberOfTargetValue; j++) {
values[j] = 0;
if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
if (obj instanceof Long) {
values[j] = (Long) obj;
} else if (obj instanceof Double) {
values[j] = (Double) obj;
} else if (obj instanceof Boolean) {
values[j] = (Boolean) obj ? 1 : 0;
} else {
return;
}
//check weight
if (weight.containsKey(listOfTargetFeatures.get(j))) {
values[j] *= weight.get(listOfTargetFeatures.get(j));
}
//check absolute
if (athenaMLFeatureConfiguration.isAbsolute()){
values[j] = Math.abs(values[j]);
}
}
}
Vector normedForVal;
if (athenaMLFeatureConfiguration.isNormalization()) {
normedForVal = normalizer.transform(Vectors.dense(values));
} else {
normedForVal = Vectors.dense(values);
}
LabeledPoint p = new LabeledPoint(originLabel,normedForVal);
int validatedLabel = (int) model.predict(p.features());
logisticRegressionValidationSummary.updateSummary(validatedLabel,idx,feature);
long end2 = System.nanoTime();
long result2 = end2 - start2;
logisticRegressionValidationSummary.addTotalNanoSeconds(result2);
}
});
logisticRegressionValidationSummary.getAverageNanoSeconds();
logisticRegressionValidationSummary.setLogisticRegressionDetectionAlgorithm((LogisticRegressionDetectionAlgorithm) logisticRegressionDetectionModel.getDetectionAlgorithm());
}
示例10: validate
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
DecisionTreeDetectionModel decisionTreeDetectionModel,
DecisionTreeValidationSummary decisionTreeValidationSummary) {
List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
Marking marking = decisionTreeDetectionModel.getMarking();
DecisionTreeModel model = (DecisionTreeModel) decisionTreeDetectionModel.getDetectionModel();
Normalizer normalizer = new Normalizer();
int numberOfTargetValue = listOfTargetFeatures.size();
mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
long start2 = System.nanoTime(); // <-- start
BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
BSONObject idx = (BSONObject) t._2();
int originLabel = marking.checkClassificationMarkingElements(idx,feature);
double[] values = new double[numberOfTargetValue];
for (int j = 0; j < numberOfTargetValue; j++) {
values[j] = 0;
if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
if (obj instanceof Long) {
values[j] = (Long) obj;
} else if (obj instanceof Double) {
values[j] = (Double) obj;
} else if (obj instanceof Boolean) {
values[j] = (Boolean) obj ? 1 : 0;
} else {
return;
}
//check weight
if (weight.containsKey(listOfTargetFeatures.get(j))) {
values[j] *= weight.get(listOfTargetFeatures.get(j));
}
//check absolute
if (athenaMLFeatureConfiguration.isAbsolute()){
values[j] = Math.abs(values[j]);
}
}
}
Vector normedForVal;
if (athenaMLFeatureConfiguration.isNormalization()) {
normedForVal = normalizer.transform(Vectors.dense(values));
} else {
normedForVal = Vectors.dense(values);
}
LabeledPoint p = new LabeledPoint(originLabel,normedForVal);
int validatedLabel = (int) model.predict(p.features());
decisionTreeValidationSummary.updateSummary(validatedLabel,idx,feature);
long end2 = System.nanoTime();
long result2 = end2 - start2;
decisionTreeValidationSummary.addTotalNanoSeconds(result2);
}
});
decisionTreeValidationSummary.getAverageNanoSeconds();
decisionTreeValidationSummary.setDecisionTreeDetectionAlgorithm((DecisionTreeDetectionAlgorithm) decisionTreeDetectionModel.getDetectionAlgorithm());
}
示例11: validate
import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
NaiveBayesDetectionModel naiveBayesDetectionModel,
NaiveBayesValidationSummary naiveBayesValidationSummary) {
List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
Marking marking = naiveBayesDetectionModel.getMarking();
NaiveBayesModel model = (NaiveBayesModel) naiveBayesDetectionModel.getDetectionModel();
Normalizer normalizer = new Normalizer();
int numberOfTargetValue = listOfTargetFeatures.size();
mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
long start2 = System.nanoTime(); // <-- start
BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
BSONObject idx = (BSONObject) t._2();
int originLabel = marking.checkClassificationMarkingElements(idx,feature);
double[] values = new double[numberOfTargetValue];
for (int j = 0; j < numberOfTargetValue; j++) {
values[j] = 0;
if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
if (obj instanceof Long) {
values[j] = (Long) obj;
} else if (obj instanceof Double) {
values[j] = (Double) obj;
} else if (obj instanceof Boolean) {
values[j] = (Boolean) obj ? 1 : 0;
} else {
return;
}
//check weight
if (weight.containsKey(listOfTargetFeatures.get(j))) {
values[j] *= weight.get(listOfTargetFeatures.get(j));
}
//check absolute
if (athenaMLFeatureConfiguration.isAbsolute()){
values[j] = Math.abs(values[j]);
}
}
}
Vector normedForVal;
if (athenaMLFeatureConfiguration.isNormalization()) {
normedForVal = normalizer.transform(Vectors.dense(values));
} else {
normedForVal = Vectors.dense(values);
}
LabeledPoint p = new LabeledPoint(originLabel,normedForVal);
int validatedLabel = (int) model.predict(p.features());
naiveBayesValidationSummary.updateSummary(validatedLabel,idx,feature);
long end2 = System.nanoTime();
long result2 = end2 - start2;
naiveBayesValidationSummary.addTotalNanoSeconds(result2);
}
});
naiveBayesValidationSummary.getAverageNanoSeconds();
naiveBayesValidationSummary.setNaiveBayesDetectionAlgorithm((NaiveBayesDetectionAlgorithm) naiveBayesDetectionModel.getDetectionAlgorithm());
}