当前位置: 首页>>代码示例>>Java>>正文


Java JavaPairRDD.foreach方法代码示例

本文整理汇总了Java中org.apache.spark.api.java.JavaPairRDD.foreach方法的典型用法代码示例。如果您正苦于以下问题:Java JavaPairRDD.foreach方法的具体用法?Java JavaPairRDD.foreach怎么用?Java JavaPairRDD.foreach使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在org.apache.spark.api.java.JavaPairRDD的用法示例。


在下文中一共展示了JavaPairRDD.foreach方法的11个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: interleaveSplitFastq

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
    List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
    JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
    JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

    zips.foreach( splits ->  {
      Path path = splits._1.getPath();
      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
      FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
      writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
    });
  }
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:17,代码来源:InterleaveMulti.java

示例2: interleaveSplitFastq

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
    List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
    JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
    JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

    zips.foreach( splits ->  {
      Path path = splits._1.getPath();
      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
      FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);

      writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir+"/"+path.getParent().getName()+"_"+splits._1.getStart()+".fq");
    });
  }
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:18,代码来源:Decompress.java

示例3: interleaveSplitFastq

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public static void interleaveSplitFastq(FileStatus fst, FileStatus fst2, String splitDir, int splitlen, JavaSparkContext sc) throws IOException {

    String[] ns = fst.getPath().getName().split("\\.");
    //TODO: Handle also compressed files
    List<FileSplit> nlif = NLineInputFormat.getSplitsForFile(fst, sc.hadoopConfiguration(), splitlen);
    List<FileSplit> nlif2 = NLineInputFormat.getSplitsForFile(fst2, sc.hadoopConfiguration(), splitlen);

    JavaRDD<FileSplit> splitRDD = sc.parallelize(nlif);
    JavaRDD<FileSplit> splitRDD2 = sc.parallelize(nlif2);
    JavaPairRDD<FileSplit, FileSplit> zips = splitRDD.zip(splitRDD2);

    zips.foreach( splits ->  {
      Path path = splits._1.getPath();
      FastqRecordReader fqreader = new FastqRecordReader(new Configuration(), splits._1);
      FastqRecordReader fqreader2 = new FastqRecordReader(new Configuration(), splits._2);
      writeInterleavedSplits(fqreader, fqreader2, new Configuration(), splitDir, path.getParent().getName()+"_"+splits._1.getStart()+".fq");
    });
  }
 
开发者ID:NGSeq,项目名称:ViraPipe,代码行数:19,代码来源:DecompressInterleave.java

示例4: validate

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
                     AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
                     GaussianMixtureDetectionModel gaussianMixtureDetectionModel,
                     GaussianMixtureValidationSummary gaussianMixtureValidationSummary) {
    List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
    Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
    GaussianMixtureModel gaussianMixtureModel = (GaussianMixtureModel) gaussianMixtureDetectionModel.getDetectionModel();

    int numberOfTargetValue = listOfTargetFeatures.size();
    Normalizer normalizer = new Normalizer();

    mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
        public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
            long start2 = System.nanoTime(); // <-- start
            BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
            BSONObject idx = (BSONObject) t._2();
            Vector normedForVal;

            double[] values = new double[numberOfTargetValue];
            for (int j = 0; j < numberOfTargetValue; j++) {
                values[j] = 0;
                if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
                    Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
                    if (obj instanceof Long) {
                        values[j] = (Long) obj;
                    } else if (obj instanceof Double) {
                        values[j] = (Double) obj;
                    } else if (obj instanceof Boolean) {
                        values[j] = (Boolean) obj ? 1 : 0;
                    } else {
                        return;
                    }

                    //check weight
                    if (weight.containsKey(listOfTargetFeatures.get(j))) {
                        values[j] *= weight.get(listOfTargetFeatures.get(j));
                    }

                    //check absolute
                    if (athenaMLFeatureConfiguration.isAbsolute()){
                        values[j] = Math.abs(values[j]);
                    }
                }
            }

            if (athenaMLFeatureConfiguration.isNormalization()) {
                normedForVal = normalizer.transform(Vectors.dense(values));
            } else {
                normedForVal = Vectors.dense(values);
            }
            int detectIdx = gaussianMixtureModel.predict(normedForVal);


            gaussianMixtureValidationSummary.updateSummary(detectIdx, idx, feature);

            long end2 = System.nanoTime();
            long result2 = end2 - start2;
            gaussianMixtureValidationSummary.addTotalNanoSeconds(result2);
        }
    });
    gaussianMixtureValidationSummary.calculateDetectionRate();
    gaussianMixtureValidationSummary.getAverageNanoSeconds();
    gaussianMixtureValidationSummary.setGaussianMixtureDetectionAlgorithm(
            (GaussianMixtureDetectionAlgorithm)gaussianMixtureDetectionModel.getDetectionAlgorithm());
}
 
开发者ID:shlee89,项目名称:athena,代码行数:66,代码来源:GaussianMixtureDistJob.java

示例5: validate

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
                     AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
                     KMeansDetectionModel kMeansDetectionModel,
                     KmeansValidationSummary kmeansValidationSummary) {
    List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
    Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
    KMeansModel cluster = (KMeansModel) kMeansDetectionModel.getDetectionModel();
    int numberOfTargetValue = listOfTargetFeatures.size();
    Normalizer normalizer = new Normalizer();

    mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
        public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
            long start2 = System.nanoTime(); // <-- start
            BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
            BSONObject idx = (BSONObject) t._2();
            Vector normedForVal;

            double[] values = new double[numberOfTargetValue];
            for (int j = 0; j < numberOfTargetValue; j++) {
                values[j] = 0;
                if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
                    Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
                    if (obj instanceof Long) {
                        values[j] = (Long) obj;
                    } else if (obj instanceof Double) {
                        values[j] = (Double) obj;
                    } else if (obj instanceof Boolean) {
                        values[j] = (Boolean) obj ? 1 : 0;
                    } else {
                        return;
                    }

                    //check weight
                    if (weight.containsKey(listOfTargetFeatures.get(j))) {
                        values[j] *= weight.get(listOfTargetFeatures.get(j));
                    }
                    //check absolute
                    if (athenaMLFeatureConfiguration.isAbsolute()) {
                        values[j] = Math.abs(values[j]);
                    }
                }


            }


            if (athenaMLFeatureConfiguration.isNormalization()) {
                normedForVal = normalizer.transform(Vectors.dense(values));
            } else {
                normedForVal = Vectors.dense(values);
            }

            int detectIdx = cluster.predict(normedForVal);

            kmeansValidationSummary.updateSummary(detectIdx, idx, feature);

            long end2 = System.nanoTime();
            long result2 = end2 - start2;
            kmeansValidationSummary.addTotalNanoSeconds(result2);
        }
    });
    kmeansValidationSummary.calculateDetectionRate();
    kmeansValidationSummary.getAverageNanoSeconds();
    kmeansValidationSummary.setkMeansDetectionAlgorithm((KMeansDetectionAlgorithm) kMeansDetectionModel.getDetectionAlgorithm());
}
 
开发者ID:shlee89,项目名称:athena,代码行数:66,代码来源:KMeansDistJob.java

示例6: validate

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
                     AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
                     GradientBoostedTreesDetectionModel gradientBoostedTreesDetectionModel,
                     GradientBoostedTreesValidationSummary gradientBoostedTreesValidationSummary) {
    List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
    Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
    Marking marking = gradientBoostedTreesDetectionModel.getMarking();
    GradientBoostedTreesModel model = (GradientBoostedTreesModel) gradientBoostedTreesDetectionModel.getDetectionModel();
    Normalizer normalizer = new Normalizer();

    int numberOfTargetValue = listOfTargetFeatures.size();

    mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
        public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
            long start2 = System.nanoTime(); // <-- start
            BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
            BSONObject idx = (BSONObject) t._2();
            int originLabel = marking.checkClassificationMarkingElements(idx,feature);

            double[] values = new double[numberOfTargetValue];
            for (int j = 0; j < numberOfTargetValue; j++) {
                values[j] = 0;
                if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
                    Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
                    if (obj instanceof Long) {
                        values[j] = (Long) obj;
                    } else if (obj instanceof Double) {
                        values[j] = (Double) obj;
                    } else if (obj instanceof Boolean) {
                        values[j] = (Boolean) obj ? 1 : 0;
                    } else {
                        return;
                    }

                    //check weight
                    if (weight.containsKey(listOfTargetFeatures.get(j))) {
                        values[j] *= weight.get(listOfTargetFeatures.get(j));
                    }

                    //check absolute
                    if (athenaMLFeatureConfiguration.isAbsolute()){
                        values[j] = Math.abs(values[j]);
                    }
                }
            }

            Vector normedForVal;
            if (athenaMLFeatureConfiguration.isNormalization()) {
                normedForVal = normalizer.transform(Vectors.dense(values));
            } else {
                normedForVal = Vectors.dense(values);
            }

            LabeledPoint p = new LabeledPoint(originLabel,normedForVal);

            int validatedLabel = (int) model.predict(p.features());


            gradientBoostedTreesValidationSummary.updateSummary(validatedLabel,idx,feature);

            long end2 = System.nanoTime();
            long result2 = end2 - start2;
            gradientBoostedTreesValidationSummary.addTotalNanoSeconds(result2);
        }
    });
    gradientBoostedTreesValidationSummary.getAverageNanoSeconds();
    gradientBoostedTreesValidationSummary.setGradientBoostedTreesDetectionAlgorithm((GradientBoostedTreesDetectionAlgorithm) gradientBoostedTreesDetectionModel.getDetectionAlgorithm());
}
 
开发者ID:shlee89,项目名称:athena,代码行数:69,代码来源:GradientBoostedTreesDistJob.java

示例7: validate

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
                     AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
                     RandomForestDetectionModel randomForestDetectionModel,
                     RandomForestValidationSummary randomForestValidationSummary) {
    List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
    Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
    Marking marking = randomForestDetectionModel.getMarking();
    RandomForestModel model = (RandomForestModel) randomForestDetectionModel.getDetectionModel();
    Normalizer normalizer = new Normalizer();

    int numberOfTargetValue = listOfTargetFeatures.size();

    mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
        public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
            long start2 = System.nanoTime(); // <-- start
            BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
            BSONObject idx = (BSONObject) t._2();
            int originLabel = marking.checkClassificationMarkingElements(idx,feature);

            double[] values = new double[numberOfTargetValue];
            for (int j = 0; j < numberOfTargetValue; j++) {
                values[j] = 0;
                if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
                    Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
                    if (obj instanceof Long) {
                        values[j] = (Long) obj;
                    } else if (obj instanceof Double) {
                        values[j] = (Double) obj;
                    } else if (obj instanceof Boolean) {
                        values[j] = (Boolean) obj ? 1 : 0;
                    } else {
                        return;
                    }

                    //check weight
                    if (weight.containsKey(listOfTargetFeatures.get(j))) {
                        values[j] *= weight.get(listOfTargetFeatures.get(j));
                    }

                    //check absolute
                    if (athenaMLFeatureConfiguration.isAbsolute()){
                        values[j] = Math.abs(values[j]);
                    }
                }
            }

            Vector normedForVal;
            if (athenaMLFeatureConfiguration.isNormalization()) {
                normedForVal = normalizer.transform(Vectors.dense(values));
            } else {
                normedForVal = Vectors.dense(values);
            }

            LabeledPoint p = new LabeledPoint(originLabel,normedForVal);

            int validatedLabel = (int) model.predict(p.features());


            randomForestValidationSummary.updateSummary(validatedLabel,idx,feature);

            long end2 = System.nanoTime();
            long result2 = end2 - start2;
            randomForestValidationSummary.addTotalNanoSeconds(result2);
        }
    });
    randomForestValidationSummary.getAverageNanoSeconds();
    randomForestValidationSummary.setRandomForestDetectionAlgorithm((RandomForestDetectionAlgorithm) randomForestDetectionModel.getDetectionAlgorithm());
}
 
开发者ID:shlee89,项目名称:athena,代码行数:69,代码来源:RandomForestDistJob.java

示例8: validate

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
                     AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
                     SVMDetectionModel SVMDetectionModel,
                     SVMValidationSummary SVMValidationSummary) {
    List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
    Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
    Marking marking = SVMDetectionModel.getMarking();
    SVMModel model = (SVMModel) SVMDetectionModel.getDetectionModel();

    Normalizer normalizer = new Normalizer();

    int numberOfTargetValue = listOfTargetFeatures.size();

    mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
        public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
            long start2 = System.nanoTime(); // <-- start
            BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
            BSONObject idx = (BSONObject) t._2();
            int originLabel = marking.checkClassificationMarkingElements(idx,feature);

            double[] values = new double[numberOfTargetValue];
            for (int j = 0; j < numberOfTargetValue; j++) {
                values[j] = 0;
                if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
                    Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
                    if (obj instanceof Long) {
                        values[j] = (Long) obj;
                    } else if (obj instanceof Double) {
                        values[j] = (Double) obj;
                    } else if (obj instanceof Boolean) {
                        values[j] = (Boolean) obj ? 1 : 0;
                    } else {
                        return;
                    }

                    //check weight
                    if (weight.containsKey(listOfTargetFeatures.get(j))) {
                        values[j] *= weight.get(listOfTargetFeatures.get(j));
                    }

                    //check absolute
                    if (athenaMLFeatureConfiguration.isAbsolute()){
                        values[j] = Math.abs(values[j]);
                    }
                }
            }

            Vector normedForVal;
            if (athenaMLFeatureConfiguration.isNormalization()) {
                normedForVal = normalizer.transform(Vectors.dense(values));
            } else {
                normedForVal = Vectors.dense(values);
            }

            LabeledPoint p = new LabeledPoint(originLabel,normedForVal);

            //Only SVM!!
            int validatedLabel;// = (int) model.predict(p.features());
            double score = model.predict(p.features());
            if (score > 0){
                //detection
                validatedLabel = 1;
            } else {
                validatedLabel = 0;
            }
            SVMValidationSummary.updateSummary(validatedLabel,idx,feature);

            long end2 = System.nanoTime();
            long result2 = end2 - start2;
            SVMValidationSummary.addTotalNanoSeconds(result2);
        }
    });
    SVMValidationSummary.getAverageNanoSeconds();
    SVMValidationSummary.setSvmDetectionAlgorithm((SVMDetectionAlgorithm) SVMDetectionModel.getDetectionAlgorithm());
}
 
开发者ID:shlee89,项目名称:athena,代码行数:76,代码来源:SVMDistJob.java

示例9: validate

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
                     AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
                     LogisticRegressionDetectionModel logisticRegressionDetectionModel,
                     LogisticRegressionValidationSummary logisticRegressionValidationSummary) {
    List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
    Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
    Marking marking = logisticRegressionDetectionModel.getMarking();
    LogisticRegressionModel model = (LogisticRegressionModel) logisticRegressionDetectionModel.getDetectionModel();
    Normalizer normalizer = new Normalizer();

    int numberOfTargetValue = listOfTargetFeatures.size();

    mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
        public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
            long start2 = System.nanoTime(); // <-- start
            BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
            BSONObject idx = (BSONObject) t._2();
            int originLabel = marking.checkClassificationMarkingElements(idx,feature);

            double[] values = new double[numberOfTargetValue];
            for (int j = 0; j < numberOfTargetValue; j++) {
                values[j] = 0;
                if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
                    Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
                    if (obj instanceof Long) {
                        values[j] = (Long) obj;
                    } else if (obj instanceof Double) {
                        values[j] = (Double) obj;
                    } else if (obj instanceof Boolean) {
                        values[j] = (Boolean) obj ? 1 : 0;
                    } else {
                        return;
                    }

                    //check weight
                    if (weight.containsKey(listOfTargetFeatures.get(j))) {
                        values[j] *= weight.get(listOfTargetFeatures.get(j));
                    }

                    //check absolute
                    if (athenaMLFeatureConfiguration.isAbsolute()){
                        values[j] = Math.abs(values[j]);
                    }
                }
            }

            Vector normedForVal;
            if (athenaMLFeatureConfiguration.isNormalization()) {
                normedForVal = normalizer.transform(Vectors.dense(values));
            } else {
                normedForVal = Vectors.dense(values);
            }

            LabeledPoint p = new LabeledPoint(originLabel,normedForVal);

            int validatedLabel = (int) model.predict(p.features());


            logisticRegressionValidationSummary.updateSummary(validatedLabel,idx,feature);

            long end2 = System.nanoTime();
            long result2 = end2 - start2;
            logisticRegressionValidationSummary.addTotalNanoSeconds(result2);
        }
    });
    logisticRegressionValidationSummary.getAverageNanoSeconds();
    logisticRegressionValidationSummary.setLogisticRegressionDetectionAlgorithm((LogisticRegressionDetectionAlgorithm) logisticRegressionDetectionModel.getDetectionAlgorithm());
}
 
开发者ID:shlee89,项目名称:athena,代码行数:69,代码来源:LogisticRegressionDistJob.java

示例10: validate

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
                     AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
                     DecisionTreeDetectionModel decisionTreeDetectionModel,
                     DecisionTreeValidationSummary decisionTreeValidationSummary) {
    List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
    Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
    Marking marking = decisionTreeDetectionModel.getMarking();
    DecisionTreeModel model = (DecisionTreeModel) decisionTreeDetectionModel.getDetectionModel();
    Normalizer normalizer = new Normalizer();

    int numberOfTargetValue = listOfTargetFeatures.size();

    mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
        public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
            long start2 = System.nanoTime(); // <-- start
            BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
            BSONObject idx = (BSONObject) t._2();
            int originLabel = marking.checkClassificationMarkingElements(idx,feature);

            double[] values = new double[numberOfTargetValue];
            for (int j = 0; j < numberOfTargetValue; j++) {
                values[j] = 0;
                if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
                    Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
                    if (obj instanceof Long) {
                        values[j] = (Long) obj;
                    } else if (obj instanceof Double) {
                        values[j] = (Double) obj;
                    } else if (obj instanceof Boolean) {
                        values[j] = (Boolean) obj ? 1 : 0;
                    } else {
                        return;
                    }

                    //check weight
                    if (weight.containsKey(listOfTargetFeatures.get(j))) {
                        values[j] *= weight.get(listOfTargetFeatures.get(j));
                    }

                    //check absolute
                    if (athenaMLFeatureConfiguration.isAbsolute()){
                        values[j] = Math.abs(values[j]);
                    }
                }
            }

            Vector normedForVal;
            if (athenaMLFeatureConfiguration.isNormalization()) {
                normedForVal = normalizer.transform(Vectors.dense(values));
            } else {
                normedForVal = Vectors.dense(values);
            }

            LabeledPoint p = new LabeledPoint(originLabel,normedForVal);

            int validatedLabel = (int) model.predict(p.features());


            decisionTreeValidationSummary.updateSummary(validatedLabel,idx,feature);

            long end2 = System.nanoTime();
            long result2 = end2 - start2;
            decisionTreeValidationSummary.addTotalNanoSeconds(result2);
        }
    });
    decisionTreeValidationSummary.getAverageNanoSeconds();
    decisionTreeValidationSummary.setDecisionTreeDetectionAlgorithm((DecisionTreeDetectionAlgorithm) decisionTreeDetectionModel.getDetectionAlgorithm());
}
 
开发者ID:shlee89,项目名称:athena,代码行数:69,代码来源:DecisionTreeDistJob.java

示例11: validate

import org.apache.spark.api.java.JavaPairRDD; //导入方法依赖的package包/类
public void validate(JavaPairRDD<Object, BSONObject> mongoRDD,
                     AthenaMLFeatureConfiguration athenaMLFeatureConfiguration,
                     NaiveBayesDetectionModel naiveBayesDetectionModel,
                     NaiveBayesValidationSummary naiveBayesValidationSummary) {
    List<AthenaFeatureField> listOfTargetFeatures = athenaMLFeatureConfiguration.getListOfTargetFeatures();
    Map<AthenaFeatureField, Integer> weight = athenaMLFeatureConfiguration.getWeight();
    Marking marking = naiveBayesDetectionModel.getMarking();
    NaiveBayesModel model = (NaiveBayesModel) naiveBayesDetectionModel.getDetectionModel();
    Normalizer normalizer = new Normalizer();

    int numberOfTargetValue = listOfTargetFeatures.size();

    mongoRDD.foreach(new VoidFunction<Tuple2<Object, BSONObject>>() {
        public void call(Tuple2<Object, BSONObject> t) throws UnknownHostException {
            long start2 = System.nanoTime(); // <-- start
            BSONObject feature = (BSONObject) t._2().get(AthenaFeatureField.FEATURE);
            BSONObject idx = (BSONObject) t._2();
            int originLabel = marking.checkClassificationMarkingElements(idx,feature);

            double[] values = new double[numberOfTargetValue];
            for (int j = 0; j < numberOfTargetValue; j++) {
                values[j] = 0;
                if (feature.containsField(listOfTargetFeatures.get(j).getValue())) {
                    Object obj = feature.get(listOfTargetFeatures.get(j).getValue());
                    if (obj instanceof Long) {
                        values[j] = (Long) obj;
                    } else if (obj instanceof Double) {
                        values[j] = (Double) obj;
                    } else if (obj instanceof Boolean) {
                        values[j] = (Boolean) obj ? 1 : 0;
                    } else {
                        return;
                    }

                    //check weight
                    if (weight.containsKey(listOfTargetFeatures.get(j))) {
                        values[j] *= weight.get(listOfTargetFeatures.get(j));
                    }

                    //check absolute
                    if (athenaMLFeatureConfiguration.isAbsolute()){
                        values[j] = Math.abs(values[j]);
                    }
                }
            }

            Vector normedForVal;
            if (athenaMLFeatureConfiguration.isNormalization()) {
                normedForVal = normalizer.transform(Vectors.dense(values));
            } else {
                normedForVal = Vectors.dense(values);
            }

            LabeledPoint p = new LabeledPoint(originLabel,normedForVal);

            int validatedLabel = (int) model.predict(p.features());


            naiveBayesValidationSummary.updateSummary(validatedLabel,idx,feature);

            long end2 = System.nanoTime();
            long result2 = end2 - start2;
            naiveBayesValidationSummary.addTotalNanoSeconds(result2);
        }
    });
    naiveBayesValidationSummary.getAverageNanoSeconds();
    naiveBayesValidationSummary.setNaiveBayesDetectionAlgorithm((NaiveBayesDetectionAlgorithm) naiveBayesDetectionModel.getDetectionAlgorithm());
}
 
开发者ID:shlee89,项目名称:athena,代码行数:69,代码来源:NaiveBayesDistJob.java


注:本文中的org.apache.spark.api.java.JavaPairRDD.foreach方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。