当前位置: 首页>>代码示例>>Java>>正文


Java Frame.vec方法代码示例

本文整理汇总了Java中water.fvec.Frame.vec方法的典型用法代码示例。如果您正苦于以下问题:Java Frame.vec方法的具体用法?Java Frame.vec怎么用?Java Frame.vec使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在water.fvec.Frame的用法示例。


在下文中一共展示了Frame.vec方法的14个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: column

import water.fvec.Frame; //导入方法依赖的package包/类
/** Return a single column from the frame. */
@SuppressWarnings("unused") // called through reflection by RequestServer
public FramesV3 column(int version, FramesV3 s) { // TODO: should return a Vec schema
  Frame frame = getFromDKV("key", s.frame_id.key());

  Vec vec = frame.vec(s.column);
  if (null == vec)
    throw new H2OColumnNotFoundArgumentException("column", s.frame_id.toString(), s.column);

  Vec[] vecs = { vec };
  String[] names = { s.column };
  Frame new_frame = new Frame(names, vecs);
  s.frames = new FrameV3[1];
  s.frames[0] = new FrameV3().fillFromImpl(new_frame);
  ((FrameV3)s.frames[0]).clearBinsField();
  return s;
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:18,代码来源:FramesHandler.java

示例2: columnSummary

import water.fvec.Frame; //导入方法依赖的package包/类
@SuppressWarnings("unused") // called through reflection by RequestServer
public FramesV3 columnSummary(int version, FramesV3 s) {
  Frame frame = getFromDKV("key", s.frame_id.key()); // safe
  Vec vec = frame.vec(s.column);
  if (null == vec)
    throw new H2OColumnNotFoundArgumentException("column", s.frame_id.toString(), s.column);

  // Compute second pass of rollups: the histograms.
  if (!vec.isString()) {
    vec.bins();
  }

  // Cons up our result
  s.frames = new FrameV3[1];
  s.frames[0] = new FrameV3().fillFromImpl(new Frame(new String[]{s.column}, new Vec[]{vec}), s.row_offset, s.row_count, s.column_offset, s.column_count, true);
  return s;
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:18,代码来源:FramesHandler.java

示例3: predict

import water.fvec.Frame; //导入方法依赖的package包/类
public double[] predict(Frame inputs) {
    Frame predsFrame = dlModel.score(inputs);
    int numRows = (int) inputs.numRows();
    Vec predsVector = predsFrame.vec(0);
    double[] predVals = new double[numRows];
    for (int i = 0; i < numRows; i++) {
        predVals[i] = predsVector.at(i);
    }
    return predVals;
}
 
开发者ID:wso2-attic,项目名称:carbon-ml,代码行数:11,代码来源:MLDeeplearningModel.java

示例4: columnDomain

import water.fvec.Frame; //导入方法依赖的package包/类
@SuppressWarnings("unused") // called through reflection by RequestServer
public FramesV3 columnDomain(int version, FramesV3 s) {
  Frame frame = getFromDKV("key", s.frame_id.key());
  Vec vec = frame.vec(s.column);
  if (vec == null)
    throw new H2OColumnNotFoundArgumentException("column", s.frame_id.toString(), s.column);
  s.domain = new String[1][];
  s.domain[0] = vec.domain();
  return s;
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:11,代码来源:FramesHandler.java

示例5: apply

import water.fvec.Frame; //导入方法依赖的package包/类
@Override Val apply( Env env, Env.StackHelp stk, AST asts[] ) {
  Frame fr = stk.track(asts[1].exec(env)).getFrame();
  Vec[] nvecs = new Vec[fr.numCols()];
  Vec vv;
  for(int c=0;c<nvecs.length;++c) {
    vv = fr.vec(c);
    if( vv.isCategorical() ) nvecs[c] = vv.toIntVec();
    else if( vv.isString() ) nvecs[c] = vv.toNumeric();
    else                     nvecs[c] = vv.makeCopy();
  }
  return new ValFrame(new Frame(fr._names, nvecs));
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:13,代码来源:ASTStrList.java

示例6: apply

import water.fvec.Frame; //导入方法依赖的package包/类
@Override Val apply( Env env, Env.StackHelp stk, AST asts[] ) {
  Frame fr1 = stk.track(asts[1].exec(env)).getFrame();
  Frame fr2 = asts.length==3 ? stk.track(asts[2].exec(env)).getFrame() : null;
  int ncols = fr1.numCols() + (fr2==null ? 0 : fr2.numCols());
  Vec vec1 = fr1.vec(0);

  Val res = fast_table(vec1,ncols,fr1._names[0]);
  if( res != null ) return res;

  if( !(asts.length == 2 || asts.length == 3) || ncols > 2 )
    throw new IllegalArgumentException("table expects one or two columns");

  Vec vec2 = fr1.numCols()==2 ? fr1.vec(1) : fr2 != null ? fr2.vec(0) : null;
  return slow_table(vec1,vec2,fr1._names[0]);
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:16,代码来源:ASTTable.java

示例7: score

import water.fvec.Frame; //导入方法依赖的package包/类
/** Bulk score the frame {@code fr}, producing a Frame result; the 1st
   *  Vec is the predicted class, the remaining Vecs are the probability
   *  distributions.  For Regression (single-class) models, the 1st and only
   *  Vec is the prediction value.  The result is in the DKV; caller is
   *  responsible for deleting.
   *
   * @param fr frame which should be scored
   * @return A new frame containing a predicted values. For classification it
   *         contains a column with prediction and distribution for all
   *         response classes. For regression it contains only one column with
   *         predicted values.
   * @throws IllegalArgumentException
   */
  public Frame score(Frame fr, String destination_key) throws IllegalArgumentException {
    Frame adaptFr = new Frame(fr);
    boolean computeMetrics = (!isSupervised() || adaptFr.find(_output.responseName()) != -1);
    adaptTestForTrain(adaptFr,true, computeMetrics);   // Adapt
    Frame output = predictScoreImpl(fr, adaptFr, destination_key); // Predict & Score
    // Log modest confusion matrices
    Vec predicted = output.vecs()[0]; // Modeled/predicted response
    String mdomain[] = predicted.domain(); // Domain of predictions (union of test and train)

    // Output is in the model's domain, but needs to be mapped to the scored
    // dataset's domain.
    if(_output.isClassifier() && computeMetrics) {
//      assert(mdomain != null); // label must be categorical
      ModelMetrics mm = ModelMetrics.getFromDKV(this,fr);
      ConfusionMatrix cm = mm.cm();
      if (cm != null && cm._domain != null) //don't print table for regression
        if( cm._cm.length < _parms._max_confusion_matrix_size/*Print size limitation*/ ) {
          Log.info(cm.table().toString(1));
        }
      if (mm.hr() != null) {
        Log.info(getHitRatioTable(mm.hr()));
      }
      Vec actual = fr.vec(_output.responseName());
      if( actual != null ) {  // Predict does not have an actual, scoring does
        String sdomain[] = actual.domain(); // Scored/test domain; can be null
        if (sdomain != null && mdomain != sdomain && !Arrays.equals(mdomain, sdomain))
          output.replace(0, new CategoricalWrappedVec(actual.group().addVec(), actual._rowLayout, sdomain, predicted._key));
      }
    }

    cleanup_adapt(adaptFr, fr);
    return output;
  }
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:47,代码来源:Model.java

示例8: map

import water.fvec.Frame; //导入方法依赖的package包/类
public void map(final Chunk[] chks) {
      final Frame tgt = _tgt;
      final long [] espc = tgt.anyVec().espc();
      final int colStart = (int)chks[0].start();
//      addToPendingCount(espc.length - 2);
      for (int i = 0; i < espc.length - 1; ++i) {
        final int fi = i;
//        new CountedCompleter(this) {
//          @Override
//          public void compute() {
        final NewChunk[] tgtChunks = new NewChunk[chks[0]._len];
        for (int j = 0; j < tgtChunks.length; ++j)
          tgtChunks[j] = new NewChunk(tgt.vec(j + colStart), fi);
        for (int c = ((int) espc[fi]); c < (int) espc[fi + 1]; ++c) {
          NewChunk nc = chks[c].inflate();
          Iterator<Value> it = nc.values();
          while (it.hasNext()) {
            Value v = it.next();
            NewChunk t = tgtChunks[v.rowId0()];
            t.addZeros(c - (int) espc[fi] - t.len());
            v.add2Chunk(t);
          }
        }
//            addToPendingCount(tgtChunks.length - 1);
        for (int j = 0; j < tgtChunks.length; ++j) { // finalize the target chunks and close them
          final int fj = j;
//              new CountedCompleter(this) {
//                @Override
//                public void compute() {
          tgtChunks[fj].addZeros((int) (espc[fi + 1] - espc[fi]) - tgtChunks[fj]._len);
          tgtChunks[fj].close(_fs);
          tgtChunks[fj] = null;
//                  tryComplete();
        }
//              }.fork();
//            }
//          }
//        }.fork();
      }
    }
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:41,代码来源:DMatrix.java

示例9: doAUC

import water.fvec.Frame; //导入方法依赖的package包/类
private static double doAUC(double probs[], double actls[]) {
  double rows[][] = new double[probs.length][];
  for( int i=0; i<probs.length; i++ )
    rows[i] = new double[]{probs[i],actls[i]};
  Frame fr = ArrayUtils.frame(new String[]{"probs", "actls"}, rows);
  AUC2 auc = new AUC2(fr.vec("probs"),fr.vec("actls"));
  fr.remove();
  for( int i=0; i<auc._nBins; i++ ) System.out.print("{"+((double)auc._tps[i]/auc._p)+","+((double)auc._fps[i]/auc._n)+"} ");
  System.out.println();
  for( int i=0; i<auc._nBins; i++ ) System.out.print(AUC2.ThresholdCriterion.min_per_class_accuracy.exec(auc,i)+" ");
  System.out.println();
  return auc._auc;
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:14,代码来源:AUCTest.java

示例10: validate

import water.fvec.Frame; //导入方法依赖的package包/类
public void validate(GLM glm) {
  if(_weights_column != null && _offset_column != null && _weights_column.equals(_offset_column))
    glm.error("_offset_column", "Offset must be different from weights");
  if(_lambda_search)
    if (glm.nFoldCV())
      glm.error("_lambda_search", "Lambda search is not currently supported in conjunction with N-fold cross-validation");
    if(_nlambdas == -1)
      _nlambdas = 100;
    else
      _exactLambdas = false;
  if(_family != Family.tweedie) {
    glm.hide("_tweedie_variance_power","Only applicable with Tweedie family");
    glm.hide("_tweedie_link_power","Only applicable with Tweedie family");
  }

  if(_beta_constraints != null) {
    Frame f = _beta_constraints.get();
    if(f == null) glm.error("beta_constraints","Missing frame for beta constraints");
    Vec v = f.vec("names");
    if(v == null)glm.error("beta_constraints","Beta constraints parameter must have names column with valid coefficient names");
    // todo: check the coefficient names
    v = f.vec("upper_bounds");
    if(v != null && !v.isNumeric())
      glm.error("beta_constraints","upper_bounds must be numeric if present");v = f.vec("upper_bounds");
    v = f.vec("lower_bounds");
    if(v != null && !v.isNumeric())
      glm.error("beta_constraints","lower_bounds must be numeric if present");
    v = f.vec("beta_given");
    if(v != null && !v.isNumeric())
      glm.error("beta_constraints","beta_given must be numeric if present");v = f.vec("upper_bounds");
    v = f.vec("beta_start");
    if(v != null && !v.isNumeric())
      glm.error("beta_constraints","beta_start must be numeric if present");
  }
  if(_family == Family.binomial) {
    Frame frame = DKV.getGet(_train);
    if (frame != null) {
      Vec response = frame.vec(_response_column);
      if (response != null) {
        if (response.min() != 0 || response.max() != 1) {
          glm.error("_response_column", "Illegal response for family binomial, must be binary, got min = " + response.min() + ", max = " + response.max() + ")");
        }
      }
    }
  } else if (glm.nclasses() > 2 ) {
    glm.error("_response_column", "Illegal response for " + _family + " family, cannot be categorical with more than 2 levels");
  }

  if(!_lambda_search) {
    glm.hide("_lambda_min_ratio", "only applies if lambda search is on.");
    glm.hide("_nlambdas", "only applies if lambda search is on.");
  }
  if(_link != Link.family_default) { // check we have compatible link
    switch (_family) {
      case gaussian:
        if (_link != Link.identity && _link != Link.log && _link != Link.inverse)
          throw new IllegalArgumentException("Incompatible link function for selected family. Only identity, log and inverse links are allowed for family=gaussian.");
        break;
      case binomial:
        if (_link != Link.logit) // fixme: R also allows log, but it's not clear when can be applied and what should we do in case the predictions are outside of 0/1.
          throw new IllegalArgumentException("Incompatible link function for selected family. Only logit is allowed for family=binomial. Got " + _link);
        break;
      case poisson:
        if (_link != Link.log && _link != Link.identity)
          throw new IllegalArgumentException("Incompatible link function for selected family. Only log and identity links are allowed for family=poisson.");
        break;
      case gamma:
        if (_link != Link.inverse && _link != Link.log && _link != Link.identity)
          throw new IllegalArgumentException("Incompatible link function for selected family. Only inverse, log and identity links are allowed for family=gamma.");
        break;
      case tweedie:
        if (_link != Link.tweedie)
          throw new IllegalArgumentException("Incompatible link function for selected family. Only tweedie link allowed for family=tweedie.");
        break;
      default:
        H2O.fail();
    }
  }
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:80,代码来源:GLMModel.java

示例11: directSVD

import water.fvec.Frame; //导入方法依赖的package包/类
private Frame directSVD(DataInfo dinfo, Frame qfrm, SVDModel model, String u_name) {
  DataInfo qinfo = null;
  Frame u = null;
  final int ncolA = dinfo._adaptedFrame.numCols();

  try {
    // 0) Make input frame [A,Q], where A = read-only training data, Q = matrix from randomized subspace iteration
    Vec[] vecs = new Vec[ncolA + _parms._nv];
    for (int i = 0; i < ncolA; i++) vecs[i] = dinfo._adaptedFrame.vec(i);
    for (int i = 0; i < _parms._nv; i++) vecs[ncolA + i] = qfrm.vec(i);
    Frame aqfrm = new Frame(vecs);

    // 1) Form the matrix B' = A'Q = (Q'A)'
    update(1, "Forming small matrix B = Q'A for direct SVD");
    SMulTask stsk = new SMulTask(dinfo, _parms._nv);
    stsk.doAll(aqfrm);

    // 2) Compute SVD of small matrix: If B' = WDV', then B = VDW'
    update(1, "Calculating SVD of small matrix locally");
    Matrix atqJ = new Matrix(stsk._atq);
    SingularValueDecomposition svdJ = atqJ.svd();

    // 3) Form orthonormal matrix U = QV
    update(1, "Forming distributed orthonormal matrix U");
    if (_parms._keep_u) {
      model._output._u_key = Key.make(u_name);
      double[][] svdJ_u = svdJ.getV().getMatrix(0,atqJ.getColumnDimension()-1,0,_parms._nv-1).getArray();

      qinfo = new DataInfo(Key.make(), qfrm, null, true, DataInfo.TransformType.NONE, false, false, false);
      DKV.put(qinfo._key, qinfo);
      BMulTask btsk = new BMulTask(self(), qinfo, ArrayUtils.transpose(svdJ_u));
      btsk.doAll_numericResult(_parms._nv, qinfo._adaptedFrame);
      u = btsk.outputFrame(model._output._u_key, null, null);
    }

    model._output._d = Arrays.copyOfRange(svdJ.getSingularValues(),0,_parms._nv);
    model._output._v = svdJ.getU().getMatrix(0,atqJ.getRowDimension()-1,0,_parms._nv-1).getArray();
  } catch( Throwable t ) {
    Job thisJob = DKV.getGet(_key);
    if (thisJob._state == JobState.CANCELLED) {
      Log.info("Job cancelled by user.");
    } else {
      t.printStackTrace();
      failed(t);
      throw t;
    }
  } finally {
    if( qinfo != null ) qinfo.remove();
  }
  return u;
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:52,代码来源:SVD.java

示例12: testCheckPointReconstruction

import water.fvec.Frame; //导入方法依赖的package包/类
private void testCheckPointReconstruction(String dataset,
                                          int responseIdx,
                                          boolean classification,
                                          int ntreesInPriorModel, int ntreesInNewModel,
                                          float sampleRateInPriorModel, float sampleRateInNewModel) {
  Frame f = parse_test_file(dataset);
  // If classification turn response into categorical
  if (classification) {
    Vec respVec = f.vec(responseIdx);
    f.replace(responseIdx, respVec.toCategoricalVec()).remove();
    DKV.put(f._key, f);
  }
  GBMModel model = null;
  GBMModel modelFromCheckpoint = null;
  GBMModel modelFinal = null;
  try {
    GBMModel.GBMParameters gbmParams = new GBMModel.GBMParameters();
    gbmParams._model_id = Key.make("Initial model");
    gbmParams._train = f._key;
    gbmParams._response_column = f.name(responseIdx);
    gbmParams._ntrees = ntreesInPriorModel;
    gbmParams._seed = 42;
    gbmParams._max_depth = 10;
    gbmParams._score_each_iteration = true;
    model = new GBM(gbmParams).trainModel().get();

    GBMModel.GBMParameters gbmFromCheckpointParams = new GBMModel.GBMParameters();
    gbmFromCheckpointParams._model_id = Key.make("Model from checkpoint");
    gbmFromCheckpointParams._train = f._key;
    gbmFromCheckpointParams._response_column = f.name(responseIdx);
    gbmFromCheckpointParams._ntrees = ntreesInPriorModel + ntreesInNewModel;
    gbmFromCheckpointParams._seed = 42;
    gbmFromCheckpointParams._checkpoint = model._key;
    gbmFromCheckpointParams._score_each_iteration = true;
    gbmFromCheckpointParams._max_depth = 10;
    modelFromCheckpoint = new GBM(gbmFromCheckpointParams).trainModel().get();

    // Compute a separated model containing the same numnber of trees as a model built from checkpoint
    GBMModel.GBMParameters gbmFinalParams = new GBMModel.GBMParameters();
    gbmFinalParams._model_id = Key.make("Validation model");
    gbmFinalParams._train = f._key;
    gbmFinalParams._response_column = f.name(responseIdx);
    gbmFinalParams._ntrees = ntreesInPriorModel + ntreesInNewModel;
    gbmFinalParams._seed = 42;
    gbmFinalParams._score_each_iteration = true;
    gbmFinalParams._max_depth = 10;
    modelFinal = new GBM(gbmFinalParams).trainModel().get();

    CompressedTree[][] treesFromCheckpoint = getTrees(modelFromCheckpoint);
    CompressedTree[][] treesFromFinalModel = getTrees(modelFinal);
    assertTreeEquals("The model created from checkpoint and corresponding model created from scratch should have the same trees!",
            treesFromCheckpoint, treesFromFinalModel, true);

    // Make sure we are not re-using trees
    for (int tree = 0; tree < treesFromCheckpoint.length; tree++) {
      for (int clazz = 0; clazz < treesFromCheckpoint[tree].length; clazz++) {
        if (treesFromCheckpoint[tree][clazz] !=null) { // We already verify equality of models
          CompressedTree a = treesFromCheckpoint[tree][clazz];
          CompressedTree b = treesFromFinalModel[tree][clazz];
          Assert.assertNotEquals(a._key, b._key);
        }
      }
    }
  } finally {
    if (f!=null) f.delete();
    if (model!=null) model.delete();
    if (modelFromCheckpoint!=null) modelFromCheckpoint.delete();
    if (modelFinal!=null) modelFinal.delete();
  }
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:71,代码来源:GBMCheckpointTest.java

示例13: testCheckPointReconstruction

import water.fvec.Frame; //导入方法依赖的package包/类
private void testCheckPointReconstruction(String dataset,
                                          int responseIdx,
                                          boolean classification,
                                          int ntreesInPriorModel, int ntreesInNewModel,
                                          float sampleRateInPriorModel, float sampleRateInNewModel) {
  Frame f = parse_test_file(dataset);
  // If classification turn response into categorical
  if (classification) {
    Vec respVec = f.vec(responseIdx);
    f.replace(responseIdx, respVec.toCategoricalVec()).remove();
    DKV.put(f._key, f);
  }
  DRFModel model = null;
  DRFModel modelFromCheckpoint = null;
  DRFModel modelFinal = null;
  try {
    DRFModel.DRFParameters drfParams = new DRFModel.DRFParameters();
    drfParams._model_id = Key.make("Initial model");
    drfParams._train = f._key;
    drfParams._response_column = f.name(responseIdx);
    drfParams._ntrees = ntreesInPriorModel;
    drfParams._seed = 42;
    drfParams._max_depth = 10;
    drfParams._score_each_iteration = true;
    drfParams._sample_rate = sampleRateInPriorModel;
    model = new DRF(drfParams).trainModel().get();

    DRFModel.DRFParameters drfFromCheckpointParams = new DRFModel.DRFParameters();
    drfFromCheckpointParams._model_id = Key.make("Model from checkpoint");
    drfFromCheckpointParams._train = f._key;
    drfFromCheckpointParams._response_column = f.name(responseIdx);
    drfFromCheckpointParams._ntrees = ntreesInPriorModel + ntreesInNewModel;
    drfFromCheckpointParams._seed = 42;
    drfFromCheckpointParams._checkpoint = model._key;
    drfFromCheckpointParams._score_each_iteration = true;
    drfFromCheckpointParams._max_depth = 10;
    drfFromCheckpointParams._sample_rate = sampleRateInNewModel;
    modelFromCheckpoint = new DRF(drfFromCheckpointParams).trainModel().get();

    // Compute a separated model containing the same number of trees as a model built from checkpoint
    DRFModel.DRFParameters drfFinalParams = new DRFModel.DRFParameters();
    drfFinalParams._model_id = Key.make("Validation model");
    drfFinalParams._train = f._key;
    drfFinalParams._response_column = f.name(responseIdx);
    drfFinalParams._ntrees = ntreesInPriorModel + ntreesInNewModel;
    drfFinalParams._seed = 42;
    drfFinalParams._score_each_iteration = true;
    drfFinalParams._max_depth = 10;
    modelFinal = new DRF(drfFinalParams).trainModel().get();

    CompressedTree[][] treesFromCheckpoint = getTrees(modelFromCheckpoint);
    CompressedTree[][] treesFromFinalModel = getTrees(modelFinal);
    assertTreeEquals("The model created from checkpoint and corresponding model created from scratch should have the same trees!",
            treesFromCheckpoint, treesFromFinalModel, true);

    // Make sure we are not re-using trees
    for (int tree = 0; tree < treesFromCheckpoint.length; tree++) {
      for (int clazz = 0; clazz < treesFromCheckpoint[tree].length; clazz++) {
        if (treesFromCheckpoint[tree][clazz] !=null) { // We already verify equality of models
          CompressedTree a = treesFromCheckpoint[tree][clazz];
          CompressedTree b = treesFromFinalModel[tree][clazz];
          Assert.assertNotEquals(a._key, b._key);
        }
      }
    }
  } finally {
    if (f!=null) f.delete();
    if (model!=null) model.delete();
    if (modelFromCheckpoint!=null) modelFromCheckpoint.delete();
    if (modelFinal!=null) modelFinal.delete();
  }
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:73,代码来源:DRFCheckpointTest.java

示例14: testAUC0

import water.fvec.Frame; //导入方法依赖的package包/类
@Test public void testAUC0() {
  double auc0 = AUC2.perfectAUC(new double[]{0,0.5,0.5,1}, new double[]{0,0,1,1});
  Assert.assertEquals(0.875,auc0,1e-7);
  // Flip the tied actuals
  double auc1 = AUC2.perfectAUC(new double[]{0,0.5,0.5,1}, new double[]{0,1,0,1});
  Assert.assertEquals(0.875,auc1,1e-7);

  // Area is 10/12 (TPS=4, FPS=3, so area is 4x3 or 12 units; 10 are under).
  double auc2 = AUC2.perfectAUC(new double[]{0.1,0.2,0.3,0.4,0.5,0.6,0.7}, new double[]{0,0,1,1,0,1,1});
  Assert.assertEquals(0.8333333,auc2,1e-7);


  // Sorted probabilities.  At threshold 1e-6 flips from false to true, on
  // average.  However, there are a lot of random choices at 1e-3.
  double probs[] = new double[]{1e-8,1e-7,1e-6,1e-5,1e-4,1e-3,1e-3,1e-3,1e-3,1e-3,1e-3,1e-3,1e-3,1e-3,1e-3,1e-2,1e-1};
  double actls[] = new double[]{   0,   0,    1,   1,   1,  1,   0,   1,   0,   1,   0,   1,   0,   1,   1,   1,   1};

  // Positives & Negatives
  int P = 0;
  for( double a : actls ) P += (int)a;
  int N = actls.length - P;
  System.out.println("P="+P+", N="+N);

  // Compute TP & FP for all thresholds
  double thresh[] = new double[]{1e-1,1e-2,1e-3+1e-9,1e-3,1e-3-1e-9,1e-4,1e-5,1e-6,1e-7,1e-8,0};
  int tp[] = new int[thresh.length], fp[] = new int[thresh.length];
  int tn[] = new int[thresh.length], fn[] = new int[thresh.length];
  for( int i=0; i<probs.length; i++ ) {
    for( int t=0; t<thresh.length; t++ ) {
      if( probs[i] >= thresh[t] ) // Not interested if below threshold
        if( actls[i]==0.0 ) fp[t]++; // False positive
        else tp[t]++;                // True  positive
      else
        if( actls[i]==0.0 ) tn[t]++; // True  negative
        else fn[t]++;                // False negative
    }
  }
  System.out.println(Arrays.toString(tp));
  System.out.println(Arrays.toString(fp));
  System.out.println(Arrays.toString(fn));
  System.out.println(Arrays.toString(tn));
  for( int i=0; i<tp.length; i++ ) System.out.print("{"+((double)tp[i]/P)+","+((double)fp[i]/N)+"} ");
  System.out.println();
  // The AUC for this dataset, according to R's ROCR package, is 0.6363636363
  Assert.assertEquals(doAUC(probs,actls),0.636363636363,1e-5);
  Assert.assertEquals(AUC2.perfectAUC(probs,actls),0.636363636363,1e-7);

  // Shuffle, check again
  swap(0, 5, probs, actls);
  swap(1, 6, probs, actls);
  swap(7, 15, probs, actls);
  Assert.assertEquals(doAUC(probs,actls),0.636363636363,1e-5);
  Assert.assertEquals(AUC2.perfectAUC(probs,actls),0.636363636363,1e-7);

  // Now from a large test file
  double ROCR_auc = 0.7244389;
  Frame fr = parse_test_file("smalldata/junit/auc.csv.gz");
  // Slow; used to confirm the accuracy as we increase bin counts
  //for( int i=10; i<1000; i+=10 ) {
  //  AUC2 auc = new AUC2(i,fr.vec("V1"),fr.vec("V2"));
  //  System.out.println("bins="+i+", aucERR="+Math.abs(auc._auc-ROCR_auc)/ROCR_auc);
  //  Assert.assertEquals(fr.numRows(), auc._p+auc._n);
  //}

  double aucp = AUC2.perfectAUC(fr.vec("V1"), fr.vec("V2"));
  Assert.assertEquals(ROCR_auc, aucp, 1e-4);
  AUC2 auc = new AUC2(fr.vec("V1"), fr.vec("V2"));
  Assert.assertEquals(ROCR_auc, auc._auc, 1e-4);

  Assert.assertEquals(1.0, AUC2.ThresholdCriterion.precision.max_criterion(auc), 1e-4);

  double ROCR_max_abs_mcc = 0.4553512;
  Assert.assertEquals(ROCR_max_abs_mcc, AUC2.ThresholdCriterion.absolute_MCC.max_criterion(auc), 1e-3);

  double ROCR_f1 = 0.9920445; // same as ROCR "f" with alpha=0, or alternative beta=1
  Assert.assertEquals(ROCR_f1, AUC2.ThresholdCriterion.f1.max_criterion(auc), 1e-4);

  fr.remove();
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:80,代码来源:AUCTest.java


注:本文中的water.fvec.Frame.vec方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。