当前位置: 首页>>代码示例>>Java>>正文


Java RebalanceDataSet.join方法代码示例

本文整理汇总了Java中water.fvec.RebalanceDataSet.join方法的典型用法代码示例。如果您正苦于以下问题:Java RebalanceDataSet.join方法的具体用法?Java RebalanceDataSet.join怎么用?Java RebalanceDataSet.join使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在water.fvec.RebalanceDataSet的用法示例。


在下文中一共展示了RebalanceDataSet.join方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。

示例1: reBalance

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
/**
 * Rebalance a frame for load balancing
 * @param fr Input frame
 * @param local whether to only create enough chunks to max out all cores on one node only
 * @return Frame that has potentially more chunks
 */
private Frame reBalance(final Frame fr, boolean local, final String name) {
  int chunks = (int)Math.min( 4 * H2O.NUMCPUS * (local ? 1 : H2O.CLOUD.size()), fr.numRows());
  if (fr.anyVec().nChunks() > chunks && !_parms._reproducible) {
    Log.info("Dataset already contains " + fr.anyVec().nChunks() + " chunks. No need to rebalance.");
    return fr;
  } else if (_parms._reproducible) {
    Log.warn("Reproducibility enforced - using only 1 thread - can be slow.");
    chunks = 1;
  }
  if (!_parms._quiet_mode) Log.info("ReBalancing dataset into (at least) " + chunks + " chunks.");
  Key newKey = Key.make(name + ".chunks" + chunks);
  RebalanceDataSet rb = new RebalanceDataSet(fr, newKey, chunks);
  H2O.submitTask(rb);
  rb.join();
  Frame f = DKV.get(newKey).get();
  _delete_me.add(f);
  return f;
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:25,代码来源:DeepLearning.java

示例2: reBalance

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
/**
   * Rebalance a frame for load balancing
   * @param fr Input frame
   * @param local whether to only create enough chunks to max out all cores on one node only
   * @return Frame that has potentially more chunks
   */
  private Frame reBalance(final Frame fr, boolean local) {
    int chunks = (int)Math.min( 4 * H2O.NUMCPUS * (local ? 1 : H2O.CLOUD.size()), fr.numRows());
    if (fr.anyVec().nChunks() > chunks && !reproducible) {
      Log.info("Dataset already contains " + fr.anyVec().nChunks() + " chunks. No need to rebalance.");
      return fr;
    } else if (reproducible) {
      Log.warn("Reproducibility enforced - using only 1 thread - can be slow.");
      chunks = 1;
    }
    if (!quiet_mode) Log.info("ReBalancing dataset into (at least) " + chunks + " chunks.");
//      return MRUtils.shuffleAndBalance(fr, chunks, seed, local, shuffle_training_data);
    String snewKey = fr._key != null ? (fr._key.toString() + ".balanced") : Key.rand();
    Key newKey = Key.makeSystem(snewKey);
    RebalanceDataSet rb = new RebalanceDataSet(fr, newKey, chunks);
    H2O.submitTask(rb);
    rb.join();
    return UKV.get(newKey);
  }
 
开发者ID:h2oai,项目名称:h2o-2,代码行数:25,代码来源:DeepLearning.java

示例3: testPubDev928

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
/** Load simple dataset, rebalance to a number of chunks > number of rows, and run deep learning */
@Test public void testPubDev928() {
  // Create rebalanced dataset
  Key rebalancedKey = Key.make("rebalanced");
  NFSFileVec nfs = NFSFileVec.make(find_test_file("smalldata/logreg/prostate.csv"));
  Frame fr = ParseDataset.parse(Key.make(), nfs._key);
  RebalanceDataSet rb = new RebalanceDataSet(fr, rebalancedKey, (int)(fr.numRows()+1));
  H2O.submitTask(rb);
  rb.join();
  Frame rebalanced = DKV.get(rebalancedKey).get();

  // Assert that there is at least one 0-len chunk
  assertZeroLengthChunk("Rebalanced dataset should contain at least one 0-len chunk!", rebalanced.anyVec());

  DeepLearningModel dlModel = null;
  try {
    // Launch Deep Learning
    DeepLearningParameters dlParams = new DeepLearningParameters();
    dlParams._train = rebalancedKey;
    dlParams._epochs = 5;
    dlParams._response_column = "CAPSULE";

    dlModel = new DeepLearning(dlParams).trainModel().get();
  } finally {
    fr.delete();
    rebalanced.delete();
    if (dlModel != null) dlModel.delete();
  }
}
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:30,代码来源:DeepLearningScoreTest.java

示例4: testChunks

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Test public void testChunks() {
  Frame frame = parse_test_file("smalldata/covtype/covtype.20k.data");

  AggregatorModel.AggregatorParameters parms = new AggregatorModel.AggregatorParameters();
  parms._train = frame._key;
  parms._target_num_exemplars = 137;
  parms._rel_tol_num_exemplars = 0.05;
  long start = System.currentTimeMillis();
  AggregatorModel agg = new Aggregator(parms).trainModel().get();  // 0.418
  System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start)/1000. + " seconds");    agg.checkConsistency();
  Frame output = agg._output._output_frame.get();
  checkNumExemplars(agg);
  output.remove();
  agg.remove();

  for (int i : new int[]{1,2,5,10,50,100}) {
    Key key = Key.make();
    RebalanceDataSet rb = new RebalanceDataSet(frame, key, i);
    H2O.submitTask(rb);
    rb.join();
    Frame rebalanced = DKV.get(key).get();

    parms = new AggregatorModel.AggregatorParameters();
    parms._train = frame._key;
    parms._target_num_exemplars = 137;
    parms._rel_tol_num_exemplars = 0.05;
    start = System.currentTimeMillis();
    AggregatorModel agg2 = new Aggregator(parms).trainModel().get();  // 0.373 0.504 0.357 0.454 0.368 0.355
    System.out.println("AggregatorModel finished in: " + (System.currentTimeMillis() - start)/1000. + " seconds");      agg2.checkConsistency();
    Log.info("Number of exemplars for " + i + " chunks: " + agg2._exemplars.length);
    rebalanced.delete();
    Assert.assertTrue(Math.abs(agg._exemplars.length - agg2._exemplars.length) == 0);
    output = agg2._output._output_frame.get();
    output.remove();
    checkNumExemplars(agg);
    agg2.remove();
  }
  frame.delete();
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:40,代码来源:AggregatorTest.java

示例5: testPubDev928

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
/** Load simple dataset, rebalance to a number of chunks > number of rows, and run deep learning */
@Test public void testPubDev928() {
  // Create rebalanced dataset
  Key rebalancedKey = Key.make("rebalanced");
  NFSFileVec nfs = TestUtil.makeNfsFileVec("smalldata/logreg/prostate.csv");
  Frame fr = ParseDataset.parse(Key.make(), nfs._key);
  RebalanceDataSet rb = new RebalanceDataSet(fr, rebalancedKey, (int)(fr.numRows()+1));
  H2O.submitTask(rb);
  rb.join();
  Frame rebalanced = DKV.get(rebalancedKey).get();

  // Assert that there is at least one 0-len chunk
  assertZeroLengthChunk("Rebalanced dataset should contain at least one 0-len chunk!", rebalanced.anyVec());

  DeepLearningModel dlModel = null;
  try {
    // Launch Deep Learning
    DeepLearningParameters dlParams = new DeepLearningParameters();
    dlParams._train = rebalancedKey;
    dlParams._epochs = 5;
    dlParams._response_column = "CAPSULE";

    dlModel = new DeepLearning(dlParams).trainModel().get();
  } finally {
    fr.delete();
    rebalanced.delete();
    if (dlModel != null) dlModel.delete();
  }
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:30,代码来源:DeepLearningScoreTest.java

示例6: serve

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Override public RequestBuilders.Response serve() {
  if( source==null ) throw new IllegalArgumentException("Missing frame to rebalance!");
  try {
    if (chunks > source.numRows()) throw new IllegalArgumentException("Cannot create more than " + source.numRows() + " chunks.");
    if( after==null ) after = Key.make(source._key.toString() + ".balanced");
    RebalanceDataSet rb = new RebalanceDataSet(source, after, chunks);
    H2O.submitTask(rb);
    rb.join();
    return RequestBuilders.Response.done(this);
  } catch( Throwable t ) {
    return RequestBuilders.Response.error(t);
  }
}
 
开发者ID:h2oai,项目名称:h2o-2,代码行数:14,代码来源:ReBalance.java

示例7: testReprodubility

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Test public void testReprodubility() {
    Frame tfr=null;
    final int N = 5;
    double[] mses = new double[N];

    Scope.enter();
    try {
      // Load data, hack frames
      tfr = parse_test_file("smalldata/covtype/covtype.20k.data");

      // rebalance to 256 chunks
      Key dest = Key.make("df.rebalanced.hex");
      RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
      H2O.submitTask(rb);
      rb.join();
      tfr.delete();
      tfr = DKV.get(dest).get();
//      Scope.track(tfr.replace(54, tfr.vecs()[54].toCategoricalVec())._key);
//      DKV.put(tfr);

      for (int i=0; i<N; ++i) {
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = "C55";
        parms._nbins = 1000;
        parms._ntrees = 1;
        parms._max_depth = 8;
        parms._learn_rate = 0.1f;
        parms._min_rows = 10;
//        parms._distribution = Family.multinomial;
        parms._distribution = Distribution.Family.gaussian;

        // Build a first model; all remaining models should be equal
        GBM job = new GBM(parms);
        GBMModel gbm = job.trainModel().get();
        assertEquals(gbm._output._ntrees, parms._ntrees);

        mses[i] = gbm._output._scored_train[gbm._output._scored_train.length-1]._mse;
        job.remove();
        gbm.delete();
      }
    } finally{
      if (tfr != null) tfr.remove();
    }
    Scope.exit();
    for( double mse : mses ) assertEquals(mse, mses[0], 1e-15);
  }
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:48,代码来源:GBMTest.java

示例8: testReprodubilityAirline

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Test public void testReprodubilityAirline() {
    Frame tfr=null;
    final int N = 1;
    double[] mses = new double[N];

    Scope.enter();
    try {
      // Load data, hack frames
      tfr = parse_test_file("./smalldata/airlines/allyears2k_headers.zip");

      // rebalance to fixed number of chunks
      Key dest = Key.make("df.rebalanced.hex");
      RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
      H2O.submitTask(rb);
      rb.join();
      tfr.delete();
      tfr = DKV.get(dest).get();
//      Scope.track(tfr.replace(54, tfr.vecs()[54].toCategoricalVec())._key);
//      DKV.put(tfr);
      for (String s : new String[]{
              "DepTime", "ArrTime", "ActualElapsedTime",
              "AirTime", "ArrDelay", "DepDelay", "Cancelled",
              "CancellationCode", "CarrierDelay", "WeatherDelay",
              "NASDelay", "SecurityDelay", "LateAircraftDelay", "IsArrDelayed"
      }) {
        tfr.remove(s).remove();
      }
      DKV.put(tfr);
      for (int i=0; i<N; ++i) {
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = "IsDepDelayed";
        parms._nbins = 10;
        parms._nbins_cats = 500;
        parms._ntrees = 7;
        parms._max_depth = 5;
        parms._min_rows = 10;
        parms._distribution = Distribution.Family.bernoulli;
        parms._balance_classes = true;
        parms._seed = 0;

        // Build a first model; all remaining models should be equal
        GBM job = new GBM(parms);
        GBMModel gbm = job.trainModel().get();
        assertEquals(gbm._output._ntrees, parms._ntrees);

        mses[i] = gbm._output._scored_train[gbm._output._scored_train.length-1]._mse;
        job.remove();
        gbm.delete();
      }
    } finally {
      if (tfr != null) tfr.remove();
    }
    Scope.exit();
    for( double mse : mses )
      assertEquals(0.21979375165014595, mse, 1e-8); //check for the same result on 1 nodes and 5 nodes (will only work with enough chunks), mse, 1e-8); //check for the same result on 1 nodes and 5 nodes (will only work with enough chunks)
  }
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:58,代码来源:GBMTest.java

示例9: testReprodubilityAirlineSingleNode

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Test public void testReprodubilityAirlineSingleNode() {
    Frame tfr=null;
    final int N = 1;
    double[] mses = new double[N];

    Scope.enter();
    try {
      // Load data, hack frames
      tfr = parse_test_file("./smalldata/airlines/allyears2k_headers.zip");

      // rebalance to fixed number of chunks
      Key dest = Key.make("df.rebalanced.hex");
      RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
      H2O.submitTask(rb);
      rb.join();
      tfr.delete();
      tfr = DKV.get(dest).get();
//      Scope.track(tfr.replace(54, tfr.vecs()[54].toCategoricalVec())._key);
//      DKV.put(tfr);
      for (String s : new String[]{
              "DepTime", "ArrTime", "ActualElapsedTime",
              "AirTime", "ArrDelay", "DepDelay", "Cancelled",
              "CancellationCode", "CarrierDelay", "WeatherDelay",
              "NASDelay", "SecurityDelay", "LateAircraftDelay", "IsArrDelayed"
      }) {
        tfr.remove(s).remove();
      }
      DKV.put(tfr);
      for (int i=0; i<N; ++i) {
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = "IsDepDelayed";
        parms._nbins = 10;
        parms._nbins_cats = 500;
        parms._ntrees = 7;
        parms._max_depth = 5;
        parms._min_rows = 10;
        parms._distribution = Distribution.Family.bernoulli;
        parms._balance_classes = true;
        parms._seed = 0;
        parms._build_tree_one_node = true;

        // Build a first model; all remaining models should be equal
        GBM job = new GBM(parms);
        GBMModel gbm = job.trainModel().get();
        assertEquals(gbm._output._ntrees, parms._ntrees);

        mses[i] = gbm._output._scored_train[gbm._output._scored_train.length-1]._mse;
        job.remove();
        gbm.delete();
      }
    } finally {
      if (tfr != null) tfr.remove();
    }
    Scope.exit();
    for( double mse : mses )
      assertEquals(0.21979375165014595, mse, 1e-8); //check for the same result on 1 nodes and 5 nodes (will only work with enough chunks)
  }
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:59,代码来源:GBMTest.java

示例10: testReproducibility

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Test public void testReproducibility() {
    Frame tfr=null;
    final int N = 5;
    double[] mses = new double[N];

    Scope.enter();
    try {
      // Load data, hack frames
      tfr = parse_test_file("smalldata/covtype/covtype.20k.data");

      // rebalance to 256 chunks
      Key dest = Key.make("df.rebalanced.hex");
      RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
      H2O.submitTask(rb);
      rb.join();
      tfr.delete();
      tfr = DKV.get(dest).get();
//      Scope.track(tfr.replace(54, tfr.vecs()[54].toCategoricalVec())._key);
//      DKV.put(tfr);

      for (int i=0; i<N; ++i) {
        DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
        parms._train = tfr._key;
        parms._response_column = "C55";
        parms._nbins = 1000;
        parms._ntrees = 1;
        parms._max_depth = 8;
        parms._mtries = -1;
        parms._min_rows = 10;
        parms._seed = 1234;

        // Build a first model; all remaining models should be equal
        DRF job = new DRF(parms);
        DRFModel drf = job.trainModel().get();
        assertEquals(drf._output._ntrees, parms._ntrees);

        mses[i] = drf._output._scored_train[drf._output._scored_train.length-1]._mse;
        job.remove();
        drf.delete();
      }
    } finally{
      if (tfr != null) tfr.remove();
    }
    Scope.exit();
    for (int i=0; i<mses.length; ++i) {
      Log.info("trial: " + i + " -> MSE: " + mses[i]);
    }
    for(double mse : mses)
      assertEquals(mse, mses[0], 1e-15);
  }
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:51,代码来源:DRFTest.java

示例11: testReproducibilityAirline

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Test public void testReproducibilityAirline() {
    Frame tfr=null;
    final int N = 1;
    double[] mses = new double[N];

    Scope.enter();
    try {
      // Load data, hack frames
      tfr = parse_test_file("./smalldata/airlines/allyears2k_headers.zip");

      // rebalance to fixed number of chunks
      Key dest = Key.make("df.rebalanced.hex");
      RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
      H2O.submitTask(rb);
      rb.join();
      tfr.delete();
      tfr = DKV.get(dest).get();
//      Scope.track(tfr.replace(54, tfr.vecs()[54].toCategoricalVec())._key);
//      DKV.put(tfr);
      for (String s : new String[]{
              "DepTime", "ArrTime", "ActualElapsedTime",
              "AirTime", "ArrDelay", "DepDelay", "Cancelled",
              "CancellationCode", "CarrierDelay", "WeatherDelay",
              "NASDelay", "SecurityDelay", "LateAircraftDelay", "IsArrDelayed"
      }) {
        tfr.remove(s).remove();
      }
      DKV.put(tfr);
      for (int i=0; i<N; ++i) {
        DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
        parms._train = tfr._key;
        parms._response_column = "IsDepDelayed";
        parms._nbins = 10;
        parms._nbins_cats = 1024;
        parms._ntrees = 7;
        parms._max_depth = 10;
        parms._binomial_double_trees = false;
        parms._mtries = -1;
        parms._min_rows = 1;
        parms._sample_rate = 0.632f;   // Simulated sampling with replacement
        parms._balance_classes = true;
        parms._seed = (1L<<32)|2;

        // Build a first model; all remaining models should be equal
        DRF job = new DRF(parms);
        DRFModel drf = job.trainModel().get();
        assertEquals(drf._output._ntrees, parms._ntrees);

        mses[i] = drf._output._training_metrics.mse();
        job.remove();
        drf.delete();
      }
    } finally{
      if (tfr != null) tfr.remove();
    }
    Scope.exit();
    for (int i=0; i<mses.length; ++i) {
      Log.info("trial: " + i + " -> MSE: " + mses[i]);
    }
    for (int i=0; i<mses.length; ++i) {
      assertEquals(0.2148575516521361, mses[i], 1e-4); //check for the same result on 1 nodes and 5 nodes
    }
  }
 
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:64,代码来源:DRFTest.java

示例12: testReprodubilityAirline

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Test public void testReprodubilityAirline() {
    Frame tfr=null;
    final int N = 5;
    double[] mses = new double[N];

    Scope.enter();
    try {
      // Load data, hack frames
      tfr = parse_test_file("./smalldata/airlines/allyears2k_headers.zip");

      // rebalance to fixed number of chunks
      Key dest = Key.make("df.rebalanced.hex");
      RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
      H2O.submitTask(rb);
      rb.join();
      tfr.delete();
      tfr = DKV.get(dest).get();
//      Scope.track(tfr.replace(54, tfr.vecs()[54].toCategoricalVec())._key);
//      DKV.put(tfr);
      for (String s : new String[]{
              "DepTime", "ArrTime", "ActualElapsedTime",
              "AirTime", "ArrDelay", "DepDelay", "Cancelled",
              "CancellationCode", "CarrierDelay", "WeatherDelay",
              "NASDelay", "SecurityDelay", "LateAircraftDelay", "IsArrDelayed"
      }) {
        tfr.remove(s).remove();
      }
      DKV.put(tfr);
      for (int i=0; i<N; ++i) {
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = "IsDepDelayed";
        parms._nbins = 10;
        parms._nbins_cats = 500;
        parms._ntrees = 7;
        parms._max_depth = 5;
        parms._min_rows = 10;
        parms._distribution = DistributionFamily.bernoulli;
        parms._balance_classes = true;
        parms._seed = 0;

        // Build a first model; all remaining models should be equal
        GBMModel gbm = new GBM(parms).trainModel().get();
        assertEquals(gbm._output._ntrees, parms._ntrees);

        mses[i] = gbm._output._scored_train[gbm._output._scored_train.length-1]._mse;
        gbm.delete();
      }
    } finally {
      if (tfr != null) tfr.remove();
    }
    Scope.exit();
    System.out.println("MSEs start");
    for(double d:mses)
      System.out.println(d);
    System.out.println("MSEs End");
    System.out.flush();
    for( double mse : mses )
      assertEquals(0.21694215729861027, mse, 1e-8); //check for the same result on 1 nodes and 5 nodes (will only work with enough chunks), mse, 1e-8); //check for the same result on 1 nodes and 5 nodes (will only work with enough chunks)
  }
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:61,代码来源:GBMTest.java

示例13: testReprodubilityAirlineSingleNode

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Test public void testReprodubilityAirlineSingleNode() {
    Frame tfr=null;
    final int N = 10;
    double[] mses = new double[N];

    Scope.enter();
    try {
      // Load data, hack frames
      tfr = parse_test_file("./smalldata/airlines/allyears2k_headers.zip");

      // rebalance to fixed number of chunks
      Key dest = Key.make("df.rebalanced.hex");
      RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, 256);
      H2O.submitTask(rb);
      rb.join();
      tfr.delete();
      tfr = DKV.get(dest).get();
//      Scope.track(tfr.replace(54, tfr.vecs()[54].toCategoricalVec())._key);
//      DKV.put(tfr);
      for (String s : new String[]{
              "DepTime", "ArrTime", "ActualElapsedTime",
              "AirTime", "ArrDelay", "DepDelay", "Cancelled",
              "CancellationCode", "CarrierDelay", "WeatherDelay",
              "NASDelay", "SecurityDelay", "LateAircraftDelay", "IsArrDelayed"
      }) {
        tfr.remove(s).remove();
      }
      DKV.put(tfr);
      for (int i=0; i<N; ++i) {
        GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
        parms._train = tfr._key;
        parms._response_column = "IsDepDelayed";
        parms._nbins = 10;
        parms._nbins_cats = 500;
        parms._ntrees = 7;
        parms._max_depth = 5;
        parms._min_rows = 10;
        parms._distribution = DistributionFamily.bernoulli;
        parms._balance_classes = true;
        parms._seed = 0;
        parms._build_tree_one_node = true;
        
        // Build a first model; all remaining models should be equal
        GBMModel gbm = new GBM(parms).trainModel().get();
        assertEquals(gbm._output._ntrees, parms._ntrees);

        mses[i] = gbm._output._scored_train[gbm._output._scored_train.length-1]._mse;
        gbm.delete();
      }
    } finally {
      if (tfr != null) tfr.remove();
    }
    Scope.exit();
    System.out.println("MSE");
    for(double d:mses)
      System.out.println(d);
    for( double mse : mses )
      assertEquals(0.21694215729861027, mse, 1e-8); //check for the same result on 1 nodes and 5 nodes (will only work with enough chunks)
  }
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:60,代码来源:GBMTest.java

示例14: testChunks

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Test public void testChunks() {
    Frame tfr;
    int[] chunks = new int[]{1,2,2,39,39,500};
    final int N = chunks.length;
    double[] mses = new double[N];
    for (int i=0; i<N; ++i) {
      Scope.enter();
      // Load data, hack frames
      tfr = parse_test_file("smalldata/covtype/covtype.20k.data");

      // rebalance to a given number of chunks
      Key dest = Key.make("df.rebalanced.hex");
      RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, chunks[i]);
      H2O.submitTask(rb);
      rb.join();
      tfr.delete();
      tfr = DKV.get(dest).get();
      assertEquals(tfr.vec(0).nChunks(), chunks[i]);
//      Scope.track(tfr.replace(54, tfr.vecs()[54].toCategoricalVec())._key);
      DKV.put(tfr);

      GBMModel.GBMParameters parms = new GBMModel.GBMParameters();
      parms._train = tfr._key;
      parms._response_column = "C55";
      parms._seed = 1234;
      parms._auto_rebalance = false;
      parms._col_sample_rate_per_tree = 0.5f;
      parms._col_sample_rate = 0.3f;
      parms._ntrees = 5;
      parms._max_depth = 5;

      // Build a first model; all remaining models should be equal
      GBM job = new GBM(parms);
      GBMModel drf = job.trainModel().get();
      assertEquals(drf._output._ntrees, parms._ntrees);

      mses[i] = drf._output._scored_train[drf._output._scored_train.length-1]._mse;
      drf.delete();
      if (tfr != null) tfr.remove();
      Scope.exit();
    }
    for (int i=0; i<mses.length; ++i) {
      Log.info("trial: " + i + " -> MSE: " + mses[i]);
    }
    for(double mse : mses)
      assertEquals(mse, mses[0], 1e-10);
  }
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:48,代码来源:GBMTest.java

示例15: testChunks

import water.fvec.RebalanceDataSet; //导入方法依赖的package包/类
@Test public void testChunks() {
  Frame tfr;
  final int N = 4;
  double[] mses = new double[N];
  int[] chunks = new int[]{1,13,19,39,500};

  for (int i=0; i<N; ++i) {
    Scope.enter();
    // Load data, hack frames
    tfr = parse_test_file("smalldata/covtype/covtype.20k.data");

    // rebalance to 256 chunks
    Key dest = Key.make("df.rebalanced.hex");
    RebalanceDataSet rb = new RebalanceDataSet(tfr, dest, chunks[i]);
    H2O.submitTask(rb);
    rb.join();
    tfr.delete();
    tfr = DKV.get(dest).get();
    Scope.track(tfr.replace(54, tfr.vecs()[54].toCategoricalVec()));
    DKV.put(tfr);

    DRFModel.DRFParameters parms = new DRFModel.DRFParameters();
    parms._train = tfr._key;
    parms._response_column = "C55";
    parms._ntrees = 10;
    parms._seed = 1234;
    parms._auto_rebalance = false;

    // Build a first model; all remaining models should be equal
    DRF job = new DRF(parms);
    DRFModel drf = job.trainModel().get();
    assertEquals(drf._output._ntrees, parms._ntrees);

    mses[i] = drf._output._scored_train[drf._output._scored_train.length-1]._mse;
    drf.delete();
    if (tfr != null) tfr.remove();
    Scope.exit();
  }
  for (int i=0; i<mses.length; ++i) {
    Log.info("trial: " + i + " -> MSE: " + mses[i]);
  }
  for(double mse : mses)
    assertEquals(mse, mses[0], 1e-10);
}
 
开发者ID:h2oai,项目名称:h2o-3,代码行数:45,代码来源:DRFTest.java


注:本文中的water.fvec.RebalanceDataSet.join方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。