本文整理汇总了Java中water.util.FrameUtils类的典型用法代码示例。如果您正苦于以下问题:Java FrameUtils类的具体用法?Java FrameUtils怎么用?Java FrameUtils使用的例子?那么, 这里精选的类代码示例或许可以为您提供帮助。
FrameUtils类属于water.util包,在下文中一共展示了FrameUtils类的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Java代码示例。
示例1: scoreExemplarMembers
import water.util.FrameUtils; //导入依赖的package包/类
@Override
public Frame scoreExemplarMembers(Key<Frame> destination_key, final int exemplarIdx) {
Vec booleanCol = new MRTask() {
@Override
public void map(Chunk c, NewChunk nc) {
for (int i=0;i<c._len;++i)
nc.addNum(c.at8(i)==_exemplars[exemplarIdx].gid ? 1 : 0,0);
}
}.doAll(Vec.T_NUM, new Frame(new Vec[]{_exemplar_assignment_vec_key.get()})).outputFrame().anyVec();
Frame orig = _parms.train();
Vec[] vecs = Arrays.copyOf(orig.vecs(), orig.vecs().length+1);
vecs[vecs.length-1] = booleanCol;
Frame ff = new Frame(orig.names(), orig.vecs());
ff.add("predicate", booleanCol);
Frame res = new Frame.DeepSelect().doAll(orig.types(),ff).outputFrame(destination_key, orig.names(), orig.domains());
FrameUtils.shrinkDomainsToObservedSubset(res);
DKV.put(res);
assert(res.numRows()==_counts[exemplarIdx]);
booleanCol.remove();
return res;
}
示例2: testNullModelRegression
import water.util.FrameUtils; //导入依赖的package包/类
static void testNullModelRegression(final CFuncRef func) throws Exception {
final Frame f = Datasets.iris();
Frame pred = null; Model model = null;
try {
NullModelParameters params = new NullModelParameters() {{
_train = f._key;
_response_column = "sepal_len";
_custom_metric_func = func.toRef();
}};
model = new NullModelBuilder(params).trainModel().get();
pred = model.score(f, null, null, true, func);
Assert.assertEquals("Null model generates only a single model metrics",
1, model._output.getModelMetrics().length);
ModelMetrics mm = model._output.getModelMetrics()[0].get();
Assert.assertEquals("Custom model metrics should compute mean of response column",
f.vec("sepal_len").mean(), mm._custom_metric.value, 1e-8);
} finally {
FrameUtils.delete(f, pred, model);
DKV.remove(func.getKey());
}
}
示例3: checkTopBottomN
import water.util.FrameUtils; //导入依赖的package包/类
public void checkTopBottomN(Frame answerF, Frame grabF, double tolerance, int grabTopN) {
Scope.enter();
try {
double nfrac = (grabTopN < 0) ? 1.0 * grabF.numRows() / answerF.numRows() : (1 - 1.0 * grabF.numRows() / answerF.numRows()); // translate percentage to actual fraction
SplitFrame sf = new SplitFrame(answerF, new double[]{nfrac, 1 - nfrac}, new Key[]{Key.make("topN.hex"), Key.make("bottomN.hex")});
// Invoke the job
sf.exec().get();
Key[] ksplits = sf._destination_frames;
Frame topN = (Frame) ((grabTopN < 0) ? DKV.get(ksplits[0]).get() : DKV.get(ksplits[1]).get());
double[] bottomN = FrameUtils.asDoubles(grabF.vec(0));
Arrays.sort(bottomN);
Frame sortedF = new water.util.ArrayUtils().frame(bottomN);
Scope.track(sortedF);
Frame sortedFT = DMatrix.transpose(sortedF);
Scope.track(sortedFT);
assertTrue(isIdenticalUpToRelTolerance(topN, sortedFT, tolerance));
Scope.track(topN);
Scope.track_generic(ksplits[0].get());
Scope.track_generic(ksplits[1].get());
} finally {
Scope.exit();
}
}
示例4: FrameTask2
import water.util.FrameUtils; //导入依赖的package包/类
public FrameTask2(H2OCountedCompleter cmp, DataInfo dinfo, Key jobKey, Vec rowFilter){
super(cmp);
_dinfo = dinfo;
_jobKey = jobKey;
_rowFilter = rowFilter;
_sparse = handlesSparseData() && FrameUtils.sparseRatio(dinfo._adaptedFrame) < .5;
}
示例5: responseMean
import water.util.FrameUtils; //导入依赖的package包/类
/**
* Compute the (weighted) mean of the response (subtracting possible offset terms)
* @return mean
*/
protected double responseMean() {
if (hasWeightCol() || hasOffsetCol()) {
return new FrameUtils.WeightedMean().doAll(
_response,
hasWeightCol() ? _weights : _response.makeCon(1),
hasOffsetCol() ? _offset : _response.makeCon(0)
).weightedMean();
}
return _response.mean();
}
示例6: testProstate
import water.util.FrameUtils; //导入依赖的package包/类
@Test public void testProstate(){
Key rebalancedKey = Key.make("rebalanced");
int [] trials = { 380, 1, 3, 8, 12, 256, 16, 32, 64, 11, 13 };
for (int k=0; k<trials.length; ++k) {
int i = trials[k];
Frame fr = null, rebalanced = null;
try {
NFSFileVec nfs = NFSFileVec.make(find_test_file("smalldata/logreg/prostate.csv"));
fr = ParseDataset.parse(Key.make(), nfs._key);
RebalanceDataSet rb = new RebalanceDataSet(fr, rebalancedKey, i);
H2O.submitTask(rb);
rb.join();
rebalanced = DKV.get(rebalancedKey).get();
assertEquals(rebalanced.numRows(), fr.numRows());
assertEquals(rebalanced.anyVec().nChunks(), i);
assertTrue(isBitIdentical(fr, rebalanced));
Log.info("Rebalanced into " + i + " chunks:");
Log.info(FrameUtils.chunkSummary(rebalanced).toString());
}
catch(Throwable t) {
t.printStackTrace();
throw new RuntimeException(t);
}
finally {
if (fr != null) fr.delete();
if (rebalanced != null) rebalanced.delete();
}
}
}
示例7: GLMMultinomialGradientTask
import water.util.FrameUtils; //导入依赖的package包/类
/**
*
* @param job
* @param dinfo
* @param lambda
* @param beta coefficients as 2D array [P][K]
* @param reg
*/
public GLMMultinomialGradientTask(Job job, DataInfo dinfo, double lambda, double[][] beta, double reg) {
_currentLambda = lambda;
_reg = reg;
// need to flip the beta
_beta = new double[beta[0].length][beta.length];
for(int i = 0; i < _beta.length; ++i)
for(int j = 0; j < _beta[i].length; ++j)
_beta[i][j] = beta[j][i];
_job = job;
_sparse = FrameUtils.sparseRatio(dinfo._adaptedFrame) < .125;
_dinfo = dinfo;
if(_dinfo._offset) throw H2O.unimpl();
}
示例8: GLMScore
import water.util.FrameUtils; //导入依赖的package包/类
public GLMScore(Job j, GLMModel m, DataInfo dinfo, String[] domain, boolean computeMetrics, boolean generatePredictions) {
_j = j;
_m = m;
_computeMetrics = computeMetrics;
_sparse = FrameUtils.sparseRatio(dinfo._adaptedFrame) < .5;
_domain = domain;
_generatePredictions = generatePredictions;
_m._parms = m._parms;
_nclasses = m._output.nclasses();
if(_m._parms._family == GLMModel.GLMParameters.Family.multinomial){
_beta = null;
_beta_multinomial = m._output._global_beta_multinomial;
} else {
double [] beta = m.beta();
int [] ids = new int[beta.length-1];
int k = 0;
for(int i = 0; i < beta.length-1; ++i){
if(beta[i] != 0) ids[k++] = i;
}
if(k < beta.length-1) {
ids = Arrays.copyOf(ids,k);
dinfo = dinfo.filterExpandedColumns(ids);
double [] beta2 = MemoryManager.malloc8d(ids.length+1);
int l = 0;
for(int x:ids)
beta2[l++] = beta[x];
beta2[l] = beta[beta.length-1];
beta = beta2;
}
_beta_multinomial = null;
_beta = beta;
}
_dinfo = dinfo;
_dinfo._valid = true; // marking dinfo as validation data set disables an assert on unseen levels (which should not happen in train)
_defaultThreshold = m.defaultThreshold();
}
示例9: createFrameOfExemplars
import water.util.FrameUtils; //导入依赖的package包/类
public Frame createFrameOfExemplars(Frame orig, Key destination_key) {
final long[] keep = new long[_exemplars.length];
for (int i=0;i<keep.length;++i)
keep[i]=_exemplars[i].gid;
Vec exAssignment = _exemplar_assignment_vec_key.get();
// preserve the original row order
Vec booleanCol = new MRTask() {
@Override
public void map(Chunk c2) {
for (int i=0;i<keep.length;++i) {
if (keep[i] < c2.start()) continue;
if (keep[i] >= c2.start()+c2._len) continue;
c2.set((int)(keep[i]-c2.start()), 1);
}
}
}.doAll(new Frame(new Vec[]{exAssignment.makeZero()}))._fr.vec(0);
Vec[] vecs = Arrays.copyOf(orig.vecs(), orig.vecs().length+1);
vecs[vecs.length-1] = booleanCol;
Frame ff = new Frame(orig.names(), orig.vecs());
ff.add("predicate", booleanCol);
Frame res = new Frame.DeepSelect().doAll(orig.types(),ff).outputFrame(destination_key, orig.names(), orig.domains());
FrameUtils.shrinkDomainsToObservedSubset(res);
booleanCol.remove();
assert(res.numRows()==_exemplars.length);
Vec cnts = res.anyVec().makeZero();
Vec.Writer vw = cnts.open();
for (int i=0;i<_counts.length;++i)
vw.set(i, _counts[i]);
vw.close();
res.add("counts", cnts);
DKV.put(destination_key, res);
return res;
}
示例10: setup
import water.util.FrameUtils; //导入依赖的package包/类
@Setup(Level.Invocation)
public void setup() {
water.util.Log.setLogLevel("ERRR");
stall_till_cloudsize(1);
trainingFrame = null;
double missing_fraction = 0.75;
long seed = 12345;
try {
trainingFrame = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
// Add missing values to the training data
Frame frame = new Frame(Key.<Frame>make(), trainingFrame.names(), trainingFrame.vecs());
DKV.put(frame._key, frame); // Need to put the frame (to be modified) into DKV for MissingInserter to pick up
FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frame._key, seed, missing_fraction);
j.execImpl().get(); // MissingInserter is non-blocking, must block here explicitly
DKV.remove(frame._key); // Delete the frame header (not the data)
paramsImputeMissing = new PCAParameters();
paramsImputeMissing._train = trainingFrame._key;
paramsImputeMissing._k = 4;
paramsImputeMissing._transform = DataInfo.TransformType.NONE;
paramsImputeMissing._pca_method = GramSVD;
paramsImputeMissing._impute_missing = true; // Don't skip rows with NA entries, but impute using mean of column
paramsImputeMissing._seed = seed;
train();
} catch (RuntimeException e) {
if (trainingFrame != null) {
trainingFrame.delete();
}
e.printStackTrace();
throw e;
}
}
示例11: setup
import water.util.FrameUtils; //导入依赖的package包/类
@Setup(Level.Invocation)
public void setup() {
water.util.Log.setLogLevel("ERRR");
stall_till_cloudsize(1);
trainingFrame = null;
double missing_fraction = 0.75;
long seed = 12345;
try {
trainingFrame = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
// Add missing values to the training data
Frame frame = new Frame(Key.<Frame>make(), trainingFrame.names(), trainingFrame.vecs());
DKV.put(frame._key, frame); // Need to put the frame (to be modified) into DKV for MissingInserter to pick up
FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frame._key, seed, missing_fraction);
j.execImpl().get(); // MissingInserter is non-blocking, must block here explicitly
DKV.remove(frame._key); // Delete the frame header (not the data)
paramsImputeMissing = new PCAParameters();
paramsImputeMissing._train = trainingFrame._key;
paramsImputeMissing._k = 4;
paramsImputeMissing._transform = DataInfo.TransformType.NONE;
paramsImputeMissing._pca_method = GramSVD;
paramsImputeMissing._impute_missing = true; // Don't skip rows with NA entries, but impute using mean of column
paramsImputeMissing._seed = seed;
if (!train()) { // prepare the model for scoring
throw new RuntimeException("PCA model failed to be trained.");
}
} catch (RuntimeException e) {
if (trainingFrame != null) {
trainingFrame.delete();
}
e.printStackTrace();
throw e;
}
}
示例12: testImputeMissing
import water.util.FrameUtils; //导入依赖的package包/类
@Test public void testImputeMissing() throws InterruptedException, ExecutionException {
Frame train = null;
double missing_fraction = 0.75;
long seed = 12345;
try {
train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
// Add missing values to the training data
if (missing_fraction > 0) {
Frame frtmp = new Frame(Key.<Frame>make(), train.names(), train.vecs());
DKV.put(frtmp._key, frtmp); // Need to put the frame (to be modified) into DKV for MissingInserter to pick up
FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frtmp._key, seed, missing_fraction);
j.execImpl().get(); // MissingInserter is non-blocking, must block here explicitly
DKV.remove(frtmp._key); // Delete the frame header (not the data)
}
PCAModel.PCAParameters parms = new PCAModel.PCAParameters();
parms._train = train._key;
parms._k = 4;
parms._transform = DataInfo.TransformType.NONE;
parms._pca_method = PCAModel.PCAParameters.Method.GramSVD;
parms._impute_missing = true; // Don't skip rows with NA entries, but impute using mean of column
parms._seed = seed;
PCAModel pca = null;
pca = new PCA(parms).trainModel().get();
if (pca != null) pca.remove();
} finally {
if (train != null) train.delete();
}
}
示例13: testArrestsMissing
import water.util.FrameUtils; //导入依赖的package包/类
@Test @Ignore public void testArrestsMissing() throws InterruptedException, ExecutionException {
SVDModel model = null;
SVDParameters parms = null;
Frame train = null;
long seed = 1234;
for (double missing_fraction : new double[]{0, 0.1, 0.25, 0.5, 0.75, 0.9}) {
try {
Scope.enter();
train = parse_test_file(Key.make("arrests.hex"), "smalldata/pca_test/USArrests.csv");
// Add missing values to the training data
if (missing_fraction > 0) {
Frame frtmp = new Frame(Key.<Frame>make(), train.names(), train.vecs());
DKV.put(frtmp._key, frtmp); // Need to put the frame (to be modified) into DKV for MissingInserter to pick up
FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frtmp._key, seed, missing_fraction);
j.execImpl().get(); // MissingInserter is non-blocking, must block here explicitly
DKV.remove(frtmp._key); // Delete the frame header (not the data)
}
parms = new SVDParameters();
parms._train = train._key;
parms._nv = train.numCols();
parms._transform = DataInfo.TransformType.STANDARDIZE;
parms._svd_method = SVDParameters.Method.Power;
parms._max_iterations = 1000;
parms._seed = seed;
parms._save_v_frame = false;
model = new SVD(parms).trainModel().get();
Log.info(100 * missing_fraction + "% missing values: Singular values = " + Arrays.toString(model._output._d));
Scope.exit();
} finally {
if (train != null) train.delete();
if (model != null) model.delete();
}
}
}
示例14: testProstateMissingProb
import water.util.FrameUtils; //导入依赖的package包/类
@Test public void testProstateMissingProb() throws InterruptedException, ExecutionException {
long seed = 1234;
Frame train = null, score = null;
SVDModel model = null;
try {
train = parse_test_file(Key.make("prostate.hex"), "smalldata/prostate/prostate_cat.csv");
// Add missing values to the training data
Frame frtmp = new Frame(Key.<Frame>make(), train.names(), train.vecs());
DKV.put(frtmp._key, frtmp); // Need to put the frame (to be modified) into DKV for MissingInserter to pick up
FrameUtils.MissingInserter j = new FrameUtils.MissingInserter(frtmp._key, seed, 0.25);
j.execImpl().get(); // MissingInserter is non-blocking, must block here explicitly
DKV.remove(frtmp._key); // Delete the frame header (not the data)
SVDParameters parms = new SVDParameters();
parms._train = train._key;
parms._nv = 8;
parms._only_v = false;
parms._keep_u = true;
parms._svd_method = SVDParameters.Method.Randomized;
parms._impute_missing = true;
parms._max_iterations = 20;
parms._save_v_frame = false;
model = new SVD(parms).trainModel().get();
score = model.score(train);
} finally {
if (train != null) train.delete();
if (score != null) score.delete();
if (model != null) model.delete();
}
}
示例15: testProstate
import water.util.FrameUtils; //导入依赖的package包/类
@Test public void testProstate(){
NFSFileVec[] nfs = new NFSFileVec[]{
TestUtil.makeNfsFileVec("smalldata/logreg/prostate.csv"),
TestUtil.makeNfsFileVec("smalldata/covtype/covtype.20k.data"),
TestUtil.makeNfsFileVec("smalldata/chicago/chicagoCrimes10k.csv.zip")};
//NFSFileVec.make(find_test_file("bigdata/laptop/usecases/cup98VAL_z.csv"))};
for (NFSFileVec fv : nfs) {
Frame fr = ParseDataset.parse(Key.make(), fv._key);
Key rebalancedKey = Key.make("rebalanced");
int[] trials = {380, 1, 3, 8, 9, 12, 256, 16, 32, 64, 11, 13};
for (int i : trials) {
Frame rebalanced = null;
try {
Scope.enter();
RebalanceDataSet rb = new RebalanceDataSet(fr, rebalancedKey, i);
H2O.submitTask(rb);
rb.join();
rebalanced = DKV.get(rebalancedKey).get();
ParseDataset.logParseResults(rebalanced);
assertEquals(rebalanced.numRows(), fr.numRows());
assertEquals(rebalanced.anyVec().nChunks(), i);
assertTrue(TestUtil.isIdenticalUpToRelTolerance(fr, rebalanced, 1e-10));
Log.info("Rebalanced into " + i + " chunks:");
Log.info(FrameUtils.chunkSummary(rebalanced).toString());
} finally {
if (rebalanced != null) rebalanced.delete();
Scope.exit();
}
}
if (fr != null) fr.delete();
}
}