本文整理汇总了Python中tests.pyunit_utils.standalone_test函数的典型用法代码示例。如果您正苦于以下问题:Python standalone_test函数的具体用法?Python standalone_test怎么用?Python standalone_test使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了standalone_test函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: iris_nfolds
def iris_nfolds():
iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv"))
model = h2o.random_forest(y=iris[4], x=iris[0:4], ntrees=50, nfolds=5)
model.show()
# Can specify both nfolds >= 2 and validation = H2OParsedData at once
try:
H2ORandomForestEstimator(ntrees=50, nfolds=5).train(y=4, x=list(range(4)), validation_frame=iris)
assert True
except EnvironmentError:
assert False, "expected an error"
if __name__ == "__main__":
pyunit_utils.standalone_test(iris_nfolds)
else:
iris_nfolds()
示例2: enumerate
h2o_data_doubled_weights = h2o_cars_data.cbind(h2o_doubled_weights)
doubled_data = h2o.as_list(h2o_cars_data, use_pandas=False)
colnames = doubled_data.pop(0)
for idx, w in enumerate(doubled_weights[0]):
if w == 2: doubled_data.append(doubled_data[idx])
h2o_data_doubled = h2o.H2OFrame(doubled_data)
h2o_data_doubled.set_names(list(colnames))
h2o_data_doubled["economy_20mpg"] = h2o_data_doubled["economy_20mpg"].asfactor()
h2o_data_doubled["cylinders"] = h2o_data_doubled["cylinders"].asfactor()
h2o_data_doubled_weights["economy_20mpg"] = h2o_data_doubled_weights["economy_20mpg"].asfactor()
h2o_data_doubled_weights["cylinders"] = h2o_data_doubled_weights["cylinders"].asfactor()
print("Checking that doubling some weights is equivalent to doubling those observations:")
print()
check_same(h2o_data_doubled, h2o_data_doubled_weights, 1)
# TODO: random weights
# TODO: all zero weights???
# TODO: negative weights???
if __name__ == "__main__":
pyunit_utils.standalone_test(weights_check)
else:
weights_check()
示例3: remove_obj_client
sys.path.insert(1,"../../")
import h2o
from tests import pyunit_utils
import os
def remove_obj_client():
training_data = h2o.import_file(pyunit_utils.locate("smalldata/logreg/benign.csv"))
Y = 3
X = range(3) + range(4,11)
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
model = H2OGeneralizedLinearEstimator(family="binomial", alpha=0, Lambda=1e-5)
print model.model_id
print model
model.train(x=X,y=Y, training_frame=training_data)
print model
h2o.remove(model)
print model
h2o.remove(training_data)
print training_data
if __name__ == "__main__":
pyunit_utils.standalone_test(remove_obj_client)
else:
remove_obj_client()
示例4: bigcatRF
def bigcatRF():
# Training set has 100 categories from cat001 to cat100
# Categories cat001, cat003, ... are perfect predictors of y = 1
# Categories cat002, cat004, ... are perfect predictors of y = 0
#Log.info("Importing bigcat_5000x2.csv data...\n")
bigcat = h2o.import_file(path=pyunit_utils.locate("smalldata/gbm_test/bigcat_5000x2.csv"))
bigcat["y"] = bigcat["y"].asfactor()
#Log.info("Summary of bigcat_5000x2.csv from H2O:\n")
#bigcat.summary()
# Train H2O DRF Model:
#Log.info("H2O DRF (Naive Split) with parameters:\nclassification = TRUE, ntree = 1, depth = 1, nbins = 100, nbins_cats=10\n")
model = H2ORandomForestEstimator(ntrees=1, max_depth=1, nbins=100, nbins_cats=10)
model.train(x="X", y="y", training_frame=bigcat)
model.show()
if __name__ == "__main__":
pyunit_utils.standalone_test(bigcatRF)
else:
bigcatRF()
示例5: Imputer
# Connect to a pre-existing cluster
# connect to localhost:54321
# Log.info("Importing benign.csv data...\n")
benign_h2o = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/benign.csv"))
# benign_h2o.summary()
benign_sci = np.genfromtxt(pyunit_utils.locate("smalldata/logreg/benign.csv"), delimiter=",")
# Impute missing values with column mean
imp = Imputer(missing_values="NaN", strategy="mean", axis=0)
benign_sci = imp.fit_transform(benign_sci)
for i in range(2, 7):
# Log.info("H2O K-Means")
km_h2o = H2OKMeansEstimator(k=i)
km_h2o.train(x=range(benign_h2o.ncol), training_frame=benign_h2o)
km_h2o.show()
model = h2o.get_model(km_h2o._id)
model.show()
km_sci = KMeans(n_clusters=i, init="k-means++", n_init=1)
km_sci.fit(benign_sci)
print "sckit centers"
print km_sci.cluster_centers_
if __name__ == "__main__":
pyunit_utils.standalone_test(get_modelKmeans)
else:
get_modelKmeans()
示例6: H2OGeneralizedLowRankEstimator
[0.7297297297297298,66.05405405405405,2.0,0.0,1.0,23.270270270270274,9.589189189189193,7.27027027027027],
[0.01754385964912314,70.35087719298245,2.0,1.0,-1.3877787807814457E-17,10.078947368421053,
42.37543859649123,6.157894736842105],
[0.9,65.95,2.0,0.0,0.2,81.94500000000001,16.375,7.4],
[0.9999999999999989,65.48598130841121,2.0,3.0,1.3877787807814457E-16,13.3092523364486,
13.268411214953275,6.747663551401869]]
initial_y_h2o = h2o.H2OFrame(list(initial_y))
glrm_h2o = H2OGeneralizedLowRankEstimator(k=5, loss_by_col=loss_all, recover_svd=True, transform="STANDARDIZE",
seed=12345, init="User", user_y=initial_y_h2o)
glrm_h2o.train(x=prostateF.names, training_frame=prostateF, validation_frame=prostateF)
glrm_h2o.show()
# exercise logistic loss with numeric columns
glrm_h2o_num = H2OGeneralizedLowRankEstimator(k=5, loss_by_col=loss_all, recover_svd=True, transform="STANDARDIZE",
seed=12345, init="User", user_y=initial_y_h2o)
glrm_h2o_num.train(x=prostateF_num.names, training_frame=prostateF_num, validation_frame=prostateF_num)
glrm_h2o_num.show()
# singular values from glrm models should equal if binary columns with binary loss are read in as either
# categorical or numerics. If not, something is wrong.
assert pyunit_utils.equal_two_arrays(glrm_h2o._model_json["output"]["singular_vals"],
glrm_h2o_num._model_json["output"]["singular_vals"], 1e-6, 1e-4), \
"Singular values obtained from logistic loss with column type as enum and numeric do not agree. Fix it now."
sys.stdout.flush()
if __name__ == "__main__":
pyunit_utils.standalone_test(glrm_pubdev_3756_arrest)
else:
glrm_pubdev_3756_arrest()
示例7: range
assert True
# Log.info("Number of rows exceeds training set's")
start = [[random.gauss(0,1) for c in range(numcol)] for r in range(numrow+2)]
try:
h2o.kmeans(x=benign_h2o, k=numrow+2, user_points=h2o.H2OFrame(start))
assert False, "expected an error"
except EnvironmentError:
assert True
# Nones are replaced with mean of a column in H2O. Not sure about Inf.
# Log.info("Any entry is NA, NaN, or Inf")
start = [[random.gauss(0,1) for c in range(numcol)] for r in range(3)]
for x in ["NA", "NaN", "Inf", "-Inf"]:
start_err = start[:]
start_err[1][random.randint(0,numcol-1)] = x
h2o.kmeans(x=benign_h2o, k=3, user_points=h2o.H2OFrame(start_err))
# Duplicates will affect sampling probability during initialization.
# Log.info("Duplicate initial clusters specified")
start = [[random.gauss(0,1) for c in range(numcol)] for r in range(3)]
start[2] = start[0]
h2o.kmeans(x=benign_h2o, k=3, user_points=h2o.H2OFrame(start))
if __name__ == "__main__":
pyunit_utils.standalone_test(init_err_casesKmeans)
else:
init_err_casesKmeans()
示例8: deepwater_tweets
cls_bias = mx.sym.Variable('cls_bias')
fc = mx.sym.FullyConnected(data=h_drop, weight=cls_weight, bias=cls_bias, num_hidden=num_label)
# softmax output
sm = mx.sym.SoftmaxOutput(data=fc, label=input_y, name='softmax')
return sm
def deepwater_tweets():
if not H2ODeepWaterEstimator.available(): return
tweets = h2o.import_file(pyunit_utils.locate("/home/arno/tweets.txt"), col_names=["text"], sep="|")
labels = h2o.import_file(pyunit_utils.locate("/home/arno/labels.txt"), col_names=["label"])
frame = tweets.cbind(labels)
print(frame.head(5))
# cnn = make_text_cnn(sentence_size=100, num_embed=300, batch_size=32,
# vocab_size=100000, dropout=dropout, with_embedding=with_embedding)
model = H2ODeepWaterEstimator(epochs=50000, learning_rate=1e-3, hidden=[100,100,100,100,100])
model.train(x=[0],y=1, training_frame=frame)
model.show()
error = model.model_performance(train=True).mean_per_class_error()
assert error < 0.1, "mean classification error is too high : " + str(error)
if __name__ == "__main__":
pyunit_utils.standalone_test(deepwater_tweets)
else:
deepwater_tweets()
示例9:
ntrees=ntrees2,
max_depth=max_depth2,
min_rows=min_rows2,
distribution=distribution,
score_each_iteration=True,
validation_x=valid[predictors],
validation_y=valid[response_col],
checkpoint=model1._id)
model4 = h2o.gbm(x=train[predictors],
y=train[response_col],
ntrees=ntrees2,
max_depth=max_depth2,
min_rows=min_rows2,
distribution=distribution,
score_each_iteration=True,
validation_x=valid[predictors],
validation_y=valid[response_col])
assert model2.auc(valid=True)==model4.auc(valid=True), "Expected Model 2 AUC: {0} to be the same as Model 4 AUC: {1}".format(model2.auc(valid=True), model4.auc(valid=True))
assert model2.giniCoef(valid=True)==model4.giniCoef(valid=True), "Expected Model 2 Gini Coef {0} to be the same as Model 4 Gini Coef: {1}".format(model2.giniCoef(valid=True), model4.giniCoef(valid=True))
assert model2.logloss(valid=True)==model4.logloss(valid=True), "Expected Model 2 Log Loss: {0} to be the same as Model 4 Log Loss: {1}".format(model2.logloss(valid=True), model4.logloss(valid=True))
if __name__ == "__main__":
pyunit_utils.standalone_test(pubdev_1829)
else:
pubdev_1829()
示例10: zip
# pass
# LHS: H2OFrame, RHS: H2OVec
#try:
# res = iris + iris[0]
# res.show()
# assert False, "expected error. objects of different dimensions not supported."
#except EnvironmentError:
# pass
# LHS: H2OFrame, RHS: scaler
# res = 1.2 + iris[2]
# res2 = iris + res[21,:]
# res2.show()
# LHS: H2OFrame, RHS: scaler
res = iris + 2
res_rows, res_cols = res.dim
assert res_rows == rows and res_cols == cols, "dimension mismatch"
for x, y in zip([res[c].sum() for c in range(cols-1)], [469.9, 342.6, 266.9, 162.2]):
assert abs(x - y) < 1e-1, "expected same values"
###################################################################
if __name__ == "__main__":
pyunit_utils.standalone_test(binop_plus)
else:
binop_plus()
示例11: print
print("check unsuccessful! h2o computed {0} and numpy computed {1}".format(h2o_val, num_val))
return success
h2o_val = h2o_data.min()
num_val = np.min(np_data)
assert abs(h2o_val - num_val) < 1e-06, (
"check unsuccessful! h2o computed {0} and numpy computed {1}. expected equal min values between h2o and "
"numpy".format(h2o_val, num_val)
)
h2o_val = h2o_data.max()
num_val = np.max(np_data)
assert abs(h2o_val - num_val) < 1e-06, (
"check unsuccessful! h2o computed {0} and numpy computed {1}. expected equal max values between h2o and "
"numpy".format(h2o_val, num_val)
)
h2o_val = h2o_data.sum()
num_val = np.sum(np_data)
assert abs(h2o_val - num_val) < 1e-06, (
"check unsuccessful! h2o computed {0} and numpy computed {1}. expected equal sum values between h2o and "
"numpy".format(h2o_val, num_val)
)
pyunit_utils.np_comparison_check(
h2o_data.var(), np.cov(np_data, rowvar=0, ddof=1), 10
), "expected equal var values between h2o and numpy"
if __name__ == "__main__":
pyunit_utils.standalone_test(expr_reducers)
else:
expr_reducers()
示例12:
dataset_params['randomize'] = True
dataset_params['factors'] = random.randint(2,2000)
dataset_params['response_factors'] = random.randint(3,100)
print "Dataset parameters: {0}".format(dataset_params)
train = h2o.create_frame(**dataset_params)
print "Training dataset:"
print train
# Save dataset to results directory
results_dir = pyunit_utils.locate("results")
h2o.download_csv(train,os.path.join(results_dir,"nb_dynamic_training_dataset.log"))
# Generate random parameters
params = {}
params['laplace'] = 0
if random.randint(0,1): params['laplace'] = random.uniform(0,11)
print "Parameter list: {0}".format(params)
x = train.names
x.remove("response")
y = "response"
pyunit_utils.javapredict(algo="naive_bayes", equality=None, train=train, test=None, x=x, y=y, compile_only=True, **params)
if __name__ == "__main__":
pyunit_utils.standalone_test(javapredict_dynamic_data)
else:
javapredict_dynamic_data()
示例13: compare_frames
except H2OValueError: # as designed
pass
compare_frames(badFrame, badClone)
originalAfterOp = H2OFrame.get_frame(badFrame.frame_id)
compare_frames(badFrame, originalAfterOp)
goodFrame = H2OFrame({"one": [4, 6, 1], "two": ["a", "b", "cde"]})
goodClone = H2OFrame({"one": [4, 6, 1], "two": ["a", "b", "cde"]})
compare_frames(goodFrame, goodClone)
factoredFrame = goodFrame.asfactor()
originalAfterOp = H2OFrame.get_frame(goodFrame.frame_id)
compare_frames(goodFrame, originalAfterOp)
expectedFactoredFrame = H2OFrame({"one": [4, 6, 1], "two": ["a", "b", "cde"]}, column_types={"one":"categorical", "two": "enum"})
compare_frames(expectedFactoredFrame, factoredFrame)
refactoredFrame = expectedFactoredFrame.asfactor()
factoredAfterOp = H2OFrame.get_frame(refactoredFrame.frame_id)
compare_frames(expectedFactoredFrame, factoredAfterOp)
if __name__ == "__main__":
pyunit_utils.standalone_test(test1)
else:
test1()
示例14: gbm
max_depth=1,
min_rows=1,
learn_rate=0.1,
distribution="gaussian")
gbm.train(x=range(3), y="Claims", training_frame=insurance, offset_column="offset")
predictions = gbm.predict(insurance)
# Comparison result generated from R's gbm:
# fit2 <- gbm(Claims ~ District + Group + Age+ offset(log(Holders)) , interaction.depth = 1,n.minobsinnode = 1,
# shrinkage = .1,bag.fraction = 1,train.fraction = 1,
# data = Insurance, distribution ="gaussian", n.trees = 600)
# pg = predict(fit2, newdata = Insurance, type = "response", n.trees=600)
# pr = pg - - log(Insurance$Holders)
assert abs(44.33016 - gbm._model_json['output']['init_f']) < 1e-5, "expected init_f to be {0}, but got {1}". \
format(44.33016, gbm._model_json['output']['init_f'])
assert abs(1491.135 - gbm.mse()) < 1e-2, "expected mse to be {0}, but got {1}".format(1491.135, gbm.mse())
assert abs(49.23438 - predictions.mean()) < 1e-2, "expected prediction mean to be {0}, but got {1}". \
format(49.23438, predictions.mean())
assert abs(-45.5720659304 - predictions.min()) < 1e-2, "expected prediction min to be {0}, but got {1}". \
format(-45.5720659304, predictions.min())
assert abs(207.387 - predictions.max()) < 1e-2, "expected prediction max to be {0}, but got {1}". \
format(207.387, predictions.max())
if __name__ == "__main__":
pyunit_utils.standalone_test(offset_gaussian)
else:
offset_gaussian()
示例15: H2ODeepLearningEstimator
# bernoulli - offset not supported
#dl = h2o.deeplearning(x=cars[2:8], y=cars["economy_20mpg"], distribution="bernoulli", offset_column="x1",
# training_frame=cars)
#predictions = dl.predict(cars)
# gamma
dl = H2ODeepLearningEstimator(distribution="gamma")
dl.train(x=list(range(3)),y="Claims", training_frame=insurance, offset_column="offset")
predictions = dl.predict(insurance)
# gaussian
dl = H2ODeepLearningEstimator(distribution="gaussian")
dl.train(x=list(range(3)),y="Claims", training_frame=insurance, offset_column="offset")
predictions = dl.predict(insurance)
# poisson
dl = H2ODeepLearningEstimator(distribution="poisson")
dl.train(x=list(range(3)),y="Claims", training_frame=insurance, offset_column="offset")
predictions = dl.predict(insurance)
# tweedie
dl = H2ODeepLearningEstimator(distribution="tweedie")
dl.train(x=list(range(3)),y="Claims", training_frame=insurance, offset_column="offset")
predictions = dl.predict(insurance)
if __name__ == "__main__":
pyunit_utils.standalone_test(offsets_and_distributions)
else:
offsets_and_distributions()