本文整理汇总了Python中tests.locate函数的典型用法代码示例。如果您正苦于以下问题:Python locate函数的具体用法?Python locate怎么用?Python locate使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了locate函数的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: smallcatGBM
def smallcatGBM():
# Training set has 26 categories from A to Z
# Categories A, C, E, G, ... are perfect predictors of y = 1
# Categories B, D, F, H, ... are perfect predictors of y = 0
#Log.info("Importing alphabet_cattest.csv data...\n")
alphabet = h2o.import_file(path=tests.locate("smalldata/gbm_test/alphabet_cattest.csv"))
alphabet["y"] = alphabet["y"].asfactor()
#Log.info("Summary of alphabet_cattest.csv from H2O:\n")
#alphabet.summary()
# Prepare data for scikit use
trainData = np.loadtxt(tests.locate("smalldata/gbm_test/alphabet_cattest.csv"), delimiter=',', skiprows=1,
converters={0:lambda s: ord(s.split("\"")[1])})
trainDataResponse = trainData[:,1]
trainDataFeatures = trainData[:,0]
# Train H2O GBM Model:
#Log.info("H2O GBM (Naive Split) with parameters:\nntrees = 1, max_depth = 1, nbins = 100\n")
gbm_h2o = h2o.gbm(x=alphabet[['X']], y=alphabet["y"], distribution="bernoulli", ntrees=1, max_depth=1, nbins=100)
gbm_h2o.show()
# Train scikit GBM Model:
# Log.info("scikit GBM with same parameters:")
gbm_sci = ensemble.GradientBoostingClassifier(n_estimators=1, max_depth=1, max_features=None)
gbm_sci.fit(trainDataFeatures[:,np.newaxis],trainDataResponse)
示例2: offsets_and_distributions
def offsets_and_distributions():
# cars
cars = h2o.upload_file(tests.locate("smalldata/junit/cars_20mpg.csv"))
cars = cars[cars["economy_20mpg"].isna() == 0]
cars["economy_20mpg"] = cars["economy_20mpg"].asfactor()
offset = h2o.H2OFrame(python_obj=[[.5] for x in range(398)])
offset.set_name(0,"x1")
cars = cars.cbind(offset)
# insurance
insurance = h2o.import_file(tests.locate("smalldata/glm_test/insurance.csv"))
insurance["offset"] = insurance["Holders"].log()
# bernoulli - offset not supported
#dl = h2o.deeplearning(x=cars[2:8], y=cars["economy_20mpg"], distribution="bernoulli", offset_column="x1",
# training_frame=cars)
#predictions = dl.predict(cars)
# gamma
dl = h2o.deeplearning(x=insurance[0:3], y=insurance["Claims"], distribution="gamma", offset_column="offset", training_frame=insurance)
predictions = dl.predict(insurance)
# gaussian
dl = h2o.deeplearning(x=insurance[0:3], y=insurance["Claims"], distribution="gaussian", offset_column="offset", training_frame=insurance)
predictions = dl.predict(insurance)
# poisson
dl = h2o.deeplearning(x=insurance[0:3], y=insurance["Claims"], distribution="poisson", offset_column="offset", training_frame=insurance)
predictions = dl.predict(insurance)
# tweedie
dl = h2o.deeplearning(x=insurance.names[0:3], y="Claims", distribution="tweedie", offset_column="offset", training_frame=insurance)
predictions = dl.predict(insurance)
开发者ID:kyoren,项目名称:https-github.com-h2oai-h2o-3,代码行数:34,代码来源:pyunit_offsets_and_distributionsDeeplearning.py
示例3: link_functions_gaussian
def link_functions_gaussian():
print("Read in prostate data.")
h2o_data = h2o.import_file(path=tests.locate("smalldata/prostate/prostate_complete.csv.zip"))
h2o_data.head()
sm_data = pd.read_csv(zipfile.ZipFile(tests.locate("smalldata/prostate/prostate_complete.csv.zip")).
open("prostate_complete.csv")).as_matrix()
sm_data_response = sm_data[:,9]
sm_data_features = sm_data[:,1:9]
print("Testing for family: GAUSSIAN")
print("Set variables for h2o.")
myY = "GLEASON"
myX = ["ID","AGE","RACE","CAPSULE","DCAPS","PSA","VOL","DPROS"]
print("Create models with canonical link: IDENTITY")
h2o_model = h2o.glm(x=h2o_data[myX], y=h2o_data[myY], family="gaussian", link="identity",alpha=[0.5], Lambda=[0])
sm_model = sm.GLM(endog=sm_data_response, exog=sm_data_features,
family=sm.families.Gaussian(sm.families.links.identity)).fit()
print("Compare model deviances for link function identity")
h2o_deviance = h2o_model.residual_deviance() / h2o_model.null_deviance()
sm_deviance = sm_model.deviance / sm_model.null_deviance
assert h2o_deviance - sm_deviance < 0.01, "expected h2o to have an equivalent or better deviance measures"
示例4: link_functions_binomial
def link_functions_binomial():
print("Read in prostate data.")
h2o_data = h2o.import_file(path=tests.locate("smalldata/prostate/prostate_complete.csv.zip"))
h2o_data.head()
sm_data = pd.read_csv(zipfile.ZipFile(tests.locate("smalldata/prostate/prostate_complete.csv.zip")).open("prostate_complete.csv")).as_matrix()
sm_data_response = sm_data[:,2]
sm_data_features = sm_data[:,[1,3,4,5,6,7,8,9]]
print("Testing for family: BINOMIAL")
print("Set variables for h2o.")
myY = "CAPSULE"
myX = ["ID","AGE","RACE","GLEASON","DCAPS","PSA","VOL","DPROS"]
print("Create models with canonical link: LOGIT")
h2o_model = h2o.glm(x=h2o_data[myX], y=h2o_data[myY].asfactor(), family="binomial", link="logit",alpha=[0.5], Lambda=[0])
sm_model = sm.GLM(endog=sm_data_response, exog=sm_data_features, family=sm.families.Binomial(sm.families.links.logit)).fit()
print("Compare model deviances for link function logit")
h2o_deviance = h2o_model.residual_deviance() / h2o_model.null_deviance()
sm_deviance = sm_model.deviance / sm_model.null_deviance
assert h2o_deviance - sm_deviance < 0.01, "expected h2o to have an equivalent or better deviance measures"
示例5: frame_slicing
def frame_slicing():
iris = h2o.import_file(path=tests.locate("smalldata/iris/iris_wheader.csv"))
prostate = h2o.import_file(path=tests.locate("smalldata/prostate/prostate.csv.zip"))
airlines = h2o.import_file(path=tests.locate("smalldata/airlines/allyears2k.zip"))
iris.show()
prostate.show()
airlines.show()
###################################################################
# H2OFrame[int] (column slice)
res1 = iris[0]
assert abs(res1[8,:] - 4.4) < 1e-10, "incorrect values"
# H2OFrame[int,int]
res2 = prostate[13, 3]
assert abs(res2 - 1) < 1e-10, "incorrect values"
# H2OFrame[int, slice]
res3 = airlines[12, 0:3]
assert abs(res3[0,0] - 1987) < 1e-10 and abs(res3[0,1] - 10) < 1e-10 and abs(res3[0,2] - 29) < 1e-10, \
"incorrect values"
# H2OFrame[slice, int]
res4 = iris[5:8, 1]
assert abs(res4[0,:] - 3.9) < 1e-10 and abs(res4[1,:] - 3.4) < 1e-10 and abs(res4[2,:] - 3.4) < 1e-10, "incorrect values"
# H2OFrame[slice, slice]
res5 = prostate[5:8, 0:3]
assert abs(res5[0,0] - 6) < 1e-10 and abs(res5[1,1] - 0) < 1e-10 and abs(res5[2,2] - 61) < 1e-10, "incorrect values"
示例6: dim_checks
def dim_checks():
# Log.info("Uploading logreg/princeton/cuse.dat")
h2o_data = h2o.import_file(path=tests.locate("smalldata/logreg/prostate.csv"))
np_data = np.loadtxt(tests.locate("smalldata/logreg/prostate.csv"), delimiter=',', skiprows=1)
h2o_rows, h2o_cols = h2o_data.dim
np_rows, np_cols = list(np_data.shape)
print 'The dimensions of h2o frame is: {0} x {1}'.format(h2o_rows, h2o_cols)
print 'The dimensions of numpy array is: {0} x {1}'.format(np_rows, np_cols)
assert [h2o_rows, h2o_cols] == [np_rows, np_cols], "expected equal number of columns and rows"
# Log.info("Slice out a column and data frame it, try dim on it...")
h2o_slice = h2o_data[4]
np_slice = np_data[:,4]
h2o_rows, h2o_cols = h2o_slice.dim
np_rows = np_slice.shape[0]
print 'The dimensions of h2o column slice is: {0} x {1}'.format(h2o_rows, h2o_cols)
print 'The dimensions of numpy array column slice is: {0} x 1'.format(np_rows)
assert [h2o_rows, h2o_cols] == [np_rows, 1], "expected equal number of columns and rows"
# Log.info("OK, now try an operator, e.g. '&', and then check dimensions agao...")
h2oColAmpFive = h2o_slice & 5
assert h2oColAmpFive.nrow == h2o_rows, "expected the number of rows to remain unchanged"
示例7: fiftycatGBM
def fiftycatGBM():
# Training set has only 45 categories cat1 through cat45
#Log.info("Importing 50_cattest_train.csv data...\n")
train = h2o.import_file(path=tests.locate("smalldata/gbm_test/50_cattest_train.csv"))
train["y"] = train["y"].asfactor()
#Log.info("Summary of 50_cattest_train.csv from H2O:\n")
#train.summary()
# Train H2O GBM Model:
#Log.info(paste("H2O GBM with parameters:\nntrees = 10, max_depth = 20, nbins = 20\n", sep = ""))
model = h2o.gbm(x=train[["x1","x2"]], y=train["y"], distribution="bernoulli", ntrees=10, max_depth=5, nbins=20)
model.show()
# Test dataset has all 50 categories cat1 through cat50
#Log.info("Importing 50_cattest_test.csv data...\n")
test = h2o.import_file(path=tests.locate("smalldata/gbm_test/50_cattest_test.csv"))
#Log.info("Summary of 50_cattest_test.csv from H2O:\n")
#test.summary()
# Predict on test dataset with GBM model:
#Log.info("Performing predictions on test dataset...\n")
predictions = model.predict(test)
predictions.show()
# Get the confusion matrix and AUC
#Log.info("Confusion matrix of predictions (max accuracy):\n")
performance = model.model_performance(test)
test_cm = performance.confusion_matrix()
test_auc = performance.auc()
示例8: pubdev_1953
def pubdev_1953():
# small_test = [tests.locate("bigdata/laptop/citibike-nyc/2013-10.csv")]
# data = h2o.import_file(path=small_test)
# startime = data["starttime"]
# secsPerDay=1000*60*60*24
# data["Days"] = (startime/secsPerDay).floor()
# grouped = data.group_by(["Days","start station name"])
# bpd = grouped.count(name="bikes").get_frame()
# secs = bpd["Days"]*secsPerDay
# bpd["Month"] = secs.month().asfactor()
# bpd["DayOfWeek"] = secs.dayOfWeek()
# wthr1 = h2o.import_file(path=[tests.locate("bigdata/laptop/citibike-nyc/31081_New_York_City__Hourly_2013.csv"), tests.locate("bigdata/laptop/citibike-nyc/31081_New_York_City__Hourly_2014.csv")])
# wthr2 = wthr1[["Year Local","Month Local","Day Local","Hour Local","Dew Point (C)","Humidity Fraction","Precipitation One Hour (mm)","Temperature (C)","Weather Code 1/ Description"]]
# wthr2.set_name(wthr2.index("Precipitation One Hour (mm)"), "Rain (mm)")
# wthr2.set_name(wthr2.index("Weather Code 1/ Description"), "WC1")
# wthr3 = wthr2[ wthr2["Hour Local"]==12 ]
# wthr3["msec"] = h2o.H2OFrame.mktime(year=wthr3["Year Local"], month=wthr3["Month Local"]-1, day=wthr3["Day Local"]-1, hour=wthr3["Hour Local"])
# secsPerDay=1000*60*60*24
# wthr3["Days"] = (wthr3["msec"]/secsPerDay).floor()
# wthr4 = wthr3.drop("Year Local").drop("Month Local").drop("Day Local").drop("Hour Local").drop("msec")
# rain = wthr4["Rain (mm)"]
# rain[ rain.isna() ] = 0
# bpd_with_weather = bpd.merge(wthr4,allLeft=True,allRite=False)
# r = bpd_with_weather['Days'].runif(seed=356964763)
# train = bpd_with_weather[ r < 0.6]
# test = bpd_with_weather[(0.6 <= r) & (r < 0.9)]
predictors = ['DayOfWeek', 'WC1', 'start station name', 'Temperature (C)', 'Days', 'Month', 'Humidity Fraction', 'Rain (mm)', 'Dew Point (C)']
train = h2o.import_file(tests.locate("smalldata/glm_test/citibike_small_train.csv"))
test = h2o.import_file(tests.locate("smalldata/glm_test/citibike_small_test.csv"))
glm0 = h2o.glm(x=train[predictors], y=train["bikes"], validation_x=test[predictors], validation_y=test["bikes"], family="poisson")
示例9: wide_dataset_large
def wide_dataset_large():
print("Reading in Arcene training data for binomial modeling.")
trainDataResponse = np.genfromtxt(tests.locate("smalldata/arcene/arcene_train_labels.labels"), delimiter=' ')
trainDataResponse = np.where(trainDataResponse == -1, 0, 1)
trainDataFeatures = np.genfromtxt(tests.locate("smalldata/arcene/arcene_train.data"), delimiter=' ')
trainData = h2o.H2OFrame(np.column_stack((trainDataResponse, trainDataFeatures)).tolist())
print("Run model on 3250 columns of Arcene with strong rules off.")
model = h2o.glm(x=trainData[1:3250], y=trainData[0].asfactor(), family="binomial", lambda_search=False, alpha=[1])
print("Test model on validation set.")
validDataResponse = np.genfromtxt(tests.locate("smalldata/arcene/arcene_valid_labels.labels"), delimiter=' ')
validDataResponse = np.where(validDataResponse == -1, 0, 1)
validDataFeatures = np.genfromtxt(tests.locate("smalldata/arcene/arcene_valid.data"), delimiter=' ')
validData = h2o.H2OFrame(np.column_stack((validDataResponse, validDataFeatures)).tolist())
prediction = model.predict(validData)
print("Check performance of predictions.")
performance = model.model_performance(validData)
print("Check that prediction AUC better than guessing (0.5).")
assert performance.auc() > 0.5, "predictions should be better then pure chance"
示例10: anomaly
def anomaly():
print "Deep Learning Anomaly Detection MNIST"
train = h2o.import_file(tests.locate("bigdata/laptop/mnist/train.csv.gz"))
test = h2o.import_file(tests.locate("bigdata/laptop/mnist/test.csv.gz"))
predictors = range(0,784)
resp = 784
# unsupervised -> drop the response column (digit: 0-9)
train = train[predictors]
test = test[predictors]
# 1) LEARN WHAT'S NORMAL
# train unsupervised Deep Learning autoencoder model on train_hex
ae_model = h2o.deeplearning(x=train[predictors], training_frame=train, activation="Tanh", autoencoder=True,
hidden=[50], l1=1e-5, ignore_const_cols=False, epochs=1)
# 2) DETECT OUTLIERS
# anomaly app computes the per-row reconstruction error for the test data set
# (passing it through the autoencoder model and computing mean square error (MSE) for each row)
test_rec_error = ae_model.anomaly(test)
# 3) VISUALIZE OUTLIERS
# Let's look at the test set points with low/median/high reconstruction errors.
# We will now visualize the original test set points and their reconstructions obtained
# by propagating them through the narrow neural net.
# Convert the test data into its autoencoded representation (pass through narrow neural net)
test_recon = ae_model.predict(test)
示例11: group_by
def group_by():
# Connect to a pre-existing cluster
h2o_iris = h2o.import_file(path=tests.locate("smalldata/iris/iris_wheader.csv"))
pd_iris = pd.read_csv(tests.locate("smalldata/iris/iris_wheader.csv"))
na_handling = ["ignore","rm","all"]
col_names = h2o_iris.col_names[0:4]
print "Running smoke test"
# smoke test
for na in na_handling:
grouped = h2o_iris.group_by("class")
grouped \
.count(na=na) \
.min( na=na) \
.max( na=na) \
.mean( na=na) \
.var( na=na) \
.sd( na=na) \
.ss( na=na) \
.sum( na=na)
print grouped.get_frame()
示例12: fiftycatRF
def fiftycatRF():
# Training set has only 45 categories cat1 through cat45
#Log.info("Importing 50_cattest_train.csv data...\n")
train = h2o.import_file(path=tests.locate("smalldata/gbm_test/50_cattest_train.csv"))
train["y"] = train["y"].asfactor()
#Log.info("Summary of 50_cattest_train.csv from H2O:\n")
#train.summary()
# Train H2O DRF Model:
#Log.info(paste("H2O DRF with parameters:\nclassification = TRUE, ntree = 50, depth = 20, nbins = 500\n", sep = ""))
model = h2o.random_forest(x=train[["x1", "x2"]], y=train["y"], ntrees=50, max_depth=20, nbins=500)
# Test dataset has all 50 categories cat1 through cat50
#Log.info("Importing 50_cattest_test.csv data...\n")
test = h2o.import_file(path=tests.locate("smalldata/gbm_test/50_cattest_test.csv"))
#Log.info("Summary of 50_cattest_test.csv from H2O:\n")
#test.summary()
# Predict on test dataset with DRF model:
#Log.info("Performing predictions on test dataset...\n")
preds = model.predict(test)
preds.head()
# Get the confusion matrix and AUC
#Log.info("Confusion matrix of predictions (max accuracy):\n")
perf = model.model_performance(test)
perf.show()
cm = perf.confusion_matrix()
print(cm)
示例13: iris_h2o_vs_sciKmeans
def iris_h2o_vs_sciKmeans():
# Connect to a pre-existing cluster
# connect to localhost:54321
iris_h2o = h2o.import_file(path=tests.locate("smalldata/iris/iris.csv"))
iris_sci = np.genfromtxt(tests.locate("smalldata/iris/iris.csv"), delimiter=',')
iris_sci = iris_sci[:,0:4]
s =[[4.9,3.0,1.4,0.2],
[5.6,2.5,3.9,1.1],
[6.5,3.0,5.2,2.0]]
start = h2o.H2OFrame(s)
h2o_km = h2o.kmeans(x=iris_h2o[0:4], k=3, user_points=start, standardize=False)
sci_km = KMeans(n_clusters=3, init=np.asarray(s), n_init=1)
sci_km.fit(iris_sci)
# Log.info("Cluster centers from H2O:")
print "Cluster centers from H2O:"
h2o_centers = h2o_km.centers()
print h2o_centers
# Log.info("Cluster centers from scikit:")
print "Cluster centers from scikit:"
sci_centers = sci_km.cluster_centers_.tolist()
print sci_centers
for hcenter, scenter in zip(h2o_centers, sci_centers):
for hpoint, spoint in zip(hcenter,scenter):
assert (hpoint- spoint) < 1e-10, "expected centers to be the same"
示例14: plot_test
def plot_test():
kwargs = {}
kwargs['server'] = True
air = h2o.import_file(tests.locate("smalldata/airlines/AirlinesTrain.csv.zip"))
# Constructing test and train sets by sampling (20/80)
s = air[0].runif()
air_train = air[s <= 0.8]
air_valid = air[s > 0.8]
myX = ["Origin", "Dest", "Distance", "UniqueCarrier", "fMonth", "fDayofMonth", "fDayOfWeek"]
myY = "IsDepDelayed"
air_gbm = h2o.gbm(x=air_train[myX], y=air_train[myY], validation_x=air_valid[myX], validation_y=air_valid[myY],
distribution="bernoulli", ntrees=100, max_depth=3, learn_rate=0.01)
# Plot ROC for training and validation sets
air_gbm.plot(type="roc", train=True, **kwargs)
air_gbm.plot(type="roc", valid=True, **kwargs)
air_test = h2o.import_file(tests.locate("smalldata/airlines/AirlinesTest.csv.zip"))
perf = air_gbm.model_performance(air_test)
#Plot ROC for test set
perf.plot(type="roc", **kwargs)
示例15: frame_as_list
def frame_as_list():
iris = h2o.import_file(path=tests.locate("smalldata/iris/iris_wheader.csv"))
prostate = h2o.import_file(path=tests.locate("smalldata/prostate/prostate.csv.zip"))
airlines = h2o.import_file(path=tests.locate("smalldata/airlines/allyears2k.zip"))
res1 = h2o.as_list(iris, use_pandas=False)
assert (
abs(float(res1[9][0]) - 4.4) < 1e-10
and abs(float(res1[9][1]) - 2.9) < 1e-10
and abs(float(res1[9][2]) - 1.4) < 1e-10
), "incorrect values"
res2 = h2o.as_list(prostate, use_pandas=False)
assert (
abs(float(res2[7][0]) - 7) < 1e-10
and abs(float(res2[7][1]) - 0) < 1e-10
and abs(float(res2[7][2]) - 68) < 1e-10
), "incorrect values"
res3 = h2o.as_list(airlines, use_pandas=False)
assert (
abs(float(res3[4][0]) - 1987) < 1e-10
and abs(float(res3[4][1]) - 10) < 1e-10
and abs(float(res3[4][2]) - 18) < 1e-10
), "incorrect values"