Python pyunit_utils.locate函数代码示例

本文整理汇总了Python中tests.pyunit_utils.locate函数的典型用法代码示例。如果您正苦于以下问题：Python locate函数的具体用法？Python locate怎么用？Python locate使用的例子？那么, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了locate函数的15个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: wide_dataset_large

def wide_dataset_large():



    print("Reading in Arcene training data for binomial modeling.")
    trainDataResponse = np.genfromtxt(pyunit_utils.locate("smalldata/arcene/arcene_train_labels.labels"), delimiter=' ')
    trainDataResponse = np.where(trainDataResponse == -1, 0, 1)
    trainDataFeatures = np.genfromtxt(pyunit_utils.locate("smalldata/arcene/arcene_train.data"), delimiter=' ')
    trainData = h2o.H2OFrame(np.column_stack((trainDataResponse, trainDataFeatures)).tolist())

    print("Run model on 3250 columns of Arcene with strong rules off.")
    model = h2o.glm(x=trainData[1:3250], y=trainData[0].asfactor(), family="binomial", lambda_search=False, alpha=[1])

    print("Test model on validation set.")
    validDataResponse = np.genfromtxt(pyunit_utils.locate("smalldata/arcene/arcene_valid_labels.labels"), delimiter=' ')
    validDataResponse = np.where(validDataResponse == -1, 0, 1)
    validDataFeatures = np.genfromtxt(pyunit_utils.locate("smalldata/arcene/arcene_valid.data"), delimiter=' ')
    validData = h2o.H2OFrame(np.column_stack((validDataResponse, validDataFeatures)).tolist())
    prediction = model.predict(validData)

    print("Check performance of predictions.")
    performance = model.model_performance(validData)

    print("Check that prediction AUC better than guessing (0.5).")
    assert performance.auc() > 0.5, "predictions should be better then pure chance"

开发者ID:ndjido，项目名称:h2o-3，代码行数:25，代码来源:pyunit_DEPRECATED_wide_dataset_largeGLM.py

示例2: fiftycatRF

def fiftycatRF():
    
    

    # Training set has only 45 categories cat1 through cat45
    #Log.info("Importing 50_cattest_train.csv data...\n")
    train = h2o.import_file(path=pyunit_utils.locate("smalldata/gbm_test/50_cattest_train.csv"))
    train["y"] = train["y"].asfactor()

    #Log.info("Summary of 50_cattest_train.csv from H2O:\n")
    #train.summary()

    # Train H2O DRF Model:
    #Log.info(paste("H2O DRF with parameters:\nclassification = TRUE, ntree = 50, depth = 20, nbins = 500\n", sep = ""))
    model = h2o.random_forest(x=train[["x1", "x2"]], y=train["y"], ntrees=50, max_depth=20, nbins=500)

    # Test dataset has all 50 categories cat1 through cat50
    #Log.info("Importing 50_cattest_test.csv data...\n")
    test = h2o.import_file(path=pyunit_utils.locate("smalldata/gbm_test/50_cattest_test.csv"))

    #Log.info("Summary of 50_cattest_test.csv from H2O:\n")
    #test.summary()

    # Predict on test dataset with DRF model:
    #Log.info("Performing predictions on test dataset...\n")
    preds = model.predict(test)
    preds.head()

    # Get the confusion matrix and AUC
    #Log.info("Confusion matrix of predictions (max accuracy):\n")
    perf = model.model_performance(test)
    perf.show()
    cm = perf.confusion_matrix()
    print(cm)

开发者ID:Kaushik512，项目名称:h2o-3，代码行数:34，代码来源:pyunit_DEPRECATED_fiftycatRF.py

示例3: wide_dataset_large

def wide_dataset_large():
  print("Reading in Arcene training data for binomial modeling.")
  trainDataResponse = np.genfromtxt(pyunit_utils.locate("smalldata/arcene/arcene_train_labels.labels"), delimiter=' ')
  trainDataResponse = np.where(trainDataResponse == -1, 0, 1)
  trainDataFeatures = np.genfromtxt(pyunit_utils.locate("smalldata/arcene/arcene_train.data"), delimiter=' ')
  xtrain = np.transpose(trainDataFeatures).tolist()
  ytrain = trainDataResponse.tolist()
  trainData = h2o.H2OFrame.fromPython([ytrain]+xtrain)

  trainData[0] = trainData[0].asfactor()

  print("Run model on 3250 columns of Arcene with strong rules off.")
  model = H2OGeneralizedLinearEstimator(family="binomial", lambda_search=False, alpha=1)
  model.train(x=range(1,3250), y=0, training_frame=trainData)

  print("Test model on validation set.")
  validDataResponse = np.genfromtxt(pyunit_utils.locate("smalldata/arcene/arcene_valid_labels.labels"), delimiter=' ')
  validDataResponse = np.where(validDataResponse == -1, 0, 1)
  validDataFeatures = np.genfromtxt(pyunit_utils.locate("smalldata/arcene/arcene_valid.data"), delimiter=' ')
  xvalid = np.transpose(validDataFeatures).tolist()
  yvalid = validDataResponse.tolist()
  validData = h2o.H2OFrame.fromPython([yvalid]+xvalid)
  prediction = model.predict(validData)

  print("Check performance of predictions.")
  performance = model.model_performance(validData)

  print("Check that prediction AUC better than guessing (0.5).")
  assert performance.auc() > 0.5, "predictions should be better then pure chance"

开发者ID:Vishnu24，项目名称:h2o-3，代码行数:29，代码来源:pyunit_wide_dataset_glm_large.py

示例4: user

def user():

    a = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))[0:4]
    a.head()

    print(a[0].names)  # Column header
    print(a[2,0])           # column 0, row 2 value
    print(a[2,"sepal_len"]) # Column 0, row 2 value
    (a[0] + 2).show()  # Add 2 to every element; broadcast a constant
    (a[0] + a[1]).show()  # Add 2 columns; broadcast parallel add
    sum(a).show()
    print(a["sepal_len"].mean())

    print()
    print("Rows 50 through 77 in the `sepal_len` column")
    a[50:78, "sepal_len"].show()  # print out rows 50 thru 77 inclusive
    print()

    a["sepal_len"].show()

    print(a[50:78, ["sepal_len", "sepal_wid"]].show())

    a.show()

    print("The column means: ")
    print(a.mean())
    print()

    try:
        print(a["Sepal_len"].dim)  # Error, misspelt column name
    except Exception:
        pass  # Expected error

    b = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))[0:4]
    c = a + b
    d = c + c + sum(a)
    e = c + a + 1
    e.show()
    # Note that "d=c+..." keeps the internal C expressions alive, until "d" goes
    # out of scope even as we nuke "c"
    c.show()
    c = None
    # Internal "ExprNode(c=a+b)" not dead!

    print(1 + (a[0] + b[1]).mean())

    import collections

    c = h2o.H2OFrame(collections.OrderedDict({"A": [1, 2, 3], "B": [4, 5, 6]}))
    c.show()

    c.describe()
    c.head()

    c[0].show()
    print(c[1,0])
    c[0:2,0].show()

    sliced = a[0:51,0]
    sliced.show()

开发者ID:StevenLOL，项目名称:h2o-3，代码行数:60，代码来源:pyunit_user.py

示例5: checkpoint_new_category_in_predictor

def checkpoint_new_category_in_predictor():

  sv1 = h2o.upload_file(pyunit_utils.locate("smalldata/iris/setosa_versicolor.csv"))
  sv2 = h2o.upload_file(pyunit_utils.locate("smalldata/iris/setosa_versicolor.csv"))
  vir = h2o.upload_file(pyunit_utils.locate("smalldata/iris/virginica.csv"))
  print("checkpoint_new_category_in_predictor-1")
  m1 = H2ODeepLearningEstimator(epochs=100)
  m1.train(x=[0,1,2,4], y=3, training_frame=sv1)

  m2 = H2ODeepLearningEstimator(epochs=200, checkpoint=m1.model_id)
  m2.train(x=[0,1,2,4], y=3, training_frame=sv2)
  print("checkpoint_new_category_in_predictor-2")

  # attempt to continue building model, but with an expanded categorical predictor domain.
  # this should fail
  try:
    m3 = H2ODeepLearningEstimator(epochs=200, checkpoint=m1.model_id)
    m3.train(x=[0,1,2,4], y=3, training_frame=vir)
    assert False, "Expected continued model-building to fail with new categories introduced in predictor"
  except EnvironmentError:
    pass
  
  print("checkpoint_new_category_in_predictor-3")

  # attempt to predict on new model, but with observations that have expanded categorical predictor domain.
  predictions = m2.predict(vir)
  print("checkpoint_new_category_in_predictor-4")

开发者ID:StevenLOL，项目名称:h2o-3，代码行数:27，代码来源:pyunit_checkpoint_new_category_in_predictorDL.py

示例6: xgboost_insurance_gaussian_small

def xgboost_insurance_gaussian_small():
    assert H2OXGBoostEstimator.available()

    # Import big dataset to ensure run across multiple nodes
    training_frame = h2o.import_file(pyunit_utils.locate("smalldata/testng/insurance_train1.csv"))
    test_frame = h2o.import_file(pyunit_utils.locate("smalldata/testng/insurance_validation1.csv"))
    x = ['Age', 'District']
    y = 'Claims'

    # Model with maximum of 2 trees
    model_2_trees = H2OXGBoostEstimator(training_frame=training_frame, learn_rate=0.7,
                                        booster='gbtree', seed=1, ntrees=2, distribution='gaussian')
    model_2_trees.train(x=x, y=y, training_frame=training_frame)
    prediction_2_trees = model_2_trees.predict(test_frame)

    assert prediction_2_trees.nrows == test_frame.nrows

    # Model with 10 trees
    model_10_trees = H2OXGBoostEstimator(training_frame=training_frame, learn_rate=0.7,
                                         booster='gbtree', seed=1, ntrees=10, distribution='gaussian')
    model_10_trees.train(x=x, y=y, training_frame=training_frame)
    prediction_10_trees = model_10_trees.predict(test_frame)

    assert prediction_10_trees.nrows == test_frame.nrows

    ## Mean square error on model with lower number of decision trees should be higher
    assert model_2_trees.mse() > model_10_trees.mse()

开发者ID:StevenLOL，项目名称:h2o-3，代码行数:27，代码来源:pyunit_insurance_gaussian_small.py

示例7: table_check

def table_check():
  df = h2o.import_file(path=pyunit_utils.locate("smalldata/prostate/prostate.csv"))
  print(df[['AGE','RACE']].table(dense=True).head().as_data_frame(True))
  print(df[['AGE','RACE']].table(dense=False).head().as_data_frame(True))
  print(df[['RACE','AGE']].table(dense=True).head().as_data_frame(True))
  print(df[['RACE','AGE']].table(dense=False).head().as_data_frame(True))
  iris = h2o.import_file(path=pyunit_utils.locate("smalldata/iris/iris.csv"))

  # single column (frame)
  table1 = iris["C5"].table()
  assert table1[0,1] == 50, "Expected 50 of {0}, but got {1}".format(table1[0,0], table1[0,1])
  assert table1[1,1] == 50, "Expected 50 of {0}, but got {1}".format(table1[1,0], table1[1,1])
  assert table1[2,1] == 50, "Expected 50 of {0}, but got {1}".format(table1[2,0], table1[2,1])

  # two-column (one argument)
  
  #dense
  table2 = iris["C1"].table(iris["C5"])
  
  #not dense
  table3 = iris["C1"].table(iris["C5"],dense=False)
  
  #check same value
  assert (table3[table3['C1'] == 5,'Iris-setosa'] == table2[(table2['C1'] == 5) & (table2['C5'] == 'Iris-setosa'),'Counts']).all()
  
  assert (table2 == iris[["C1","C5"]].table()).all()
  assert (table3 == iris[["C1","C5"]].table(dense=False)).all()

  cars = h2o.import_file(path=pyunit_utils.locate("smalldata/junit/cars_20mpg.csv"))
  table = cars[2].table().as_data_frame()
  table = dict(table[1:])
  table = {k:int(v) for k,v in list(table.items())}
  expected = Counter(itertools.chain(*cars[2].as_data_frame()[1:]))
  assert table == expected, "Expected {} for table counts but got {}".format(expected, table)

开发者ID:Kaushik512，项目名称:h2o-3，代码行数:34，代码来源:pyunit_table.py

示例8: smallcat_gbm

def smallcat_gbm():
  # Training set has 26 categories from A to Z
  # Categories A, C, E, G, ... are perfect predictors of y = 1
  # Categories B, D, F, H, ... are perfect predictors of y = 0

  alphabet = h2o.import_file(path=pyunit_utils.locate("smalldata/gbm_test/alphabet_cattest.csv"))
  alphabet["y"] = alphabet["y"].asfactor()
  #Log.info("Summary of alphabet_cattest.csv from H2O:\n")
  #alphabet.summary()

  # Prepare data for scikit use
  trainData = np.loadtxt(pyunit_utils.locate("smalldata/gbm_test/alphabet_cattest.csv"), delimiter=',', skiprows=1, converters={0:lambda s: ord(s.decode().split("\"")[1])})
  trainDataResponse = trainData[:,1]
  trainDataFeatures = trainData[:,0]

  # Train H2O GBM Model:

  gbm_h2o = H2OGradientBoostingEstimator(distribution="bernoulli",
                                         ntrees=1,
                                         max_depth=1,
                                         nbins=100)
  gbm_h2o.train(x="X",y="y", training_frame=alphabet)
  gbm_h2o.show()

  # Train scikit GBM Model:
  # Log.info("scikit GBM with same parameters:")
  gbm_sci = ensemble.GradientBoostingClassifier(n_estimators=1, max_depth=1, max_features=None)
  gbm_sci.fit(trainDataFeatures[:,np.newaxis],trainDataResponse)

开发者ID:AllCodeNoGyaan，项目名称:h2o-3，代码行数:28，代码来源:pyunit_smallcat_gbm.py

示例9: glrm_catagorical_bug_fix

def glrm_catagorical_bug_fix():
    trainData = h2o.import_file(pyunit_utils.locate("smalldata/airlines/AirlinesTest.csv.zip"))
    testData = h2o.import_file(pyunit_utils.locate("smalldata/airlines/AirlinesTrain.csv.zip"))
    glrmModel = H2OGeneralizedLowRankEstimator(k=4)
    glrmModel.train(x=trainData.names, training_frame=trainData)
    predV = glrmModel.predict(testData)
    print(predV)

开发者ID:michalkurka，项目名称:h2o-3，代码行数:7，代码来源:pyunit_PUBDEV_5776_glrm_fix_new_enum_level.py

示例10: dim_checks

def dim_checks():
  
  

  # Log.info("Uploading logreg/princeton/cuse.dat")
  h2o_data = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
  np_data = np.loadtxt(pyunit_utils.locate("smalldata/logreg/prostate.csv"), delimiter=',', skiprows=1)

  h2o_rows, h2o_cols = h2o_data.dim
  np_rows, np_cols = list(np_data.shape)

  print('The dimensions of h2o frame is: {0} x {1}'.format(h2o_rows, h2o_cols))
  print('The dimensions of numpy array is: {0} x {1}'.format(np_rows, np_cols))

  assert [h2o_rows, h2o_cols] == [np_rows, np_cols], "expected equal number of columns and rows"

  # Log.info("Slice out a column and data frame it, try dim on it...")

  h2o_slice = h2o_data[4]
  np_slice = np_data[:,4]

  h2o_rows, h2o_cols = h2o_slice.dim
  np_rows = np_slice.shape[0]

  print('The dimensions of h2o column slice is: {0} x {1}'.format(h2o_rows, h2o_cols))
  print('The dimensions of numpy array column slice is: {0} x 1'.format(np_rows))

  assert [h2o_rows, h2o_cols] == [np_rows, 1], "expected equal number of columns and rows"

  # Log.info("OK, now try an operator, e.g. '&', and then check dimensions agao...")

  h2oColAmpFive = h2o_slice & 5

  assert h2oColAmpFive.nrow == h2o_rows, "expected the number of rows to remain unchanged"

开发者ID:AllCodeNoGyaan，项目名称:h2o-3，代码行数:34，代码来源:pyunit_dim.py

示例11: link_functions_gaussian

def link_functions_gaussian():
  print("Read in prostate data.")
  h2o_data = h2o.import_file(path=pyunit_utils.locate("smalldata/prostate/prostate_complete.csv.zip"))
  h2o_data.head()

  sm_data = pd.read_csv(zipfile.ZipFile(pyunit_utils.locate("smalldata/prostate/prostate_complete.csv.zip")).
                        open("prostate_complete.csv")).as_matrix()
  sm_data_response = sm_data[:,9]
  sm_data_features = sm_data[:,1:9]

  print("Testing for family: GAUSSIAN")
  print("Set variables for h2o.")
  myY = "GLEASON"
  myX = ["ID","AGE","RACE","CAPSULE","DCAPS","PSA","VOL","DPROS"]

  print("Create models with canonical link: IDENTITY")
  h2o_model = H2OGeneralizedLinearEstimator(family="gaussian", link="identity",alpha=0.5, Lambda=0)
  h2o_model.train(x=myX, y=myY, training_frame=h2o_data)
  sm_model = sm.GLM(endog=sm_data_response, exog=sm_data_features,
                    family=sm.families.Gaussian(sm.families.links.identity)).fit()

  print("Compare model deviances for link function identity")
  h2o_deviance = old_div(h2o_model.residual_deviance(), h2o_model.null_deviance())
  sm_deviance = old_div(sm_model.deviance, sm_model.null_deviance)
  assert h2o_deviance - sm_deviance < 0.01, "expected h2o to have an equivalent or better deviance measures"

开发者ID:AllCodeNoGyaan，项目名称:h2o-3，代码行数:25，代码来源:pyunit_link_functions_gaussian_glm.py

示例12: fiftycatGBM

def fiftycatGBM():
  
  

  # Training set has only 45 categories cat1 through cat45
  #Log.info("Importing 50_cattest_train.csv data...\n")
  train = h2o.import_file(path=pyunit_utils.locate("smalldata/gbm_test/50_cattest_train.csv"))
  train["y"] = train["y"].asfactor()

  #Log.info("Summary of 50_cattest_train.csv from H2O:\n")
  #train.summary()
  
  # Train H2O GBM Model:
  #Log.info(paste("H2O GBM with parameters:\nntrees = 10, max_depth = 20, nbins = 20\n", sep = ""))
  model = h2o.gbm(x=train[["x1","x2"]], y=train["y"], distribution="bernoulli", ntrees=10, max_depth=5, nbins=20)
  model.show()
 
  # Test dataset has all 50 categories cat1 through cat50
  #Log.info("Importing 50_cattest_test.csv data...\n")
  test = h2o.import_file(path=pyunit_utils.locate("smalldata/gbm_test/50_cattest_test.csv"))
  #Log.info("Summary of 50_cattest_test.csv from H2O:\n")
  #test.summary()
  
  # Predict on test dataset with GBM model:
  #Log.info("Performing predictions on test dataset...\n")
  predictions = model.predict(test)
  predictions.show()
  
  # Get the confusion matrix and AUC
  #Log.info("Confusion matrix of predictions (max accuracy):\n")
  performance = model.model_performance(test)
  test_cm = performance.confusion_matrix()
  test_auc = performance.auc()

开发者ID:Kaushik512，项目名称:h2o-3，代码行数:33，代码来源:pyunit_DEPRECATED_fiftycatGBM.py

示例13: xgboost_milsongs_gaussian_medium

def xgboost_milsongs_gaussian_medium():
    assert H2OXGBoostEstimator.available()

    # Import big dataset to ensure run across multiple nodes
    training_frame = h2o.import_file(pyunit_utils.locate("bigdata/laptop/milsongs/milsongs-train.csv.gz"))
    test_frame = h2o.import_file(pyunit_utils.locate("bigdata/laptop/milsongs/milsongs-test.csv.gz"))
    x = list(range(1,training_frame.ncol))
    y = 0

    # Model with maximum of 2 trees
    model_2_trees = H2OXGBoostEstimator(training_frame=training_frame, learn_rate=0.3,
                                        booster='gbtree', seed=1, ntrees=2, distribution='gaussian')
    model_2_trees.train(x=x, y=y, training_frame=training_frame)
    prediction_2_trees = model_2_trees.predict(test_frame)

    assert prediction_2_trees.nrows == test_frame.nrows

    # Model with 10 trees
    model_10_trees = H2OXGBoostEstimator(training_frame=training_frame, learn_rate=0.3,
                                         booster='gbtree', seed=1, ntrees=10, distribution='gaussian')
    model_10_trees.train(x=x, y=y, training_frame=training_frame)
    prediction_10_trees = model_10_trees.predict(test_frame)

    assert prediction_10_trees.nrows == test_frame.nrows

    ## Mean square error on model with lower number of decision trees should be higher
    assert model_2_trees.mse() > model_10_trees.mse()

开发者ID:StevenLOL，项目名称:h2o-3，代码行数:27，代码来源:pyunit_milsongs_gaussian_medium.py

示例14: export_file

def export_file():
    pros_hex = h2o.upload_file(pyunit_utils.locate("smalldata/prostate/prostate.csv"))
    pros_hex[1] = pros_hex[1].asfactor()
    pros_hex[3] = pros_hex[3].asfactor()
    pros_hex[4] = pros_hex[4].asfactor()
    pros_hex[5] = pros_hex[5].asfactor()
    pros_hex[8] = pros_hex[8].asfactor()

    p_sid = pros_hex.runif()
    pros_train = pros_hex[p_sid > 0.2, :]
    pros_test = pros_hex[p_sid <= 0.2, :]

    glm = H2OGeneralizedLinearEstimator(family="binomial")
    myglm = glm.train(x=list(range(2, pros_hex.ncol)), y=1, training_frame=pros_train)
    mypred = glm.predict(pros_test)

    def id_generator(size=6, chars=string.ascii_uppercase + string.digits):
        return "".join(random.choice(chars) for _ in range(size))

    fname = id_generator() + "_prediction.csv"

    path = pyunit_utils.locate("results")
    dname = path + "/" + fname

    h2o.export_file(mypred, dname)

    py_pred = pd.read_csv(dname)
    print(py_pred.head())
    h_pred = mypred.as_data_frame(True)
    print(h_pred.head())

    # Test to check if py_pred & h_pred are identical
    assert_frame_equal(py_pred, h_pred)

开发者ID:h2oai，项目名称:h2o-3，代码行数:33，代码来源:pyunit_export_file.py

示例15: anomaly

def anomaly():
  print("Deep Learning Anomaly Detection MNIST")

  train = h2o.import_file(pyunit_utils.locate("bigdata/laptop/mnist/train.csv.gz"))
  test = h2o.import_file(pyunit_utils.locate("bigdata/laptop/mnist/test.csv.gz"))

  predictors = list(range(0,784))
  resp = 784

  # unsupervised -> drop the response column (digit: 0-9)
  train = train[predictors]
  test = test[predictors]

  # 1) LEARN WHAT'S NORMAL
  # train unsupervised Deep Learning autoencoder model on train_hex

  ae_model = H2OAutoEncoderEstimator(activation="Tanh", hidden=[2], l1=1e-5, ignore_const_cols=False, epochs=1)
  ae_model.train(x=predictors,training_frame=train)

  # 2) DETECT OUTLIERS
  # anomaly app computes the per-row reconstruction error for the test data set
  # (passing it through the autoencoder model and computing mean square error (MSE) for each row)
  test_rec_error = ae_model.anomaly(test)

  # 3) VISUALIZE OUTLIERS
  # Let's look at the test set points with low/median/high reconstruction errors.
  # We will now visualize the original test set points and their reconstructions obtained
  # by propagating them through the narrow neural net.

  # Convert the test data into its autoencoded representation (pass through narrow neural net)
  test_recon = ae_model.predict(test)

开发者ID:AllCodeNoGyaan，项目名称:h2o-3，代码行数:31，代码来源:pyunit_anomaly_deeplearning_large.py

注：本文中的tests.pyunit_utils.locate函数示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台，相关代码片段筛选自各路编程大神贡献的开源项目，源码版权归原作者所有，传播和使用请参考对应项目的License；未经允许，请勿转载。