本文整理匯總了Python中h2o.import_file方法的典型用法代碼示例。如果您正苦於以下問題:Python h2o.import_file方法的具體用法?Python h2o.import_file怎麽用?Python h2o.import_file使用的例子?那麽, 這裏精選的方法代碼示例或許可以為您提供幫助。您也可以進一步了解該方法所在類h2o
的用法示例。
在下文中一共展示了h2o.import_file方法的3個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: DSS_dataset_to_H2O_frame
# 需要導入模塊: import h2o [as 別名]
# 或者: from h2o import import_file [as 別名]
def DSS_dataset_to_H2O_frame(dataset_name): #, partition_id = None
"""This function passes the path of the data files to H2O (it does not stream the data through Python)."""
dataset = dataiku.Dataset(dataset_name)
settings = dataset.get_config()
if settings['type'] not in ['Filesystem', 'UploadedFiles', 'HDFS']:
print 'Warning: Datasets of type '+settings['type']+' are not supported for now. '
'Supported types are Filesystem, UploadedFiles and HDFS.'
separator = settings['formatParams'].get('separator',"").decode('unicode_escape')
print 'separator: <' + separator.encode('unicode_escape') + '>'
if separator == '\t':
print "Warning: H2O does not seems to support empty columns when the separator is tab."
col_names = [col['name'] for col in settings['schema']['columns']]
dataset_path = dataset.get_location_info()['info']['path'].encode('utf-8')
pathsByPartition = dataset.get_files_info()['pathsByPartition']
partitions = dataset.read_partitions if dataset.read_partitions else ['NP']
files = [file for partition in partitions for file in pathsByPartition[partition]]
filepaths = [dataset_path + file['path'] for file in files if has_data(file)]
print "filepaths:"
for f in filepaths:
print f
return h2o.import_file(
path = filepaths,
destination_frame = 'DSS.H2O_connector.dataset.' + dataset.full_name + '.' + '/'.join(partitions),
header = 0 if 'parseHeaderRow' not in settings['formatParams'] else 1 if settings['formatParams']['parseHeaderRow'] else -1,
sep = separator,
col_names = col_names,
col_types=None,
na_strings=None
# ,parse_type= 'CSV' if settings['formatType']=='csv' else None
)
示例2: test_init_read
# 需要導入模塊: import h2o [as 別名]
# 或者: from h2o import import_file [as 別名]
def test_init_read(self):
h2o.init()
train = h2o.import_file("/input/tests/data/train.csv", destination_frame="train")
self.assertEqual(100, train.nrow)
示例3: _prepare_one_hot
# 需要導入模塊: import h2o [as 別名]
# 或者: from h2o import import_file [as 別名]
def _prepare_one_hot(file, y, exclude_cols=None):
if exclude_cols is None:
exclude_cols = []
dir_path = os.path.dirname(os.path.realpath(__file__))
frame = h2o.import_file(dir_path + "/" + file)
train, test = frame.split_frame([0.95], seed=42)
cols_to_encode = []
other_cols = []
for name, ctype in test.types.items():
if name == y or name in exclude_cols:
pass
elif ctype == "enum":
cols_to_encode.append(name)
else:
other_cols.append(name)
train_frame = train.as_data_frame()
train_encode = train_frame.loc[:, cols_to_encode]
train_other = train_frame.loc[:, other_cols + [y]]
enc = OneHotEncoder(categories='auto', handle_unknown='ignore')
enc.fit(train_encode)
colnames = []
for cidx in range(len(cols_to_encode)):
for val in enc.categories_[cidx]:
colnames.append(cols_to_encode[cidx] + "." + val)
train_encoded = enc.transform(train_encode.values).toarray()
train_encoded = pd.DataFrame(train_encoded)
train_encoded.columns = colnames
train = train_other.join(train_encoded)
train = H2OFrame(train)
test_frame = test.as_data_frame()
test_encode = test_frame.loc[:, cols_to_encode]
test_other = test_frame.loc[:, other_cols]
test_encoded = enc.transform(test_encode.values).toarray()
test_encoded = pd.DataFrame(test_encoded)
test_encoded.columns = colnames
test = test_other.join(test_encoded)
return train, test