本文整理汇总了Python中feather.read_dataframe方法的典型用法代码示例。如果您正苦于以下问题:Python feather.read_dataframe方法的具体用法?Python feather.read_dataframe怎么用?Python feather.read_dataframe使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类feather
的用法示例。
在下文中一共展示了feather.read_dataframe方法的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def load(filename):
"""Loads data saved with save() (or just normally saved with pickle). Autodetects gzip if filename ends in '.gz'
Also reads feather files denoted .feather or .fthr.
Keyword arguments:
filename -- String with the relative filename of the pickle/feather to load.
"""
fl = filename.lower()
if fl.endswith('.gz'):
if fl.endswith('.feather.gz') or fl.endswith('.fthr.gz'):
raise NotImplementedError('Compressed feather is not supported.')
else:
fp = gzip.open(filename, 'rb')
return pickle.load(fp)
else:
if fl.endswith('.feather') or fl.endswith('.fthr'):
import feather
return feather.read_dataframe(filename)
else:
fp = open(filename, 'rb')
return pickle.load(fp)
示例2: _load_table
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def _load_table(self, src, fmt, dst=None, post=None, *args, **kwargs):
""" Load a data frame from table formats: csv, hdf5, feather """
if fmt == 'csv':
_data = pd.read_csv(src, *args, **kwargs)
elif fmt == 'feather':
_data = feather.read_dataframe(src, *args, **kwargs)
elif fmt == 'hdf5':
_data = pd.read_hdf(src, *args, **kwargs)
# Put into this batch only part of it (defined by index)
if isinstance(_data, pd.DataFrame):
_data = _data.loc[self.indices]
elif isinstance(_data, dd.DataFrame):
# dask.DataFrame.loc supports advanced indexing only with lists
_data = _data.loc[list(self.indices)].compute()
if callable(post):
_data = post(_data, src=src, fmt=fmt, dst=dst, **kwargs)
self.load(src=_data, dst=dst)
示例3: __init__
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def __init__(self, path: str = None, keep_in_memory: bool = True) -> None:
if not os.path.exists(path + '.meta'):
raise FileNotFoundError(os.path.abspath(path + '.meta'))
# pandas 0.22 has the fastest MultiIndex
if pd.__version__.startswith('0.22'):
import feather
cols = feather.read_dataframe(path + '.meta')
else:
cols = pd.read_feather(path + '.meta')
ohlcv = cols.ohlcv.values
adjustments = cols.adjustments.values[:2]
if adjustments[0] is None:
adjustments = None
super().__init__(path, ohlcv, adjustments)
self.keep_in_memory = keep_in_memory
self._cache = None
示例4: read_feather
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def read_feather(path, nthreads=1):
"""
Load a feather-format object from the file path
.. versionadded 0.20.0
Parameters
----------
path : string file path, or file-like object
nthreads : int, default 1
Number of CPU threads to use when reading to pandas.DataFrame
.. versionadded 0.21.0
Returns
-------
type of object stored in file
"""
feather = _try_import()
path = _stringify_path(path)
if LooseVersion(feather.__version__) < LooseVersion('0.4.0'):
return feather.read_dataframe(path)
return feather.read_dataframe(path, nthreads=nthreads)
示例5: load_full_dataset
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def load_full_dataset(self):
"""Loads the dataset from the file system.
Returns:
dset_df: Dataset as a DataFrame loaded in from a CSV or feather file
Raises:
exception: if dataset is empty or failed to load
"""
dataset_path = self.params.dataset_key
if not os.path.exists(dataset_path):
raise Exception("Dataset file %s does not exist" % dataset_path)
if dataset_path.endswith('.feather'):
if not feather_supported:
raise Exception("feather package not installed in current environment")
dset_df = feather.read_dataframe(dataset_path)
elif dataset_path.endswith('.csv'):
dset_df = pd.read_csv(dataset_path, index_col=False)
else:
raise Exception('Dataset %s is not a recognized format (csv or feather)' % dataset_path)
if dset_df is None:
raise Exception("Failed to load dataset %s" % dataset_path)
if dset_df.empty:
raise Exception("Dataset %s is empty" % dataset_path)
return dset_df
# ****************************************************************************************
示例6: _load
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def _load(self) -> pd.DataFrame:
if self._cache is not None:
return self._cache
if pd.__version__.startswith('0.22'):
import feather
df = feather.read_dataframe(self._path)
else:
df = pd.read_feather(self._path)
df.set_index(['date', 'asset'], inplace=True)
if self.keep_in_memory:
self._cache = df
return df
示例7: read_feather
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def read_feather(path, nthreads=1):
"""
Load a feather-format object from the file path
.. versionadded 0.20.0
Parameters
----------
path : string file path, or file-like object
nthreads : int, default 1
Number of CPU threads to use when reading to pandas.DataFrame
.. versionadded 0.21.0
Returns
-------
type of object stored in file
"""
feather = _try_import()
path = _stringify_path(path)
if feather.__version__ < LooseVersion('0.4.0'):
return feather.read_dataframe(path)
return feather.read_dataframe(path, nthreads=nthreads)
示例8: load_pair_data
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def load_pair_data(path):
if path.endswith('.txt.gz'):
return pd.read_csv(path, sep='\t', usecols=['pair_id', 'pval_nominal'], index_col=0, dtype={'pair_id':str, 'pval_nominal':np.float64})
elif path.endswith('.ft'):
df = feather.read_dataframe(path, columns=['pair_id', 'pval_nominal'])
df.set_index('pair_id', inplace=True)
return df
else:
raise ValueError('Input format not recognized.')
示例9: read_gct
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def read_gct(gct_file, sample_ids=None, dtype=None):
"""
Load GCT as DataFrame. The first two columns must be 'Name' and 'Description'.
"""
if sample_ids is not None:
sample_ids = ['Name']+list(sample_ids)
if gct_file.endswith('.gct.gz') or gct_file.endswith('.gct'):
if dtype is not None:
with gzip.open(gct_file, 'rt') as gct:
gct.readline()
gct.readline()
sample_ids = gct.readline().strip().split()
dtypes = {i:dtype for i in sample_ids[2:]}
dtypes['Name'] = str
dtypes['Description'] = str
df = pd.read_csv(gct_file, sep='\t', skiprows=2, usecols=sample_ids, index_col=0, dtype=dtypes)
else:
df = pd.read_csv(gct_file, sep='\t', skiprows=2, usecols=sample_ids, index_col=0)
elif gct_file.endswith('.parquet'):
df = pd.read_parquet(gct_file, columns=sample_ids)
elif gct_file.endswith('.ft'): # feather format
df = feather.read_dataframe(gct_file, columns=sample_ids)
df = df.set_index('Name')
else:
raise ValueError('Unsupported input format.')
df.index.name = 'gene_id'
if 'Description' in df.columns:
df = df.drop('Description', axis=1)
return df
示例10: load_pair_data
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def load_pair_data(path):
if path.endswith('.txt.gz'):
return pd.read_csv(path, sep='\t', usecols=['pair_id', 'slope', 'slope_se'], index_col=0, dtype={'pair_id':str, 'slope':np.float32, 'slope_se':np.float32})
elif path.endswith('.ft'):
df = feather.read_dataframe(path, columns=['pair_id', 'slope', 'slope_se'])
df.set_index('pair_id', inplace=True)
return df
else:
raise ValueError('Input format not recognized.')
示例11: load_df
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def load_df(path):
if file_format(path) != 'feather':
return default_csv_loader(path)
elif featherpmm and feather:
ds = featherpmm.read_dataframe(path)
return ds.df
elif feather:
return feather.read_dataframe(path)
else:
raise Exception('The Python feather module is not installed.\n'
'Use:\n pip install feather-format\n'
'to add capability.\n')
示例12: fun
# 需要导入模块: import feather [as 别名]
# 或者: from feather import read_dataframe [as 别名]
def fun(nGPUs=1, nFolds=1, nLambdas=100, nAlphas=8, validFraction=0.2, verbose=0,family="elasticnet", print_all_errors=False, tolerance=.001):
name = str(sys._getframe().f_code.co_name)
name = sys._getframe(1).f_code.co_name
t = time.time()
print("cwd: %s" % (os.getcwd()))
sys.stdout.flush()
print("Reading Data")
df = feather.read_dataframe("./data/bnp.feather")
print(df.shape)
X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
print("Y")
print(y)
t1 = time.time()
logloss_train, logloss_test = run_glm(X, y, nGPUs=nGPUs, nlambda=nLambdas, nfolds=nFolds, nalpha=nAlphas,
validFraction=validFraction, verbose=verbose,family=family,print_all_errors=print_all_errors,tolerance=tolerance, name=name)
# check logloss
print(logloss_train[0, 0])
print(logloss_train[0, 1])
print(logloss_train[0, 2])
print(logloss_test[0, 2])
sys.stdout.flush()
#Always checking the first 3 alphas with specific logloss scores (.48,.44)
if validFraction==0.0 and nFolds > 0:
assert logloss_train[0, 0] < .49
assert logloss_train[0, 1] < .49
assert logloss_train[1, 0] < .52
assert logloss_train[1, 1] < .52
assert logloss_train[2, 0] < .49
assert logloss_train[2, 1] < .49
if validFraction > 0.0:
assert logloss_train[0, 0] < .49
assert logloss_train[0, 1] < .49
assert logloss_train[0, 2] < .49
assert logloss_train[1, 0] < .50
assert logloss_train[1, 1] < .51
assert logloss_train[1, 2] < .51
assert logloss_train[2, 0] < .49
assert logloss_train[2, 1] < .49
assert logloss_train[2, 2] < .49
sys.stdout.flush()
print('/n Total execution time:%d' % (time.time() - t1))
print("TEST PASSED")
sys.stdout.flush()
print("Time taken: {}".format(time.time() - t))
print("DONE.")
sys.stdout.flush()