本文整理汇总了Python中pandas.read_feather方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_feather方法的具体用法?Python pandas.read_feather怎么用?Python pandas.read_feather使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.read_feather方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: load_feature
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def load_feature(feature_name: Union[int, str], directory: str = './features/',
ignore_columns: List[str] = None) -> pd.DataFrame:
"""
Load feature as pandas DataFrame.
Args:
feature_name:
The name of the feature (used in ``save_feature``).
directory:
The directory where the feature is stored.
ignore_columns:
The list of columns that will be dropped from the loaded dataframe.
Returns:
The feature dataframe
"""
path = os.path.join(directory, str(feature_name) + '.f')
df = pd.read_feather(path)
if ignore_columns:
return df.drop([c for c in ignore_columns if c in df.columns], axis=1)
else:
return df
示例2: get_proteome_percentages
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def get_proteome_percentages(counts_df, outpath, force_rerun=False):
if ssbio.utils.force_rerun(flag=force_rerun, outfile=outpath):
big_strain_percents_df = pd.DataFrame(columns=counts_df.columns)
for strain in counts_df.columns:
totals = list(filter(lambda x: x.endswith('total'), counts_df[strain].index))
for t in totals:
counts = t.rsplit('_', 1)[0]
aa_counts = list(filter(lambda x: (x.startswith(counts) and x not in totals), counts_df[strain].index))
for aa_count in aa_counts:
big_strain_percents_df.at[aa_count.replace('count', '%'), strain] = counts_df[strain][aa_count]/counts_df[strain][t]
big_strain_percents_df.astype(float).reset_index().to_feather(outpath)
else:
big_strain_percents_df = pd.read_feather(outpath).set_index('index')
big_strain_percents_df.index.name = None
return big_strain_percents_df
示例3: load_dataset
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def load_dataset(paths, index=None) -> pd.DataFrame:
assert len(paths) > 0
feature_datasets = []
for path in paths:
if index is None:
feature_datasets.append(pd.read_feather(path))
else:
feature_datasets.append(pd.read_feather(path).loc[index])
gc.collect()
# check if all of feature dataset share the same index
index = feature_datasets[0].index
for feature_dataset in feature_datasets[1:]:
pandas.testing.assert_index_equal(index, feature_dataset.index)
return pd.concat(feature_datasets, axis=1)
示例4: __init__
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def __init__(self, path: str = None, keep_in_memory: bool = True) -> None:
if not os.path.exists(path + '.meta'):
raise FileNotFoundError(os.path.abspath(path + '.meta'))
# pandas 0.22 has the fastest MultiIndex
if pd.__version__.startswith('0.22'):
import feather
cols = feather.read_dataframe(path + '.meta')
else:
cols = pd.read_feather(path + '.meta')
ohlcv = cols.ohlcv.values
adjustments = cols.adjustments.values[:2]
if adjustments[0] is None:
adjustments = None
super().__init__(path, ohlcv, adjustments)
self.keep_in_memory = keep_in_memory
self._cache = None
示例5: _make_train_test_split
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def _make_train_test_split(self, seed=1234):
from sklearn.model_selection import train_test_split
np.random.seed(seed)
covariates = pd.read_feather(self._path_dir / 'covariates.feather')
def train_test_split_customer(df, col_customer, test_size):
tr, te = train_test_split(df[[col_customer]].drop_duplicates(), test_size=test_size)
train = df.merge(tr, how='right', on=col_customer)
test = df.merge(te, how='right', on=col_customer)
return train, test
train, test = train_test_split_customer(covariates, 'msno', 0.25)
train, val = train_test_split_customer(train, 'msno', 0.1)
assert train.merge(test, how='inner', on='msno').shape[0] == 0
assert train.merge(val, how='inner', on='msno').shape[0] == 0
assert test.merge(val, how='inner', on='msno').shape[0] == 0
train.to_feather(self._path_dir / 'train.feather')
test.to_feather(self._path_dir / 'test.feather')
val.to_feather(self._path_dir / 'val.feather')
示例6: check_round_trip
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def check_round_trip(self, df, expected=None, **kwargs):
if expected is None:
expected = df
with ensure_clean() as path:
to_feather(df, path)
result = read_feather(path, **kwargs)
assert_frame_equal(result, expected)
示例7: test_path_pathlib
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def test_path_pathlib(self):
df = tm.makeDataFrame().reset_index()
result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
tm.assert_frame_equal(df, result)
示例8: test_path_localpath
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def test_path_localpath(self):
df = tm.makeDataFrame().reset_index()
result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
tm.assert_frame_equal(df, result)
示例9: load
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def load(self):
self.train = pd.read_feather(str(self.train_path))
self.test = pd.read_feather(str(self.test_path))
示例10: load_datasets
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def load_datasets(feats):
dfs = [pd.read_feather(f'features/{f}_train.feather') for f in feats]
X_train = pd.concat(dfs, axis=1, sort=False)
dfs = [pd.read_feather(f'features/{f}_test.feather') for f in feats]
X_test = pd.concat(dfs, axis=1, sort=False)
return X_train, X_test
示例11: check_round_trip
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def check_round_trip(self, df, **kwargs):
with ensure_clean() as path:
to_feather(df, path)
with catch_warnings(record=True):
result = read_feather(path, **kwargs)
assert_frame_equal(result, df)
示例12: _deserialize
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def _deserialize(data: bytes, msgpacked_cols: List[str]) -> pd.DataFrame:
"""
Data are returned as feather-packed pandas DataFrames.
Due to limitations in pyarrow, some objects are msgpacked inside the DataFrame.
"""
import pyarrow
df = pd.read_feather(pyarrow.BufferReader(data))
for col in msgpacked_cols:
df[col] = df[col].apply(lambda element: deserialize(element, "msgpack-ext"))
if "index" in df.columns:
df.set_index("index", inplace=True) # pandas.to_feather does not support indexes,
# so we have to send indexless frames over the wire, and set the index here.
return df
示例13: test_read_feather
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def test_read_feather(self):
data = pd.read_feather("/input/tests/data/feather-0_3_1.feather")
self.assertEqual(10, data.size)
示例14: get_proteome_counts_simple
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def get_proteome_counts_simple(prots_filtered_feathers, outpath, length_filter_pid=None,
copynum_scale=False, copynum_df=None,
force_rerun=False):
if ssbio.utils.force_rerun(flag=force_rerun, outfile=outpath):
big_strain_counts_df = pd.DataFrame()
first = True
for feather in prots_filtered_feathers:
loaded = load_feather(protein_feather=feather, length_filter_pid=length_filter_pid,
copynum_scale=copynum_scale,
copynum_df=copynum_df)
if first:
big_strain_counts_df = pd.DataFrame(index=loaded.index, columns=loaded.columns)
first = False
big_strain_counts_df = big_strain_counts_df.add(loaded, fill_value=0)
if len(big_strain_counts_df) > 0:
big_strain_counts_df.astype(float).reset_index().to_feather(outpath)
return big_strain_counts_df
else:
return pd.read_feather(outpath).set_index('index')
# def get_proteome_counts_simple_sc(sc, prots_filtered_feathers, outpath, length_filter_pid=None,
# copynum_scale=False, copynum_df=None,
# force_rerun=False):
# import ssbio.utils
# if ssbio.utils.force_rerun(flag=force_rerun, outfile=outpath):
# protein_feathers_final_rdd = sc.parallelize(prots_filtered_feathers)
# mapper = protein_feathers_final_rdd.map(lambda x: load_feather(protein_feather=x, length_filter_pid=None,
# copynum_scale=copynum_scale,
# copynum_df=copynum_df))
# big_strain_counts_df = mapper.reduce(lambda df1, df2: df1.add(df2, fill_value=0))
# big_strain_counts_df.astype(float).reset_index().to_feather(outpath)
# return big_strain_counts_df
# else:
# return pd.read_feather(outpath).set_index('index')
示例15: get_proteome_counts_impute_missing
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def get_proteome_counts_impute_missing(prots_filtered_feathers, outpath, length_filter_pid=None,
copynum_scale=False, copynum_df=None,
force_rerun=False):
"""Get counts, uses the mean feature vector to fill in missing proteins for a strain"""
if ssbio.utils.force_rerun(flag=force_rerun, outfile=outpath):
big_strain_counts_df = pd.DataFrame()
first = True
for feather in prots_filtered_feathers:
loaded = load_feather(protein_feather=feather, length_filter_pid=length_filter_pid,
copynum_scale=copynum_scale,
copynum_df=copynum_df)
if first:
big_strain_counts_df = pd.DataFrame(index=_all_counts, columns=loaded.columns)
first = False
new_columns = list(set(loaded.columns.tolist()).difference(big_strain_counts_df.columns))
if new_columns:
for col in new_columns:
big_strain_counts_df[col] = big_strain_counts_df.mean(axis=1)
not_in_loaded = list(set(big_strain_counts_df.columns).difference(loaded.columns.tolist()))
if not_in_loaded:
for col in not_in_loaded:
big_strain_counts_df[col] = big_strain_counts_df[col] + loaded.mean(axis=1)
big_strain_counts_df = big_strain_counts_df.add(loaded, fill_value=0)
if len(big_strain_counts_df) > 0:
big_strain_counts_df.astype(float).reset_index().to_feather(outpath)
return big_strain_counts_df
else:
return pd.read_feather(outpath).set_index('index')