当前位置: 首页>>代码示例>>Python>>正文


Python pandas.read_feather方法代码示例

本文整理汇总了Python中pandas.read_feather方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_feather方法的具体用法?Python pandas.read_feather怎么用?Python pandas.read_feather使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.read_feather方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: load_feature

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def load_feature(feature_name: Union[int, str], directory: str = './features/',
                 ignore_columns: List[str] = None) -> pd.DataFrame:
    """
    Load feature as pandas DataFrame.

    Args:
        feature_name:
            The name of the feature (used in ``save_feature``).
        directory:
            The directory where the feature is stored.
        ignore_columns:
            The list of columns that will be dropped from the loaded dataframe.
    Returns:
        The feature dataframe
    """
    path = os.path.join(directory, str(feature_name) + '.f')

    df = pd.read_feather(path)
    if ignore_columns:
        return df.drop([c for c in ignore_columns if c in df.columns], axis=1)
    else:
        return df 
开发者ID:nyanp,项目名称:nyaggle,代码行数:24,代码来源:feature_store.py

示例2: get_proteome_percentages

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def get_proteome_percentages(counts_df, outpath, force_rerun=False):
    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outpath):
        big_strain_percents_df = pd.DataFrame(columns=counts_df.columns)
        for strain in counts_df.columns:
            totals = list(filter(lambda x: x.endswith('total'), counts_df[strain].index))
            for t in totals:
                counts = t.rsplit('_', 1)[0]
                aa_counts = list(filter(lambda x: (x.startswith(counts) and x not in totals), counts_df[strain].index))
                for aa_count in aa_counts:
                    big_strain_percents_df.at[aa_count.replace('count', '%'), strain] = counts_df[strain][aa_count]/counts_df[strain][t]

        big_strain_percents_df.astype(float).reset_index().to_feather(outpath)
    else:
        big_strain_percents_df = pd.read_feather(outpath).set_index('index')

    big_strain_percents_df.index.name = None
    return big_strain_percents_df 
开发者ID:SBRG,项目名称:ssbio,代码行数:19,代码来源:atlas3.py

示例3: load_dataset

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def load_dataset(paths, index=None) -> pd.DataFrame:
    assert len(paths) > 0

    feature_datasets = []
    for path in paths:
        if index is None:
            feature_datasets.append(pd.read_feather(path))
        else:
            feature_datasets.append(pd.read_feather(path).loc[index])
        gc.collect()
    # check if all of feature dataset share the same index
    index = feature_datasets[0].index
    for feature_dataset in feature_datasets[1:]:
        pandas.testing.assert_index_equal(index, feature_dataset.index)

    return pd.concat(feature_datasets, axis=1) 
开发者ID:flowlight0,项目名称:talkingdata-adtracking-fraud-detection,代码行数:18,代码来源:run.py

示例4: __init__

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def __init__(self, path: str = None, keep_in_memory: bool = True) -> None:
        if not os.path.exists(path + '.meta'):
            raise FileNotFoundError(os.path.abspath(path + '.meta'))

        # pandas 0.22 has the fastest MultiIndex
        if pd.__version__.startswith('0.22'):
            import feather
            cols = feather.read_dataframe(path + '.meta')
        else:
            cols = pd.read_feather(path + '.meta')

        ohlcv = cols.ohlcv.values
        adjustments = cols.adjustments.values[:2]
        if adjustments[0] is None:
            adjustments = None
        super().__init__(path, ohlcv, adjustments)
        self.keep_in_memory = keep_in_memory
        self._cache = None 
开发者ID:Heerozh,项目名称:spectre,代码行数:20,代码来源:arrow.py

示例5: _make_train_test_split

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def _make_train_test_split(self, seed=1234):
        from sklearn.model_selection import train_test_split
        np.random.seed(seed)
        covariates = pd.read_feather(self._path_dir / 'covariates.feather')

        def train_test_split_customer(df, col_customer, test_size):
            tr, te = train_test_split(df[[col_customer]].drop_duplicates(), test_size=test_size)
            train =  df.merge(tr, how='right', on=col_customer)
            test =  df.merge(te, how='right', on=col_customer)
            return train, test

        train, test = train_test_split_customer(covariates, 'msno', 0.25)
        train, val = train_test_split_customer(train, 'msno', 0.1)

        assert train.merge(test, how='inner', on='msno').shape[0] == 0
        assert train.merge(val, how='inner', on='msno').shape[0] == 0
        assert test.merge(val, how='inner', on='msno').shape[0] == 0

        train.to_feather(self._path_dir / 'train.feather')
        test.to_feather(self._path_dir / 'test.feather')
        val.to_feather(self._path_dir / 'val.feather') 
开发者ID:havakv,项目名称:pycox,代码行数:23,代码来源:from_kkbox.py

示例6: check_round_trip

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def check_round_trip(self, df, expected=None, **kwargs):

        if expected is None:
            expected = df

        with ensure_clean() as path:
            to_feather(df, path)

            result = read_feather(path, **kwargs)
            assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:12,代码来源:test_feather.py

示例7: test_path_pathlib

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def test_path_pathlib(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_pathlib(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:6,代码来源:test_feather.py

示例8: test_path_localpath

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def test_path_localpath(self):
        df = tm.makeDataFrame().reset_index()
        result = tm.round_trip_localpath(df.to_feather, pd.read_feather)
        tm.assert_frame_equal(df, result) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:6,代码来源:test_feather.py

示例9: load

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def load(self):
        self.train = pd.read_feather(str(self.train_path))
        self.test = pd.read_feather(str(self.test_path)) 
开发者ID:upura,项目名称:ml-competition-template-titanic,代码行数:5,代码来源:base.py

示例10: load_datasets

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def load_datasets(feats):
    dfs = [pd.read_feather(f'features/{f}_train.feather') for f in feats]
    X_train = pd.concat(dfs, axis=1, sort=False)
    dfs = [pd.read_feather(f'features/{f}_test.feather') for f in feats]
    X_test = pd.concat(dfs, axis=1, sort=False)
    return X_train, X_test 
开发者ID:upura,项目名称:ml-competition-template-titanic,代码行数:8,代码来源:__init__.py

示例11: check_round_trip

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def check_round_trip(self, df, **kwargs):

        with ensure_clean() as path:
            to_feather(df, path)

            with catch_warnings(record=True):
                result = read_feather(path, **kwargs)
            assert_frame_equal(result, df) 
开发者ID:birforce,项目名称:vnpy_crypto,代码行数:10,代码来源:test_feather.py

示例12: _deserialize

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def _deserialize(data: bytes, msgpacked_cols: List[str]) -> pd.DataFrame:
        """
        Data are returned as feather-packed pandas DataFrames.
        Due to limitations in pyarrow, some objects are msgpacked inside the DataFrame.
        """
        import pyarrow

        df = pd.read_feather(pyarrow.BufferReader(data))
        for col in msgpacked_cols:
            df[col] = df[col].apply(lambda element: deserialize(element, "msgpack-ext"))

        if "index" in df.columns:
            df.set_index("index", inplace=True)  # pandas.to_feather does not support indexes,
            # so we have to send indexless frames over the wire, and set the index here.
        return df 
开发者ID:MolSSI,项目名称:QCPortal,代码行数:17,代码来源:dataset_view.py

示例13: test_read_feather

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def test_read_feather(self):
        data = pd.read_feather("/input/tests/data/feather-0_3_1.feather")

        self.assertEqual(10, data.size) 
开发者ID:Kaggle,项目名称:docker-python,代码行数:6,代码来源:test_pandas.py

示例14: get_proteome_counts_simple

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def get_proteome_counts_simple(prots_filtered_feathers, outpath, length_filter_pid=None,
                               copynum_scale=False, copynum_df=None,
                               force_rerun=False):
    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outpath):
        big_strain_counts_df = pd.DataFrame()
        first = True
        for feather in prots_filtered_feathers:
            loaded = load_feather(protein_feather=feather, length_filter_pid=length_filter_pid,
                                  copynum_scale=copynum_scale,
                                  copynum_df=copynum_df)
            if first:
                big_strain_counts_df = pd.DataFrame(index=loaded.index, columns=loaded.columns)
                first = False
            big_strain_counts_df = big_strain_counts_df.add(loaded, fill_value=0)
        if len(big_strain_counts_df) > 0:
            big_strain_counts_df.astype(float).reset_index().to_feather(outpath)
        return big_strain_counts_df
    else:
        return pd.read_feather(outpath).set_index('index')


# def get_proteome_counts_simple_sc(sc, prots_filtered_feathers, outpath, length_filter_pid=None,
#                                     copynum_scale=False, copynum_df=None,
#                                     force_rerun=False):
#     import ssbio.utils
#     if ssbio.utils.force_rerun(flag=force_rerun, outfile=outpath):
#         protein_feathers_final_rdd = sc.parallelize(prots_filtered_feathers)
#         mapper = protein_feathers_final_rdd.map(lambda x: load_feather(protein_feather=x, length_filter_pid=None,
#                                                                        copynum_scale=copynum_scale,
#                                                                        copynum_df=copynum_df))
#         big_strain_counts_df = mapper.reduce(lambda df1, df2: df1.add(df2, fill_value=0))
#         big_strain_counts_df.astype(float).reset_index().to_feather(outpath)
#         return big_strain_counts_df
#     else:
#         return pd.read_feather(outpath).set_index('index') 
开发者ID:SBRG,项目名称:ssbio,代码行数:37,代码来源:atlas3.py

示例15: get_proteome_counts_impute_missing

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_feather [as 别名]
def get_proteome_counts_impute_missing(prots_filtered_feathers, outpath, length_filter_pid=None,
                                       copynum_scale=False, copynum_df=None,
                                       force_rerun=False):
    """Get counts, uses the mean feature vector to fill in missing proteins for a strain"""
    if ssbio.utils.force_rerun(flag=force_rerun, outfile=outpath):
        big_strain_counts_df = pd.DataFrame()
        first = True
        for feather in prots_filtered_feathers:
            loaded = load_feather(protein_feather=feather, length_filter_pid=length_filter_pid,
                                  copynum_scale=copynum_scale,
                                  copynum_df=copynum_df)
            if first:
                big_strain_counts_df = pd.DataFrame(index=_all_counts, columns=loaded.columns)
                first = False

            new_columns = list(set(loaded.columns.tolist()).difference(big_strain_counts_df.columns))
            if new_columns:
                for col in new_columns:
                    big_strain_counts_df[col] = big_strain_counts_df.mean(axis=1)

            not_in_loaded = list(set(big_strain_counts_df.columns).difference(loaded.columns.tolist()))
            if not_in_loaded:
                for col in not_in_loaded:
                    big_strain_counts_df[col] = big_strain_counts_df[col] + loaded.mean(axis=1)

            big_strain_counts_df = big_strain_counts_df.add(loaded, fill_value=0)

        if len(big_strain_counts_df) > 0:
            big_strain_counts_df.astype(float).reset_index().to_feather(outpath)
        return big_strain_counts_df
    else:
        return pd.read_feather(outpath).set_index('index') 
开发者ID:SBRG,项目名称:ssbio,代码行数:34,代码来源:atlas3.py


注:本文中的pandas.read_feather方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。