当前位置: 首页>>代码示例>>Python>>正文


Python pandas.read_hdf方法代码示例

本文整理汇总了Python中pandas.read_hdf方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_hdf方法的具体用法?Python pandas.read_hdf怎么用?Python pandas.read_hdf使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.read_hdf方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: iMain

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def iMain():
    """
    Read an hdf file generated by us to make sure
    we can recover its content and structure.
    Give the name of an hdf5 file as a command-line argument.
    """
    assert sys.argv, __doc__
    sFile = sys.argv[1]
    assert os.path.isfile(sFile)
    oHdfStore = pandas.HDFStore(sFile, mode='r')
    print oHdfStore.groups()
    # bug - no return value
    # oSignals = pandas.read_hdf(oHdfStore, '/servings/signals')
    mSignals = oHdfStore.select('/recipe/servings/mSignals', auto_close=False)    
    print mSignals
    print oHdfStore.get_node('/recipe')._v_attrs.metadata[0]['sUrl'] 
开发者ID:OpenTrading,项目名称:OpenTrader,代码行数:18,代码来源:Omlette.py

示例2: test_write_fspath_hdf5

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def test_write_fspath_hdf5(self):
        # Same test as write_fspath_all, except HDF5 files aren't
        # necessarily byte-for-byte identical for a given dataframe, so we'll
        # have to read and compare equality
        pytest.importorskip('tables')

        df = pd.DataFrame({"A": [1, 2]})
        p1 = tm.ensure_clean('string')
        p2 = tm.ensure_clean('fspath')

        with p1 as string, p2 as fspath:
            mypath = CustomFSPath(fspath)
            df.to_hdf(mypath, key='bar')
            df.to_hdf(string, key='bar')

            result = pd.read_hdf(fspath, key='bar')
            expected = pd.read_hdf(string, key='bar')

        tm.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:21,代码来源:test_common.py

示例3: load_hdf5_data

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def load_hdf5_data(file_path, **kwargs):
    key = kwargs.get('key', None)
    pandas_format = kwargs.get('pandas_format', True)
    mode = kwargs.get('mode', 'r')
    logger.info("Opening HDF5 file {} to read...".format(file_path))
    try:
        if pandas_format:
            data = pd.read_hdf(file_path, key=key, mode=mode)
        else:
            with h5py.File(file_path, mode) as f:
                data = f[key][()]
    except KeyError as e:
        logger.exception("Dataset {} does not exist".format(dataset))
        raise exceptions.FileLoadError("Dataset does not exist")
    except Exception as e:
        logger.exception("Problem loading dataset: {0}".format(e))
        raise exceptions.FileLoadError
    logger.info("Successfully loaded HDF5 data")
    return data 
开发者ID:autodeepnet,项目名称:autodeepnet,代码行数:21,代码来源:data_utils.py

示例4: test_write_data_frame

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def test_write_data_frame(hdf_file_path):
    key = hdf.EntityKey('cause.test.prevalence')
    data = build_table([lambda *args, **kwargs: random.choice([0, 1]), "Kenya", 1],
                       2005, 2010, columns=('age', 'year', 'sex', 'draw', 'location', 'value'))

    non_val_columns = data.columns.difference({'value'})
    data = data.set_index(list(non_val_columns))

    hdf._write_pandas_data(hdf_file_path, key, data)

    written_data = pd.read_hdf(hdf_file_path, key.path)
    assert written_data.equals(data)

    filter_terms = ['draw == 0']
    written_data = pd.read_hdf(hdf_file_path, key.path, where=filter_terms)
    assert written_data.equals(data.xs(0, level='draw', drop_level=False)) 
开发者ID:ihmeuw,项目名称:vivarium,代码行数:18,代码来源:test_hdf.py

示例5: participation_to_list

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def participation_to_list(h5in, outlist):
    trade_df = pd.read_hdf(h5in, 'trades')
    trade_df = trade_df.assign(trader_id = trade_df.resting_order_id.str.split('_').str[0])
    lt_df = pd.DataFrame(trade_df.groupby(['trader_id']).quantity.count())
    lt_df.rename(columns={'quantity': 'trade'}, inplace=True)
    if 'p999999' in lt_df.index:
        lt_df.drop('p999999', inplace=True)
    ltsum_df = pd.DataFrame(trade_df.groupby(['trader_id']).quantity.sum())
    ltsum_df.rename(columns={'quantity': 'trade_vol'}, inplace=True)
    ltsum_df = ltsum_df.assign(Participation = 100*ltsum_df.trade_vol/ltsum_df.trade_vol.sum())
    providers = ltsum_df.index.unique()
    market_makers = [x for x in providers if x.startswith('m')]
    market_makers.append('j0')
    ltsum_df = ltsum_df.ix[market_makers]
    part_dict = {'MCRun': j, 'MM_Participation': ltsum_df.loc['m0', 'Participation']}
    if 'j0' in providers:
        part_dict.update({'PJ_Participation': ltsum_df.loc['j0', 'Participation']})
    outlist.append(part_dict) 
开发者ID:JackBenny39,项目名称:pyziabm,代码行数:20,代码来源:runwrapper2017mpi_r3.py

示例6: profit_to_list

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def profit_to_list(h5in, outlist):
    trade_df = pd.read_hdf(h5in, 'trades')
    trade_df = trade_df.assign(trader_id = trade_df.resting_order_id.str.split('_').str[0])
    buy_trades = trade_df[trade_df.side=='buy']
    buy_trades = buy_trades.assign(BuyCashFlow = buy_trades.price*buy_trades.quantity)
    buy_trades = buy_trades.assign(BuyVol = buy_trades.groupby('trader_id').quantity.cumsum(),
                                   CumulBuyCF = buy_trades.groupby('trader_id').BuyCashFlow.cumsum()
                                  )
    buy_trades.rename(columns={'timestamp': 'buytimestamp'}, inplace=True)
    sell_trades = trade_df[trade_df.side=='sell']
    sell_trades = sell_trades.assign(SellCashFlow = -sell_trades.price*sell_trades.quantity)
    sell_trades = sell_trades.assign(SellVol = sell_trades.groupby('trader_id').quantity.cumsum(),
                                     CumulSellCF = sell_trades.groupby('trader_id').SellCashFlow.cumsum()
                                    )
    sell_trades.rename(columns={'timestamp': 'selltimestamp'}, inplace=True)
    buy_trades = buy_trades[['trader_id', 'BuyVol', 'CumulBuyCF', 'buytimestamp']]
    sell_trades = sell_trades[['trader_id', 'SellVol', 'CumulSellCF', 'selltimestamp']]
    cash_flow = pd.merge(buy_trades, sell_trades, left_on=['trader_id', 'BuyVol'], right_on=['trader_id', 'SellVol'])
    cash_flow = cash_flow.assign(NetCashFlow = cash_flow.CumulBuyCF + cash_flow.CumulSellCF)
    temp_df = cash_flow.groupby('trader_id')['NetCashFlow', 'BuyVol'].last()
    temp_df = temp_df.assign(NetCFPerShare = temp_df.NetCashFlow/temp_df.BuyVol)
    temp_df = temp_df[['NetCashFlow', 'NetCFPerShare']]
    outlist.append(temp_df) 
开发者ID:JackBenny39,项目名称:pyziabm,代码行数:25,代码来源:runwrapper2017mpi_r3.py

示例7: tradesrets_to_list

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def tradesrets_to_list(h5in, outlist):
    indf = pd.read_hdf(h5in, 'trades')
    trades = indf.price.count()
    minprice = indf.price.min()
    maxprice = indf.price.max()
    
    indf = indf.assign(ret = 100*indf.price.pct_change())
    indf = indf.assign(abs_ret = np.abs(indf.ret))
    lags = []
    autocorr = []
    abs_autocorr = []
    for i in range(1,51):
        ac = indf.ret.autocorr(lag = i)
        aac = indf.abs_ret.autocorr(lag = i)
        lags.append(i)
        autocorr.append(ac)
        abs_autocorr.append(aac)
    ar_df = pd.DataFrame({'lag': lags, 'autocorrelation': autocorr, 'autocorrelation_abs': abs_autocorr})
    ar_df.set_index('lag', inplace=True)
    clustering_constant = np.abs(ar_df.autocorrelation_abs.sum()/ar_df.autocorrelation.sum())
    
    returns_dict = {'Trades': trades, 'MinPrice': minprice, 'MaxPrice': maxprice, 'ClusteringConstant': clustering_constant,
                    'MeanRet': indf.ret.mean(), 'StdRet': indf.ret.std(), 'SkewRet': indf.ret.skew(),
                    'KurtosisRet': indf.ret.kurtosis(), 'MCRun': j}
    outlist.append(returns_dict) 
开发者ID:JackBenny39,项目名称:pyziabm,代码行数:27,代码来源:runwrapper2017mpi_r3.py

示例8: tradesrets_to_list

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def tradesrets_to_list(h5in, outlist):
    indf = pd.read_hdf(h5in, 'trades')
    trades = indf.price.count()
    minprice = indf.price.min()
    maxprice = indf.price.max()
    indf = indf.assign(ret = 100*indf.price.pct_change())
    indf = indf.assign(abs_ret = np.abs(indf.ret))
    lags = []
    autocorr = []
    abs_autocorr = []
    for i in range(1,51):
        ac = indf.ret.autocorr(lag = i)
        aac = indf.abs_ret.autocorr(lag = i)
        lags.append(i)
        autocorr.append(ac)
        abs_autocorr.append(aac)
    ar_df = pd.DataFrame({'lag': lags, 'autocorrelation': autocorr, 'autocorrelation_abs': abs_autocorr})
    ar_df.set_index('lag', inplace=True)
    clustering_constant = np.abs(ar_df.autocorrelation_abs.sum()/ar_df.autocorrelation.sum())
    returns_dict = {'Trades': trades, 'MinPrice': minprice, 'MaxPrice': maxprice, 'ClusteringConstant': clustering_constant,
                    'MeanRet': indf.ret.mean(), 'StdRet': indf.ret.std(), 'SkewRet': indf.ret.skew(),
                    'KurtosisRet': indf.ret.kurtosis(), 'MCRun': j}
    outlist.append(returns_dict) 
开发者ID:JackBenny39,项目名称:pyziabm,代码行数:25,代码来源:runwrapper2017mpi_r3x.py

示例9: _load_table

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def _load_table(self, src, fmt, dst=None, post=None, *args, **kwargs):
        """ Load a data frame from table formats: csv, hdf5, feather """
        if fmt == 'csv':
            _data = pd.read_csv(src, *args, **kwargs)
        elif fmt == 'feather':
            _data = feather.read_dataframe(src, *args, **kwargs)
        elif fmt == 'hdf5':
            _data = pd.read_hdf(src, *args, **kwargs)

        # Put into this batch only part of it (defined by index)
        if isinstance(_data, pd.DataFrame):
            _data = _data.loc[self.indices]
        elif isinstance(_data, dd.DataFrame):
            # dask.DataFrame.loc supports advanced indexing only with lists
            _data = _data.loc[list(self.indices)].compute()

        if callable(post):
            _data = post(_data, src=src, fmt=fmt, dst=dst, **kwargs)

        self.load(src=_data, dst=dst) 
开发者ID:analysiscenter,项目名称:batchflow,代码行数:22,代码来源:batch.py

示例10: write_to_hdf5

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def write_to_hdf5(self, list_buildings, locator):
        """read in the temporary results files and append them to the Totals.csv file."""
        df = None
        for name in list_buildings:
            temporary_file = locator.get_temporary_file('%(name)sT.hdf' % locals())
            if df is None:
                df = pd.read_hdf(temporary_file, key='dataset')
            else:
                df = df.append(pd.read_hdf(temporary_file, key='dataset'))
        df.to_hdf(locator.get_total_demand('hdf'), key='dataset')

        """read saved data of monthly values and return as totals"""
        monthly_data_buildings = [pd.read_hdf(locator.get_demand_results_file(building_name, 'hdf'), key=building_name)
                                  for building_name in
                                  list_buildings]
        return df, monthly_data_buildings 
开发者ID:architecture-building-systems,项目名称:CityEnergyAnalyst,代码行数:18,代码来源:demand_writers.py

示例11: _iter_native_dataset

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def _iter_native_dataset(self, native_filters=None):
        current_fname = None
        for meta_tract in self._metadata:
            for meta_patch in meta_tract['patches']:
                tract_patch = {'tract': meta_tract['tract'], 'patch': meta_patch['patch']}
                if native_filters and not native_filters.check_scalar(tract_patch):
                    continue

                if current_fname != meta_tract['filename']:
                    current_fname = meta_tract['filename']
                    df = pd.read_hdf(os.path.join(self.base_dir, current_fname), 'df')

                slice_this = slice(*meta_patch['slice'])
                def native_quantity_getter(native_quantity):
                    # pylint: disable=W0640
                    # variables (df and slice_this) intentionally defined in loop
                    if native_quantity == '_FULL_PDF':
                        return df.iloc[slice_this, :self._n_pdf_bins].values
                    return df[native_quantity].values[slice_this]
                yield native_quantity_getter

    # Native quantity names in the photo-z catalog are too uninformative
    # Since native quantities will become regular quantities in composite catalog,
    # let us hide them all. 
开发者ID:LSSTDESC,项目名称:gcr-catalogs,代码行数:26,代码来源:photoz.py

示例12: resample_eICU_patient

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def resample_eICU_patient(pid, resample_factor_in_min, variables, upto_in_minutes):
    """
    Resample a *single* patient.
    """
    pat_df = pd.read_hdf(paths.eICU_hdf_dir + '/vitalPeriodic.h5',
                         where='patientunitstayid = ' + str(pid),
                         columns=['observationoffset', 'patientunitstayid'] + variables,
                         mode='r')
    # sometimes it's empty
    if pat_df.empty:
        return None
    if not upto_in_minutes is None:
        pat_df = pat_df.loc[0:upto_in_minutes*60]
    # convert the offset to a TimedeltaIndex (necessary for resampling)
    pat_df.observationoffset = pd.TimedeltaIndex(pat_df.observationoffset, unit='m')
    pat_df.set_index('observationoffset', inplace=True)
    pat_df.sort_index(inplace=True)
    # resample by time
    pat_df_resampled = pat_df.resample(str(resample_factor_in_min) + 'T').median()  # pandas ignores NA in median by default
    # rename pid, cast to int
    pat_df_resampled.rename(columns={'patientunitstayid': 'pid'}, inplace=True)
    pat_df_resampled['pid'] = np.int32(pat_df_resampled['pid'])
    # get offsets in minutes from index
    pat_df_resampled['offset'] = np.int32(pat_df_resampled.index.total_seconds()/60)
    return pat_df_resampled 
开发者ID:ratschlab,项目名称:RGAN,代码行数:27,代码来源:data_utils.py

示例13: get_twitter_sentiment_multilabel_classification_dataset

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def get_twitter_sentiment_multilabel_classification_dataset():

    file_name = os.path.join('tests', 'twitter_sentiment.h5')

    try:
        df_twitter = pd.read_hdf(file_name)
    except Exception as e:
        print('Error')
        print(e)
        dataset_url = 'https://raw.githubusercontent.com/ClimbsRocks/sample_datasets/master/twitter_airline_sentiment.csv'
        df_twitter = pd.read_csv(dataset_url, encoding='latin-1')
        # Do not write the index that pandas automatically creates

        df_twitter.to_hdf(file_name, key='df', format='fixed')

    # Grab only 10% of the dataset- runs much faster this way
    df_twitter = df_twitter.sample(frac=0.1)

    df_twitter['tweet_created'] = pd.to_datetime(df_twitter.tweet_created)

    df_twitter_train, df_twitter_test = train_test_split(df_twitter, test_size=0.33, random_state=42)
    return df_twitter_train, df_twitter_test 
开发者ID:ClimbsRocks,项目名称:auto_ml,代码行数:24,代码来源:utils_testing.py

示例14: test_hdf5

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def test_hdf5():
    df = pd.DataFrame({'i': [0, 0], 'j': [1, 2], 't': [0, 1]})
    tnet = teneto.TemporalNetwork(from_df=df, hdf5=True)
    if not tnet.network == './teneto_temporalnetwork.h5':
        raise AssertionError()
    df2 = pd.read_hdf('./teneto_temporalnetwork.h5')
    if not (df == df2).all().all():
        raise AssertionError()
    tnet.add_edge([0, 2, 2])
    df3 = pd.read_hdf('./teneto_temporalnetwork.h5')
    if not (df3.iloc[2].values == [0, 2, 2]).all():
        raise AssertionError()
    tnet.drop_edge([0, 2, 2])
    df4 = pd.read_hdf('./teneto_temporalnetwork.h5')
    if not (df == df4).all().all():
        raise AssertionError() 
开发者ID:wiheto,项目名称:teneto,代码行数:18,代码来源:test_temporalnetwork.py

示例15: _hdf2csv

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_hdf [as 别名]
def _hdf2csv(path):
    from glob import glob
    import pandas as pd
    from tqdm import tqdm as _tqdm
    from os.path import isdir

    if isdir(path):
        paths = glob(path + "/*.hdf5")
    else:
        paths = glob(path)
    if paths:
        import os.path

        for path in _tqdm(paths):
            base, ext = os.path.splitext(path)
            if ext == ".hdf5":
                print("Converting {}".format(path))
                out_path = base + ".csv"
                locs = pd.read_hdf(path)
                print("A total of {} rows loaded".format(len(locs)))
                locs.to_csv(out_path, sep=",", encoding="utf-8")
    print("Complete.") 
开发者ID:jungmannlab,项目名称:picasso,代码行数:24,代码来源:__main__.py


注:本文中的pandas.read_hdf方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。