本文整理汇总了Python中pandas.read_pickle方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.read_pickle方法的具体用法?Python pandas.read_pickle怎么用?Python pandas.read_pickle使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.read_pickle方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_write_explicit
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def test_write_explicit(self, compression, get_random_path):
base = get_random_path
path1 = base + ".compressed"
path2 = base + ".raw"
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
df = tm.makeDataFrame()
# write to compressed file
df.to_pickle(p1, compression=compression)
# decompress
with tm.decompress_file(p1, compression=compression) as f:
with open(p2, "wb") as fh:
fh.write(f.read())
# read decompressed file
df2 = pd.read_pickle(p2, compression=None)
tm.assert_frame_equal(df, df2)
示例2: test_write_infer
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def test_write_infer(self, ext, get_random_path):
base = get_random_path
path1 = base + ext
path2 = base + ".raw"
compression = None
for c in self._compression_to_extension:
if self._compression_to_extension[c] == ext:
compression = c
break
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
df = tm.makeDataFrame()
# write to compressed file by inferred compression method
df.to_pickle(p1)
# decompress
with tm.decompress_file(p1, compression=compression) as f:
with open(p2, "wb") as fh:
fh.write(f.read())
# read decompressed file
df2 = pd.read_pickle(p2, compression=None)
tm.assert_frame_equal(df, df2)
示例3: round_trip_pickle
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def round_trip_pickle(obj, path=None):
"""
Pickle an object and then read it again.
Parameters
----------
obj : pandas object
The object to pickle and then re-read.
path : str, default None
The path where the pickled object is written and then read.
Returns
-------
round_trip_pickled_object : pandas object
The original object that was pickled and then re-read.
"""
if path is None:
path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10)))
with ensure_clean(path) as path:
pd.to_pickle(obj, path)
return pd.read_pickle(path)
示例4: load_guesses
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def load_guesses(directory: str, output_type='char', folds=c.GUESSER_GENERATION_FOLDS) -> pd.DataFrame:
"""
Loads all the guesses pertaining to a guesser inferred from directory
:param directory: where to load guesses from
:param output_type: One of: char, full, first
:param folds: folds to load, by default all of them
:return: guesses across all folds for given directory
"""
assert len(folds) > 0
guess_df = None
for fold in folds:
input_path = AbstractGuesser.guess_path(directory, fold, output_type)
if guess_df is None:
guess_df = pd.read_pickle(input_path)
else:
new_guesses_df = pd.read_pickle(input_path)
guess_df = pd.concat([guess_df, new_guesses_df])
return guess_df
示例5: split_dataset
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def split_dataset(data_path='data-multi-visit.pkl'):
data = pd.read_pickle(data_path)
sample_id = data['SUBJECT_ID'].unique()
random_number = [i for i in range(len(sample_id))]
# shuffle(random_number)
train_id = sample_id[random_number[:int(len(sample_id)*2/3)]]
eval_id = sample_id[random_number[int(
len(sample_id)*2/3): int(len(sample_id)*5/6)]]
test_id = sample_id[random_number[int(len(sample_id)*5/6):]]
def ls2file(list_data, file_name):
with open(file_name, 'w') as fout:
for item in list_data:
fout.write(str(item) + '\n')
ls2file(train_id, 'train-id.txt')
ls2file(eval_id, 'eval-id.txt')
ls2file(test_id, 'test-id.txt')
print('train size: %d, eval size: %d, test size: %d' %
(len(train_id), len(eval_id), len(test_id)))
示例6: daily_stats
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def daily_stats(data: (pd.Series, pd.DataFrame), **kwargs) -> pd.DataFrame:
"""
Daily stats for given data
Examples:
>>> pd.set_option('precision', 2)
>>> (
... pd.concat([
... pd.read_pickle('xbbg/tests/data/sample_rms_ib0.pkl'),
... pd.read_pickle('xbbg/tests/data/sample_rms_ib1.pkl'),
... ], sort=False)
... .pipe(get_series, col='close')
... .pipe(daily_stats)
... )['RMS FP Equity'].iloc[:, :5]
count mean std min 10%
2020-01-16 00:00:00+00:00 434.0 711.16 1.11 708.6 709.6
2020-01-17 00:00:00+00:00 437.0 721.53 1.66 717.0 719.0
"""
if data.empty: return pd.DataFrame()
if 'percentiles' not in kwargs: kwargs['percentiles'] = [.1, .25, .5, .75, .9]
return data.groupby(data.index.floor('d')).describe(**kwargs)
示例7: simulate_walks
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def simulate_walks(self, num_walks, walk_length, stay_prob=0.3, workers=1, verbose=0):
layers_adj = pd.read_pickle(self.temp_path+'layers_adj.pkl')
layers_alias = pd.read_pickle(self.temp_path+'layers_alias.pkl')
layers_accept = pd.read_pickle(self.temp_path+'layers_accept.pkl')
gamma = pd.read_pickle(self.temp_path+'gamma.pkl')
walks = []
initialLayer = 0
nodes = self.idx # list(self.g.nodes())
results = Parallel(n_jobs=workers, verbose=verbose, )(
delayed(self._simulate_walks)(nodes, num, walk_length, stay_prob, layers_adj, layers_accept, layers_alias, gamma) for num in
partition_num(num_walks, workers))
walks = list(itertools.chain(*results))
return walks
示例8: test_legacy_pickle
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def test_legacy_pickle(self, datapath):
if PY3:
pytest.skip("testing for legacy pickles not "
"support on py3")
path = datapath('indexes', 'data', 'multiindex_v1.pickle')
obj = pd.read_pickle(path)
obj2 = MultiIndex.from_tuples(obj.values)
assert obj.equals(obj2)
res = obj.get_indexer(obj)
exp = np.arange(len(obj), dtype=np.intp)
assert_almost_equal(res, exp)
res = obj.get_indexer(obj2[::-1])
exp = obj.get_indexer(obj[::-1])
exp2 = obj2.get_indexer(obj2[::-1])
assert_almost_equal(res, exp)
assert_almost_equal(exp, exp2)
示例9: test_legacy_v2_unpickle
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def test_legacy_v2_unpickle(self, datapath):
# 0.7.3 -> 0.8.0 format manage
path = datapath('indexes', 'data', 'mindex_073.pickle')
obj = pd.read_pickle(path)
obj2 = MultiIndex.from_tuples(obj.values)
assert obj.equals(obj2)
res = obj.get_indexer(obj)
exp = np.arange(len(obj), dtype=np.intp)
assert_almost_equal(res, exp)
res = obj.get_indexer(obj2[::-1])
exp = obj.get_indexer(obj[::-1])
exp2 = obj2.get_indexer(obj2[::-1])
assert_almost_equal(res, exp)
assert_almost_equal(exp, exp2)
示例10: test_read_explicit
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def test_read_explicit(self, compression, get_random_path):
base = get_random_path
path1 = base + ".raw"
path2 = base + ".compressed"
with tm.ensure_clean(path1) as p1, tm.ensure_clean(path2) as p2:
df = tm.makeDataFrame()
# write to uncompressed file
df.to_pickle(p1, compression=None)
# compress
self.compress_file(p1, p2, compression=compression)
# read compressed file
df2 = pd.read_pickle(p2, compression=compression)
tm.assert_frame_equal(df, df2)
示例11: top_k_similar_items
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def top_k_similar_items(movies,ratings_df,k,TRAINED=False):
"""
Returns k similar movies for respective movie
INPUTS :
movies : list of numbers or number, list of movie ids
ratings_df : rating dataframe, store all users rating for respective movies
k : natural number
TRAINED : TRUE or FALSE, weather use trained user vs movie table or untrained
OUTPUT:
list of k similar movies for respected movie
"""
if TRAINED:
df=pd.read_pickle("user_item_table_train.pkl")
else:
df=pd.read_pickle("user_item_table.pkl")
corr_matrix=item_item_correlation(df,TRAINED)
if type(movies) is not list:
return corr_matrix[movies].sort_values(ascending=False).drop(movies).index.values[0:k]
else:
dict={}
for movie in movies:
dict.update({movie:corr_matrix[movie].sort_values(ascending=False).drop(movie).index.values[0:k]})
pd.DataFrame(dict).to_csv("movie_top_k.csv")
return dict
示例12: user_user_pearson_corr
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def user_user_pearson_corr(ratings_df,TRAINED):
if TRAINED:
if os.path.isfile("model/user_user_corr_train.pkl"):
df_corr=pd.read_pickle("user_user_corr_train.pkl")
else:
df =pd.read_pickle("user_item_table_train.pkl")
df=df.T
df_corr=df.corr()
df_corr.to_pickle("user_user_corr_train.pkl")
else:
if os.path.isfile("model/user_user_corr.pkl"):
df_corr=pd.read_pickle("user_user_corr.pkl")
else:
df = pd.read_pickle("user_item_table.pkl")
df=df.T
df_corr=df.corr()
df_corr.to_pickle("user_user_corr.pkl")
return df_corr
示例13: pd_cache
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def pd_cache(func):
# Caches a Pandas DF into file for later use
# Memoization version for pandas DF
try:
os.mkdir('.pd_cache')
except FileExistsError:
pass
@wraps(func)
def cache(*args, **kw):
# Get raw code of function as str and hash it
func_code = ''.join(inspect.getsourcelines(func)[0]).encode('utf-8')
hsh = hashlib.md5(func_code).hexdigest()[:6]
f = '.pd_cache/' + func.__name__ + '_' + hsh + '.pkl'
if os.path.exists(f):
df = pd.read_pickle(f)
return df
# Delete any file name that has `cached_[func_name]_[6_chars]_.pkl`
for cached in glob(f'./.pd_cache/{func.__name__}_*.pkl'):
if (len(cached) - len(func.__name__)) == 20:
os.remove(cached)
# Write new
df = func(*args, **kw)
df.to_pickle(f)
return df
return cache
示例14: __init__
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def __init__(self, ticker, provider):
# providers is a list of pricing providers
# ex: ['alphavantage', 'Yahoo']
self.ticker = ticker.upper()
self.provider = provider
self.filename = ("thewarden/pricing_engine/pricing_data/" +
self.ticker + "_" + provider.name + ".price")
self.filename = os.path.join(current_path(), self.filename)
self.errors = []
# makesure file path exists
os.makedirs(os.path.dirname(self.filename), exist_ok=True)
# Try to read from file and check how recent it is
try:
today = datetime.now().date()
filetime = datetime.fromtimestamp(os.path.getctime(self.filename))
if filetime.date() == today:
self.df = pd.read_pickle(self.filename)
else:
self.df = self.update_history()
except FileNotFoundError:
self.df = self.update_history()
try:
self.last_update = self.df.index.max()
self.first_update = self.df.index.min()
self.last_close = self.df.head(1).close[0]
except AttributeError as e:
self.errors.append(e)
self.last_update = self.first_update = self.last_close = None
示例15: update_history
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import read_pickle [as 别名]
def update_history(self, force=False):
# Check first if file exists and if fresh
# The line below skips history for providers that have realtime in name
if 'realtime' in self.provider.name:
return None
if not force:
try:
# Check if saved file is recent enough to be used
# Local file has to have a modified time in today
today = datetime.now().date()
filetime = datetime.fromtimestamp(
os.path.getctime(self.filename))
if filetime.date() == today:
price_pickle = pd.read_pickle(self.filename)
return (price_pickle)
except FileNotFoundError:
pass
# File not found ot not new. Need to update the matrix
# Cycle through the provider list until there's satisfactory data
price_request = self.provider.request_data(self.ticker)
# Parse and save
df = self.price_parser(price_request, self.provider)
if df is None:
self.errors.append(
f"Empty df for {self.ticker} using {self.provider.name}")
return (None)
df.sort_index(ascending=False, inplace=True)
df.index = pd.to_datetime(df.index)
df.to_pickle(self.filename)
# Refresh the class - reinitialize
return (df)