本文整理汇总了Python中pandas.HDFStore.get方法的典型用法代码示例。如果您正苦于以下问题:Python HDFStore.get方法的具体用法?Python HDFStore.get怎么用?Python HDFStore.get使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.HDFStore
的用法示例。
在下文中一共展示了HDFStore.get方法的9个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: final_check
# 需要导入模块: from pandas import HDFStore [as 别名]
# 或者: from pandas.HDFStore import get [as 别名]
def final_check(year=2006):
test_filename = os.path.join(DATA_SOURCES_DIR, "test.h5")
survey_filename = os.path.join(DATA_SOURCES_DIR, "survey.h5")
store = HDFStore(test_filename)
survey = HDFStore(survey_filename)
final2 = store.get('survey_2006')
print survey
finalT = survey.get('survey_2006')
varlist = [
'adeben',
'adfdap',
'amois',
'ancchom',
'ancentr',
'anciatm',
'ancrech',
'anref',
'contra',
'datant',
'dimtyp',
'ident',
'idfoy'
'noi',
'nondic',
'rabs',
'RABSP',
'RAISTP',
'raistp',
'rdem',
'retrai',
'sitant',
'sp10',
'sp11',
'stc',
'TXTPPB',
]
for i in range(0, 10):
varname = 'sp0' + str(i)
varlist.append(varname)
varlist = set(varlist)
columns = final2.columns
columns = set(columns)
print varlist.difference(columns)
print final2.loc[
final2.idfoy == 603018901,
['idfoy', 'quifoy', 'idfam', 'quifam', 'idmen', 'quimen', 'noi']
].to_string()
return
示例2: final_check
# 需要导入模块: from pandas import HDFStore [as 别名]
# 或者: from pandas.HDFStore import get [as 别名]
def final_check(year=2006):
test_filename = os.path.join(DATA_SOURCES_DIR,"test.h5")
survey_filename = os.path.join(DATA_SOURCES_DIR,"survey.h5")
store = HDFStore(test_filename)
survey = HDFStore(survey_filename)
final2 = store.get('survey_2006')
print survey
finalT = survey.get('survey_2006')
varlist = ['anref', 'sitant', 'adeben', 'stc', 'retrai', 'contra', 'datant', 'rabs', 'nondic', 'TXTPPB',
'ancrech', 'RAISTP', 'amois', 'adfdap', 'ancentr', 'anciatm', 'ancchom', 'ident', 'noi', 'dimtyp',
'RABSP', 'raistp', 'rdem', 'sp10', 'sp11', 'idfoy']
for i in range(0,10):
varname = 'sp0' + str(i)
varlist.append(varname)
varlist = set(varlist)
columns = final2.columns ;
columns = set(columns)
print varlist.difference(columns)
print final2.loc[final2.idfoy==603018901,
['idfoy', 'quifoy', 'idfam', 'quifam', 'idmen', 'quimen', 'noi']].to_string()
# print final2
# print finalT
# # control(final2, debug=True, verbose=True, verbose_columns=['idfam', 'quifam'])
# # control(finalT, debug=True, verbose=True, verbose_columns=['idfam', 'quifam'])
# print 'FAMILLE--------------'
# print final2.quifam.value_counts()
# print finalT.quifam.value_counts()
# print ''
# print 'FOYER------------------'
# print final2.quifoy.value_counts()
# print finalT.quifoy.value_counts()
# print ''
# print 'MENAGES-----------------'
# print final2.quimen.value_counts()
# print finalT.quimen.value_counts()
#
# print ''
# print final2.age.describe()
# print finalT.age.describe()
# # age_data = final2['age'].value_counts().reset_index()
# # age_data = age_data.sort_index(by='index', ascending='True')
# # print age_data.to_string()
# # print final2.loc[final2['quifam']==2, ['quifam', 'age']].describe()
return
示例3: test
# 需要导入模块: from pandas import HDFStore [as 别名]
# 或者: from pandas.HDFStore import get [as 别名]
def test():
'''
Validate check_consistency
'''
#===========================================================================
# from pandas import DataFrame
#res = DataFrame({af_col.name: simulation.output_table.get_value(af_col.name, af_col.entity)})
# print res
#===========================================================================
store = HDFStore(os.path.join(os.path.dirname(os.path.join(SRC_PATH,'countries','france','data','erf')),'fichiertest.h5'))
datatable = store.get('test12')
test_simu = store.get('test_simu')
print check_consistency(test_simu, datatable)
示例4: load_df
# 需要导入模块: from pandas import HDFStore [as 别名]
# 或者: from pandas.HDFStore import get [as 别名]
def load_df(path, default=None):
"""Load DataFrame for HDF5 store path '\logs' table"""
try:
store = HDFStore(path)
print store.keys()
df = store.get('logs')
store.close()
return df
except:
return default
示例5: build_comparison
# 需要导入模块: from pandas import HDFStore [as 别名]
# 或者: from pandas.HDFStore import get [as 别名]
def build_comparison():
directory = os.path.dirname(__file__)
fname = os.path.join(directory, H5_FILENAME)
store = HDFStore(fname)
openfisca = store.get("openfisca")
insee = store.get("insee")
print openfisca
print insee
# for year in range(2006,2010):
print openfisca.head()
openfisca.drop(0, axis=0, inplace=True)
openfisca.reset_index(inplace=True)
from pandas import DataFrame
print (openfisca.sum() - insee.sum())/insee.sum()
df = (openfisca-insee)/insee
print df
print df.to_string()
示例6: HDFStoreDataFrame
# 需要导入模块: from pandas import HDFStore [as 别名]
# 或者: from pandas.HDFStore import get [as 别名]
class HDFStoreDataFrame(BaseIO):
def setup(self):
N = 25000
index = tm.makeStringIndex(N)
self.df = DataFrame({'float1': np.random.randn(N),
'float2': np.random.randn(N)},
index=index)
self.df_mixed = DataFrame({'float1': np.random.randn(N),
'float2': np.random.randn(N),
'string1': ['foo'] * N,
'bool1': [True] * N,
'int1': np.random.randint(0, N, size=N)},
index=index)
self.df_wide = DataFrame(np.random.randn(N, 100))
self.start_wide = self.df_wide.index[10000]
self.stop_wide = self.df_wide.index[15000]
self.df2 = DataFrame({'float1': np.random.randn(N),
'float2': np.random.randn(N)},
index=date_range('1/1/2000', periods=N))
self.start = self.df2.index[10000]
self.stop = self.df2.index[15000]
self.df_wide2 = DataFrame(np.random.randn(N, 100),
index=date_range('1/1/2000', periods=N))
self.df_dc = DataFrame(np.random.randn(N, 10),
columns=['C%03d' % i for i in range(10)])
self.fname = '__test__.h5'
self.store = HDFStore(self.fname)
self.store.put('fixed', self.df)
self.store.put('fixed_mixed', self.df_mixed)
self.store.append('table', self.df2)
self.store.append('table_mixed', self.df_mixed)
self.store.append('table_wide', self.df_wide)
self.store.append('table_wide2', self.df_wide2)
def teardown(self):
self.store.close()
self.remove(self.fname)
def time_read_store(self):
self.store.get('fixed')
def time_read_store_mixed(self):
self.store.get('fixed_mixed')
def time_write_store(self):
self.store.put('fixed_write', self.df)
def time_write_store_mixed(self):
self.store.put('fixed_mixed_write', self.df_mixed)
def time_read_store_table_mixed(self):
self.store.select('table_mixed')
def time_write_store_table_mixed(self):
self.store.append('table_mixed_write', self.df_mixed)
def time_read_store_table(self):
self.store.select('table')
def time_write_store_table(self):
self.store.append('table_write', self.df)
def time_read_store_table_wide(self):
self.store.select('table_wide')
def time_write_store_table_wide(self):
self.store.append('table_wide_write', self.df_wide)
def time_write_store_table_dc(self):
self.store.append('table_dc_write', self.df_dc, data_columns=True)
def time_query_store_table_wide(self):
self.store.select('table_wide', where="index > self.start_wide and "
"index < self.stop_wide")
def time_query_store_table(self):
self.store.select('table', where="index > self.start and "
"index < self.stop")
def time_store_repr(self):
repr(self.store)
def time_store_str(self):
str(self.store)
def time_store_info(self):
self.store.info()
示例7: PlotTestLogsAll
# 需要导入模块: from pandas import HDFStore [as 别名]
# 或者: from pandas.HDFStore import get [as 别名]
def PlotTestLogsAll(myfile):
""" Reads the specifed TestLogsAll.HDF5 file and calculates and plots relationships.
Input: File name with the results, e.g.: "aLabView2\\TestLogsAll.h5"
Output: Plots in the same directory
"""
import pandas as pd # multidimensional data analysis
import numpy as np # python numerical library
# from os import listdir
# from os.path import isdir, isfile, join
import matplotlib.pyplot as plt
# from matplotlib.backends.backend_pdf import PdfPages
# Wei's advice ===
import matplotlib.backends.backend_pdf as dpdf
from pandas import read_hdf, HDFStore, ExcelWriter
CONFIG = {'PlotIslDurHist':True, # Island durations histogram
'PlotCorDur2Pen':True, # Correlation of duration to penetration
'PlotCorDur2fStd':True, # Correlation of duration to standard deviation of frequency
}
if myfile.endswith(".h5"):
mypdffile = "".join(myfile.split(".")[0:-1] + ['.pdf'])
else:
mypdffile = myfile + '.pdf'
print "Opening: " + myfile
h5store = HDFStore(myfile)
TestLog = h5store.get('TestLogsAll')
print "Opening: " + mypdffile
pltPdf = dpdf.PdfPages(mypdffile)
# Filtering TestLog into df1
df1 = TestLog[(TestLog['tIslDur'] > 0.) &
(TestLog['NrmlFlg'] == 'y') &
(TestLog['FileName'] != 'TestLogMotorBr.xlsx') &
(TestLog['FileName'] != 'TestLogSummer01.xlsx')]
# Adding details to df1
df1['QCload0']=df1['QCload']
df1.loc[(df1[df1['QCload0']<0].index), ('QCload0')] =0.0 # df1['QCload0'][df1['QCload0']<0] = 0.0
df1['PFact']=df1['LabViewP']/(df1['LabViewP']**2 + (-df1['GEAmpQ']+df1['QCload0'])**2).apply(np.sqrt)
df1['PFactsign']='ind'
df1.loc[(df1[df1['QCload0']>df1['GEAmpQ']].index), ('PFactsign')]='cap' # df1['PFactsign'][df1['QCload0']>df1['GEAmpQ']]='cap'
df1s = df1[df1['FileName'].str.contains('Summer')]
df1w = df1[df1['FileName'].str.contains('Winter')]
if CONFIG['PlotIslDurHist']: # Island duration histogram
# Fig: Island duration histogram
fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6))
# provision for a label
# fig.suptitle(myfile) # This titles the figure
# File info output to page top
# label= file_info[file_info.index==fname][['fiComment']].values[0][0]
# label = myfile
# ax0.annotate(label,
# xy=(0.2/6.4, 4.6/4.8), # (0.2,-0.2)inch from top left corner
# xycoords='figure fraction',
# horizontalalignment='left',
# verticalalignment='top',
# fontsize=10)
# subplots_adjust(top=4./4.8)
df2a = TestLog['tIslDur'][TestLog['tIslDur'] > 0.]
ax0.set_title('Island Duration Histogram')
df2a.plot(kind='hist', bins=20, ax=ax0, alpha=0.5) # legend=True
# df2.plot(kind='hist', bins=20, ax=ax0, alpha=0.5, legend=True)
# ax0.set_xlim([-1.5,1.5])
# ax0.set_ylim([-1.2,1.2])
ax0.grid(True, which='both')
ax0.set_xlabel('Island duration (sec)')
ax0.set_ylabel('Number of observations')
# ax0.set_aspect('equal')
# ax1.set_title('Currents Al/Be')
# ax1.plot(df2['pvIal']/1.5, df2['pvIbe']/1.5)
# ax1.set_xlim([-300,300])
# ax1.set_ylim([-240,240])
# ax1.grid(True, which='both')
# ax1.set_aspect('equal')
# ax1.set_title('Island Voltage Al/Be')
# ax1.plot(df2['Time'], df2['Island Val']/1.5/sqrt(2)/BASE['Vln'])
# ax1.plot(df2['Time'], df2['Island Vbe']/1.5/sqrt(2)/BASE['Vln'])
# ax1.set_ylim([-1.2,1.2])
# ax1.grid(True, which='both')
pltPdf.savefig() # saves fig to pdf
plt.close() # Closes fig to clean up memory
# Fig: PF actual histogram
fig, (ax0)= plt.subplots(nrows=1, ncols=1, figsize=(8,6))
df2a = df1['PFact']
ax0.set_title('Load PF actual')
df2a.plot(kind='hist', bins=20, ax=ax0, alpha=0.5) # legend=True
ax0.grid(True, which='both')
ax0.set_xlabel('PF actual')
ax0.set_ylabel('Number of observations')
#.........这里部分代码省略.........
示例8: HDFStore
# 需要导入模块: from pandas import HDFStore [as 别名]
# 或者: from pandas.HDFStore import get [as 别名]
from pandas import HDFStore
store = HDFStore('store.h5', complevel=9)
fmap = wrap_monitor(wrap_write(partial(fetch_safe, rse=args.rse), store, overwrite=args.overwrite),
monitor)
p.map(fmap, datelist)
monitor.close()
logging.info("closing file")
store.close()
logging.info("trying to open output")
store = HDFStore('store.h5')
data = []
for k in store.keys():
try:
d = store.get(k)
d['timestamp'] = pd.to_datetime(k.split("_")[1], format='%d%m%Y')
data.append(d)
except Exception as e:
print "Problem reading", k
print e
store.close()
data = pd.concat(data)
data = data.set_index(['timestamp', 'owner'])
data_to_plot = data['size'].unstack().fillna(0)
dataplot = data_to_plot.iplot(kind='area', fill=True, asFigure=True)
for d in dataplot['data']:
d['hoverinfo'] = 'text+x+name'
d['text'] = ["%.2f Gb" % xx for xx in data_to_plot[d['name']].tolist()]
data.iplot(data=dataplot['data'])
示例9: normalize
# 需要导入模块: from pandas import HDFStore [as 别名]
# 或者: from pandas.HDFStore import get [as 别名]
class LogSaver:
"""
self.directory : Directory structure for temp and saved files
self.log_list : List of server.log files to process
self.extra : True if log messages and thread ids are to be saved too
self.history_path : History of server.log conversions saved here
self.progress_store_path : HDF5 file that holds one DataFrame for each server.log file
self.store_path : Final DataFrame of all server.log entries saved here
self.history : History of server.log conversions
"""
FINAL = 'logs'
PROGRESS = 'progress'
HISTORY = 'history'
@staticmethod
def normalize(name):
return re.sub(r'[^a-zA-Z0-9]', '_', name)
@staticmethod
def make_name(base_name, extra):
if extra:
return base_name + '.extra'
else:
return base_name
#@staticmethod
#def temp_name(log_list, extra):
# hsh = hash(log_list)
# sgn = 'n' if hsh < 0 else 'p'
# temp = 'temp_%s%08X' % (sgn, abs(hsh))
# return LogSaver.make_name(temp, extra)
def __init__(self, store_path, log_list, extra):
self.directory = ObjectDirectory(store_path)
self.log_list = tuple(sorted(log_list))
self.extra = extra
self.history_path = self.directory.get_path(LogSaver.HISTORY, temp=True)
self.progress_store_path = self.directory.get_path(LogSaver.PROGRESS, temp=True, is_df=True)
self.store_path = self.directory.get_path(LogSaver.make_name(LogSaver.FINAL, extra),
is_df=True)
self.history = ObjectDirectory.load_object(self.history_path, {})
self.saved = False
def __repr__(self):
return '\n'.join('%s: %s' % (k,v) for k,v in self.__dict__.items())
def __str__(self):
return '\n'.join([repr(self), '%d log files' % len(self.log_list)])
def save_all_logs(self, force=False):
if os.path.exists(self.store_path):
final_store = HDFStore(self.store_path)
print 'Keys: %s' % final_store
final_store.close()
return
if not force:
assert not os.path.exists(self.history_path), '''
%s exists but %s does not.
There appears to be a conversion in progress.
-f forces conversion to complete.
''' % (self.history_path, self.store_path)
self.directory.make_dir_if_necessary(self.progress_store_path)
self.progress_store = HDFStore(self.progress_store_path)
for path in self.log_list:
self.save_log(path)
self.check()
print '--------'
print 'All tables in %s' % self.progress_store_path
print self.progress_store.keys()
print '--------'
def get_log(path):
try:
return self.progress_store.get(LogSaver.normalize(path))
except Exception as e:
print
print path
raise e
df_list = [get_log(path) for path in self.log_list]
self.progress_store.close()
print 'Closed %s' % self.progress_store_path
df_all = pd.concat(df_list)
print 'Final list has %d entries' % len(df_all)
final_store = HDFStore(self.store_path)
final_store.put('logs', df_all)
print 'Keys: %s' % final_store
final_store.close()
print 'Closed %s' % self.store_path
# Save the history in a corresponding file
self.directory.save('history', self.history)
print 'Saved history'
#.........这里部分代码省略.........