本文整理匯總了Python中pandas.core.frame.DataFrame類的典型用法代碼示例。如果您正苦於以下問題:Python DataFrame類的具體用法?Python DataFrame怎麽用?Python DataFrame使用的例子?那麽, 這裏精選的類代碼示例或許可以為您提供幫助。
在下文中一共展示了DataFrame類的15個代碼示例,這些例子默認根據受歡迎程度排序。您可以為喜歡或者感覺有用的代碼點讚,您的評價將有助於係統推薦出更棒的Python代碼示例。
示例1: plot
def plot(self):
"""
Plots 2 graphs. One for N-period moving average, lower and upper bands.
One for P/N and position.
"""
columns = {"Upper Bands": self.upper_bands,
"Lower Bands": self.lower_bands,
"Moving Means": self.moving_means,
"Opening Prices": self.prices}
df = DataFrame(columns, index=self.dates)
df.plot()
fig = plt.figure(num=None, figsize=(18, 10), dpi=80, facecolor='w', edgecolor='k')
fig.add_subplot(121)
trans_dates = [tran.date for tran in self.transactions]
# we negate the value here to show profit/loss
trans = Series([-tran.value() for tran in self.transactions], index=trans_dates)
position = Series([tran.units for tran in self.transactions], index=trans_dates)
position.cumsum().plot(label="Position")
plt.xlabel("Date")
plt.ylabel("Position")
plt.title("Position over Time")
plt.legend(loc="best")
fig.add_subplot(122)
trans.cumsum().plot(label="P/L")
plt.xlabel("Date")
plt.ylabel("Profit/Loss")
plt.title("Profit and Loss over Time")
plt.legend(loc="best")
plt.show()
示例2: get_daily_normals
def get_daily_normals(self, start_date = None, end_date = None, stamp_year = 2001):
"""
:type start_date: datetime.datetime
:type end_date: datetime.datetime
:rtype : list , list
"""
self.stamp_day_dates = pandas.DatetimeIndex(start = datetime(stamp_year,1,1), end = date(stamp_year, 12, 31),
freq = pandas.datetools.offsets.Day())
if start_date is None:
start_date = self.time[0]
if end_date is None:
end_date = self.time[-1]
di = pandas.DatetimeIndex(data = self.time)
df = DataFrame(data = self.data, index = di, columns=["values",])
df = df.select( lambda d: start_date <= d <= end_date )
df_mean = df.groupby(by = lambda d: (d.day, d.month)).mean()
return self.stamp_day_dates, df_mean.ix[[ (d.day, d.month) for d in self.stamp_day_dates] ,"values"]
示例3: getList
def getList(self,week, club, colList, filename):
s = pd.read_csv(filename)
df2 = DataFrame(s)
df3 = DataFrame(s)
columns = df2.columns
xlist = list()
for c in columns:
if c.upper().find("PRICE ADJUSTMENT") == -1:
if c.find(week) != -1:
xlist.append(str(c))
indexList = list()
for xcolumn in xlist:
colist = list()
colist.append(xcolumn)
df4 = DataFrame(df3, columns=colist)[~df3[xcolumn].isnull()]
for row in df4.iterrows():
if row[1][0] == club:
indexList.append(row[0])
fin = DataFrame(df2, index=indexList, columns=colList)
if fin.empty:
return
fin["Camp"] = club
fin["Week"] = week
return fin
示例4: pivot
def pivot(self, index=None, columns=None, values=None):
"""
See DataFrame.pivot
"""
index_vals = self[index]
column_vals = self[columns]
mindex = MultiIndex.from_arrays([index_vals, column_vals],
names=[index, columns])
if values is None:
items = self.columns - [index, columns]
mat = self.reindex(columns=items).values
else:
items = [values]
mat = np.atleast_2d(self[values].values).T
stacked = DataFrame(mat, index=mindex, columns=items)
if not mindex.is_lexsorted():
stacked = stacked.sortlevel(level=0)
unstacked = stacked.unstack()
if values is not None:
unstacked.columns = unstacked.columns.droplevel(0)
return unstacked
示例5: _unstack_frame
def _unstack_frame(obj, level):
from pandas.core.internals import BlockManager, make_block
if obj._is_mixed_type:
unstacker = _Unstacker(np.empty(obj.shape, dtype=bool), # dummy
obj.index, level=level,
value_columns=obj.columns)
new_columns = unstacker.get_new_columns()
new_index = unstacker.get_new_index()
new_axes = [new_columns, new_index]
new_blocks = []
mask_blocks = []
for blk in obj._data.blocks:
bunstacker = _Unstacker(blk.values.T, obj.index, level=level,
value_columns=blk.items)
new_items = bunstacker.get_new_columns()
new_values, mask = bunstacker.get_new_values()
mblk = make_block(mask.T, new_items, new_columns)
mask_blocks.append(mblk)
newb = make_block(new_values.T, new_items, new_columns)
new_blocks.append(newb)
result = DataFrame(BlockManager(new_blocks, new_axes))
mask_frame = DataFrame(BlockManager(mask_blocks, new_axes))
return result.ix[:, mask_frame.sum(0) > 0]
else:
unstacker = _Unstacker(obj.values, obj.index, level=level,
value_columns=obj.columns)
return unstacker.get_result()
示例6: _wrap_applied_output
def _wrap_applied_output(self, keys, values, not_indexed_same=False):
if len(keys) == 0:
return Series([])
key_names = [ping.name for ping in self.groupings]
if isinstance(values[0], Series):
if not_indexed_same:
data_dict = dict(zip(keys, values))
result = DataFrame(data_dict).T
if len(self.groupings) > 1:
result.index = MultiIndex.from_tuples(keys, names=key_names)
return result
else:
cat_values = np.concatenate([x.values for x in values])
cat_index = values[0].index
if len(values) > 1:
cat_index = cat_index.append([x.index for x in values[1:]])
return Series(cat_values, index=cat_index)
elif isinstance(values[0], DataFrame):
# possible that Series -> DataFrame by applied function
return self._wrap_frames(keys, values,
not_indexed_same=not_indexed_same)
else:
if len(self.groupings) > 1:
index = MultiIndex.from_tuples(keys, names=key_names)
return Series(values, index)
else:
return Series(values, keys)
示例7: get_result
def get_result(self):
if self._is_series:
if self.axis == 0:
new_data = com._concat_compat([x.get_values() for x in self.objs])
name = com._consensus_name_attr(self.objs)
return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat')
else:
data = dict(zip(range(len(self.objs)), self.objs))
index, columns = self.new_axes
tmpdf = DataFrame(data, index=index)
if columns is not None:
tmpdf.columns = columns
return tmpdf.__finalize__(self, method='concat')
else:
mgrs_indexers = []
for obj in self.objs:
mgr = obj._data
indexers = {}
for ax, new_labels in enumerate(self.new_axes):
if ax == self.axis:
# Suppress reindexing on concat axis
continue
obj_labels = mgr.axes[ax]
if not new_labels.equals(obj_labels):
indexers[ax] = obj_labels.reindex(new_labels)[1]
mgrs_indexers.append((obj._data, indexers))
new_data = concatenate_block_managers(
mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy)
if not self.copy:
new_data._consolidate_inplace()
return self.objs[0]._from_axes(new_data, self.new_axes).__finalize__(self, method='concat')
示例8: export_converted_values
def export_converted_values(self):
"""
This function is called initially to convert per-100g values to per serving values
Once this function is invoked, new file is generated which serves as Database
This function will need to be called only one time
:return:
"""
file_converted = self.file_converted_values
data_file = self.file_database
data = self.read_csv(data_file)
converted_data = list()
for item in data.values:
converted_list = list(item[0:2])
sub_item = item[2:50]
for nutrient in sub_item:
import math
if math.isnan(nutrient):
nutrient = 0
converted_list.append(nutrient * sub_item[47] / 100)
converted_list.append(item[50])
converted_data.append(converted_list)
if len(self.cols) == 0:
for col_name in list(data._info_axis._data):
self.cols.append(col_name)
df = DataFrame(data=converted_data, columns=self.cols)
df.to_csv(file_converted, index=False)
print 'File has been exported'
示例9: feature_engineering
def feature_engineering(raw_data):
input_data = raw_data[['Date','AdjClose','AdjVolume']].dropna()
train_ratio = 0.8
savedata= DataFrame(input_data)
savedata.to_csv('/home/peng/workspace/datafortrainCao.csv', header=0)
#===========================================================================
# Vol_5 = index_cal().VOL_n(input_data, 5)
# Vol_10 = index_cal().VOL_n(input_data, 10)
# Vol_15 = index_cal().VOL_n(input_data, 15)
# Vol_20 = index_cal().VOL_n(input_data, 20)
# RDV_5 = index_cal().RDV_n(input_data, 5)
# RDV_10 = index_cal().RDV_n(input_data, 10)
# RDV_15 = index_cal().RDV_n(input_data, 15)
# RDV_20 = index_cal().RDV_n(input_data, 20)
#===========================================================================
EMA15 = index_cal().EMAn(input_data, 15)
RDP_5 = index_cal().RDP_n(input_data, 5)
RDP_10 = index_cal().RDP_n(input_data, 10)
RDP_15 = index_cal().RDP_n(input_data, 15)
RDP_20 = index_cal().RDP_n(input_data, 20)
RDP_plus_5 = index_cal().RDP_plus_n(input_data, 5)
all_data = mergeColumnByDate(RDP_5,RDP_10,RDP_15,RDP_20,EMA15,RDP_plus_5)
features = all_data[['RDP-5','RDP-10','RDP-15','RDP-20','EMA15']]
features = PCA().fit_transform(features.values)
(x_train, x_test) = divideTrainTest(features, train_ratio)
objectives = all_data['RDP+5'].values
(y_train,y_real) = divideTrainTest(objectives, train_ratio)
return (x_train,y_train,x_test,y_real)
示例10: classifyTestData
def classifyTestData(testFilePath,modelRoot):
"""
This method calls the traverseDecisionTreeModel() to classify the test data on the trained model and generate Confusion matrix and error at the given depth
:param testFilePath: Path to the test file
:param modelRoot: Root node of the decision tree of the trained model
"""
correctlyClassifiedInstances=0
incorrectlyClassifiedInstances=0
testDataList=[]
input=open(testFilePath,'rU')
csvObject=csv.reader(input)
label = featureList[len(featureList) -1]
classLabels = featureAndValueMapping.get(label)
classLabelCount = len(classLabels)
ConfusionMatrix = [[0 for x in range(int(classLabelCount))] for x in range(int(classLabelCount))]
for row in csvObject:
predictedLabel=traverseDecisionTreeModel(row,root)
ConfusionMatrix[int(row[len(row)- 1]) - 1][int(predictedLabel) - 1] += 1
if predictedLabel==row[len(row)-1]:
correctlyClassifiedInstances+=1
else:
incorrectlyClassifiedInstances+=1
df = DataFrame(ConfusionMatrix)
df.columns = classLabels
df.index = classLabels
print "Confusion Matrix :: \n"
print df
print "Correctly Classified Instance ",correctlyClassifiedInstances
print "Incorrectly Classified Instance ",incorrectlyClassifiedInstances
示例11: test_missing_value_generator
def test_missing_value_generator(self):
types = ("b", "h", "l")
df = DataFrame([[0.0]], columns=["float_"])
with tm.ensure_clean() as path:
df.to_stata(path)
with StataReader(path) as rdr:
valid_range = rdr.VALID_RANGE
expected_values = ["." + chr(97 + i) for i in range(26)]
expected_values.insert(0, ".")
for t in types:
offset = valid_range[t][1]
for i in range(0, 27):
val = StataMissingValue(offset + 1 + i)
self.assertTrue(val.string == expected_values[i])
# Test extremes for floats
val = StataMissingValue(struct.unpack("<f", b"\x00\x00\x00\x7f")[0])
self.assertTrue(val.string == ".")
val = StataMissingValue(struct.unpack("<f", b"\x00\xd0\x00\x7f")[0])
self.assertTrue(val.string == ".z")
# Test extremes for floats
val = StataMissingValue(struct.unpack("<d", b"\x00\x00\x00\x00\x00\x00\xe0\x7f")[0])
self.assertTrue(val.string == ".")
val = StataMissingValue(struct.unpack("<d", b"\x00\x00\x00\x00\x00\x1a\xe0\x7f")[0])
self.assertTrue(val.string == ".z")
示例12: test_missing_value_generator
def test_missing_value_generator(self):
types = ('b','h','l')
df = DataFrame([[0.0]],columns=['float_'])
with tm.ensure_clean() as path:
df.to_stata(path)
with StataReader(path) as rdr:
valid_range = rdr.VALID_RANGE
expected_values = ['.' + chr(97 + i) for i in range(26)]
expected_values.insert(0, '.')
for t in types:
offset = valid_range[t][1]
for i in range(0,27):
val = StataMissingValue(offset+1+i)
self.assertTrue(val.string == expected_values[i])
# Test extremes for floats
val = StataMissingValue(struct.unpack('<f',b'\x00\x00\x00\x7f')[0])
self.assertTrue(val.string == '.')
val = StataMissingValue(struct.unpack('<f',b'\x00\xd0\x00\x7f')[0])
self.assertTrue(val.string == '.z')
# Test extremes for floats
val = StataMissingValue(struct.unpack('<d',b'\x00\x00\x00\x00\x00\x00\xe0\x7f')[0])
self.assertTrue(val.string == '.')
val = StataMissingValue(struct.unpack('<d',b'\x00\x00\x00\x00\x00\x1a\xe0\x7f')[0])
self.assertTrue(val.string == '.z')
示例13: pivot
def pivot(self, index=None, columns=None, values=None):
"""
See DataFrame.pivot
"""
index_vals = self[index]
column_vals = self[columns]
mindex = MultiIndex.from_arrays([index_vals, column_vals])
try:
mindex._verify_integrity()
except Exception:
raise Exception("duplicate index/column pairs!")
if values is None:
items = self.columns - [index, columns]
mat = self.reindex(columns=items).values
else:
items = [values]
mat = np.atleast_2d(self[values].values).T
stacked = DataFrame(mat, index=mindex, columns=items)
if not mindex.is_lexsorted():
stacked = stacked.sortlevel(level=0)
unstacked = stacked.unstack()
if values is not None:
unstacked.columns = unstacked.columns.droplevel(0)
return unstacked
示例14: test_read_write_dta12
def test_read_write_dta12(self):
original = DataFrame([(1, 2, 3, 4, 5, 6)],
columns=['astringwithmorethan32characters_1',
'astringwithmorethan32characters_2',
'+',
'-',
'short',
'delete'])
formatted = DataFrame([(1, 2, 3, 4, 5, 6)],
columns=['astringwithmorethan32characters_',
'_0astringwithmorethan32character',
'_',
'_1_',
'_short',
'_delete'])
formatted.index.name = 'index'
formatted = formatted.astype(np.int32)
with tm.ensure_clean() as path:
with warnings.catch_warnings(record=True) as w:
original.to_stata(path, None)
tm.assert_equal(len(w), 1) # should get a warning for that format.
written_and_read_again = self.read_dta(path)
tm.assert_frame_equal(written_and_read_again.set_index('index'), formatted)
示例15: stack_sparse_frame
def stack_sparse_frame(frame):
"""
Only makes sense when fill_value is NaN
"""
lengths = [s.sp_index.npoints for _, s in compat.iteritems(frame)]
nobs = sum(lengths)
# this is pretty fast
minor_labels = np.repeat(np.arange(len(frame.columns)), lengths)
inds_to_concat = []
vals_to_concat = []
# TODO: Figure out whether this can be reached.
# I think this currently can't be reached because you can't build a SparseDataFrame
# with a non-np.NaN fill value (fails earlier).
for _, series in compat.iteritems(frame):
if not np.isnan(series.fill_value):
raise TypeError('This routine assumes NaN fill value')
int_index = series.sp_index.to_int_index()
inds_to_concat.append(int_index.indices)
vals_to_concat.append(series.sp_values)
major_labels = np.concatenate(inds_to_concat)
stacked_values = np.concatenate(vals_to_concat)
index = MultiIndex(levels=[frame.index, frame.columns],
labels=[major_labels, minor_labels],
verify_integrity=False)
lp = DataFrame(stacked_values.reshape((nobs, 1)), index=index,
columns=['foo'])
return lp.sortlevel(level=0)