本文整理汇总了Python中pandas.dataframe方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.dataframe方法的具体用法?Python pandas.dataframe怎么用?Python pandas.dataframe使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.dataframe方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: has_all_feature_columns
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def has_all_feature_columns(self, dset_df):
"""
Compare the columns in dataframe dset_df against the feature columns required by
the current featurization and descriptor_type param. Returns True if dset_df contains
all the required columns.
Args:
dset_df (DataFrame): Feature matrix
Returns:
(Boolean): boolean specifying whether there are any missing columns in dset_df
"""
missing_cols = set(self.featurization.get_feature_columns()) - set(dset_df.columns.values)
return (len(missing_cols) == 0)
# *************************************************************************************
示例2: load_featurized_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def load_featurized_data(self):
"""Loads prefeaturized data from the filesystem. Returns a data frame,
which is then passed to featurization.extract_prefeaturized_data() for processing.
Returns:
featurized_dset_df (pd.DataFrame): dataframe of the prefeaturized data, needs futher processing
"""
# First check to set if dataset already has the feature columns we need
dset_df = self.load_full_dataset()
if self.has_all_feature_columns(dset_df):
self.dataset_key = self.params.dataset_key
return dset_df
# Otherwise, generate the expected path for the featurized dataset
featurized_dset_name = self.featurization.get_featurized_dset_name(self.dataset_name)
dataset_dir = os.path.dirname(self.params.dataset_key)
data_dir = os.path.join(dataset_dir, self.featurization.get_featurized_data_subdir())
featurized_dset_path = os.path.join(data_dir, featurized_dset_name)
featurized_dset_df = pd.read_csv(featurized_dset_path)
self.dataset_key = featurized_dset_path
return featurized_dset_df
# ****************************************************************************************
示例3: query_account
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def query_account(self, format=""):
"""
return pd.dataframe
"""
r, msg = self._check_session()
if not r: return (None, msg)
rpc_params = {}
data_format = self._get_format(format, "pandas")
if data_format == "pandas":
rpc_params["format"] = "columnset"
cr = self._remote.call("oms.query_account", rpc_params)
return utils.extract_result(cr, data_format=data_format, class_name="Account")
示例4: query_position
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def query_position(self, mode="all", securities="", format=""):
"""
securities: seperate by ","
return pd.dataframe
"""
r, msg = self._check_session()
if not r: return (None, msg)
rpc_params = {"mode" : mode,
"security" : securities}
data_format = self._get_format(format, "pandas")
if data_format == "pandas":
rpc_params["format"] = "columnset"
cr = self._remote.call("oms.query_position", rpc_params)
return utils.extract_result(cr, data_format=data_format, class_name="Position")
示例5: query_net_position
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def query_net_position(self, mode="all", securities="", format=""):
"""
securities: seperate by ","
return pd.dataframe
"""
r, msg = self._check_session()
if not r: return (None, msg)
rpc_params = {"mode" : mode,
"security" : securities}
data_format = self._get_format(format, "pandas")
if data_format == "pandas":
rpc_params["format"] = "columnset"
cr = self._remote.call("oms.query_net_position", rpc_params)
return utils.extract_result(cr, data_format=data_format, class_name="NetPosition")
示例6: query_task
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def query_task(self, task_id=-1, format=""):
"""
task_id: -1 -- all
return pd.dataframe
"""
r, msg = self._check_session()
if not r: return (None, msg)
rpc_params = {"task_id": task_id}
data_format = self._get_format(format, "pandas")
if data_format == "pandas":
rpc_params["format"] = "columnset"
cr = self._remote.call("oms.query_task", rpc_params)
return utils.extract_result(cr, data_format=data_format, class_name="Task")
示例7: query_trade
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def query_trade(self, task_id=-1, format=""):
"""
task_id: -1 -- all
return pd.dataframe
"""
r, msg = self._check_session()
if not r: return (None, msg)
rpc_params = {"task_id": task_id}
data_format = self._get_format(format, "pandas")
if data_format == "pandas":
rpc_params["format"] = "columnset"
cr = self._remote.call("oms.query_trade", rpc_params)
return utils.extract_result(cr, data_format=data_format, class_name="Trade")
示例8: query_portfolio
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def query_portfolio(self, format=""):
"""
return pd.dataframe
"""
r, msg = self._check_session()
if not r: return (None, msg)
rpc_params = {}
data_format = self._get_format(format, "pandas")
if data_format == "pandas":
rpc_params["format"] = "columnset"
cr = self._remote.call("pms.query_portfolio", rpc_params)
return utils.extract_result(cr, index_column="security", data_format=data_format, class_name="NetPosition")
示例9: calc_worst_hour
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def calc_worst_hour(latitude, weather_data, solar_window_solstice):
"""
Calculate the first hour of solar window of the winter solstice for panel spacing.
http://www.affordable-solar.com/learning-center/building-a-system/calculating-tilted-array-spacing/
:param latitude: latitude of the site [degree]
:type latitude: float
:param weather_data: weather data of the site
:type weather_data: pd.dataframe
:param solar_window_solstice: the desired hour of shade-free solar window on the winter solstice.
:type solar_window_solstice: floar
:return worst_hour: the hour to calculate minimum spacing
:rtype worst_hour: float
"""
if latitude > 0:
northern_solstice = weather_data.query('month == 12 & day == 21')
worst_hour = northern_solstice[northern_solstice.hour == (12 - round(solar_window_solstice / 2))].index[0]
else:
southern_solstice = weather_data.query('month == 6 & day == 21')
worst_hour = southern_solstice[southern_solstice.hour == (12 - round(solar_window_solstice / 2))].index[0]
return worst_hour
示例10: transform
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def transform(self, X, end_index_list = None):
if end_index_list is None:
end_index_list = self.end_index_list # in case the end_index_list was set as meta_data
if end_index_list is None:
return X
else:
voted_labels = []
prev_index = 0
if not isinstance(X, np.ndarray):
if isinstance(X, list):
X = np.array(X)
elif isinstance(X, pd.dataframe):
X = X.as_matrix()
for index in end_index_list:
labels = X[prev_index:index]
(values,counts) = np.unique(labels,return_counts=True)
ind=np.argmax(counts) #If two labels are in majority, this will pick the first one.
voted_labels.append(ind)
return np.array(voted_labels)
示例11: from_dict_of_values_to_df
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def from_dict_of_values_to_df(data_dict, ts_index, columns=None):
"""
Turn a set of fixed values into a pd.dataframe
:param data_dict: A dict of scalars
:param ts_index: A timeseries index
:param columns: (optional) A list of str to align the column names to [must have entries in data_dict keys]
:return: pd.dataframe, column names from data_dict, values repeated scalars
"""
if columns is None:
columns = data_dict.keys()
columns_as_list = list(columns)
numeric_values = dict([(keyname, [data_dict[keyname]] * len(ts_index))
for keyname in columns_as_list])
pd_dataframe = pd.DataFrame(numeric_values, ts_index)
return pd_dataframe
示例12: dataframe_pad
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def dataframe_pad(starting_df, column_list, padwith=0.0):
"""
Takes a dataframe and adds extra columns if neccessary so we end up with columns named column_list
:param starting_df: A pd.dataframe with named columns
:param column_list: A list of column names
:param padwith: The value to pad missing columns with
:return: pd.Dataframe
"""
def _pad_column(column_name, starting_df, padwith):
if column_name in starting_df.columns:
return starting_df[column_name]
else:
return pd.Series([0.0] * len(starting_df.index), starting_df.index)
new_data = [
_pad_column(column_name, starting_df, padwith)
for column_name in column_list
]
new_df = pd.concat(new_data, axis=1)
new_df.columns = column_list
return new_df
示例13: check_params_with_data
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def check_params_with_data(self, df, actual_field, predicted_field):
""" Check parameters against ground-truth values.
Handle errors regarding cardinality of ground-truth labels
and check pos_label param, if applicable. Assumed data has already
been cleaned and made categorical. Overwritten as needed.
Args:
df (pd.dataframe): input dataframe
actual_field (str): name of ground-truth field
predicted_field (str): name of predicted field
Raises:
RuntimeError if params are incompatible with passed data
"""
msg = 'Scoring method {} does not support "check_params_with_data" method.'
raise MLSPLNotImplementedError(msg.format(self.scoring_name))
示例14: score
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def score(self, df, options):
""" Compute the score.
Args:
df (pd.DataFrame): input dataframe
options (dict): passed options
Returns:
df_output (pd.dataframe): output dataframe
"""
# Prepare ground-truth and predicted labels
y_actual, y_predicted = self.prepare_input_data(df, self.actual_field, self.predicted_field, options)
# Get the scoring result
result = self.scoring_function(y_actual, y_predicted, **self.params)
# Create the output df
df_output = self.create_output(self.scoring_name, result)
return df_output
示例15: create_output
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import dataframe [as 别名]
def create_output(self, scoring_name, result):
""" Create output dataframe
Args:
scoring_name (str): scoring function name
result (float, dict or array): output of sklearn scoring function
Returns:
output_df (pd.DataFrame): output dataframe
"""
labels = self.params.get('labels', None)
if labels is not None: # labels is union of predicted & actual classes. (eg. average=none, confusion matrix)
output_df = pd.DataFrame(data=[result], columns=labels)
else: # otherwise, use scoring name
output_df = pd.DataFrame(data=[result], columns=[scoring_name])
return output_df