本文整理汇总了Python中pandas.Dataframe方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.Dataframe方法的具体用法?Python pandas.Dataframe怎么用?Python pandas.Dataframe使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.Dataframe方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: preprocess
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def preprocess(self, data) -> pd.DataFrame:
""" Convert a list of text into a dataframe containing padded token ids,
masks distinguishing word tokens from pads, and word token counts for
each text in the list.
:param data: list of strings (e.g. sentences)
:type data: list
:return: tokens (pd.Dataframe): a dataframe containing
lists of word token ids, pad/word masks, and token counts
for each string in the list
:rtype: pandas dataframe
"""
token_lists = []
masks = []
counts = []
for sentence in data:
token_list, mask = self.generate_tokens(sentence)
token_lists.append(token_list)
masks.append(mask)
counts.append(np.sum(mask))
tokens = pd.DataFrame(
{"tokens": token_lists, "mask": masks, "counts": counts}
)
return tokens
示例2: read_selig
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def read_selig(path):
"""Read a Selig-style airfoil file
Parameters
-----------
path : str
Path to the Selig-stle .dat file.
Returns
-------
air_df : pd.Dataframe
Pandas Dataframe containing x- and y-coordinates of airfoil data.
"""
air_df = pd.read_csv(path, delim_whitespace=True,
header=0)
air_df.columns = ['x', 'y']
return air_df
示例3: add_group_component
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def add_group_component(self, components, name, group):
"""Adds a component with given name that contains all of the components
in group.
Parameters
----------
components: Dataframe with components.
name: Name of new group component.
group: List of components that form the group.
Returns
-------
Dataframe with components.
"""
new_comp = components[components['component'].isin(set(group))].copy()
group_cols = new_comp['col'].unique()
if len(group_cols) > 0:
new_comp = pd.DataFrame({'col': group_cols, 'component': name})
components = components.append(new_comp)
return components
示例4: predictive_samples
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def predictive_samples(self, df):
"""Sample from the posterior predictive distribution.
Parameters
----------
df: Dataframe with dates for predictions (column ds), and capacity
(column cap) if logistic growth.
Returns
-------
Dictionary with keys "trend" and "yhat" containing
posterior predictive samples for that component.
"""
df = self.setup_dataframe(df.copy())
sim_values = self.sample_posterior_predictive(df)
return sim_values
示例5: predict_uncertainty
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def predict_uncertainty(self, df):
"""Prediction intervals for yhat and trend.
Parameters
----------
df: Prediction dataframe.
Returns
-------
Dataframe with uncertainty intervals.
"""
sim_values = self.sample_posterior_predictive(df)
lower_p = 100 * (1.0 - self.interval_width) / 2
upper_p = 100 * (1.0 + self.interval_width) / 2
series = {}
for key in ['yhat', 'trend']:
series['{}_lower'.format(key)] = self.percentile(
sim_values[key], lower_p, axis=1)
series['{}_upper'.format(key)] = self.percentile(
sim_values[key], upper_p, axis=1)
return pd.DataFrame(series)
示例6: _dataframe_to_html
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def _dataframe_to_html(df, precision, **kwargs):
""" Makes HTML table from provided dataframe.
Removes HTML5 non-compliant attributes (ex: `border`).
Parameters
----------
df: pandas.Dataframe
Dataframe to be converted into HTML table.
precision: int
The display precision for float values in the table.
**kwargs: keyworded arguments
Supplies keyworded arguments for func: pandas.Dataframe.to_html()
Returns
-------
html_table: String
Code for HTML table.
"""
with pd.option_context('display.precision', precision):
html_table = df.to_html(**kwargs)
html_table = html_table.replace('border="1" ', '')
return html_table
示例7: __init__
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def __init__(self, DataFrame):
"""Stock Transaction
Arguments:
DataFrame {pd.Dataframe} -- [input is one/multi day transaction]
"""
self.type = 'stock_transaction'
self.data = DataFrame
if 'amount' not in DataFrame.columns:
if 'vol' in DataFrame.columns:
self.data['amount'] = self.data.vol * self.data.price * 100
elif 'volume' in DataFrame.columns:
self.data['amount'] = self.data.volume * self.data.price * 100
if '_id' in DataFrame.columns:
self.data = self.data.drop(["_id"], axis=1)
self.mongo_coll = DATABASE.stock_transaction
示例8: get_jobs
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def get_jobs(self, recursive=True, columns=None):
"""
Internal function to return the jobs as dictionary rather than a pandas.Dataframe
Args:
recursive (bool): search subprojects [True/False]
columns (list): by default only the columns ['id', 'project'] are selected, but the user can select a subset
of ['id', 'status', 'chemicalformula', 'job', 'subjob', 'project', 'projectpath',
'timestart', 'timestop', 'totalcputime', 'computer', 'hamilton', 'hamversion', 'parentid',
'masterid']
Returns:
dict: columns are used as keys and point to a list of the corresponding values
"""
if not isinstance(self.db, FileTable):
return get_jobs(
database=self.db,
sql_query=self.sql_query,
user=self.user,
project_path=self.project_path,
recursive=recursive,
columns=columns,
)
else:
return self.db.get_jobs(project=self.project_path, recursive=recursive, columns=columns)
示例9: queue_table
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def queue_table(self, project_only=True, recursive=True, full_table=False):
"""
Display the queuing system table as pandas.Dataframe
Args:
project_only (bool): Query only for jobs within the current project - True by default
recursive (bool): Include jobs from sub projects
full_table (bool): Whether to show the entire pandas table
Returns:
pandas.DataFrame: Output from the queuing system - optimized for the Sun grid engine
"""
return queue_table(
job_ids=self.get_job_ids(recursive=recursive), project_only=project_only,
full_table=full_table
)
示例10: queue_table_global
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def queue_table_global(self, full_table=False):
"""
Display the queuing system table as pandas.Dataframe
Args:
full_table (bool): Whether to show the entire pandas table
Returns:
pandas.DataFrame: Output from the queuing system - optimized for the Sun grid engine
"""
df = queue_table(job_ids=[], project_only=False, full_table=full_table)
if len(df) != 0 and self.db is not None:
return pandas.DataFrame(
[
self.db.get_item_by_id(
int(str(queue_ID).replace("pi_", "").replace(".sh", ""))
)
for queue_ID in df["jobname"]
if str(queue_ID).startswith("pi_")
]
)
else:
return None
示例11: get_from_table
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def get_from_table(self, path, name):
"""
Get a specific value from a pandas.Dataframe
Args:
path (str): relative path to the data object
name (str): parameter key
Returns:
dict, list, float, int: the value associated to the specific parameter key
"""
df_table = self.get(path)
keys = df_table["Parameter"]
if name in keys:
job_id = keys.index(name)
return df_table["Value"][job_id]
raise ValueError("Unknown name: {0}".format(name))
示例12: output_to_pandas
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def output_to_pandas(self, sort_by=None, h5_path="output"):
"""
Convert output of all child jobs to a pandas Dataframe object.
Args:
sort_by (str): sort the output using pandas.DataFrame.sort_values(by=sort_by)
h5_path (str): select child output to include - default='output'
Returns:
pandas.Dataframe: output as dataframe
"""
# TODO: The output to pandas function should no longer be required
with self.project_hdf5.open(h5_path) as hdf:
for key in hdf.list_nodes():
self._output[key] = hdf[key]
df = pandas.DataFrame(self._output)
if sort_by is not None:
df = df.sort_values(by=sort_by)
return df
# TODO: make it more general and move it then into genericJob
示例13: validate
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def validate(self, df):
"""Check to make sure the Dataframe conforms to the schema"""
expected_columns = set(self.schema["schema"]["columns"].keys())
found_columns = set(df.columns.values)
# handle some extra cases
if "PIPE0" in expected_columns:
found_columns = {c for c in found_columns if not c.startswith("PIPE")}
found_columns.add("PIPE0")
# handle some extra cases
if "NODE0" in expected_columns:
found_columns = {c for c in found_columns if not c.startswith("NODE")}
found_columns.add("NODE0")
if not found_columns == expected_columns:
missing_columns = expected_columns - found_columns
extra_columns = found_columns - expected_columns
warnings.warn("Dataframe does not conform to schemas.yml specification for {lm}"
"(missing: {missing_columns}, extra: {extra_columns}".format(
lm=self.lm, missing_columns=missing_columns, extra_columns=extra_columns))
示例14: predict_proba
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def predict_proba(self, X):
"""Predict probabilities of class membership for logistic regression.
The regression uses the pooled parameters from each of the imputed
datasets to generate a set of single predictions. The pooled params
come from multiply imputed datasets, but the predictions themselves
follow the same rules as an logistic regression. Because this is
logistic regression, the sigmoid function is applied to the result
of the normal equation, giving us probabilities between 0 and 1 for
each prediction. This method returns those probabilities.
Args:
X (pd.Dataframe): predictors to predict response
Returns:
np.array: prob of class membership for predicted observations.
"""
# run validation first
X = self._predict_strategy_validator(self, X)
# get the alpha and betas, then create linear equation for predictions
alpha = self.statistics_["coefs"].values[0]
betas = self.statistics_["coefs"].values[1:]
return self._sigmoid(alpha + np.dot(X, betas))
示例15: dataframe_pad
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import Dataframe [as 别名]
def dataframe_pad(starting_df, column_list, padwith=0.0):
"""
Takes a dataframe and adds extra columns if neccessary so we end up with columns named column_list
:param starting_df: A pd.dataframe with named columns
:param column_list: A list of column names
:param padwith: The value to pad missing columns with
:return: pd.Dataframe
"""
def _pad_column(column_name, starting_df, padwith):
if column_name in starting_df.columns:
return starting_df[column_name]
else:
return pd.Series([0.0] * len(starting_df.index), starting_df.index)
new_data = [
_pad_column(column_name, starting_df, padwith)
for column_name in column_list
]
new_df = pd.concat(new_data, axis=1)
new_df.columns = column_list
return new_df