本文整理汇总了Python中pandas.concat方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.concat方法的具体用法?Python pandas.concat怎么用?Python pandas.concat使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.concat方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: saveTimingInfo
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def saveTimingInfo(summary):
timingsPath = "test_timings.csv"
git_version = subprocess.check_output(["git", "describe"]).strip()
new_row = summary[["timing"]].T
new_row["date"] = [datetime.datetime.now()]
new_row["version"] = git_version
if os.path.exists(timingsPath):
timings = pandas.read_csv(timingsPath, index_col=0)
timings = pandas.concat([timings, new_row])
else:
timings = new_row
timings.to_csv(timingsPath)
print(timings)
示例2: get_table
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def get_table(results, letter):
y = "Δ{}".format(letter)
df = Table(
RecursiveDict([("δ", results[0]), (y, results[1]), (y + "ₑᵣᵣ", results[2])])
)
x0, x1 = map(float, df["δ"].iloc[[0, -1]])
pad = 0.15 * (x1 - x0)
mask = (results[3] > x0 - pad) & (results[3] < x1 + pad)
x, fit = results[3][mask], results[4][mask]
df.set_index("δ", inplace=True)
df2 = pd.DataFrame(RecursiveDict([("δ", x), (y + " Fit", fit)]))
df2.set_index("δ", inplace=True)
cols = ["δ", y, y + "ₑᵣᵣ", y + " Fit"]
return (
pd.concat([df, df2], sort=True)
.sort_index()
.reset_index()
.rename(columns={"index": "δ"})
.fillna("")[cols]
)
示例3: gen_feat_dict
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def gen_feat_dict(self):
if self.dfTrain is None:
dfTrain = pd.read_csv(self.trainfile)
else:
dfTrain = self.dfTrain
if self.dfTest is None:
dfTest = pd.read_csv(self.testfile)
else:
dfTest = self.dfTest
df = pd.concat([dfTrain, dfTest])
self.feat_dict = {}
tc = 0
for col in df.columns:
if col in self.ignore_cols:
continue
if col in self.numeric_cols:
# map to a single index
self.feat_dict[col] = tc
tc += 1
else:
us = df[col].unique()
self.feat_dict[col] = dict(zip(us, range(tc, len(us)+tc)))
tc += len(us)
self.feat_dim = tc
示例4: prepro_pos_table
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def prepro_pos_table(pos_tables):
"""Extracts unique positions and sorts them."""
if not isinstance(pos_tables, list):
pos_tables = [pos_tables]
pos_table = None
for next_pos_table in pos_tables:
if pos_table is None:
pos_table = next_pos_table
else:
pos_table = pd.concat([pos_table, next_pos_table])
pos_table = pos_table.groupby('chromo').apply(
lambda df: pd.DataFrame({'pos': np.unique(df['pos'])}))
pos_table.reset_index(inplace=True)
pos_table = pos_table[['chromo', 'pos']]
pos_table.sort_values(['chromo', 'pos'], inplace=True)
return pos_table
示例5: concat
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def concat(*plots):
""" Concatenate plots. The type of the resulting plot will be the type
of the first parsed plot
"""
first = plots[0]
if isinstance(first, DateTimeLine):
chart = DateTimeLine()
else:
chart = Line()
y_data = {}
for plot in plots:
p_data = plot.y_data
for serie, data in p_data.items():
y_data[serie] = data
chart.add(serie, data)
chart.y_data = y_data
return chart
示例6: transform
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def transform(self, numerical_feature_list, categorical_feature_list):
"""
Args:
numerical_feature_list: list of numerical features
categorical_feature_list: list of categorical features
Returns:
Dictionary with following keys:
features: DataFrame with concatenated features
feature_names: list of features names
categorical_features: list of categorical feature names
"""
features = numerical_feature_list + categorical_feature_list
for feature in features:
feature = self._format_target(feature)
feature.set_index(self.id_column, drop=True, inplace=True)
features = pd.concat(features, axis=1).astype(np.float32).reset_index()
outputs = dict()
outputs['features'] = features
outputs['feature_names'] = list(features.columns)
outputs['categorical_features'] = self._get_feature_names(categorical_feature_list)
return outputs
示例7: test_weighted_mean
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def test_weighted_mean(dim, level, wgts_name):
res = esmlab.weighted_mean(dset, dim=dim, weights=wgts[wgts_name])
df = dset.to_dataframe()
df_w = wgts.to_dataframe()[wgts_name]
if not dim:
res = res.to_array().data
d = pd.concat([df, df_w], axis=1)
expected = d.apply(
lambda x: np.ma.average(np.ma.MaskedArray(x, mask=np.isnan(x)), weights=d.t_s_wgts)
)[['da1', 'da2']]
expected = expected.to_xarray().data
np.testing.assert_allclose(res, expected)
else:
expected = df.groupby(level=level).apply(
wavg, weights=wgts[wgts_name].data, col_names=['da1', 'da2']
)
res = res.to_dataframe()
assert_frame_equal(res.sort_index(), expected.sort_index())
示例8: observe
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def observe(self) -> np.array:
"""Returns the rows to be observed by the agent."""
rows = self.rows.copy()
if len(rows) < self.window_size:
size = self.window_size - len(rows)
padding = np.zeros((size, rows.shape[1]))
padding = pd.DataFrame(padding, columns=self.rows.columns)
rows = pd.concat([padding, rows], ignore_index=True, sort=False)
if isinstance(rows, pd.DataFrame):
rows = rows.fillna(0, axis=1)
rows = rows.values
rows = np.nan_to_num(rows)
return rows
示例9: test_bert_explain_local
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def test_bert_explain_local(self):
train_data = get_ssts_dataset('train')
test_data = get_ssts_dataset('test')
X_train = train_data[TEXT_COL]
X_test = test_data[TEXT_COL]
preprocessor =BertPreprocessor()
df_train = pd.concat([train_data[LABEL_COL], preprocessor.preprocess(X_train)], axis=1)
df_test = pd.concat([test_data[LABEL_COL], preprocessor.preprocess(X_test)], axis=1)
model_config = BERT_MODEL_CONFIG
explainer = IntrospectiveRationaleExplainer(classifier_type=CLASSIFIER_TYPE_BERT, cuda=CUDA)
explainer.build_model_config(model_config)
explainer.set_preprocessor(preprocessor)
explainer.load()
explainer.fit(df_train, df_test)
local_explanation = explainer.explain_local(SENTENCE)
# BERT adds [CLS] at the beginning of a sentence and [SEP] at the end of each sentence but we remove them.
assert len(local_explanation.local_importance_values) == len(SENTENCE.split())
示例10: test_rnn_explain_local
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def test_rnn_explain_local(self):
train_data = get_ssts_dataset('train')
test_data = get_ssts_dataset('test')
all_data = pd.concat([train_data, test_data])
X_train = train_data[TEXT_COL]
X_test = test_data[TEXT_COL]
preprocessor = GlovePreprocessor(count_threshold=TOKEN_COUNT_THRESHOLD, token_cutoff=MAX_SENT_COUNT)
preprocessor.build_vocab(all_data[TEXT_COL])
df_train = pd.concat([train_data[LABEL_COL], preprocessor.preprocess(X_train)], axis=1)
df_test = pd.concat([test_data[LABEL_COL], preprocessor.preprocess(X_test)], axis=1)
model_config = RNN_MODEL_CONFIG
explainer = IntrospectiveRationaleExplainer(classifier_type=CLASSIFIER_TYPE_RNN, cuda=CUDA)
explainer.build_model_config(model_config)
explainer.set_preprocessor(preprocessor)
explainer.load()
explainer.fit(df_train, df_test)
local_explanation = explainer.explain_local(SENTENCE)
assert len(local_explanation.local_importance_values) == len(SENTENCE.split())
示例11: calc_allroiidx_distances
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def calc_allroiidx_distances(vertices_df, roi, surfL, surfR, pvertex_colname):
'''
loop over all subjects calculating distances for one roi
'''
## determine the surface for measurment
hemi = vertices_df.loc[vertices_df.roiidx==roi,'hemi'].values[0]
if hemi == "L": surf = surfL
if hemi == "R": surf = surfR
## subset the dataframe
roidf = vertices_df.loc[vertices_df.roiidx==roi,:]
## run all the subjects and return into a tupley thing of results
all_dfs = (calc_subdistances_distances(roidf, surf, subid, pvertex_colname) for subid in vertices_df.subid.unique())
## concatenate all the results
roi_sub2sub = pd.concat(all_dfs, ignore_index=True)
return(roi_sub2sub)
示例12: match_arrivals_with_schedule
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def match_arrivals_with_schedule(estimated_trips, schedule_direction):
schedule_direction.loc[:,"datetime_utc"] = pd.to_datetime(schedule_direction["datetime"], utc=True)
estimated_trips.loc[:,"datetime_utc"] = pd.to_datetime(estimated_trips["datetime"], utc=True)
schedule_direction = schedule_direction.set_index(pd.DatetimeIndex(schedule_direction["datetime_utc"])).sort_index()
matched_estimates = [
match_times(
stop_id,
stop_estimates,
schedule_direction[schedule_direction["stop_id"] == stop_id],
)
for stop_id, stop_estimates in estimated_trips.groupby(["stop_id"])
]
matched_estimates = [x for x in matched_estimates if x is not None]
matched_estimates = pd.concat(matched_estimates)
matched_estimates["since_scheduled"] = (
matched_estimates["datetime_utc"] - matched_estimates["closest_scheduled"]
)
return matched_estimates
示例13: _ecg_delineate_check
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def _ecg_delineate_check(waves, rpeaks):
"""This function replaces the delineated features with np.nan if its standardized distance from R-peaks is more than
3."""
df = pd.DataFrame.from_dict(waves)
features_columns = df.columns
df = pd.concat([df, pd.DataFrame({"ECG_R_Peaks": rpeaks})], axis=1)
# loop through all columns to calculate the z distance
for column in features_columns: # pylint: disable=W0612
df = _calculate_abs_z(df, features_columns)
# Replace with nan if distance > 3
for col in features_columns:
for i in range(len(df)):
if df["Dist_R_" + col][i] > 3:
df[col][i] = np.nan
# Return df without distance columns
df = df[features_columns]
waves = df.to_dict("list")
return waves
示例14: movie_preprocessing
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def movie_preprocessing(movie):
movie_col = list(movie.columns)
movie_tag = [doc.split('|') for doc in movie['tag']]
tag_table = {token: idx for idx, token in enumerate(set(itertools.chain.from_iterable(movie_tag)))}
movie_tag = pd.DataFrame(movie_tag)
tag_table = pd.DataFrame(tag_table.items())
tag_table.columns = ['Tag', 'Index']
# use one-hot encoding for movie genres (here called tag)
tag_dummy = np.zeros([len(movie), len(tag_table)])
for i in range(len(movie)):
for j in range(len(tag_table)):
if tag_table['Tag'][j] in list(movie_tag.iloc[i, :]):
tag_dummy[i, j] = 1
# combine the tag_dummy one-hot encoding table to original movie files
movie = pd.concat([movie, pd.DataFrame(tag_dummy)], 1)
movie_col.extend(['tag' + str(i) for i in range(len(tag_table))])
movie.columns = movie_col
movie = movie.drop('tag', 1)
return movie
示例15: balance_dataset
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import concat [as 别名]
def balance_dataset(data):
# define column names
column_names = list(data.columns)
# assert equal number o samples per class
samples_pro_emotion = {e: len(data[data.emotion == e]) for e in data.emotion.unique()}
balanced_data = pd.concat([data[data.emotion == e].sample(min(samples_pro_emotion.values()))
for e in data.emotion.unique()],
axis=0,
keys=list(data.columns))
# split data
X = balanced_data.iloc[:, :-1]
y = balanced_data.iloc[:, -1:].astype('category')
# print("%25s : %s" % ("Data with balanced sets", str(balanced_data.shape)))
return balanced_data, X, y, column_names