本文整理汇总了Python中pandas.MultiIndex方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.MultiIndex方法的具体用法?Python pandas.MultiIndex怎么用?Python pandas.MultiIndex使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.MultiIndex方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: _link_index
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def _link_index(self, df_a, df_b):
"""Build an index for linking two datasets.
Parameters
----------
df_a : (tuple of) pandas.Series
The data of the left DataFrame to build the index with.
df_b : (tuple of) pandas.Series
The data of the right DataFrame to build the index with.
Returns
-------
pandas.MultiIndex
A pandas.MultiIndex with record pairs. Each record pair
contains the index values of two records.
"""
raise NotImplementedError(
"Not possible to call index for the BaseEstimator"
)
示例2: fit_predict
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def fit_predict(self, comparison_vectors, match_index=None):
"""Train the classifier.
Parameters
----------
comparison_vectors : pandas.DataFrame
The comparison vectors.
match_index : pandas.MultiIndex
The true matches.
return_type : str
Deprecated. Use recordlinkage.options instead. Use the option
`recordlinkage.set_option('classification.return_type', 'index')`
instead.
Returns
-------
pandas.Series
A pandas Series with the labels 1 (for the matches) and 0 (for the
non-matches).
"""
self.fit(comparison_vectors, match_index)
result = self.predict(comparison_vectors)
return result
示例3: _febrl_links
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def _febrl_links(df):
"""Get the links of a FEBRL dataset."""
index = df.index.to_series()
keys = index.str.extract(r'rec-(\d+)', expand=True)[0]
index_int = numpy.arange(len(df))
df_helper = pandas.DataFrame({
'key': keys,
'index': index_int
})
# merge the two frame and make MultiIndex.
pairs_df = df_helper.merge(
df_helper, on='key'
)[['index_x', 'index_y']]
pairs_df = pairs_df[pairs_df['index_x'] > pairs_df['index_y']]
return pandas.MultiIndex(
levels=[df.index.values, df.index.values],
codes=[pairs_df['index_x'].values, pairs_df['index_y'].values],
names=[None, None],
verify_integrity=False
)
示例4: true_positives
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def true_positives(links_true, links_pred):
"""Count the number of True Positives.
Returns the number of correctly predicted links, also called the number of
True Positives (TP).
Parameters
----------
links_true: pandas.MultiIndex, pandas.DataFrame, pandas.Series
The true (or actual) links.
links_pred: pandas.MultiIndex, pandas.DataFrame, pandas.Series
The predicted links.
Returns
-------
int
The number of correctly predicted links.
"""
links_true = _get_multiindex(links_true)
links_pred = _get_multiindex(links_pred)
return len(links_true & links_pred)
示例5: false_positives
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def false_positives(links_true, links_pred):
"""Count the number of False Positives.
Returns the number of incorrect predictions of true non-links. (true non-
links, but predicted as links). This value is known as the number of False
Positives (FP).
Parameters
----------
links_true: pandas.MultiIndex, pandas.DataFrame, pandas.Series
The true (or actual) links.
links_pred: pandas.MultiIndex, pandas.DataFrame, pandas.Series
The predicted links.
Returns
-------
int
The number of false positives.
"""
links_true = _get_multiindex(links_true)
links_pred = _get_multiindex(links_pred)
return len(links_pred.difference(links_true))
示例6: false_negatives
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def false_negatives(links_true, links_pred):
"""Count the number of False Negatives.
Returns the number of incorrect predictions of true links. (true links,
but predicted as non-links). This value is known as the number of False
Negatives (FN).
Parameters
----------
links_true: pandas.MultiIndex, pandas.DataFrame, pandas.Series
The true (or actual) links.
links_pred: pandas.MultiIndex, pandas.DataFrame, pandas.Series
The predicted links.
Returns
-------
int
The number of false negatives.
"""
links_true = _get_multiindex(links_true)
links_pred = _get_multiindex(links_pred)
return len(links_true.difference(links_pred))
示例7: test_iterative
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def test_iterative(self):
"""Test the iterative behaviour."""
# SINGLE STEP
index_class = Full()
pairs = index_class.index((self.a, self.b))
pairs = pd.DataFrame(index=pairs).sort_index()
# MULTI STEP
index_class = Full()
pairs1 = index_class.index((self.a[0:50], self.b))
pairs2 = index_class.index((self.a[50:100], self.b))
pairs_split = pairs1.append(pairs2)
pairs_split = pd.DataFrame(index=pairs_split).sort_index()
pdt.assert_frame_equal(pairs, pairs_split)
# note possible to sort MultiIndex, so made a frame out of it.
示例8: test_index_names_pandas023
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def test_index_names_pandas023(self, index_class):
# Pandas changes the behaviour of MultiIndex names.
# https://github.com/pandas-dev/pandas/pull/18882
# https://github.com/J535D165/recordlinkage/issues/55
# This test tests compatibility.
# make an index for each dataframe with a new index name
index_a = pd.Index(self.a.index, name='index')
df_a = pd.DataFrame(self.a, index=index_a)
index_b = pd.Index(self.b.index, name='index')
df_b = pd.DataFrame(self.b, index=index_b)
# make the index
pairs_link = index_class._link_index(df_a, df_b)
if pairs_link.names[0] is not None:
assert pairs_link.names[0] != pairs_link.names[1]
# make the index
pairs_dedup = index_class._dedup_index(df_a)
if pairs_link.names[0] is not None:
assert pairs_dedup.names[0] != pairs_dedup.names[1]
示例9: test_lower_triangular
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def test_lower_triangular(self, index_class):
# make an index for each dataframe with a new index name
index_a = pd.Index(self.a.index, name='index')
df_a = pd.DataFrame(self.a, index=index_a)
pairs = index_class.index(df_a)
# expected
levels = [df_a.index.values, df_a.index.values]
codes = np.tril_indices(len(df_a.index), k=-1)
full_pairs = pd.MultiIndex(levels=levels,
codes=codes,
verify_integrity=False)
# all pairs are in the lower triangle of the matrix.
assert len(pairs.difference(full_pairs)) == 0
示例10: test_krebs_dataset_download
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def test_krebs_dataset_download():
# remove downloaded datasets
clear_data_home()
krebs_data, krebs_matches = load_krebsregister()
for i in range(1, 11):
assert Path(get_data_home(), "krebsregister",
"block_{}.zip".format(i)).is_file()
# count the number of recordss
assert type(krebs_data), pandas.DataFrame
assert type(krebs_matches), pandas.MultiIndex
assert len(krebs_data) == 5749132
assert len(krebs_matches) == 20931
示例11: _infer_choices_with_experience
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def _infer_choices_with_experience(params, options):
"""Infer choices with experiences.
Example
-------
>>> options = {"covariates": {"a": "exp_white_collar + exp_a", "b": "exp_b >= 2"}}
>>> index = pd.MultiIndex.from_product([["category"], ["a", "b"]])
>>> params = pd.Series(index=index, dtype="object")
>>> _infer_choices_with_experience(params, options)
['a', 'b', 'white_collar']
"""
covariates = options["covariates"]
parameters = params.index.get_level_values(1)
used_covariates = [cov for cov in covariates if cov in parameters]
matches = []
for param in parameters:
matches += re.findall(r"\bexp_([A-Za-z_]+)\b", str(param))
for cov in used_covariates:
matches += re.findall(r"\bexp_([A-Za-z_]+)\b", covariates[cov])
return sorted(set(matches))
示例12: multi_index_insert_row
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def multi_index_insert_row(df, index_row, values_row):
""" Return a new dataframe with a row inserted for a multi-index dataframe.
This will sort the rows according to the ordered multi-index levels.
"""
if PD_VER < '0.24.0':
row_index = pd.MultiIndex(levels=[[i] for i in index_row],
labels=[[0] for i in index_row])
else:
row_index = pd.MultiIndex(levels=[[i] for i in index_row],
codes=[[0] for i in index_row])
row = pd.DataFrame(values_row, index=row_index, columns=df.columns)
df = pd.concat((df, row))
if df.index.lexsort_depth == len(index_row) and df.index[-2] < df.index[-1]:
# We've just appended a row to an already-sorted dataframe
return df
# The df wasn't sorted or the row has to be put in the middle somewhere
return df.sort_index()
示例13: test_data_info_cols
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def test_data_info_cols(library):
i = MultiIndex.from_tuples([(1, "ab"), (2, "bb"), (3, "cb")])
s = DataFrame(data=[100, 200, 300], index=i)
library.write('test_data', s)
md = library.get_info('test_data')
# {'dtype': [('level_0', '<i8'), ('level_1', 'S2'), ('0', '<i8')],
# 'col_names': {u'index': [u'level_0', u'level_1'], u'columns': [u'0'], 'index_tz': [None, None]},
# 'type': u'pandasdf',
# 'handler': 'PandasDataFrameStore',
# 'rows': 3,
# 'segment_count': 1,
# 'size': 50}
assert 'size' in md
assert md['segment_count'] == 1
assert md['rows'] == 3
assert md['handler'] == 'PandasDataFrameStore'
assert md['type'] == 'pandasdf'
assert md['col_names'] == {'index': ['level_0', u'level_1'], 'columns': [u'0'], 'index_tz': [None, None]}
assert len(md['dtype']) == 3
assert md['dtype'][0][0] == 'level_0'
assert md['dtype'][1][0] == 'level_1'
assert md['dtype'][2][0] == '0'
示例14: setup_method
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def setup_method(self, method):
self.indices = dict(unicodeIndex=tm.makeUnicodeIndex(100),
strIndex=tm.makeStringIndex(100),
dateIndex=tm.makeDateIndex(100),
periodIndex=tm.makePeriodIndex(100),
tdIndex=tm.makeTimedeltaIndex(100),
intIndex=tm.makeIntIndex(100),
uintIndex=tm.makeUIntIndex(100),
rangeIndex=tm.makeRangeIndex(100),
floatIndex=tm.makeFloatIndex(100),
boolIndex=Index([True, False]),
catIndex=tm.makeCategoricalIndex(100),
empty=Index([]),
tuples=MultiIndex.from_tuples(lzip(
['foo', 'bar', 'baz'], [1, 2, 3])),
repeats=Index([0, 0, 1, 1, 2, 2]))
self.setup_indices()
示例15: test_droplevel
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import MultiIndex [as 别名]
def test_droplevel(self, indices):
# GH 21115
if isinstance(indices, MultiIndex):
# Tested separately in test_multi.py
return
assert indices.droplevel([]).equals(indices)
for level in indices.name, [indices.name]:
if isinstance(indices.name, tuple) and level is indices.name:
# GH 21121 : droplevel with tuple name
continue
with pytest.raises(ValueError):
indices.droplevel(level)
for level in 'wrong', ['wrong']:
with pytest.raises(KeyError):
indices.droplevel(level)