本文整理汇总了Python中pandas.MultiIndex.from_arrays方法的典型用法代码示例。如果您正苦于以下问题:Python MultiIndex.from_arrays方法的具体用法?Python MultiIndex.from_arrays怎么用?Python MultiIndex.from_arrays使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas.MultiIndex
的用法示例。
在下文中一共展示了MultiIndex.from_arrays方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: test_to_excel_multiindex
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_to_excel_multiindex(self, merge_cells, engine, ext):
frame = self.frame
arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
new_index = MultiIndex.from_arrays(arrays,
names=['first', 'second'])
frame.index = new_index
frame.to_excel(self.path, 'test1', header=False)
frame.to_excel(self.path, 'test1', columns=['A', 'B'])
# round trip
frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
reader = ExcelFile(self.path)
df = read_excel(reader, 'test1', index_col=[0, 1])
tm.assert_frame_equal(frame, df)
# GH13511
示例2: test_compare_custom_instance_type
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_compare_custom_instance_type(self):
A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
B = DataFrame({'col': ['abc', 'abd', 'abc', 'abc', '123']})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
def call(s1, s2):
# this should raise on incorrect types
assert isinstance(s1, np.ndarray)
assert isinstance(s2, np.ndarray)
return np.ones(len(s1), dtype=np.int)
comp = recordlinkage.Compare()
comp.compare_vectorized(lambda s1, s2: np.ones(len(s1), dtype=np.int),
'col', 'col')
result = comp.compute(ix, A, B)
expected = DataFrame([1, 1, 1, 1, 1], index=ix)
pdt.assert_frame_equal(result, expected)
示例3: test_compare_custom_vectorized_dedup
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_compare_custom_vectorized_dedup(self):
A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
ix = MultiIndex.from_arrays([[0, 1, 2, 3, 4], [1, 2, 3, 4, 0]])
# test without label
comp = recordlinkage.Compare()
comp.compare_vectorized(lambda s1, s2: np.ones(len(s1), dtype=np.int),
'col', 'col')
result = comp.compute(ix, A)
expected = DataFrame([1, 1, 1, 1, 1], index=ix)
pdt.assert_frame_equal(result, expected)
# test with label
comp = recordlinkage.Compare()
comp.compare_vectorized(
lambda s1, s2: np.ones(len(s1), dtype=np.int),
'col',
'col',
label='test')
result = comp.compute(ix, A)
expected = DataFrame([1, 1, 1, 1, 1], index=ix, columns=['test'])
pdt.assert_frame_equal(result, expected)
示例4: test_indexing_types
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_indexing_types(self):
# test the two types of indexing
# this test needs improvement
A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
B = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
B_reversed = B[::-1].copy()
ix = MultiIndex.from_arrays([np.arange(5), np.arange(5)])
# test with label indexing type
comp_label = recordlinkage.Compare(indexing_type='label')
comp_label.exact('col', 'col')
result_label = comp_label.compute(ix, A, B_reversed)
# test with position indexing type
comp_position = recordlinkage.Compare(indexing_type='position')
comp_position.exact('col', 'col')
result_position = comp_position.compute(ix, A, B_reversed)
assert (result_position.values == 1).all(axis=0)
pdt.assert_frame_equal(result_label, result_position)
示例5: test_pass_list_of_features
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_pass_list_of_features(self):
from recordlinkage.compare import FrequencyA, VariableA, VariableB
# setup datasets and record pairs
A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
B = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
ix = MultiIndex.from_arrays([np.arange(5), np.arange(5)])
# test with label indexing type
features = [
VariableA('col', label='y1'),
VariableB('col', label='y2'),
FrequencyA('col', label='y3')
]
comp_label = recordlinkage.Compare(features=features)
result_label = comp_label.compute(ix, A, B)
assert list(result_label) == ["y1", "y2", "y3"]
示例6: test_feature_multicolumn_input
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_feature_multicolumn_input(self):
# test using classes and the base class
A = DataFrame({
'col1': ['abc', 'abc', 'abc', 'abc', 'abc'],
'col2': ['abc', 'abc', 'abc', 'abc', 'abc']
})
B = DataFrame({
'col1': ['abc', 'abd', 'abc', 'abc', '123'],
'col2': ['abc', 'abd', 'abc', 'abc', '123']
})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
feature = BaseCompareFeature(['col1', 'col2'], ['col1', 'col2'])
feature._f_compare_vectorized = \
lambda s1_1, s1_2, s2_1, s2_2: np.ones(len(s1_1))
feature.compute(ix, A, B)
示例7: test_numeric
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_numeric(self):
A = DataFrame({'col': [1, 1, 1, nan, 0]})
B = DataFrame({'col': [1, 2, 3, nan, nan]})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
comp = recordlinkage.Compare()
comp.numeric('col', 'col', 'step', offset=2)
comp.numeric('col', 'col', method='step', offset=2)
comp.numeric('col', 'col', 'step', 2)
result = comp.compute(ix, A, B)
# Basics
expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=0)
pdt.assert_series_equal(result[0], expected)
# Basics
expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=1)
pdt.assert_series_equal(result[1], expected)
# Basics
expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=2)
pdt.assert_series_equal(result[2], expected)
示例8: test_dates
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_dates(self):
A = DataFrame({
'col':
to_datetime(
['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30'])
})
B = DataFrame({
'col':
to_datetime([
'2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
'2010/9/30'
])
})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
comp = recordlinkage.Compare()
comp.date('col', 'col')
result = comp.compute(ix, A, B)[0]
expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0)
pdt.assert_series_equal(result, expected)
示例9: test_date_incorrect_dtype
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_date_incorrect_dtype(self):
A = DataFrame({
'col':
['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30']
})
B = DataFrame({
'col': [
'2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
'2010/9/30'
]
})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
A['col1'] = to_datetime(A['col'])
B['col1'] = to_datetime(B['col'])
comp = recordlinkage.Compare()
comp.date('col', 'col1')
pytest.raises(ValueError, comp.compute, ix, A, B)
comp = recordlinkage.Compare()
comp.date('col1', 'col')
pytest.raises(ValueError, comp.compute, ix, A, B)
示例10: test_geo
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_geo(self):
# Utrecht, Amsterdam, Rotterdam (Cities in The Netherlands)
A = DataFrame({
'lat': [52.0842455, 52.3747388, 51.9280573],
'lng': [5.0124516, 4.7585305, 4.4203581]
})
B = DataFrame({
'lat': [52.3747388, 51.9280573, 52.0842455],
'lng': [4.7585305, 4.4203581, 5.0124516]
})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
comp = recordlinkage.Compare()
comp.geo(
'lat', 'lng', 'lat', 'lng', method='step',
offset=50) # 50 km range
result = comp.compute(ix, A, B)
# Missing values as default [36.639460, 54.765854, 44.092472]
expected = Series([1.0, 0.0, 1.0], index=ix, name=0)
pdt.assert_series_equal(result[0], expected)
示例11: test_defaults
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_defaults(self):
# default algorithm is levenshtein algorithm
# test default values are indentical to levenshtein
A = DataFrame({
'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
})
B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
comp = recordlinkage.Compare()
comp.string('col', 'col', label='default')
comp.string('col', 'col', method='levenshtein', label='with_args')
result = comp.compute(ix, A, B)
pdt.assert_series_equal(
result['default'].rename(None),
result['with_args'].rename(None)
)
示例12: test_fuzzy
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_fuzzy(self):
A = DataFrame({
'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
})
B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
comp = recordlinkage.Compare()
comp.string('col', 'col', method='jaro', missing_value=0)
comp.string('col', 'col', method='q_gram', missing_value=0)
comp.string('col', 'col', method='cosine', missing_value=0)
comp.string('col', 'col', method='jaro_winkler', missing_value=0)
comp.string('col', 'col', method='dameraulevenshtein', missing_value=0)
comp.string('col', 'col', method='levenshtein', missing_value=0)
result = comp.compute(ix, A, B)
print(result)
assert result.notnull().all(1).all(0)
assert (result[result.notnull()] >= 0).all(1).all(0)
assert (result[result.notnull()] <= 1).all(1).all(0)
示例13: test_freq_nan
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_freq_nan(self, missing_value):
# data
array_repeated = np.repeat(np.arange(10, dtype=np.float64), 10)
array_repeated[90:] = np.nan
array_tiled = np.tile(np.arange(20, dtype=np.float64), 5)
# convert to pandas data
A = DataFrame({'col': array_repeated})
B = DataFrame({'col': array_tiled})
ix = MultiIndex.from_arrays([A.index.values, B.index.values])
# the part to test
from recordlinkage.compare import Frequency
comp = recordlinkage.Compare()
comp.add(Frequency(left_on='col', missing_value=missing_value))
result = comp.compute(ix, A, B)
expected_np = np.ones((100, )) / 10
expected_np[90:] = missing_value
expected = DataFrame(expected_np, index=ix)
pdt.assert_frame_equal(result, expected)
示例14: _index_from_records
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def _index_from_records(self, recarr):
index = recarr.dtype.metadata['index']
if len(index) == 1:
rtn = Index(np.copy(recarr[str(index[0])]), name=index[0])
if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
else:
level_arrays = []
index_tz = recarr.dtype.metadata.get('index_tz', [])
for level_no, index_name in enumerate(index):
# build each index level separately to ensure we end up with the right index dtype
level = Index(np.copy(recarr[str(index_name)]))
if level_no < len(index_tz):
tz = index_tz[level_no]
if tz is not None:
if not isinstance(level, DatetimeIndex) and len(level) == 0:
# index type information got lost during save as the index was empty, cast back
level = DatetimeIndex([], tz=tz)
else:
level = level.tz_localize('UTC').tz_convert(tz)
level_arrays.append(level)
rtn = MultiIndex.from_arrays(level_arrays, names=index)
return rtn
示例15: test_isin
# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_isin():
values = [('foo', 2), ('bar', 3), ('quux', 4)]
idx = MultiIndex.from_arrays([
['qux', 'baz', 'foo', 'bar'],
np.arange(4)
])
result = idx.isin(values)
expected = np.array([False, False, True, True])
tm.assert_numpy_array_equal(result, expected)
# empty, return dtype bool
idx = MultiIndex.from_arrays([[], []])
result = idx.isin(values)
assert len(result) == 0
assert result.dtype == np.bool_