当前位置: 首页>>代码示例>>Python>>正文


Python MultiIndex.from_arrays方法代码示例

本文整理汇总了Python中pandas.MultiIndex.from_arrays方法的典型用法代码示例。如果您正苦于以下问题:Python MultiIndex.from_arrays方法的具体用法?Python MultiIndex.from_arrays怎么用?Python MultiIndex.from_arrays使用的例子?那么恭喜您, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas.MultiIndex的用法示例。


在下文中一共展示了MultiIndex.from_arrays方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: test_to_excel_multiindex

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_to_excel_multiindex(self, merge_cells, engine, ext):
        frame = self.frame
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays,
                                           names=['first', 'second'])
        frame.index = new_index

        frame.to_excel(self.path, 'test1', header=False)
        frame.to_excel(self.path, 'test1', columns=['A', 'B'])

        # round trip
        frame.to_excel(self.path, 'test1', merge_cells=merge_cells)
        reader = ExcelFile(self.path)
        df = read_excel(reader, 'test1', index_col=[0, 1])
        tm.assert_frame_equal(frame, df)

    # GH13511 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:19,代码来源:test_excel.py

示例2: test_compare_custom_instance_type

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_compare_custom_instance_type(self):

        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B = DataFrame({'col': ['abc', 'abd', 'abc', 'abc', '123']})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        def call(s1, s2):

            # this should raise on incorrect types
            assert isinstance(s1, np.ndarray)
            assert isinstance(s2, np.ndarray)

            return np.ones(len(s1), dtype=np.int)

        comp = recordlinkage.Compare()
        comp.compare_vectorized(lambda s1, s2: np.ones(len(s1), dtype=np.int),
                                'col', 'col')
        result = comp.compute(ix, A, B)
        expected = DataFrame([1, 1, 1, 1, 1], index=ix)
        pdt.assert_frame_equal(result, expected) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:22,代码来源:test_compare.py

示例3: test_compare_custom_vectorized_dedup

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_compare_custom_vectorized_dedup(self):

        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        ix = MultiIndex.from_arrays([[0, 1, 2, 3, 4], [1, 2, 3, 4, 0]])

        # test without label
        comp = recordlinkage.Compare()
        comp.compare_vectorized(lambda s1, s2: np.ones(len(s1), dtype=np.int),
                                'col', 'col')
        result = comp.compute(ix, A)
        expected = DataFrame([1, 1, 1, 1, 1], index=ix)
        pdt.assert_frame_equal(result, expected)

        # test with label
        comp = recordlinkage.Compare()
        comp.compare_vectorized(
            lambda s1, s2: np.ones(len(s1), dtype=np.int),
            'col',
            'col',
            label='test')
        result = comp.compute(ix, A)
        expected = DataFrame([1, 1, 1, 1, 1], index=ix, columns=['test'])
        pdt.assert_frame_equal(result, expected) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:25,代码来源:test_compare.py

示例4: test_indexing_types

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_indexing_types(self):
        # test the two types of indexing

        # this test needs improvement

        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B_reversed = B[::-1].copy()
        ix = MultiIndex.from_arrays([np.arange(5), np.arange(5)])

        # test with label indexing type
        comp_label = recordlinkage.Compare(indexing_type='label')
        comp_label.exact('col', 'col')
        result_label = comp_label.compute(ix, A, B_reversed)

        # test with position indexing type
        comp_position = recordlinkage.Compare(indexing_type='position')
        comp_position.exact('col', 'col')
        result_position = comp_position.compute(ix, A, B_reversed)

        assert (result_position.values == 1).all(axis=0)

        pdt.assert_frame_equal(result_label, result_position) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:25,代码来源:test_compare.py

示例5: test_pass_list_of_features

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_pass_list_of_features(self):

        from recordlinkage.compare import FrequencyA, VariableA, VariableB

        # setup datasets and record pairs
        A = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        B = DataFrame({'col': ['abc', 'abc', 'abc', 'abc', 'abc']})
        ix = MultiIndex.from_arrays([np.arange(5), np.arange(5)])

        # test with label indexing type

        features = [
            VariableA('col', label='y1'),
            VariableB('col', label='y2'),
            FrequencyA('col', label='y3')
        ]
        comp_label = recordlinkage.Compare(features=features)
        result_label = comp_label.compute(ix, A, B)

        assert list(result_label) == ["y1", "y2", "y3"] 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:22,代码来源:test_compare.py

示例6: test_feature_multicolumn_input

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_feature_multicolumn_input(self):
        # test using classes and the base class

        A = DataFrame({
            'col1': ['abc', 'abc', 'abc', 'abc', 'abc'],
            'col2': ['abc', 'abc', 'abc', 'abc', 'abc']
        })
        B = DataFrame({
            'col1': ['abc', 'abd', 'abc', 'abc', '123'],
            'col2': ['abc', 'abd', 'abc', 'abc', '123']
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        feature = BaseCompareFeature(['col1', 'col2'], ['col1', 'col2'])
        feature._f_compare_vectorized = \
            lambda s1_1, s1_2, s2_1, s2_2: np.ones(len(s1_1))
        feature.compute(ix, A, B) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:19,代码来源:test_compare.py

示例7: test_numeric

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_numeric(self):

        A = DataFrame({'col': [1, 1, 1, nan, 0]})
        B = DataFrame({'col': [1, 2, 3, nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.numeric('col', 'col', 'step', offset=2)
        comp.numeric('col', 'col', method='step', offset=2)
        comp.numeric('col', 'col', 'step', 2)
        result = comp.compute(ix, A, B)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=0)
        pdt.assert_series_equal(result[0], expected)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=1)
        pdt.assert_series_equal(result[1], expected)

        # Basics
        expected = Series([1.0, 1.0, 1.0, 0.0, 0.0], index=ix, name=2)
        pdt.assert_series_equal(result[2], expected) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:25,代码来源:test_compare.py

示例8: test_dates

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_dates(self):

        A = DataFrame({
            'col':
            to_datetime(
                ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30'])
        })
        B = DataFrame({
            'col':
            to_datetime([
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ])
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.date('col', 'col')
        result = comp.compute(ix, A, B)[0]

        expected = Series([1, 0, 0, 0.5, 0.5], index=ix, name=0)

        pdt.assert_series_equal(result, expected) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:25,代码来源:test_compare.py

示例9: test_date_incorrect_dtype

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_date_incorrect_dtype(self):

        A = DataFrame({
            'col':
            ['2005/11/23', nan, '2004/11/23', '2010/01/10', '2010/10/30']
        })
        B = DataFrame({
            'col': [
                '2005/11/23', '2010/12/31', '2005/11/23', '2010/10/01',
                '2010/9/30'
            ]
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        A['col1'] = to_datetime(A['col'])
        B['col1'] = to_datetime(B['col'])

        comp = recordlinkage.Compare()
        comp.date('col', 'col1')
        pytest.raises(ValueError, comp.compute, ix, A, B)

        comp = recordlinkage.Compare()
        comp.date('col1', 'col')
        pytest.raises(ValueError, comp.compute, ix, A, B) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:26,代码来源:test_compare.py

示例10: test_geo

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_geo(self):

        # Utrecht, Amsterdam, Rotterdam (Cities in The Netherlands)
        A = DataFrame({
            'lat': [52.0842455, 52.3747388, 51.9280573],
            'lng': [5.0124516, 4.7585305, 4.4203581]
        })
        B = DataFrame({
            'lat': [52.3747388, 51.9280573, 52.0842455],
            'lng': [4.7585305, 4.4203581, 5.0124516]
        })
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.geo(
            'lat', 'lng', 'lat', 'lng', method='step',
            offset=50)  # 50 km range
        result = comp.compute(ix, A, B)

        # Missing values as default [36.639460, 54.765854, 44.092472]
        expected = Series([1.0, 0.0, 1.0], index=ix, name=0)
        pdt.assert_series_equal(result[0], expected) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:24,代码来源:test_compare.py

示例11: test_defaults

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_defaults(self):

        # default algorithm is levenshtein algorithm
        # test default values are indentical to levenshtein

        A = DataFrame({
            'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
        })
        B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.string('col', 'col', label='default')
        comp.string('col', 'col', method='levenshtein', label='with_args')
        result = comp.compute(ix, A, B)

        pdt.assert_series_equal(
            result['default'].rename(None),
            result['with_args'].rename(None)
        ) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:22,代码来源:test_compare.py

示例12: test_fuzzy

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_fuzzy(self):

        A = DataFrame({
            'col': [u'str_abc', u'str_abc', u'str_abc', nan, u'hsdkf']
        })
        B = DataFrame({'col': [u'str_abc', u'str_abd', u'jaskdfsd', nan, nan]})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        comp = recordlinkage.Compare()
        comp.string('col', 'col', method='jaro', missing_value=0)
        comp.string('col', 'col', method='q_gram', missing_value=0)
        comp.string('col', 'col', method='cosine', missing_value=0)
        comp.string('col', 'col', method='jaro_winkler', missing_value=0)
        comp.string('col', 'col', method='dameraulevenshtein', missing_value=0)
        comp.string('col', 'col', method='levenshtein', missing_value=0)
        result = comp.compute(ix, A, B)

        print(result)

        assert result.notnull().all(1).all(0)
        assert (result[result.notnull()] >= 0).all(1).all(0)
        assert (result[result.notnull()] <= 1).all(1).all(0) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:24,代码来源:test_compare.py

示例13: test_freq_nan

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_freq_nan(self, missing_value):

        # data
        array_repeated = np.repeat(np.arange(10, dtype=np.float64), 10)
        array_repeated[90:] = np.nan
        array_tiled = np.tile(np.arange(20, dtype=np.float64), 5)

        # convert to pandas data
        A = DataFrame({'col': array_repeated})
        B = DataFrame({'col': array_tiled})
        ix = MultiIndex.from_arrays([A.index.values, B.index.values])

        # the part to test
        from recordlinkage.compare import Frequency

        comp = recordlinkage.Compare()
        comp.add(Frequency(left_on='col', missing_value=missing_value))
        result = comp.compute(ix, A, B)

        expected_np = np.ones((100, )) / 10
        expected_np[90:] = missing_value
        expected = DataFrame(expected_np, index=ix)
        pdt.assert_frame_equal(result, expected) 
开发者ID:J535D165,项目名称:recordlinkage,代码行数:25,代码来源:test_compare.py

示例14: _index_from_records

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def _index_from_records(self, recarr):
        index = recarr.dtype.metadata['index']

        if len(index) == 1:
            rtn = Index(np.copy(recarr[str(index[0])]), name=index[0])
            if isinstance(rtn, DatetimeIndex) and 'index_tz' in recarr.dtype.metadata:
                rtn = rtn.tz_localize('UTC').tz_convert(recarr.dtype.metadata['index_tz'])
        else:
            level_arrays = []
            index_tz = recarr.dtype.metadata.get('index_tz', [])
            for level_no, index_name in enumerate(index):
                # build each index level separately to ensure we end up with the right index dtype
                level = Index(np.copy(recarr[str(index_name)]))
                if level_no < len(index_tz):
                    tz = index_tz[level_no]
                    if tz is not None:
                        if not isinstance(level, DatetimeIndex) and len(level) == 0:
                            # index type information got lost during save as the index was empty, cast back
                            level = DatetimeIndex([], tz=tz)
                        else:
                            level = level.tz_localize('UTC').tz_convert(tz)
                level_arrays.append(level)
            rtn = MultiIndex.from_arrays(level_arrays, names=index)
        return rtn 
开发者ID:man-group,项目名称:arctic,代码行数:26,代码来源:numpy_records.py

示例15: test_isin

# 需要导入模块: from pandas import MultiIndex [as 别名]
# 或者: from pandas.MultiIndex import from_arrays [as 别名]
def test_isin():
    values = [('foo', 2), ('bar', 3), ('quux', 4)]

    idx = MultiIndex.from_arrays([
        ['qux', 'baz', 'foo', 'bar'],
        np.arange(4)
    ])
    result = idx.isin(values)
    expected = np.array([False, False, True, True])
    tm.assert_numpy_array_equal(result, expected)

    # empty, return dtype bool
    idx = MultiIndex.from_arrays([[], []])
    result = idx.isin(values)
    assert len(result) == 0
    assert result.dtype == np.bool_ 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_contains.py


注:本文中的pandas.MultiIndex.from_arrays方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。