当前位置: 首页>>代码示例>>Python>>正文


Python pandas.merge方法代码示例

本文整理汇总了Python中pandas.merge方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.merge方法的具体用法?Python pandas.merge怎么用?Python pandas.merge使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在pandas的用法示例。


在下文中一共展示了pandas.merge方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。

示例1: transform

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def transform(self, df):
        df["name"] = df["name"].apply(number_preprocess)
        df["item_description"] = df["item_description"].apply(number_preprocess)

        for cat in self.cat_cols:
            df[cat] = df[cat].apply(lambda x: x if x in self.cat_vocab[cat] else "rarecategory")
            df[cat] = self.le[cat].transform(df[cat])

        df["name"] = df["name"].apply(lambda x: cut(x, self.voc))
        df["item_description"] = df["item_description"].apply(lambda x: cut(x, self.voc))

        df['seq_item_description'] = self.tok_raw.texts_to_sequences(df["item_description"].values)
        df['seq_name'] = self.tok_raw.texts_to_sequences(df["name"].values)

        for col in ["name_ori", "item_description_ori"]:
            f_col = col + "_freq"
            df = pd.merge(df, self.freqs[col], how="left", on=col)
            df[f_col] = df[f_col].fillna(0)
            df[f_col] = df[f_col] / (self.max_freqs[col] + 1)

        return df 
开发者ID:aerdem4,项目名称:mercari-price-suggestion,代码行数:23,代码来源:preprocess_for_nn.py

示例2: df_fx

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def df_fx(self, currency, fx_provider):
        try:
            # First get the df from this currency
            if currency != 'USD':
                fx = PriceData(currency, fx_provider)
                fx.df = fx.df.rename(columns={'close': 'fx_close'})
                fx.df["fx_close"] = pd.to_numeric(fx.df.fx_close,
                                                  errors='coerce')
                # Merge the two dfs:
                merge_df = pd.merge(self.df, fx.df, on='date', how='inner')
                merge_df['close'] = merge_df['close'].astype(float)
                merge_df['close_converted'] = merge_df['close'] * merge_df[
                    'fx_close']
                return (merge_df)
            else:  # If currency is USD no conversion is needed - prices are all in USD
                self.df['fx_close'] = 1
                self.df['close_converted'] = self.df['close'].astype(float)
                return (self.df)
        except Exception as e:
            self.errors.append(e)
            return (None) 
开发者ID:pxsocs,项目名称:thewarden,代码行数:23,代码来源:pricing.py

示例3: main

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def main():
    # read and preprocess the movie data
    movie = pd.read_table('movies.dat', sep='::', names=['movie_id', 'movie_name', 'tag'], engine='python')
    movie = movie_preprocessing(movie)

    # read the ratings data and merge it with movie data
    rating = pd.read_table("ratings.dat", sep="::",
                           names=["user_id", "movie_id", "rating", "timestamp"], engine='python')
    data = pd.merge(rating, movie, on="movie_id")

    # extract feature from our data set
    streaming_batch, user_feature, actions, reward_list = feature_extraction(data)
    streaming_batch.to_csv("streaming_batch.csv", sep='\t', index=False)
    user_feature.to_csv("user_feature.csv", sep='\t')
    pd.DataFrame(actions, columns=['movie_id']).to_csv("actions.csv", sep='\t', index=False)
    reward_list.to_csv("reward_list.csv", sep='\t', index=False)

    action_context = movie[movie['movie_id'].isin(actions)]
    action_context.to_csv("action_context.csv", sep='\t', index = False) 
开发者ID:ntucllab,项目名称:striatum,代码行数:21,代码来源:movielens_preprocess.py

示例4: test_merge

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge(self, data, na_value):
        # GH-20743
        df1 = pd.DataFrame({'ext': data[:3], 'int1': [1, 2, 3],
                            'key': [0, 1, 2]})
        df2 = pd.DataFrame({'int2': [1, 2, 3, 4], 'key': [0, 0, 1, 3]})

        res = pd.merge(df1, df2)
        exp = pd.DataFrame(
            {'int1': [1, 1, 2], 'int2': [1, 2, 3], 'key': [0, 0, 1],
             'ext': data._from_sequence([data[0], data[0], data[1]],
                                        dtype=data.dtype)})
        self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])

        res = pd.merge(df1, df2, how='outer')
        exp = pd.DataFrame(
            {'int1': [1, 1, 2, 3, np.nan], 'int2': [1, 2, 3, np.nan, 4],
             'key': [0, 0, 1, 2, 3],
             'ext': data._from_sequence(
                 [data[0], data[0], data[1], data[2], na_value],
                 dtype=data.dtype)})
        self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']]) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:23,代码来源:reshaping.py

示例5: test_merge_on_extension_array

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_on_extension_array(self, data):
        # GH 23020
        a, b = data[:2]
        key = type(data)._from_sequence([a, b], dtype=data.dtype)

        df = pd.DataFrame({"key": key, "val": [1, 2]})
        result = pd.merge(df, df, on='key')
        expected = pd.DataFrame({"key": key,
                                 "val_x": [1, 2],
                                 "val_y": [1, 2]})
        self.assert_frame_equal(result, expected)

        # order
        result = pd.merge(df.iloc[[1, 0]], df, on='key')
        expected = expected.iloc[[1, 0]].reset_index(drop=True)
        self.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:reshaping.py

示例6: test_merge_on_multikey

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_on_multikey(self, left, right, join_type):
        on_cols = ['key1', 'key2']
        result = (left.join(right, on=on_cols, how=join_type)
                  .reset_index(drop=True))

        expected = pd.merge(left, right.reset_index(),
                            on=on_cols, how=join_type)

        tm.assert_frame_equal(result, expected)

        result = (left.join(right, on=on_cols, how=join_type, sort=True)
                  .reset_index(drop=True))

        expected = pd.merge(left, right.reset_index(),
                            on=on_cols, how=join_type, sort=True)

        tm.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:19,代码来源:test_multi.py

示例7: test_single_common_level

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_single_common_level(self):
        index_left = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'),
                                                ('K1', 'X2')],
                                               names=['key', 'X'])

        left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                             'B': ['B0', 'B1', 'B2']},
                            index=index_left)

        index_right = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'),
                                                 ('K2', 'Y2'), ('K2', 'Y3')],
                                                names=['key', 'Y'])

        right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
                              'D': ['D0', 'D1', 'D2', 'D3']},
                             index=index_right)

        result = left.join(right)
        expected = (pd.merge(left.reset_index(), right.reset_index(),
                             on=['key'], how='inner')
                    .set_index(['key', 'X', 'Y']))

        tm.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:25,代码来源:test_multi.py

示例8: test_merge_index_singlekey_right_vs_left

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_index_singlekey_right_vs_left(self):
        left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
                          'v1': np.random.randn(7)})
        right = DataFrame({'v2': np.random.randn(4)},
                          index=['d', 'b', 'c', 'a'])

        merged1 = merge(left, right, left_on='key',
                        right_index=True, how='left', sort=False)
        merged2 = merge(right, left, right_on='key',
                        left_index=True, how='right', sort=False)
        assert_frame_equal(merged1, merged2.loc[:, merged1.columns])

        merged1 = merge(left, right, left_on='key',
                        right_index=True, how='left', sort=True)
        merged2 = merge(right, left, right_on='key',
                        left_index=True, how='right', sort=True)
        assert_frame_equal(merged1, merged2.loc[:, merged1.columns]) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:19,代码来源:test_merge.py

示例9: test_merge_index_singlekey_inner

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_index_singlekey_inner(self):
        left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
                          'v1': np.random.randn(7)})
        right = DataFrame({'v2': np.random.randn(4)},
                          index=['d', 'b', 'c', 'a'])

        # inner join
        result = merge(left, right, left_on='key', right_index=True,
                       how='inner')
        expected = left.join(right, on='key').loc[result.index]
        assert_frame_equal(result, expected)

        result = merge(right, left, right_on='key', left_index=True,
                       how='inner')
        expected = left.join(right, on='key').loc[result.index]
        assert_frame_equal(result, expected.loc[:, result.columns]) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_merge.py

示例10: test_merge_misspecified

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_misspecified(self):
        msg = "Must pass right_on or right_index=True"
        with pytest.raises(pd.errors.MergeError, match=msg):
            merge(self.left, self.right, left_index=True)
        msg = "Must pass left_on or left_index=True"
        with pytest.raises(pd.errors.MergeError, match=msg):
            merge(self.left, self.right, right_index=True)

        msg = ('Can only pass argument "on" OR "left_on" and "right_on", not'
               ' a combination of both')
        with pytest.raises(pd.errors.MergeError, match=msg):
            merge(self.left, self.left, left_on='key', on='key')

        msg = r"len\(right_on\) must equal len\(left_on\)"
        with pytest.raises(ValueError, match=msg):
            merge(self.df, self.df2, left_on=['key1'],
                  right_on=['key1', 'key2']) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:19,代码来源:test_merge.py

示例11: test_merge_join_key_dtype_cast

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_join_key_dtype_cast(self):
        # #8596

        df1 = DataFrame({'key': [1], 'v1': [10]})
        df2 = DataFrame({'key': [2], 'v1': [20]})
        df = merge(df1, df2, how='outer')
        assert df['key'].dtype == 'int64'

        df1 = DataFrame({'key': [True], 'v1': [1]})
        df2 = DataFrame({'key': [False], 'v1': [0]})
        df = merge(df1, df2, how='outer')

        # GH13169
        # this really should be bool
        assert df['key'].dtype == 'object'

        df1 = DataFrame({'val': [1]})
        df2 = DataFrame({'val': [2]})
        lkey = np.array([1])
        rkey = np.array([2])
        df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer')
        assert df['key_0'].dtype == 'int64' 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:24,代码来源:test_merge.py

示例12: test_other_datetime_unit

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_other_datetime_unit(self):
        # GH 13389
        df1 = pd.DataFrame({'entity_id': [101, 102]})
        s = pd.Series([None, None], index=[101, 102], name='days')

        for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]',
                      'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
                      'datetime64[ns]']:

            df2 = s.astype(dtype).to_frame('days')
            # coerces to datetime64[ns], thus sholuld not be affected
            assert df2['days'].dtype == 'datetime64[ns]'

            result = df1.merge(df2, left_on='entity_id', right_index=True)

            exp = pd.DataFrame({'entity_id': [101, 102],
                                'days': np.array(['nat', 'nat'],
                                                 dtype='datetime64[ns]')},
                               columns=['entity_id', 'days'])
            tm.assert_frame_equal(result, exp) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:22,代码来源:test_merge.py

示例13: test_other_timedelta_unit

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_other_timedelta_unit(self, unit):
        # GH 13389
        df1 = pd.DataFrame({'entity_id': [101, 102]})
        s = pd.Series([None, None], index=[101, 102], name='days')

        dtype = "m8[{}]".format(unit)
        df2 = s.astype(dtype).to_frame('days')
        assert df2['days'].dtype == 'm8[ns]'

        result = df1.merge(df2, left_on='entity_id', right_index=True)

        exp = pd.DataFrame({'entity_id': [101, 102],
                            'days': np.array(['nat', 'nat'],
                                             dtype=dtype)},
                           columns=['entity_id', 'days'])
        tm.assert_frame_equal(result, exp) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:18,代码来源:test_merge.py

示例14: test_merge_on_datetime64tz_empty

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_on_datetime64tz_empty(self):
        # https://github.com/pandas-dev/pandas/issues/25014
        dtz = pd.DatetimeTZDtype(tz='UTC')
        right = pd.DataFrame({'date': [pd.Timestamp('2018', tz=dtz.tz)],
                              'value': [4.0],
                              'date2': [pd.Timestamp('2019', tz=dtz.tz)]},
                             columns=['date', 'value', 'date2'])
        left = right[:0]
        result = left.merge(right, on='date')
        expected = pd.DataFrame({
            'value_x': pd.Series(dtype=float),
            'date2_x': pd.Series(dtype=dtz),
            'date': pd.Series(dtype=dtz),
            'value_y': pd.Series(dtype=float),
            'date2_y': pd.Series(dtype=dtz),
        }, columns=['value_x', 'date2_x', 'date', 'value_y', 'date2_y'])
        tm.assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:19,代码来源:test_merge.py

示例15: test_merge_datetime64tz_with_dst_transition

# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_datetime64tz_with_dst_transition(self):
        # GH 18885
        df1 = pd.DataFrame(pd.date_range(
            '2017-10-29 01:00', periods=4, freq='H', tz='Europe/Madrid'),
            columns=['date'])
        df1['value'] = 1
        df2 = pd.DataFrame({
            'date': pd.to_datetime([
                '2017-10-29 03:00:00', '2017-10-29 04:00:00',
                '2017-10-29 05:00:00'
            ]),
            'value': 2
        })
        df2['date'] = df2['date'].dt.tz_localize('UTC').dt.tz_convert(
            'Europe/Madrid')
        result = pd.merge(df1, df2, how='outer', on='date')
        expected = pd.DataFrame({
            'date': pd.date_range(
                '2017-10-29 01:00', periods=7, freq='H', tz='Europe/Madrid'),
            'value_x': [1] * 4 + [np.nan] * 3,
            'value_y': [np.nan] * 4 + [2] * 3
        })
        assert_frame_equal(result, expected) 
开发者ID:Frank-qlu,项目名称:recruit,代码行数:25,代码来源:test_merge.py


注:本文中的pandas.merge方法示例由纯净天空整理自Github/MSDocs等开源代码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。