本文整理汇总了Python中pandas.merge方法的典型用法代码示例。如果您正苦于以下问题:Python pandas.merge方法的具体用法?Python pandas.merge怎么用?Python pandas.merge使用的例子?那么, 这里精选的方法代码示例或许可以为您提供帮助。您也可以进一步了解该方法所在类pandas
的用法示例。
在下文中一共展示了pandas.merge方法的15个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于系统推荐出更棒的Python代码示例。
示例1: transform
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def transform(self, df):
df["name"] = df["name"].apply(number_preprocess)
df["item_description"] = df["item_description"].apply(number_preprocess)
for cat in self.cat_cols:
df[cat] = df[cat].apply(lambda x: x if x in self.cat_vocab[cat] else "rarecategory")
df[cat] = self.le[cat].transform(df[cat])
df["name"] = df["name"].apply(lambda x: cut(x, self.voc))
df["item_description"] = df["item_description"].apply(lambda x: cut(x, self.voc))
df['seq_item_description'] = self.tok_raw.texts_to_sequences(df["item_description"].values)
df['seq_name'] = self.tok_raw.texts_to_sequences(df["name"].values)
for col in ["name_ori", "item_description_ori"]:
f_col = col + "_freq"
df = pd.merge(df, self.freqs[col], how="left", on=col)
df[f_col] = df[f_col].fillna(0)
df[f_col] = df[f_col] / (self.max_freqs[col] + 1)
return df
示例2: df_fx
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def df_fx(self, currency, fx_provider):
try:
# First get the df from this currency
if currency != 'USD':
fx = PriceData(currency, fx_provider)
fx.df = fx.df.rename(columns={'close': 'fx_close'})
fx.df["fx_close"] = pd.to_numeric(fx.df.fx_close,
errors='coerce')
# Merge the two dfs:
merge_df = pd.merge(self.df, fx.df, on='date', how='inner')
merge_df['close'] = merge_df['close'].astype(float)
merge_df['close_converted'] = merge_df['close'] * merge_df[
'fx_close']
return (merge_df)
else: # If currency is USD no conversion is needed - prices are all in USD
self.df['fx_close'] = 1
self.df['close_converted'] = self.df['close'].astype(float)
return (self.df)
except Exception as e:
self.errors.append(e)
return (None)
示例3: main
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def main():
# read and preprocess the movie data
movie = pd.read_table('movies.dat', sep='::', names=['movie_id', 'movie_name', 'tag'], engine='python')
movie = movie_preprocessing(movie)
# read the ratings data and merge it with movie data
rating = pd.read_table("ratings.dat", sep="::",
names=["user_id", "movie_id", "rating", "timestamp"], engine='python')
data = pd.merge(rating, movie, on="movie_id")
# extract feature from our data set
streaming_batch, user_feature, actions, reward_list = feature_extraction(data)
streaming_batch.to_csv("streaming_batch.csv", sep='\t', index=False)
user_feature.to_csv("user_feature.csv", sep='\t')
pd.DataFrame(actions, columns=['movie_id']).to_csv("actions.csv", sep='\t', index=False)
reward_list.to_csv("reward_list.csv", sep='\t', index=False)
action_context = movie[movie['movie_id'].isin(actions)]
action_context.to_csv("action_context.csv", sep='\t', index = False)
示例4: test_merge
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge(self, data, na_value):
# GH-20743
df1 = pd.DataFrame({'ext': data[:3], 'int1': [1, 2, 3],
'key': [0, 1, 2]})
df2 = pd.DataFrame({'int2': [1, 2, 3, 4], 'key': [0, 0, 1, 3]})
res = pd.merge(df1, df2)
exp = pd.DataFrame(
{'int1': [1, 1, 2], 'int2': [1, 2, 3], 'key': [0, 0, 1],
'ext': data._from_sequence([data[0], data[0], data[1]],
dtype=data.dtype)})
self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
res = pd.merge(df1, df2, how='outer')
exp = pd.DataFrame(
{'int1': [1, 1, 2, 3, np.nan], 'int2': [1, 2, 3, np.nan, 4],
'key': [0, 0, 1, 2, 3],
'ext': data._from_sequence(
[data[0], data[0], data[1], data[2], na_value],
dtype=data.dtype)})
self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']])
示例5: test_merge_on_extension_array
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_on_extension_array(self, data):
# GH 23020
a, b = data[:2]
key = type(data)._from_sequence([a, b], dtype=data.dtype)
df = pd.DataFrame({"key": key, "val": [1, 2]})
result = pd.merge(df, df, on='key')
expected = pd.DataFrame({"key": key,
"val_x": [1, 2],
"val_y": [1, 2]})
self.assert_frame_equal(result, expected)
# order
result = pd.merge(df.iloc[[1, 0]], df, on='key')
expected = expected.iloc[[1, 0]].reset_index(drop=True)
self.assert_frame_equal(result, expected)
示例6: test_merge_on_multikey
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_on_multikey(self, left, right, join_type):
on_cols = ['key1', 'key2']
result = (left.join(right, on=on_cols, how=join_type)
.reset_index(drop=True))
expected = pd.merge(left, right.reset_index(),
on=on_cols, how=join_type)
tm.assert_frame_equal(result, expected)
result = (left.join(right, on=on_cols, how=join_type, sort=True)
.reset_index(drop=True))
expected = pd.merge(left, right.reset_index(),
on=on_cols, how=join_type, sort=True)
tm.assert_frame_equal(result, expected)
示例7: test_single_common_level
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_single_common_level(self):
index_left = pd.MultiIndex.from_tuples([('K0', 'X0'), ('K0', 'X1'),
('K1', 'X2')],
names=['key', 'X'])
left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
'B': ['B0', 'B1', 'B2']},
index=index_left)
index_right = pd.MultiIndex.from_tuples([('K0', 'Y0'), ('K1', 'Y1'),
('K2', 'Y2'), ('K2', 'Y3')],
names=['key', 'Y'])
right = pd.DataFrame({'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']},
index=index_right)
result = left.join(right)
expected = (pd.merge(left.reset_index(), right.reset_index(),
on=['key'], how='inner')
.set_index(['key', 'X', 'Y']))
tm.assert_frame_equal(result, expected)
示例8: test_merge_index_singlekey_right_vs_left
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_index_singlekey_right_vs_left(self):
left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
'v1': np.random.randn(7)})
right = DataFrame({'v2': np.random.randn(4)},
index=['d', 'b', 'c', 'a'])
merged1 = merge(left, right, left_on='key',
right_index=True, how='left', sort=False)
merged2 = merge(right, left, right_on='key',
left_index=True, how='right', sort=False)
assert_frame_equal(merged1, merged2.loc[:, merged1.columns])
merged1 = merge(left, right, left_on='key',
right_index=True, how='left', sort=True)
merged2 = merge(right, left, right_on='key',
left_index=True, how='right', sort=True)
assert_frame_equal(merged1, merged2.loc[:, merged1.columns])
示例9: test_merge_index_singlekey_inner
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_index_singlekey_inner(self):
left = DataFrame({'key': ['a', 'b', 'c', 'd', 'e', 'e', 'a'],
'v1': np.random.randn(7)})
right = DataFrame({'v2': np.random.randn(4)},
index=['d', 'b', 'c', 'a'])
# inner join
result = merge(left, right, left_on='key', right_index=True,
how='inner')
expected = left.join(right, on='key').loc[result.index]
assert_frame_equal(result, expected)
result = merge(right, left, right_on='key', left_index=True,
how='inner')
expected = left.join(right, on='key').loc[result.index]
assert_frame_equal(result, expected.loc[:, result.columns])
示例10: test_merge_misspecified
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_misspecified(self):
msg = "Must pass right_on or right_index=True"
with pytest.raises(pd.errors.MergeError, match=msg):
merge(self.left, self.right, left_index=True)
msg = "Must pass left_on or left_index=True"
with pytest.raises(pd.errors.MergeError, match=msg):
merge(self.left, self.right, right_index=True)
msg = ('Can only pass argument "on" OR "left_on" and "right_on", not'
' a combination of both')
with pytest.raises(pd.errors.MergeError, match=msg):
merge(self.left, self.left, left_on='key', on='key')
msg = r"len\(right_on\) must equal len\(left_on\)"
with pytest.raises(ValueError, match=msg):
merge(self.df, self.df2, left_on=['key1'],
right_on=['key1', 'key2'])
示例11: test_merge_join_key_dtype_cast
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_join_key_dtype_cast(self):
# #8596
df1 = DataFrame({'key': [1], 'v1': [10]})
df2 = DataFrame({'key': [2], 'v1': [20]})
df = merge(df1, df2, how='outer')
assert df['key'].dtype == 'int64'
df1 = DataFrame({'key': [True], 'v1': [1]})
df2 = DataFrame({'key': [False], 'v1': [0]})
df = merge(df1, df2, how='outer')
# GH13169
# this really should be bool
assert df['key'].dtype == 'object'
df1 = DataFrame({'val': [1]})
df2 = DataFrame({'val': [2]})
lkey = np.array([1])
rkey = np.array([2])
df = merge(df1, df2, left_on=lkey, right_on=rkey, how='outer')
assert df['key_0'].dtype == 'int64'
示例12: test_other_datetime_unit
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_other_datetime_unit(self):
# GH 13389
df1 = pd.DataFrame({'entity_id': [101, 102]})
s = pd.Series([None, None], index=[101, 102], name='days')
for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]',
'datetime64[s]', 'datetime64[ms]', 'datetime64[us]',
'datetime64[ns]']:
df2 = s.astype(dtype).to_frame('days')
# coerces to datetime64[ns], thus sholuld not be affected
assert df2['days'].dtype == 'datetime64[ns]'
result = df1.merge(df2, left_on='entity_id', right_index=True)
exp = pd.DataFrame({'entity_id': [101, 102],
'days': np.array(['nat', 'nat'],
dtype='datetime64[ns]')},
columns=['entity_id', 'days'])
tm.assert_frame_equal(result, exp)
示例13: test_other_timedelta_unit
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_other_timedelta_unit(self, unit):
# GH 13389
df1 = pd.DataFrame({'entity_id': [101, 102]})
s = pd.Series([None, None], index=[101, 102], name='days')
dtype = "m8[{}]".format(unit)
df2 = s.astype(dtype).to_frame('days')
assert df2['days'].dtype == 'm8[ns]'
result = df1.merge(df2, left_on='entity_id', right_index=True)
exp = pd.DataFrame({'entity_id': [101, 102],
'days': np.array(['nat', 'nat'],
dtype=dtype)},
columns=['entity_id', 'days'])
tm.assert_frame_equal(result, exp)
示例14: test_merge_on_datetime64tz_empty
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_on_datetime64tz_empty(self):
# https://github.com/pandas-dev/pandas/issues/25014
dtz = pd.DatetimeTZDtype(tz='UTC')
right = pd.DataFrame({'date': [pd.Timestamp('2018', tz=dtz.tz)],
'value': [4.0],
'date2': [pd.Timestamp('2019', tz=dtz.tz)]},
columns=['date', 'value', 'date2'])
left = right[:0]
result = left.merge(right, on='date')
expected = pd.DataFrame({
'value_x': pd.Series(dtype=float),
'date2_x': pd.Series(dtype=dtz),
'date': pd.Series(dtype=dtz),
'value_y': pd.Series(dtype=float),
'date2_y': pd.Series(dtype=dtz),
}, columns=['value_x', 'date2_x', 'date', 'value_y', 'date2_y'])
tm.assert_frame_equal(result, expected)
示例15: test_merge_datetime64tz_with_dst_transition
# 需要导入模块: import pandas [as 别名]
# 或者: from pandas import merge [as 别名]
def test_merge_datetime64tz_with_dst_transition(self):
# GH 18885
df1 = pd.DataFrame(pd.date_range(
'2017-10-29 01:00', periods=4, freq='H', tz='Europe/Madrid'),
columns=['date'])
df1['value'] = 1
df2 = pd.DataFrame({
'date': pd.to_datetime([
'2017-10-29 03:00:00', '2017-10-29 04:00:00',
'2017-10-29 05:00:00'
]),
'value': 2
})
df2['date'] = df2['date'].dt.tz_localize('UTC').dt.tz_convert(
'Europe/Madrid')
result = pd.merge(df1, df2, how='outer', on='date')
expected = pd.DataFrame({
'date': pd.date_range(
'2017-10-29 01:00', periods=7, freq='H', tz='Europe/Madrid'),
'value_x': [1] * 4 + [np.nan] * 3,
'value_y': [np.nan] * 4 + [2] * 3
})
assert_frame_equal(result, expected)