import numpy as np
import pandas as pd
df1=pd.DataFrame({"key":["A","B","C","D"],
"value":np.random.randn(4)})
df2=pd.DataFrame({"key":["B","D","D","E"],
"value":np.random.randn(4)})
df1
'''
key value
0 A 0.491384
1 B -1.412033
2 C -1.435534
3 D 0.281810
'''
df2
'''
key value
0 B 0.609666
1 D -1.871337
2 D 0.405684
3 E -0.133232
'''
#内连接 join
'''
select * from df1 f1 inner join df2 f2 on f1.key = f2.key
'''
pd.merge(df1,df2,on="key",how="inner")
'''
key value-1 value-2
0 B -1.412033 0.609666
1 D 0.281810 -1.871337
2 D 0.281810 0.405684
'''
#suffixes=("-1","-2") 指定列名的后缀名
pd.merge(df1,df2,on="key",how="inner",suffixes=("-1","-2"))
'''
key value-1 value-2
0 B -1.412033 0.609666
1 D 0.281810 -1.871337
2 D 0.281810 0.405684
'''
#左外连接
'''
select * from df1 f1 left join df2 f2 on f1.key = f2.key
'''
pd.merge(df1,df2,on="key",how="left",suffixes=("-1","-2"))
'''
key value-1 value-2
0 A 0.491384 NaN
1 B -1.412033 0.609666
2 C -1.435534 NaN
3 D 0.281810 -1.871337
4 D 0.281810 0.405684
'''
#右外连接
'''
select * from df1 f1 right join df2 f2 on f1.key = f2.key
'''
pd.merge(df1,df2,on="key",how="right",suffixes=("-1","-2"))
'''
key value-1 value-2
0 B -1.412033 0.609666
1 D 0.281810 -1.871337
2 D 0.281810 0.405684
3 E NaN -0.133232
'''
#全连接
'''
select * from df1 f1 outer join df2 f2 on f1.key = f2.key
'''
pd.merge(df1,df2,on="key",how="outer",suffixes=("-1","-2"))
'''
key value-1 value-2
0 A 0.491384 NaN
1 B -1.412033 0.609666
2 C -1.435534 NaN
3 D 0.281810 -1.871337
4 D 0.281810 0.405684
5 E NaN -0.133232
'''
#union连接两张表自动去重,union all连接两张表不去重
df3=pd.DataFrame({"city":["A","B","C"],
"rank":range(1,4)})
df4=pd.DataFrame({"city":["A","D","E"],
"rank":range(1,4)})
df3
'''
city rank
0 A 1
1 B 2
2 C 3
'''
df4
'''
city rank
0 A 1
1 D 2
2 E 3
'''
#union 连接
pd.concat([df3,df4])
'''
city rank
0 A 1
1 B 2
2 C 3
0 A 1
1 D 2
2 E 3
'''
#union all连接
pd.concat([df3,df4]).drop_duplicates()
'''
city rank
0 A 1
1 B 2
2 C 3
1 D 2
2 E 3
'''
网友评论