利用Python数据分析:数据的规整化(一)

来源:互联网 发布:哈登身体数据 编辑:程序博客网 时间:2024/06/10 16:18
import pandas as pdfrom pandas import Series,DataFrameimport numpy as npdf1 = DataFrame({'key':['b','b','a','c','a','a','b'],                 'data1':range(7)})df2 = DataFrame({'key':['a','b','d'],                 'data2':range(3)})df1df2pd.merge(df1,df2) #  保留了重叠的部分""" data1 key  data20      0   b      11      1   b      12      6   b      13      2   a      04      4   a      05      5   a      0"""pd.merge(df1,df2,on='key') # 指定按照哪个列进行链接,如果没有指定,就会自动根据重叠的列名当做键df3 = DataFrame({'lkey':['b','b','a','c','a','a','b'],                 'data1':range(7)})df4 = DataFrame({'rkey':['a','b','d'],                 'data2':range(3)})pd.merge(df3,df4,left_on='lkey',right_on='rkey')# 默认情况下merge做的事内连接,取交集pd.merge(df1,df2,how='outer') # 外链接取的是并集df1 = DataFrame({'key':['b','b','a','c','a','b'],                 'data1':range(6)})df2 = DataFrame({'key':['a','b','a','b','d'],                 'data2':range(5)})df1df2pd.merge(df1,df2,on='key',how='left') # 按照左边进行多对多的链接,pd.merge(df1,df2,how='inner') # 多对多链接产生的是笛卡尔积left = DataFrame({'key1':['foo','foo','bar'],                  'key2':['one','two','one'],                 'lval':[1,2,3]})right = DataFrame({'key1':['foo','foo','bar','bar'],                   'key2':['one','one','one','two'],                   'rval':[4,5,6,7]})pd.merge(left,right,on =['key1','key2'],how='outer') # 多个键进行合并,传入一组列名组成的列表就行pd.merge(left,right,on='key1') # 重名的列pd.merge(left,right,on='key1',suffixes=('_left','_right')) # 指定附加到左右两个dataframe对象重叠列名上的字符

0 0
原创粉丝点击