R语言中实现表的链接-merge函数

来源:互联网 发布:最好的网络营销软件 编辑:程序博客网 时间:2024/06/05 08:42
authors <- data.frame(
  surname = I(c("Tukey", "Venables", "Tierney", "Ripley", "McNeil")),
  nationality = c("US", "Australia", "US", "UK", "Australia"),
  deceased = c("yes", rep("no", 4)))
books <- data.frame(
  name = I(c("Tukey", "Venables", "Tierney",
             "Ripley",  "McNeil", "R Core")),
  title = c("Exploratory Data Analysis",
            "Modern Applied Statistics",
            "LISP-STAT",
            "Spatial Statistics", 
            "Interactive Data Analysis",
            "An Introduction to R"),
  other.author = c(NA, "Ripley", NA, NA, NA, 

                   "Venables & Smith"))

> authors
   surname nationality deceased
1    Tukey          US      yes
2 Venables   Australia       no
3  Tierney          US       no
4   Ripley          UK       no
5   McNeil   Australia       no
> books
      name                     title     other.author
1    Tukey Exploratory Data Analysis             <NA>
2 Venables Modern Applied Statistics           Ripley
3  Tierney                 LISP-STAT             <NA>
4   Ripley        Spatial Statistics             <NA>
5   McNeil Interactive Data Analysis             <NA>
6   R Core      An Introduction to R Venables & Smith

#如果要实现类似sql里面的inner join 功能,则用代码
m1 <- merge(authors, books, by.x = "surname", by.y = "name")
#如果要实现left join功能则用代码
m2 <- merge(authors, books, by.x = "surname", by.y = "name",all.x=TRUE)
#right join功能代码
m3 <- merge(authors, books, by.x = "surname", by.y = "name",all.y=TRUE)
#all join功能代码
m4 <- merge(authors, books, by.x = "surname", by.y = "name",all=TRUE)

> m1
   surname nationality deceased                     title other.author
1   McNeil   Australia       no Interactive Data Analysis         <NA>
2   Ripley          UK       no        Spatial Statistics         <NA>
3  Tierney          US       no                 LISP-STAT         <NA>
4    Tukey          US      yes Exploratory Data Analysis         <NA>
5 Venables   Australia       no Modern Applied Statistics       Ripley
> m2
   surname nationality deceased                     title other.author
1   McNeil   Australia       no Interactive Data Analysis         <NA>
2   Ripley          UK       no        Spatial Statistics         <NA>
3  Tierney          US       no                 LISP-STAT         <NA>
4    Tukey          US      yes Exploratory Data Analysis         <NA>
5 Venables   Australia       no Modern Applied Statistics       Ripley
> m3
   surname nationality deceased                     title     other.author
1   McNeil   Australia       no Interactive Data Analysis             <NA>
2   R Core        <NA>     <NA>      An Introduction to R Venables & Smith
3   Ripley          UK       no        Spatial Statistics             <NA>
4  Tierney          US       no                 LISP-STAT             <NA>
5    Tukey          US      yes Exploratory Data Analysis             <NA>
6 Venables   Australia       no Modern Applied Statistics           Ripley
> m4
   surname nationality deceased                     title     other.author
1   McNeil   Australia       no Interactive Data Analysis             <NA>
2   R Core        <NA>     <NA>      An Introduction to R Venables & Smith
3   Ripley          UK       no        Spatial Statistics             <NA>
4  Tierney          US       no                 LISP-STAT             <NA>
5    Tukey          US      yes Exploratory Data Analysis             <NA>
6 Venables   Australia       no Modern Applied Statistics           Ripley

#关于单变量匹配的总结就是这些,但对于多变量匹配呢,例如下面两个表,需要对k1,k2两个变量都相等的情况下匹配
x <- data.frame(k1 = c(NA,NA,3,4,5), k2 = c(1,NA,NA,4,5), data = 1:5)

y <- data.frame(k1 = c(NA,2,NA,4,5), k2 = c(NA,NA,3,4,5), data = 1:5)

> x
  k1 k2 data
1 NA  1    1
2 NA NA    2
3  3 NA    3
4  4  4    4
5  5  5    5
> y
  k1 k2 data
1 NA NA    1
2  2 NA    2
3 NA  3    3
4  4  4    4
5  5  5    5

#匹配代码如下inner join

merge(x, y, by = c("k1","k2")) 

 k1 k2 data.x data.y
1  4  4      4      4
2  5  5      5      5
3 NA NA      2      1


0 0
原创粉丝点击