我有这样的数据:
df <-
a b c
1 2 3
1 2 4
1 2 5
1 2 9
2 3 3
2 3 4
2 3 5
2 3 9
3 4 3
3 4 4
3 4 5
3 4 9我希望删除基于列a的重复行,但保留列c中的值,如下所示:
df2 <-
a b c c1 c2 c3
1 2 3 4 5 9
2 3 3 4 5 9
3 4 3 4 5 9我知道如何删除重复项,如下所示:
df2 <-df[!(df$a=="1"),]但是现在已经知道如何将值添加到保留的行中。
发布于 2020-03-17 03:07:14
我们可以在对dataset的行unlist进行子集设置时排除c,然后与整个'c‘列连接
c(unlist(df[!duplicated(df$a), 1:2]), c = df$c)
# a b c1 c2 c3 c4
# 1 2 3 4 5 9 如果我们需要与预期的名称相同的名称
c(unlist(df[!duplicated(df$a), 1:2]),
setNames(df$c, make.unique(rep('c', nrow(df)), sep="")))
# a b c c1 c2 c3
# 1 2 3 4 5 9 使用新的示例
library(dplyr)
library(tidyr)
df2 %>%
group_by(a) %>%
summarise(b = first(b), c = list(as.list(c))) %>%
unnest_wider(c(c))%>%
rename_at(vars(starts_with('.')), ~ str_c('c', seq_along(.)))
# A tibble: 2 x 6
# a b c1 c2 c3 c4
# <int> <int> <int> <int> <int> <int>
#1 1 2 3 4 5 9
#2 2 2 3 4 5 9或使用再次更新的示例
df3 %>%
group_by(a) %>%
summarise(b = first(b), c = list(as.list(c))) %>%
unnest_wider(c(c))%>%
rename_at(vars(starts_with('.')), ~ str_c('c', seq_along(.)))
# A tibble: 3 x 6
# a b c1 c2 c3 c4
# <int> <int> <int> <int> <int> <int>
#1 1 2 3 4 5 9
#2 2 3 3 4 5 9
#3 3 4 3 4 5 9或使用data.table
library(data.table)
setDT(df3)[, c(.(b = first(b)),
as.data.frame.list(setNames(c, rep('c', .N)))), a]
# a b c c.1 c.2 c.3
#1: 1 2 3 4 5 9
#2: 2 3 3 4 5 9
#3: 3 4 3 4 5 9数据
df <- structure(list(a = c(1L, 1L, 1L, 1L), b = c(2L, 3L, 3L, 4L),
c = c(3L, 4L, 5L, 9L)), class = "data.frame", row.names = c(NA,
-4L))
df2 <- structure(list(a = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L), b = c(2L,
3L, 3L, 4L, 2L, 3L, 3L, 4L), c = c(3L, 4L, 5L, 9L, 3L, 4L, 5L,
9L)), class = "data.frame", row.names = c(NA, -8L))
df3 <- structure(list(a = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L), b = c(2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L
), c = c(3L, 4L, 5L, 9L, 3L, 4L, 5L, 9L, 3L, 4L, 5L, 9L)), class = "data.frame", row.names = c(NA,
-12L))https://stackoverflow.com/questions/60711842
复制相似问题