考虑到这是我的数据集
df <-
structure(list(id = 1:6, w = c(NA, 11L, 12L, 13L, 14L, 15L),
x = c(20L, 21L, NA, 23L, 24L, 25L), y = c(30L, 31L, 32L,
NA, 34L, 35L), z = c(40L, NA, 42L, 43L, 44L, NA), Group = c("Group1",
"Group2", "Group2", "Group2", "Group3", "Group3")), row.names = c(NA,
-6L), class = c("tbl_df", "tbl", "data.frame"))
id w x y z Group
1 NA 20 30 40 Group1
2 11 21 31 NA Group2
3 12 NA 32 42 Group2
4 13 23 NA 43 Group2
5 14 24 34 44 Group3
6 15 25 35 NA Group3我试图从w,x,y,z列中按组对观察结果进行求和。我使用这样的case_when语句。
df <- df %>%
mutate(
group1_total <- case_when( Group = "Group1" ~ rowSums(w,x, na.rm = TRUE)),
group2_total <- case_when( Group = "Group2" ~ rowSums(w,x,z, na.rm = TRUE)),
group3_total <- case_when( Group = "Group3" ~ rowSums(w,x,y,z, na.rm = TRUE))
)我收到一个错误的说法,rowSums()中的错误。不知道我做错了什么。任何建议或帮助都是非常感谢的,谢谢。
发布于 2022-09-18 10:38:03
我将创建一个命名列表,在该列表中,我们将为每个组指定要求和的变量。
cur_data()返回当前组的数据,group_cols[[unique(Group)]]为每个组选择适当的列。
library(dplyr)
group_cols <- list('Group1' = c('w', 'x'), 'Group2' = c('w', 'x', 'z'),
'Group3' = c('w', 'x', 'y', 'z'))
df %>%
group_by(Group) %>%
mutate(total = rowSums(select(cur_data(),
group_cols[[unique(Group)]]), na.rm = TRUE)) %>%
ungroup
# id w x y z Group total
# <int> <int> <int> <int> <int> <chr> <dbl>
#1 1 NA 20 30 40 Group1 20
#2 2 11 21 31 NA Group2 32
#3 3 12 NA 32 42 Group2 54
#4 4 13 23 NA 43 Group2 79
#5 5 14 24 34 44 Group3 116
#6 6 15 25 35 NA Group3 75这样做的好处是,计算是按组进行的,而不是每一行。
发布于 2022-09-18 09:00:16
您可以将rowwise()、c_across() (c_across()设计为与rowwise()一起使用以便于执行逐行聚合)和sum()结合使用。
library(dplyr)
df %>%
rowwise() %>%
mutate(
group1_total = case_when( Group == "Group1" ~ sum(c_across(w:x), na.rm = TRUE)),
group2_total = case_when( Group == "Group2" ~ sum(c_across(c(w,x,z)), na.rm = TRUE)),
group3_total = case_when( Group == "Group3" ~ sum(c_across(w:z), na.rm = TRUE))
)输出
id w x y z Group group1_total group2_total group3_total
<int> <int> <int> <int> <int> <chr> <int> <int> <int>
1 1 NA 20 30 40 Group1 20 NA NA
2 2 11 21 31 NA Group2 NA 32 NA
3 3 12 NA 32 42 Group2 NA 54 NA
4 4 13 23 NA 43 Group2 NA 79 NA
5 5 14 24 34 44 Group3 NA NA 116
6 6 15 25 35 NA Group3 NA NA 75如果您对一个总列感兴趣,那么可以使用coalesce(),例如。
df %>%
rowwise() %>%
mutate(
group1_total = case_when( Group == "Group1" ~ sum(c_across(w:x), na.rm = TRUE)),
group2_total = case_when( Group == "Group2" ~ sum(c_across(c(w,x,z)), na.rm = TRUE)),
group3_total = case_when( Group == "Group3" ~ sum(c_across(w:z), na.rm = TRUE))
) %>%
mutate(total = coalesce(group1_total, group2_total,group3_total)) %>%
select(-contains("_total"))导致
# A tibble: 6 x 7
# Rowwise:
id w x y z Group total
<int> <int> <int> <int> <int> <chr> <int>
1 1 NA 20 30 40 Group1 20
2 2 11 21 31 NA Group2 32
3 3 12 NA 32 42 Group2 54
4 4 13 23 NA 43 Group2 79
5 5 14 24 34 44 Group3 116
6 6 15 25 35 NA Group3 75发布于 2022-09-18 19:12:17
一个选项是按键/值数据集连接,替换与NA不匹配的列,然后获取rowSums
library(dplyr)
library(stringr)
keydat <- tibble(Group = c("Group1", "Group2", "Group3"),
nm = c("w|x", "w|x|z", 'w|x|y|z'))
df %>%
left_join(keydat) %>%
mutate(total = rowSums(across(w:z, ~ case_when(str_detect(cur_column(),
nm)~ .x)), na.rm = TRUE), nm = NULL)-output
# A tibble: 6 × 7
id w x y z Group total
<int> <int> <int> <int> <int> <chr> <dbl>
1 1 NA 20 30 40 Group1 20
2 2 11 21 31 NA Group2 32
3 3 12 NA 32 42 Group2 54
4 4 13 23 NA 43 Group2 79
5 5 14 24 34 44 Group3 116
6 6 15 25 35 NA Group3 75https://stackoverflow.com/questions/73761234
复制相似问题