我需要获取成绩列表(数学、语言、sci等)的值在2016年存在有效值的条件下(validity_2016==“是”),将其转换为名为grades_{subjects}的新变量(例如grades_math)。
df<-tibble(person = c("Alice", "Bob", "Mary"),
validity_2016 = c(NA, "yes", NA),
likes_ham = c("no", "yes", "yes"),
grades_math_2015=c(6,2,4),
grades_math_2016=c(3,5,7),
grades_language_2015=c(7,1,9),
grades_language_2016=c(3,6,7),
grades_sci_2015=c(7,1,9),
grades_sci_2016=c(3,6,7))我想知道以以下方式使用dplyr的mutate_at或mutate(across的可行性:
dplyr::mutate(across(grades_math_2016, grades_language_2016,grades_sci_2016),
~dplyr::case_when(!is.na(validity_2016)~list(grades_math_2015,grades_language_2015,grades_sci_2015)~.),
.names="{col}"))结果应该如下所示:
df<-tibble(person = c("Alice", "Bob", "Mary"),
validity_2016 = c(NA, "yes", NA),
likes_ham = c("no", "yes", "yes"),
grades_math_2015=c(6,2,4),
grades_math_2016=c(3,5,7),
grades_language_2015=c(7,1,9),
grades_language_2016=c(3,6,7),
grades_sci_2015=c(7,1,9),
grades_sci_2016=c(3,6,7),
grades_math=c(6,5,4),
grades_language=c(7,6,7),
grades_sci=c(7,6,9))发布于 2020-08-07 04:42:17
我建议对每个主题使用mutate和ifelse。类似于:
df2 = df %>%
mutate(grades_math = ifelse(validatiy_2016 == "yes", grades_math_2016, grades_math_2015))如果这种方法的缺点是,您需要对每个主题重复此方法。这可以通过以下方式实现自动化:
out_cols = c("grades_math", "grades_sci")
for(col in out_cols){
c15 = paste0(col,"_2015")
c16 = paste0(col,"_2016")
df = df %>% mutate(!!sym(col) := ifelse(validaity_2016 == "yes", !!sym(c16), !!sym(c15)))
}其中,!!sym(x)获取保存在变量x中的文本,并将其转换为变量名(例如,如果为x = "sci",则!!sym(x)将提供变量sci,而不是文本"sci"或变量x)。
发布于 2020-08-07 07:08:53
tidyverse和rlang示例:
此示例使用mutate和case_when为您描述的变量赋值。我将其封装在一个函数中,以防这是您经常要做的事情。
library(tidyverse)
library(rlang)
make_grade_columns <- function(df, condition_col, year_view){
year_column_names <- colnames(df)[str_detect(colnames(df), year_view) & colnames(df) != condition_col & !str_detect(colnames(df), "validity")]
year_prior_column_names <- colnames(df)[str_detect(colnames(df), as.character(as.numeric(year_view) - 1)) & colnames(df) != condition_col]
return_col_names <- str_remove(year_column_names, "_\\d\\d\\d\\d")
df <- df %>% mutate(
!!return_col_names[1] := case_when(
(df %>% select(!!!condition_col)) == "yes" ~ !! sym(year_column_names[1]),
T ~ !! sym(year_prior_column_names[1])),
!!return_col_names[2] := case_when(
(df %>% select(!!!condition_col)) == "yes" ~ !! sym(year_column_names[2]),
T ~ !! sym(year_prior_column_names[2])),
!!return_col_names[3] := case_when(
(df %>% select(!!!condition_col)) == "yes" ~ !! sym(year_column_names[3]),
T ~ !! sym(year_prior_column_names[3])))
return(df)
}
make_grade_columns(df, "validity_2016", "2016") %>%
select(person, validity_2016, grades_math, grades_sci, grades_language)
# # A tibble: 3 x 5
# person validity_2016 grades_math grades_sci grades_language
# <chr> <chr> <dbl> <dbl> <dbl>
# 1 Alice NA 6 7 7
# 2 Bob yes 5 6 6
# 3 Mary NA 4 9 9假设你更改了它,并想看看如果他们对likes_ham的回答是“是”,就会看到分数。只需将其作为函数的条件列。
make_grade_columns(df, "likes_ham", "2016")%>%
select(person, likes_ham, grades_math, grades_sci, grades_language)
# # A tibble: 3 x 5
# person likes_ham grades_math grades_sci grades_language
# <chr> <chr> <dbl> <dbl> <dbl>
# 1 Alice no 6 7 7
# 2 Bob yes 5 6 6
# 3 Mary yes 7 7 7该函数将接受yes答案,并返回年份的值。如果答案是“否”,那么它将返回前一年的值。
https://stackoverflow.com/questions/63290366
复制相似问题