基于下面定义的arms,我在dplyr::mutate函数下编写了这些语句来定义事件。我想知道你是否可以帮我写一个函数,可以减少代码行,因为有些代码是重复的?谢谢
Arm- 1 (September 1, 2017- August 31, 2018)
arm_1_event_1 <- c(as.Date("2017-09-01"), as.Date("2017-11-30"))
arm_1_event_2 <- c(as.Date("2017-12-01"), as.Date("2018-02-28"))
arm_1_event_3 <- c(as.Date("2018-03-01"), as.Date("2018-05-31"))
arm_1_event_4 <- c(as.Date("2018-06-01"), as.Date("2018-08-31"))
Arm- 2 (September 1, 2018- August 31, 2019)
arm_2_event_1 <- c(as.Date("2018-09-01"), as.Date("2018-11-30"))
arm_2_event_2 <- c(as.Date("2018-12-01"), as.Date("2019-02-28"))
arm_2_event_3 <- c(as.Date("2019-03-01"), as.Date("2019-05-31"))
arm_2_event_4 <- c(as.Date("2019-06-01"), as.Date("2019-08-31"))
Arm- 3 (September 1, 2019- August 31, 2020)
arm_3_event_1 <- c(as.Date("2019-09-01"), as.Date("2019-11-30"))
arm_3_event_2 <- c(as.Date("2019-12-01"), as.Date("2020-02-29"))
arm_3_event_3 <- c(as.Date("2020-03-01"), as.Date("2020-05-31"))
arm_3_event_4 <- c(as.Date("2020-06-01"), as.Date("2020-08-31"))dplyr::mutate(
arms_and_events = NA_character_,
arms_and_events = dplyr::case_when(
((date_application_received >= arm_1_event_1[1] &
date_application_received <= arm_1_event_1[2]) ~ "arm_1_event_1"),
((date_application_received >= arm_1_event_2[1] &
date_application_received <= arm_1_event_2[2]) ~ "arm_1_event_2"),
((date_application_received >= arm_1_event_3[1] &
date_application_received <= arm_1_event_3[2]) ~ "arm_1_event_3"),
((date_application_received >= arm_1_event_4[1] &
date_application_received <= arm_1_event_4[2]) ~ "arm_1_event_4"),
((date_application_received >= arm_2_event_1[1] &
date_application_received <= arm_2_event_1[2]) ~ "arm_2_event_1"),
((date_application_received >= arm_2_event_2[1] &
date_application_received <= arm_2_event_2[2]) ~ "arm_2_event_2"),
((date_application_received >= arm_2_event_3[1] &
date_application_received <= arm_2_event_3[2]) ~ "arm_2_event_3"),
((date_application_received >= arm_2_event_4[1]
& date_application_received <= arm_2_event_4[2]) ~ "arm_2_event_4"),
((date_application_received >= arm_3_event_1[1] &
date_application_received <= arm_3_event_1[2]) ~ "arm_3_event_1"),
((date_application_received >= arm_3_event_2[1] &
date_application_received <= arm_3_event_2[2]) ~ "arm_3_event_2"),
((date_application_received >= arm_3_event_3[1] &
date_application_received <= arm_3_event_3[2]) ~ "arm_3_event_3"),
((date_application_received >= arm_3_event_4[1] &
date_application_received <= arm_3_event_4[2]) ~ "arm_3_event_4"),
((date_voucher_issued >= arm_1_event_1[1] &
date_voucher_issued <= arm_1_event_1[2]) ~ "arm_1_event_1"),
((date_voucher_issued >= arm_1_event_2[1] &
date_voucher_issued <= arm_1_event_2[2]) ~ "arm_1_event_2"),
((date_voucher_issued >= arm_1_event_3[1] &
date_voucher_issued <= arm_1_event_3[2]) ~ "arm_1_event_3"),
((date_voucher_issued >= arm_1_event_4[1] &
date_voucher_issued <= arm_1_event_4[2]) ~ "arm_1_event_4"),
((date_voucher_issued >= arm_2_event_1[1] &
date_voucher_issued <= arm_2_event_1[2]) ~ "arm_2_event_1"),
((date_voucher_issued >= arm_2_event_2[1] &
date_voucher_issued <= arm_2_event_2[2]) ~ "arm_2_event_2"),
((date_voucher_issued >= arm_2_event_3[1] &
date_voucher_issued <= arm_2_event_3[2]) ~ "arm_2_event_3"),
((date_voucher_issued >= arm_2_event_4[1] &
date_voucher_issued <= arm_2_event_4[2]) ~ "arm_2_event_4"),
((date_voucher_issued >= arm_3_event_1[1] &
date_voucher_issued <= arm_3_event_1[2]) ~ "arm_3_event_1"),
((date_voucher_issued >= arm_3_event_2[1] &
date_voucher_issued <= arm_3_event_2[2]) ~ "arm_3_event_2"),
((date_voucher_issued >= arm_3_event_3[1] &
date_voucher_issued <= arm_3_event_3[2]) ~ "arm_3_event_3"),
((date_voucher_issued >= arm_3_event_4[1] &
date_voucher_issued <= arm_3_event_4[2]) ~ "arm_3_event_4")
)发布于 2020-10-09 01:43:02
当我们考虑NA时,这个函数起作用:
arms_and_events_function <- function(d)
{
df <- data.frame(arm = rep(1:3, each = 4),
event = rep(1:4, 3),
start = as.Date(c("2017-09-01", "2017-12-01", "2018-03-01",
"2018-06-01", "2018-09-01", "2018-12-01",
"2019-03-01", "2019-06-01", "2019-09-01",
"2019-12-01", "2020-03-01", "2020-06-01")),
stop = as.Date(c("2017-11-30", "2018-02-28", "2018-05-31",
"2018-08-31", "2018-11-30", "2019-02-28",
"2019-05-31", "2019-08-31", "2019-11-30",
"2020-02-29", "2020-05-31", "2020-08-31"))
)
sapply(d, function(x) if(is.na(x)) NA else {
inds <- which(x >= df$start & x<= df$stop)
paste0("arm_", df$arm[inds], "_event_", df$event[inds])
})
}谢谢!
发布于 2020-10-08 05:22:25
您的整个代码(包括第一个块中的赋值)可以简化为一个简单的函数:
arm_and_event <- function(d)
{
df <- data.frame(arm = rep(1:3, each = 4),
event = rep(1:4, 3),
start = as.Date(c("2017-09-01", "2017-12-01", "2018-03-01",
"2018-06-01", "2018-09-01", "2018-12-01",
"2019-03-01", "2019-06-01", "2019-09-01",
"2019-12-01", "2020-03-01", "2020-06-01")),
stop = as.Date(c("2017-11-30", "2018-02-28", "2018-05-31",
"2018-08-31", "2018-11-30", "2019-02-28",
"2019-05-31", "2019-08-31", "2019-11-30",
"2020-02-29", "2020-05-31", "2020-08-31")))
matches <- sapply(d, function(x) which(x >= df$start & x <= df$stop))
paste0("arm", df$arm[matches], "_event", df$event[matches])
}因此,例如,如果我们有一个日期向量,如下所示:
date_application_received <- as.Date(c("2017-09-30", "2019-01-05"))然后我们可以这样做:
arm_and_event(date_application_received)
#> [1] "arm1_event1" "arm2_event2"额外的好处是我们的全局工作区不会被很多变量污染,代码也更容易维护。
https://stackoverflow.com/questions/64251907
复制相似问题