我试图将dplyr函数转换为data.table,并发现很难实现奇偶。任何帮助都非常感谢!
数据
DT = data.table(
"brk" = 1L,
"est" = c(900, 900, 1000, 1000, 1100),
"t" = c(0, 12, 15, 22, 30)
)期望结果
Desired_DT = data.table(
"brk" = 1L,
"est" = c(900, 900, 1000, 1000, 1100),
"t" = c(0, 12, 15, 22, 30),
"prev_est" = c(NA, NA , 900, 900, 1000),
"prev_est_age" = c(NA, NA, 15, 22, 15)
)一种可能的dplyr方式
DT %>%
left_join(
DT %>%
group_by(est) %>%
slice(1) %>%
ungroup() %>%
mutate(
prev_est = lag(n = 1L, est),
prev_t = lag(n = 1L, t)
)
) %>% tidyr::fill(.data$prev_est, .data$prev_t) %>%
mutate(prev_est_age = t - prev_t)发布于 2019-04-15 16:27:29
这里有一个选项,它的语法类似于tidyverse中的
library(data.table)
library(zoo)
# get the first row, grouped by 'est', create two lag columns of 'est', t'
dt1 <- DT[, head(.SD, 1), est][, c("prev_est", "prev_t") :=
shift(.SD), .SDcols = c('est', 't')][]
# do a join on 'brk', 'est', 't', do the `fill` the `na.locf0` from `zoo`
# and lastly create the column 'prev_est_age' as the difference of t, prev_t
dt1[DT, on = .(brk, est, t)][, c("prev_est", "prev_t") := lapply(.SD,
zoo::na.locf0), .SDcols = c("prev_est", "prev_t")][,
prev_est_age := t - prev_t][]我们也可以利用滚动连接。
dt1 <- unique(DT, by = 'est')[, paste0("prev_", names(DT)[2:3]) :=
shift(.SD), .SDcols = est:t][]
out <- dt1[DT, on = .(brk, est, t), roll = Inf][,
prev_est_age := t - prev_t][, prev_t := NULL][]
identical(Desired_DT, out)
#[1] TRUEhttps://stackoverflow.com/questions/55693411
复制相似问题