library(data.table)
HAVE = data.table(STUDENT=c(1,1,1,1,2,2,2,2,3,3,3,4,4,4,4,4),
TIME=c(1,2,3,4,1,2,3,4,1,2,3,1,2,3,4,5),
SCORE=c(1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1))
WANT = data.table(TIMES=c(12,23,34,45),
SCORE00=c(0,1,0,0),
SCORE01=c(0,2,1,0),
SCORE10=c(3,1,0,0),
SCORE11=c(1,0,2,1))我有一个data.table ' have‘,它显示了学生在一段时间内的重复分数。对于每个时间切换(1-2,2-3,3-4,4-5),我希望计算从0-0,0-1,1-0,1-1变化的学生数量,如“WANT”中所示。
发布于 2021-04-20 21:41:31
一种data.table方法
# Set keys
setkey(HAVE, STUDENT, TIME)
HAVE[, `:=`(SCORE2 = paste0(shift(SCORE, type = "lag"), SCORE),
TIMES = paste0(shift(TIME, type = "lag"), TIME)),
by = .(STUDENT)]
dcast(HAVE[!grepl("NA", SCORE2), ], TIMES ~ paste0("SCORE", SCORE2), fill = 0,
value.var = "TIMES", fun.aggregate = length)
# TIMES SCORE00 SCORE01 SCORE10 SCORE11
# 1: 12 0 0 3 1
# 2: 23 1 2 1 0
# 3: 34 0 1 0 2
# 4: 45 0 0 0 1发布于 2021-04-20 21:29:52
当然不是最好的解决方案:
library(purrr)
library(dplyr)
WANT <- HAVE %>%
split(.$STUDENT) %>%
map(~map2_chr(
head(.x$SCORE, -1), tail(.x$SCORE, -1), str_c)
) %>%
data.table::transpose() %>%
map(~as_tibble(as.list(table(.x)))) %>%
map(~setNames(.x, str_c("SCORE", names(.x)))) %>%
bind_rows() %>%
select(sort(names(.)))
WANT[is.na(WANT)] <- 0
WANT$TIMES <- unique(HAVE$TIME) %>%
sort() %>%
{map2_chr(head(., -1), tail(., -1), str_c)}
WANT <- WANT %>% select(TIMES, everything())发布于 2021-04-20 22:15:38
另一个基本的、基于管道的替代方案
WANTED <- split.data.frame(HAVE, HAVE$STUDENT) %>%
lapply(., function(x) {x$Score2 <- c(x$SCORE[2:nrow(x)], NA); return(x)}) %>%
rbindlist %>%
.[complete.cases(.), ] %>%
unite("ZF", SCORE, Score2, sep = ",") %>%
select(-STUDENT) %>%
table带输出
> split.data.frame(HAVE, HAVE$STUDENT) %>%
+ lapply(., function(x) {x$Score2 <- c(x$SCORE[2:nrow(x)], NA); return(x)}) %>%
+ rbindlist %>%
+ .[complete.cases(.), ] %>%
+ unite("ZF", SCORE, Score2, sep = ",") %>%
+ select(-STUDENT) %>%
+ table
ZF
TIME 0,0 0,1 1,0 1,1
1 0 0 3 1
2 1 2 1 0
3 0 1 0 2
4 0 0 0 1https://stackoverflow.com/questions/67179416
复制相似问题