首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >R DataTable汇总更改

R DataTable汇总更改
EN

Stack Overflow用户
提问于 2021-04-20 20:57:10
回答 3查看 33关注 0票数 0
代码语言:javascript
复制
library(data.table)

HAVE = data.table(STUDENT=c(1,1,1,1,2,2,2,2,3,3,3,4,4,4,4,4),
TIME=c(1,2,3,4,1,2,3,4,1,2,3,1,2,3,4,5),
SCORE=c(1,0,1,1,1,0,0,1,1,1,0,1,0,1,1,1))

WANT = data.table(TIMES=c(12,23,34,45),
SCORE00=c(0,1,0,0),
SCORE01=c(0,2,1,0),
SCORE10=c(3,1,0,0),
SCORE11=c(1,0,2,1))

我有一个data.table ' have‘,它显示了学生在一段时间内的重复分数。对于每个时间切换(1-2,2-3,3-4,4-5),我希望计算从0-0,0-1,1-0,1-1变化的学生数量,如“WANT”中所示。

EN

回答 3

Stack Overflow用户

发布于 2021-04-20 21:41:31

一种data.table方法

代码语言:javascript
复制
# Set keys
setkey(HAVE, STUDENT, TIME)
HAVE[, `:=`(SCORE2 = paste0(shift(SCORE, type = "lag"), SCORE),
            TIMES  = paste0(shift(TIME, type = "lag"), TIME)),
     by = .(STUDENT)]
dcast(HAVE[!grepl("NA", SCORE2), ], TIMES ~ paste0("SCORE", SCORE2), fill = 0, 
      value.var = "TIMES", fun.aggregate = length)

#    TIMES SCORE00 SCORE01 SCORE10 SCORE11
# 1:    12       0       0       3       1
# 2:    23       1       2       1       0
# 3:    34       0       1       0       2
# 4:    45       0       0       0       1
票数 2
EN

Stack Overflow用户

发布于 2021-04-20 21:29:52

当然不是最好的解决方案:

代码语言:javascript
复制
library(purrr)
library(dplyr)

WANT <- HAVE %>% 
  split(.$STUDENT) %>% 
  map(~map2_chr(
    head(.x$SCORE, -1), tail(.x$SCORE, -1), str_c)
  ) %>% 
  data.table::transpose() %>% 
  map(~as_tibble(as.list(table(.x)))) %>% 
  map(~setNames(.x, str_c("SCORE", names(.x)))) %>% 
  bind_rows() %>% 
  select(sort(names(.)))

WANT[is.na(WANT)] <- 0

WANT$TIMES <- unique(HAVE$TIME) %>%
  sort() %>%
  {map2_chr(head(., -1), tail(., -1), str_c)}

WANT <- WANT %>% select(TIMES, everything())
票数 0
EN

Stack Overflow用户

发布于 2021-04-20 22:15:38

另一个基本的、基于管道的替代方案

代码语言:javascript
复制
WANTED <- split.data.frame(HAVE, HAVE$STUDENT) %>% 
                lapply(., function(x) {x$Score2 <- c(x$SCORE[2:nrow(x)], NA);  return(x)}) %>% 
                       rbindlist %>% 
                           .[complete.cases(.), ] %>% 
                                unite("ZF", SCORE, Score2, sep = ",") %>% 
                                     select(-STUDENT) %>% 
                                            table

带输出

代码语言:javascript
复制
> split.data.frame(HAVE, HAVE$STUDENT) %>% 
+      lapply(., function(x) {x$Score2 <- c(x$SCORE[2:nrow(x)], NA);  return(x)}) %>% 
+            rbindlist %>% 
+              .[complete.cases(.), ] %>% 
+                unite("ZF", SCORE, Score2, sep = ",") %>% 
+                select(-STUDENT) %>% 
+                   table
    ZF
TIME 0,0 0,1 1,0 1,1
   1   0   0   3   1
   2   1   2   1   0
   3   0   1   0   2
   4   0   0   0   1
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/67179416

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档