对于每个参与者和每个试验,我需要检查对于CURRENT_ID中的所有连续行,第一行在列A中的值为0,最后一行在列B中的值为0。如果这两个条件都满足,我希望新列C中的值为0,如果不是,我希望值为1。
head(mydf, 10)
#> # A tibble: 10 x 6
#> A B participant trial CURRENT_ID C
#> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
#> 1 0 1 ppt01 45 3 0
#> 2 1 0 ppt01 45 4 0
#> 3 0 1 ppt01 45 10 0
#> 4 0 0 ppt01 45 11 0
#> 5 1 0 ppt01 45 12 0
#> 6 0 1 ppt01 87 2 0
#> 7 1 0 ppt01 87 3 0
#> 8 1 1 ppt01 87 4 1
#> 9 1 1 ppt01 87 5 1
#> 10 0 1 ppt01 34 6 0我需要考虑每个参与者和试验的每一对连续行(基于CURRENT_ID的值是连续的)。在上面的示例中,第8行和第9行在新列C中的值为1,因为第8行在列A中的值为1(而不是0),行9在列B中的值为1(而不是0)。
下面是如何比较行的示例,参与者ppt01和trial 87
A B participant trial CURRENT_ID C
0 1 ppt01 87 2 0
1 0 ppt01 87 3 0
1 0 ppt01 87 3 0
1 1 ppt01 87 4 1
1 1 ppt01 87 4 1
1 1 ppt01 87 5 1数据:
mydf <- structure(list(A = c(0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0,
1, 1), B = c(1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1
), participant = c("ppt01", "ppt01", "ppt01", "ppt01", "ppt01",
"ppt01", "ppt01", "ppt01", "ppt01", "ppt01", "ppt01", "ppt01",
"ppt01", "ppt01", "ppt01", "ppt01", "ppt01", "ppt01", "ppt01",
"ppt01", "ppt01", "ppt02", "ppt02", "ppt02", "ppt02", "ppt02",
"ppt02", "ppt02", "ppt02", "ppt02", "ppt02", "ppt02", "ppt02",
"ppt02", "ppt02", "ppt02", "ppt02"), trial = c(45, 45, 45, 45,
45, 87, 87, 87, 87, 34, 34, 34, 34, 34, 34, 8, 8, 8, 8, 8, 8,
87, 87, 87, 87, 55, 55, 55, 55, 55, 55, 22, 22, 22, 22, 22, 22
), CURRENT_ID = c(3, 4, 10, 11, 12, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 5, 6, 9, 10, 11, 12, 2, 3, 4, 5, 5, 6, 9, 10, 11, 12, 2,
3, 4, 10, 11, 12), C = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
1, 0, 1, 1)), class = c("spec_tbl_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -37L), spec = structure(list(cols = list(
A = structure(list(), class = c("collector_double", "collector"
)), B = structure(list(), class = c("collector_double", "collector"
)), participant = structure(list(), class = c("collector_character",
"collector")), trial = structure(list(), class = c("collector_double",
"collector")), CURRENT_ID = structure(list(), class = c("collector_double",
"collector")), C = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))发布于 2017-06-15 05:05:03
如果您希望将AB对分组在participant-trial组中,这应该是可行的:
d %>% group_by(participant, trial) %>% mutate(AB = ceiling(1:n()/2)) %>% group_by(participant, trial, AB) %>% mutate(newC = ifelse(length(A) == 1 | (A[1] == 0 & B[2] == 0), 0, 1))我保留了新的列,这样您就可以看到这是如何完成的。
输出:
# A tibble: 15 x 8
A B participant trial CURRENT_ID C AB newC
<int> <int> <chr> <int> <int> <int> <dbl> <dbl>
1 0 1 ppt01 45 3 0 1 0
2 1 0 ppt01 45 4 0 1 0
3 0 1 ppt01 45 10 0 2 0
4 0 0 ppt01 45 11 0 2 0
5 1 0 ppt01 45 12 0 3 0
6 0 1 ppt01 87 2 0 1 0
7 1 0 ppt01 87 3 0 1 0
8 1 1 ppt01 87 4 1 2 1
9 1 1 ppt01 87 5 1 2 1
10 0 1 ppt01 34 6 0 1 0
11 0 0 ppt01 34 7 0 1 0
12 0 0 ppt01 34 8 0 2 0
13 0 0 ppt01 34 9 0 2 0
14 0 0 ppt01 34 10 0 3 0
15 1 0 ppt01 34 11 0 3 0否则,如最初所述:
require(dplyr)
d %>% group_by(participant, trial) %>% mutate(newC = ifelse(A[1] == 0 & B[n()] == 0, 0, 1))输出:
Source: local data frame [15 x 7]
Groups: participant, trial [3]
# A tibble: 15 x 7
A B participant trial CURRENT_ID C newC
<int> <int> <chr> <int> <int> <int> <dbl>
1 0 1 ppt01 45 3 0 0
2 1 0 ppt01 45 4 0 0
3 0 1 ppt01 45 10 0 0
4 0 0 ppt01 45 11 0 0
5 1 0 ppt01 45 12 0 0
6 0 1 ppt01 87 2 0 1
7 1 0 ppt01 87 3 0 1
8 1 1 ppt01 87 4 1 1
9 1 1 ppt01 87 5 1 1
10 0 1 ppt01 34 6 0 0
11 0 0 ppt01 34 7 0 0
12 0 0 ppt01 34 8 0 0
13 0 0 ppt01 34 9 0 0
14 0 0 ppt01 34 10 0 0
15 1 0 ppt01 34 11 0 0我通过dput()使用了您的数据子集
d <- structure(
list(
A = c(0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 1L),
B = c(1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 1L,
1L, 0L, 0L, 0L, 0L, 0L),
participant = c(
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01",
"ppt01"
),
trial = c(
45L,
45L,
45L,
45L,
45L,
87L,
87L,
87L,
87L,
34L,
34L,
34L,
34L,
34L,
34L
),
CURRENT_ID = c(3L, 4L, 10L, 11L, 12L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L),
C = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L)
),
.Names = c("A", "B", "participant",
"trial", "CURRENT_ID", "C"),
class = "data.frame",
row.names = c(NA,-15L)
)发布于 2020-05-16 15:37:53
Base R解决方案:
mydf$grouping_vec <- with(mydf, paste(participant, trial,
ave(CURRENT_ID, participant, trial, FUN = function(x){
cumsum(c(1, ifelse(diff(x) > 1, 1, 0)))
}), sep = " - "))
data.frame(do.call("rbind",
lapply(split(mydf, mydf$grouping_vec),
function(x) {
data.frame(cbind(x[, names(x) != "C"],
C = rep(if(x$A[1] == 0 & x$B[nrow(x)] == 0){0}else{1}, nrow(x))))}
)),
row.names = NULL)Tidyverse解决方案:
library(tidyverse)
mydf %>%
mutate(grouping_vec = str_c(participant,
trial,
cumsum(c(1, diff(CURRENT_ID) != 1))),
sep = " - ")) %>%
group_by(grouping_vec) %>%
mutate(C = if_else(first(A) == 0 & last(B) == 0, 0, 1)) %>%
ungroup() %>%
select(-grouping_vec)https://stackoverflow.com/questions/44554080
复制相似问题