我有一个数据表,对于每个datatable$Ppt和每个datatable$nitem,当datatable$Region中有"fffword“时,我需要提取"fffword”的值,并将其与以下"word“的值进行比较。如果这两个值是相同的,那么在datatable$Output中我需要一个0的值,如果这两个值是不同的,我需要datatable$Output中的值1。
我试过:
datatable %>% group_by(Ppt, nitem) %>%
mutate(Output = ifelse(as.numeric(gsub("fffword([0-9]+).*","\\1",Region) == lag(as.numeric(gsub("word([0-9]+).*","\\1",Region)), 0L,ifelse(as.numeric(gsub("fffword([0-9]+).*","\\1",Region) != lag(as.numeric(gsub("word([0-9]+).*","\\1",Region)), 1L)但不起作用。
#Ppt Region nitem Output
#1 "fffword8" 93 0 (current ffword n=8, following word n=8)
#1 "word8" 93 0 (previous ffword n=8, current word n=8)
#1 "fffword9" 93 1 (current ffword n=9, no following word for this ppt and this nitem)
#1 "word2" 122 1 (no previous fffword for this ppt and this nitem and this n Region)
#1 "fffword3" 122 0 (current ffword n=3, following word n=3)
#1 "word3" 122 0 (previous ffword n=3, current word n=3)
#1 "word6" 122 1 (no previous fffword for this ppt and this nitem and this n Region)
#1 "fffword7" 122 0
#1 "word7" 122 0
#1 "fffword8" 122 0
#1 "word8" 122 0
#54 "fffword8" 4 0
#54 "word8" 4 0
#54 "fffword9" 4 1
#54 "word2" 4 1
#54 "fffword2" 10 0
#54 "word4" 10 1
#54 "word6" 10 1
#54 "fffword23" 10 0
#54 "word23" 10 0
#54 "fffword24" 5 0
#54 "word24" 5 0发布于 2018-02-09 22:12:46
使用嵌套的ifelse和dplyr进行一次尝试可以提供您想要的结果。办法是:
如果当前行的区域为fffword,则应将数字与next(lead)行匹配,但当当前行的区域为word时,则应将数字与前一行(lag)行进行比较。
如果next或previous行不可用,则应将Output视为1。
来自区域的digit刚刚被比较为character格式。在进行比较之前,没有明显的理由在numeric中转换这些内容。
library(dplyr)
datatable <- read.table(text = 'Ppt Region nitem
1 "fffword8" 93
1 "word8" 93
1 "fffword9" 93
1 "word2" 122
1 "fffword3" 122
1 "word3" 122
1 "word6" 122
1 "fffword7" 122
1 "word7" 122
1 "fffword8" 122
1 "word8" 122
54 "fffword8" 4
54 "word8" 4
54 "fffword9" 4
54 "word2" 4
54 "fffword2" 10
54 "word4" 10
54 "word6" 10
54 "fffword23" 10
54 "word23" 10
54 "fffword24" 5
#54 "word24" 5 ', header = T, stringsAsFactors = F)
datatable %>% group_by(Ppt, nitem) %>%
mutate(Output = ifelse(grepl("^fffword",Region),
ifelse(is.na(lead(Region)),1L,
ifelse((gsub("fffword([0-9]+).*","\\1",Region)) == (gsub("word([0-9]+).*","\\1",lead(Region))), 0L,1L)),
ifelse(is.na(lag(Region)),1L,
ifelse((gsub("word([0-9]+).*","\\1",Region)) == (gsub("fffword([0-9]+).*","\\1",lag(Region))), 0L,1L))
)) %>% as.data.frame()
#Result
Ppt Region nitem Output
1 1 fffword8 93 0
2 1 word8 93 0
3 1 fffword9 93 1
4 1 word2 122 1
5 1 fffword3 122 0
6 1 word3 122 0
7 1 word6 122 1
8 1 fffword7 122 0
9 1 word7 122 0
10 1 fffword8 122 0
11 1 word8 122 0
12 54 fffword8 4 0
13 54 word8 4 0
14 54 fffword9 4 1
15 54 word2 4 1
16 54 fffword2 10 1
17 54 word4 10 1
18 54 word6 10 1
19 54 fffword23 10 0
20 54 word23 10 0
21 54 fffword24 5 1https://stackoverflow.com/questions/48710736
复制相似问题