我有一个具有长格式数据的数据框架,如下所示
ID Frame.No ROI.No Flux.med
01 1 1 78
01 1 2 76
01 2 1 80
01 2 2 80
01 3 1 89
01 3 2 80
27 1 1 60
27 1 2 68
27 4 1 80
27 4 2 89对于每个"ID“,我希望获得ROI 1和2的第一个和最大的Flux.med,并将所有这些放在一个新的数据want中。如果我有一个只有一个主题(例如ID 01)的数据,我可以使用以下代码识别我需要的Flux.med值:
ROI1.baseline <- mydata %>%
filter(ROI.No == "ROI 1" & Frame.No == min(Frame.No))%>%
select(Flux.Med)
ROI1.max <- mydata%>%
filter(ROI.No == "ROI 1")%>%
filter (Flux.Med == max(Flux.Med))%>%
select(Flux.Med)
ROI2.baseline <- mydata%>%
filter(ROI.No == "ROI 2" & Frame.No == min(Frame.No))%>%
select(Flux.Med)
ROI.max <- mydata%>%
filter(ROI.No == "ROI 2")%>%
filter (Flux.Med == max(Flux.Med))%>%
select(Flux.Med)但是我需要对每个ID这样做,并将结果保存在一个dataframe中。
我可以用一个for循环来完成这个任务吗?
发布于 2020-04-20 06:09:31
谢谢你的建议。最后我就是这样做的:
ROI1.baseline <- ldi_data %>%
group_by(ID)%>%
filter(ROI.No == "ROI 1" & Frame.No == min(Frame.No))%>% ###uses lowest number frame as baseline (not necessarily frame 1 if it was excluded)
select(Flux.Med)%>%
dplyr::rename(ROI1_baseline = Flux.Med)%>%
as.data.frame(ROI1.baseline)
ROI1.max <- ldi_data%>%
group_by(ID)%>%
filter(ROI.No == "ROI 1")%>%
filter (Flux.Med == max(Flux.Med))%>%
select(Flux.Med)%>%
dplyr::rename(ROI1_max = Flux.Med)%>%
as.data.frame(ROI1.max)
ROI2.baseline <- ldi_data%>%
group_by(ID)%>%
filter(ROI.No == "ROI 2" & Frame.No == min(Frame.No))%>%
select(Flux.Med)%>%
dplyr::rename(ROI2_baseline = Flux.Med)%>%
as.data.frame(ROI2.baseline)
ROI2.max <- ldi_data%>%
group_by(ID)%>%
filter(ROI.No == "ROI 2")%>%
filter (Flux.Med == max(Flux.Med))%>%
select(Flux.Med)%>%
dplyr::rename(ROI2_max = Flux.Med)%>%
as.data.frame(ROI2.max)
summary <- Reduce(merge, list(ROI1.baseline, ROI1.max, ROI2.baseline, ROI2.max))发布于 2020-04-08 15:15:31
我们可以在每个max和ID中获得第一个和ROI.No值。
library(dplyr)
mydata %>%
group_by(ID, ROI.No) %>%
summarise(first_flux = first(Flux.med),
max_flux = max(Flux.med))
# ID ROI.No first_flux max_flux
# <int> <int> <int> <int>
#1 1 1 78 89
#2 1 2 76 80
#3 27 1 60 80
#4 27 2 68 89或者使用aggregate:
aggregate(Flux.med~ID + ROI.No, mydata, function(x) c(first = x[1], max = max(x)))数据
mydata <- structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 27L, 27L, 27L,
27L), Frame.No = c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 4L, 4L), ROI.No = c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), Flux.med = c(78L, 76L, 80L,
80L, 89L, 80L, 60L, 68L, 80L, 89L)), class = "data.frame", row.names = c(NA,-10L))发布于 2020-04-08 17:40:34
我们可以使用data.table
library(data.table)
setDT(df1)[, .(first_flux = first(Flux.med),
max_flux = max(Flux.med)), .(ID, ROI.No)]数据
df1 <- structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 27L, 27L, 27L,
27L), Frame.No = c(1L, 1L, 2L, 2L, 3L, 3L, 1L, 1L, 4L, 4L), ROI.No = c(1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L), Flux.med = c(78L, 76L, 80L,
80L, 89L, 80L, 60L, 68L, 80L, 89L)), class = "data.frame",
row.names = c(NA,-10L))https://stackoverflow.com/questions/61103765
复制相似问题