data <-
STUDY ID BASE CYCLE1 DIED PROG
1 1 100 30 No Yes
1 2 NA 20 Yes No
1 3 16 NA Yes Yes
1 4 15 10 Yes Yes我想对以下几点作一个总结:
答案:
对此逐项研究的汇总表将是很棒的(显示数字和百分比)。我在使用Rstudio。
发布于 2019-07-30 00:04:08
如果它是基于第一个filter
library(dplyr)
library(stringr)
data %>%
group_by(STUDY) %>%
filter(!is.na(BASE) & !is.na(CYCLE1)) %>%
summarise(ID = str_c(ID, collapse=", "),
n1 = n(),
n2 = sum(DIED== "Yes"),
n3 = sum(DIED == "Yes"|PROG == "Yes"))
# A tibble: 1 x 5
# STUDY ID n1 n2 n3
# <int> <chr> <int> <int> <int>
#1 1 1, 4 2 1 2如果我们也需要这个百分比
out <- data %>%
group_by(STUDY) %>%
mutate(i1 = !is.na(BASE) & !is.na(CYCLE1),
perc1 = 100 * mean(i1),
n1 = sum(i1),
i2 = DIED == "Yes" & i1,
perc2 = 100 * mean(i2),
n2 = sum(i2),
i3 = (DIED == "Yes"|PROG == "Yes") & i1,
perc3 = 100 * mean(i3),
n3 = sum(i3)) %>%
filter(i1) %>%
select(STUDY, ID, matches("perc"), matches("n")) %>%
mutate(ID = toString(ID)) %>%
slice(1)
# A tibble: 1 x 8
# Groups: STUDY [1]
# STUDY ID perc1 perc2 perc3 n1 n2 n3
# <int> <chr> <dbl> <dbl> <dbl> <int> <int> <int>
#1 1 1, 4 50 25 50 2 1 2它可以进一步修改以格式化输出。
library(tidyr) # 0.8.3.9000
out %>%
pivot_longer(cols = perc1:n3, names_to = c( "perc", "n"),
names_sep = "(?<=[a-z])(?=[0-9])") %>%
group_by(STUDY, ID, n) %>%
summarise(value = sprintf("%d (%d%%)", last(value), first(value))) %>%
select(-n)https://stackoverflow.com/questions/57262670
复制相似问题