我有一个有很多因子值的数据集。我正在寻找一个体面的方法打印出每个价值的百分比。事先非常感谢。
mtcars; rownames(mtcars) <- NULL
df <- mtcars[,c(2,8,9)]
df$am <- factor(df$am); df$vs <- factor(df$vs); df$cyl <- factor(df$cyl)
sapply(df, function(x) if("factor" %in% class(x)) {prop.table(table(x))})预期答案
0 1 4 6 8
cyl NA NA 0.34 0.21 0.43
vs 0.56 0.43 NA NA NA
am 0.59 0.40 NA NA NA发布于 2022-05-12 08:17:23
您可以使用dplyr::bind_rows
library(dplyr)
s <- sapply(df, function(x) if("factor" %in% class(x)) prop.table(table(x)))
bind_rows(s, .id = "col") %>%
relocate(col, order(colnames(.)))
## A tibble: 3 × 6
# col `0` `1` `4` `6` `8`
# <chr> <table> <table> <table> <table> <table>
#1 cyl NA NA 0.34375 0.21875 0.4375
#2 vs 0.56250 0.43750 NA NA NA
#3 am 0.59375 0.40625 NA NA NA 或者,用行名:
bind_rows(s) %>%
relocate(order(colnames(.))) %>%
as.data.frame() %>%
`rownames<-`(names(s))
# 0 1 4 6 8
#cyl NA NA 0.34375 0.21875 0.4375
#vs 0.56250 0.43750 NA NA NA
#am 0.59375 0.40625 NA NA NA发布于 2022-05-12 08:20:28
一种基于purrr::map_dfr的可能而紧凑的解决方案
library(tidyverse)
map_dfr(df, ~ prop.table(table(.x)), .id = "id") %>%
column_to_rownames("id") %>% select(sort(names(.)))
#> 0 1 4 6 8
#> cyl NA NA 0.34375 0.21875 0.4375
#> vs 0.56250 0.43750 NA NA NA
#> am 0.59375 0.40625 NA NA NA基于purrr::map和bind_rows的另一种可能的解决方案(不太紧凑)
library(tidyverse)
map(df, ~ prop.table(table(.x))) %>% bind_rows() %>% as.data.frame %>%
`rownames<-`(names(df)) %>% select(sort(names(.)))
#> 0 1 4 6 8
#> cyl NA NA 0.34375 0.21875 0.4375
#> vs 0.56250 0.43750 NA NA NA
#> am 0.59375 0.40625 NA NA NAhttps://stackoverflow.com/questions/72212166
复制相似问题