首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >如何使用broom和dplyr拟合多列上的分布

如何使用broom和dplyr拟合多列上的分布
EN

Stack Overflow用户
提问于 2017-05-08 22:13:31
回答 2查看 786关注 0票数 1

我有以下数据:

代码语言:javascript
复制
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
set.seed(1)
df <- data_frame(
  genes = paste("Gene_",letters[0:10],sep=""),
  X = rnorm(10, 0, 1),
  Y = rnorm(10, 0, 2),
  Z = rnorm(10, 0, 4))

df
#> # A tibble: 10 × 4
#>     genes          X           Y          Z
#>     <chr>      <dbl>       <dbl>      <dbl>
#> 1  Gene_a -0.6264538  3.02356234  3.6759095
#> 2  Gene_b  0.1836433  0.77968647  3.1285452
#> 3  Gene_c -0.8356286 -1.24248116  0.2982599
#> 4  Gene_d  1.5952808 -4.42939977 -7.9574068
#> 5  Gene_e  0.3295078  2.24986184  2.4793030
#> 6  Gene_f -0.8204684 -0.08986722 -0.2245150
#> 7  Gene_g  0.4874291 -0.03238053 -0.6231820
#> 8  Gene_h  0.7383247  1.88767242 -5.8830095
#> 9  Gene_i  0.5757814  1.64244239 -1.9126002
#> 10 Gene_j -0.3053884  1.18780264  1.6717662

我可以这样计算X列:

代码语言:javascript
复制
fit_X <- MASS::fitdistr(df$X,"normal")
broom::tidy(fit_X)
#>   term  estimate std.error
#> 1 mean 0.1322028 0.2341758
#> 2   sd 0.7405289 0.1655873
broom::glance(fit_X)
#>    n    logLik      AIC      BIC
#> 1 10 -11.18548 26.37096 26.97613

如何对所有列执行此操作(第一个列除外- genes),以便最终获得:

代码语言:javascript
复制
    mean.estimate sd.estimate mean.stderror sd.stderror n loglik    AIC      BIC
X      0.1322028  0.7405289   0.2341758    0.1655873   10 -11.18548 26.37096 26.97613
Y      0.4976899  2.0292617   0.6417089    0.4537      10 -21.26611 46.53221 47.13738
Z     -0.534693   3.626276    1.1467291    0.8108599   10 -27.07145 58.14289 58.74806
EN

回答 2

Stack Overflow用户

发布于 2017-05-09 00:00:23

您可以使用tidyr::gatherpurrr::mapbroom::glance,如下所示:

代码语言:javascript
复制
df_new <- gather(df,var, val,-genes) %>% 
        group_by(var) %>% 
        summarise(vec = val %>% list) %>% 
        mutate(mod = map(vec, ~MASS::fitdistr(.x, "normal") )) %>% 
        mutate(mod_est_mean = map(mod, ("estimate")) %>% map_dbl("mean")) %>% 
        mutate(mod_est_sd = map(mod, ("estimate")) %>% map_dbl("sd")) %>% 
        mutate(mod_std_mean = map(mod, ("sd")) %>% map_dbl("mean")) %>% 
        mutate(mod_std_error = map(mod, ("sd")) %>% map_dbl("sd")) %>% 
        mutate(mod_glance = map(mod, glance)) %>% 
        select(-vec, -mod) %>% 
        unnest()

此代码块的细目如下:

获取列中每个组的向量

代码语言:javascript
复制
gather(df,var, val,-genes) %>% 
        group_by(var) %>% 
        summarise(vec = val %>% list)

# A tibble: 3 × 2
    var        vec
  <chr>     <list>
1     X <dbl [10]>
2     Y <dbl [10]>
3     Z <dbl [10]>

在新列中添加模型

代码语言:javascript
复制
df_new <- gather(df,var, val,-genes) %>% 
        group_by(var) %>% 
        summarise(vec = val %>% list) %>% 
        mutate(mod = map(vec, ~MASS::fitdistr(.x, "normal") )) 
# A tibble: 3 × 3
    var        vec            mod
  <chr>     <list>         <list>
1     X <dbl [10]> <S3: fitdistr>
2     Y <dbl [10]> <S3: fitdistr>
3     Z <dbl [10]> <S3: fitdistr>

为预估添加列

代码语言:javascript
复制
df_new <- gather(df,var, val,-genes) %>% 
        group_by(var) %>% 
        summarise(vec = val %>% list) %>% 
        mutate(mod = map(vec, ~MASS::fitdistr(.x, "normal") )) %>% 
        mutate(mod_est_mean = map(mod, ("estimate")) %>% map_dbl("mean")) %>% 
        mutate(mod_est_sd = map(mod, ("estimate")) %>% map_dbl("sd")) %>% 
        mutate(mod_std_mean = map(mod, ("sd")) %>% map_dbl("mean")) %>% 
        mutate(mod_std_error = map(mod, ("sd")) %>% map_dbl("sd"))

# A tibble: 3 × 7
    var        vec            mod mod_est_mean mod_est_sd mod_std_mean mod_std_error
  <chr>     <list>         <list>        <dbl>      <dbl>        <dbl>         <dbl>
1     X <dbl [10]> <S3: fitdistr>    0.1322028  0.7405289    0.2341758     0.1655873
2     Y <dbl [10]> <S3: fitdistr>    0.4976899  2.0292617    0.6417089     0.4537567
3     Z <dbl [10]> <S3: fitdistr>   -0.5346930  3.6262759    1.1467291     0.8108599

浏览模型并取消嵌套

代码语言:javascript
复制
gather(df,var, val,-genes) %>% 
        group_by(var) %>% 
        summarise(vec = val %>% list) %>% 
        mutate(mod = map(vec, ~MASS::fitdistr(.x, "normal") )) %>% 
        mutate(mod_est_mean = map(mod, ("estimate")) %>% map_dbl("mean")) %>% 
        mutate(mod_est_sd = map(mod, ("estimate")) %>% map_dbl("sd")) %>% 
        mutate(mod_std_mean = map(mod, ("sd")) %>% map_dbl("mean")) %>% 
        mutate(mod_std_error = map(mod, ("sd")) %>% map_dbl("sd")) %>% 
        mutate(mod_glance = map(mod, glance)) %>% 
        select(-vec, -mod) %>% 
        unnest()

# A tibble: 3 × 9
    var mod_est_mean mod_est_sd mod_std_mean mod_std_error     n    logLik      AIC      BIC
  <chr>        <dbl>      <dbl>        <dbl>         <dbl> <int>     <dbl>    <dbl>    <dbl>
1     X    0.1322028  0.7405289    0.2341758     0.1655873    10 -11.18548 26.37096 26.97613
2     Y    0.4976899  2.0292617    0.6417089     0.4537567    10 -21.26611 46.53221 47.13738
3     Z   -0.5346930  3.6262759    1.1467291     0.8108599    10 -27.07145 58.14289 58.74806
票数 1
EN

Stack Overflow用户

发布于 2017-05-08 22:17:51

尝试:

代码语言:javascript
复制
library(reshape2)
df <- melt(df)

df2 <- df %>% group_by(variable) %>% do(broom::tidy(MASS::fitdistr(.$value, "normal")))
df2 <- merge(df2 %>% dcast(variable ~ term, value.var = "estimate"), 
             df2 %>% dcast(variable ~ term, value.var = "std.error"), 
             by="variable", suffixes = c("_estimates", "_stderror")
)
df2 <- merge(df2, df %>% group_by(variable) %>% do(broom::glance(MASS::fitdistr(.$value, "normal"))))
df2
  variable mean_estimates sd_estimates mean_stderror sd_stderror  n    logLik      AIC      BIC
1        X      0.1322028    0.7405289     0.2341758   0.1655873 10 -11.18548 26.37096 26.97613
2        Y      0.4976899    2.0292617     0.6417089   0.4537567 10 -21.26611 46.53221 47.13738
3        Z     -0.5346930    3.6262759     1.1467291   0.8108599 10 -27.07145 58.14289 58.74806
票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/43849952

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档