首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >Barplot表示有统计学差异

Barplot表示有统计学差异
EN

Stack Overflow用户
提问于 2020-01-11 11:31:52
回答 1查看 2.8K关注 0票数 1

我需要针对相应的表型绘制一个重要的SNP代码(分类)的条形图,类似于这些图:

我在R中尝试了很多方法,并得到了一些结果,但我在字段中得到了我最喜欢的结果。以下是守则和结果:

### DATA

代码语言:javascript
复制
SNP_code <- as.factor(c("GG","GA","AA","GA","GA","GG","GG","GG","GG","GA","GA","AA","GA","GA","GA","GG","GG","GG","GG","AA","GG","GG","GG","GG","AA","GG","GG","GA","GG","AA","GA","GG","GG","GG","GG","GG","GG","AA","GG","GA","GG","GG","GA","GG","GG","GA","GG","GG","GA","GA","GG","GA","GG","GA","GA","GA","GA","GA","GA","GG","GG","GG","AA","GA","GA","GA","GA","GG","GA","GG","GG","GG","GA","GA","GA","GG","GG","GA","GG","AA","GG","GG","GG","AA"))

EBV <- c(0.06663,-0.03031,-0.122,-0.02021,-0.1157,-0.08131,-0.02034,-0.06324,0.06699,-0.062,0.02736,-0.1201,-0.04846,-0.06934,-0.06023,-0.009244,-0.05648,-0.01908,0.06728,-0.06517,0.08534,0.07618,-0.0814,0.06113,-0.0795,0.1055,0.08305,0.1209,-0.05314,-0.09431,0.05185,0.1347,0.1591,0.08777,0.08326,0.1612,0.09528,-0.1002,0.1561,-0.09327,0.09474,0.1356,0.06384,0.1585,0.03235,0.1081,0.1462,-0.04082,-0.05042,0.01793,-0.1157,-0.1165,-0.009399,-0.02311,-0.108,-0.1143,0.07219,0.01376,-0.05059,-0.052,0.08494,-0.0388,-0.06346,0.07789,0.02961,-0.1126,0.1102,0.133,-0.09317,-0.1181,0.1584,0.122,0.1019,-0.04074,-0.01178,0.09523,-0.03266,-0.01258,-0.0231,-0.08259,0.05823,-0.02894,-0.008242,0.07981)

LS <- c(2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,2,1,2,1,1,2,1,2,2,2,1,1,1,2,2,2,2,2,1,1,2,1,2,2,2,1,2,2,2,1,1,2,1,1,1,1,1,1,1,1,1,1,2,1,1,2,1,1,2,2,1,1,2,1,2,1,1,2,1,1,1,1,1,1,1,2)

IDs <- c(1033,1081,1106,1107,1120,1194,1199,1326,1334,1340,1345,1358,1398,1404,1405,1421,1457,1459,1464,1509,1529,1542,1550,2025,2030,2095,2099,2128,2141,2153,2167,2224,2232,2238,2244,2266,2271,2280,2283,2323,2326,2337,2369,2390,2391,2396,851012,851016,851021,851055,851063,851084,851105,851109,851146,851169,851176,851198,851205,851246,851266,851292,851332,851345,851488,851489,851509,851528,851531,851547,851562,851573,851574,851578,851584,851588,851592,851622,851651,851670,851672,851684,851690,861086)

sig_snp <- data.frame(IDs, SNP_code, EBV, LS)

###方差分析与均值比较

代码语言:javascript
复制
library(dplyr)
### for LS
group_by(sig_snp, SNP_code) %>%
  summarise(
    count = n(),
    mean = mean(LS, na.rm = TRUE),
    sd = sd(LS, na.rm = TRUE))

### for EBV
group_by(sig_snp, SNP_code) %>%
  summarise(
    count = n(),
    mean = mean(EBV, na.rm = TRUE),
    sd = sd(EBV, na.rm = TRUE))

# Compute the analysis of variance
Anova.fit <- aov(EBV ~ SNP_code, data = sig_snp)
summary(Anova.fit)
# Tukey multiple pairwise-comparisons
TukeyHSD(Anova.fit) 
# or
library(multcomp)
summary(glht(Anova.fit, linfct = mcp(SNP_code = "Tukey")))

用于EBV的###框图(实际上,我需要用于LS和EBV的Barplot )

代码语言:javascript
复制
library(ggplot2)
library(ggpval)

plot <- ggplot(sig_snp, aes(SNP_code, EBV)) +
geom_boxplot(fill=c("red","blue", "green"), color="black", width=.7); plot
add_pval(plot, pairs = list(c(1, 3)), test='wilcox.test')
add_pval(plot, pairs = list(c(2, 3)), test='wilcox.test')
add_pval(plot, pairs = list(c(1, 2)), test='wilcox.test')

"add_pval“只使用"wilcox.test”和"t.test",但我喜欢Tukey。任何帮助都是非常感谢的。

EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2020-01-12 02:20:54

我在下面发布的代码肯定还有改进的余地,但至少给出了一个工作流示例,您可以使用该工作流获取“最喜欢”的参数:

A部分:条形图

( 1)对sig_snp进行了重组,得到了每一个SNP在EBV或LS函数中的均值。

代码语言:javascript
复制
library(tidyverse)
DF1 <- sig_snp %>% 
  pivot_longer(., cols = c(EBV,LS), names_to = "Variable", values_to = "Values") %>%
  group_by(SNP_code, Variable) %>%
  summarise(Mean = mean(Values),
            SEM = sd(Values) / sqrt(n()),
            Nb = n()) %>%
  rowwise() %>%
  mutate(Labels = as.character(SNP_code)) %>%
  mutate(Labels = paste(unlist(strsplit(Labels,"")),collapse = "/")) %>%
  mutate(Labels = paste0(Labels,"\nn = ",Nb))

# A tibble: 6 x 6
  SNP_code Variable    Mean    SEM    Nb Labels       
  <fct>    <chr>      <dbl>  <dbl> <int> <chr>        
1 AA       EBV      -0.0719 0.0202     9 "A/A\nn = 9" 
2 AA       LS        1.11   0.111      9 "A/A\nn = 9" 
3 GA       EBV      -0.0141 0.0134    31 "G/A\nn = 31"
4 GA       LS        1.23   0.0763    31 "G/A\nn = 31"
5 GG       EBV       0.0422 0.0126    44 "G/G\nn = 44"
6 GG       LS        1.48   0.0762    44 "G/G\nn = 44"

labels列稍后将用于x轴的标记.

2)然后,我们将通过以下方法计算出总平均值(这将有助于绘制“平均值”栏):

代码语言:javascript
复制
library(tidyverse)
DF2 <- sig_snp  %>% 
  pivot_longer(., cols = c(EBV,LS), names_to = "Variable", values_to = "Values") %>%
  group_by(Variable) %>%
  summarise(Mean = mean(Values),
            SEM = sd(Values) / sqrt(n()),
            Nb = n()) %>%
  mutate(SNP_code = "All") %>%
  select(SNP_code, Variable, Mean, SEM, Nb) %>%
  rowwise() %>%
  mutate(Labels = paste0("Mean\nn = ",Nb))

# A tibble: 2 x 6
  SNP_code Variable    Mean     SEM    Nb Labels        
  <chr>    <chr>      <dbl>   <dbl> <int> <chr>         
1 All      EBV      0.00918 0.00944    84 "Mean\nn = 84"
2 All      LS       1.35    0.0522     84 "Mean\nn = 84"

3)我们结合了DF1和DF2,并重新组织了SNP_code级别,以获得正确的绘图顺序:

代码语言:javascript
复制
library(tidyverse)
DF <- bind_rows(DF1, DF2)
DF$Labels = factor(DF$Labels,levels= c("Mean\nn = 84",
                                       "A/A\nn = 9",
                                       "G/A\nn = 31",
                                       "G/G\nn = 44" ))

4)现在,我们可以绘制它:

代码语言:javascript
复制
library(ggplot2)
ggplot(DF, aes(x = SNP_code, y = Mean, fill = SNP_code))+
  geom_bar(stat = "identity", show.legend = FALSE)+
  geom_errorbar(aes(ymin = Mean-SEM, ymax = Mean+SEM), width = 0.2)+
  facet_wrap(.~Variable, scales = "free")+
  scale_x_discrete(name = "",labels = levels(DF$Labels))

B部分:在条形图上添加统计量

为了添加统计信息,可以使用geom_signif函数从ggsignif包中添加来自外部输出的统计信息。

1)首先为EBV上的Tukey测试的输出创建数据:

代码语言:javascript
复制
Anova.fit <- aov(EBV ~ SNP_code, data = sig_snp)
t <- TukeyHSD(Anova.fit)
stat <- t$SNP_code
Stat_EBV <- stat %>% as.data.frame() %>% 
  mutate(Variable = "EBV") %>% 
  mutate(Group = rownames(stat)) %>%
  rowwise() %>%
  mutate(Group1 = unlist(strsplit(Group,"-"))[1]) %>%
  mutate(Group2 = unlist(strsplit(Group,"-"))[2]) %>%
  mutate(labels = round(`p adj`,4)) 

Stat_EBV$y_pos <- c(0.06,0.08,0.1)

2) LS的Tukey测试也是一样的。

代码语言:javascript
复制
Anova.fit <- aov(LS ~ SNP_code, data = sig_snp)
t <- TukeyHSD(Anova.fit)
stat <- t$SNP_code
Stat_LS <- stat %>% as.data.frame() %>% 
  mutate(Variable = "LS") %>% 
  mutate(Group = rownames(stat)) %>%
  rowwise() %>%
  mutate(Group1 = unlist(strsplit(Group,"-"))[1]) %>%
  mutate(Group2 = unlist(strsplit(Group,"-"))[2]) %>%
  mutate(labels = round(`p adj`,4)) 
Stat_LS$y_pos = c(1.7,1.9,2.1)

3)这两种统计数据的绑定:

代码语言:javascript
复制
library(tidyverse)
STAT <- bind_rows(Stat_EBV,Stat_LS)

# A tibble: 6 x 10
    diff      lwr   upr  `p adj` Variable Group Group1 Group2 labels y_pos
   <dbl>    <dbl> <dbl>    <dbl> <chr>    <chr> <chr>  <chr>   <dbl> <dbl>
1 0.0578 -0.0130  0.129 0.132    EBV      GA-AA GA     AA     0.132   0.06
2 0.114   0.0457  0.183 0.000431 EBV      GG-AA GG     AA     0.0004  0.08
3 0.0563  0.0125  0.100 0.00821  EBV      GG-GA GG     GA     0.0082  0.1 
4 0.115  -0.303   0.532 0.790    LS       GA-AA GA     AA     0.790   1.7 
5 0.366  -0.0373  0.770 0.0832   LS       GG-AA GG     AA     0.0832  1.9 
6 0.251  -0.00716 0.510 0.0585   LS       GG-GA GG     GA     0.0585  2.1

4)获取条形图并添加统计结果:

代码语言:javascript
复制
library(ggplot2)
library(ggsignif)
ggplot(DF, aes(x = SNP_code, y = Mean, fill = SNP_code))+
  geom_bar(stat = "identity", show.legend = FALSE)+
  geom_errorbar(aes(ymin = Mean-SEM, ymax = Mean+SEM), width = 0.2)+
  geom_signif(inherit.aes = FALSE, data = STAT, 
              aes(xmin=Group1, xmax=Group2, annotations=labels, y_position=y_pos),
              manual = TRUE)+
  facet_wrap(.~Variable, scales = "free")+
  scale_x_discrete(name = "",labels = levels(DF$Labels))

我希望这看起来像你期望的那样。

票数 2
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/59693988

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档