首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >如何在R中打印一个函数的多个二项式测试结果

如何在R中打印一个函数的多个二项式测试结果
EN

Stack Overflow用户
提问于 2021-03-29 05:50:01
回答 1查看 46关注 0票数 0

我已经在R中做了一个函数来做二项式测试,我已经成功地运行了它,而没有使用循环(for i语句)。然而,我一直试图通过申请i语句来简化它,但只完成了我想要的一半。我的功能是:

代码语言:javascript
复制
binom_test_2b = function(spot_id, cluster_id, marker_gene){
# Finding the cluster and marker gene in scRNA-seq dataset
cluster_id  = sample(as.data.frame(Idents(glioblastoma))[,1], 1)
marker = FindMarkers(glioblastoma, ident.1 = cluster_id)
marker = cbind(gene = rownames(marker), marker)
rownames(marker) = 1:nrow(marker) 

# Finding the cluster and the spot in spatial dataset
cluster = Idents(glio_spatial)
df_cluster = as.data.frame(cluster)
df_cluster = cbind(spot_id = rownames(df_cluster), df_cluster)
rownames(df_cluster) = 1:nrow(df_cluster)
df_cluster = df_cluster[df_cluster$cluster == sample(df_cluster[,ncol(df_cluster)],1),]
spot_id = sample(df_cluster[,1],1)
spatial_count = as.data.frame(glio_spatial@assays$Spatial@counts)
spatial_count = cbind(gene = rownames(spatial_count), spatial_count)
rownames(spatial_count) = 1:nrow(spatial_count) 
spatial_count = spatial_count[c("gene", spot_id)]
spatial_count = rename(spatial_count, "count" = spot_id)

#Finding the percentage of the marker gene expressed in the spot
# List of genes with not zero count at the chosen spot
gene_spot  = spatial_count[spatial_count$count != 0,]

#Finding gene intersection between scRNA-seq cluster and spot
intersection = inner_join(gene_spot, marker)

#Finding marker_gene sample
marker_gene = sample(head(intersection[order(intersection[2], decreasing = TRUE),],5)[,1],5)

for (i in 1:length(marker_gene)){
    ratio_spot = gene_spot[which(gene_spot$gene == marker_gene[i]), ][,2]/sum(gene_spot[,ncol(gene_spot)])
    #Finding the percentage of the marker gene expressed outside the cluster
    all_cells_one_gene = glioblastoma@assays$RNA@counts[marker_gene[i],]
    selected_cells_one_gene = all_cells_one_gene[cluster_id!=Idents(glioblastoma)]
    gene_count_out_cluster = glioblastoma@assays$RNA@counts[,cluster_id!=Idents(glioblastoma)]
    ratio_out_cluster = sum(selected_cells_one_gene)/sum(gene_count_out_cluster)
    result_1 = sprintf("The gene %s is a marker gene for scRNA-seq cluster %s and is expressed in the spot %s.", marker_gene, cluster_id, spot_id)
    result_2 = binom.test(as.integer(ratio_spot*sum(gene_spot[,ncol(gene_spot)])), as.integer(sum(gene_spot[,ncol(gene_spot)])), as.integer(ratio_out_cluster))
}
return(list(head(intersection[order(intersection[2], decreasing = TRUE),],5)[,1:2], result_1, result_2))
}

这个结果是:

我希望该函数将打印五个给定标记基因的二项式测试结果(提供标记基因的精确二项式测试5次,而不是只显示一个确切的二项式测试)。有没有人能建议我怎么做才能得到五个给定标记基因的二项式结果?

我一直在尝试做一些修改,比如result_2i,ratio_spoti,ratio_out_clusteri,但都不起作用。

更新#2涉及到@Sirius的答案。我对它进行了如下修改:

代码语言:javascript
复制
binom_test_2c = function(spot_id, cluster_id, marker_gene){
# Finding the cluster and marker gene in scRNA-seq dataset
cluster_id  = sample(as.data.frame(Idents(glioblastoma))[,1], 1)
marker = FindMarkers(glioblastoma, ident.1 = cluster_id)
marker = cbind(gene = rownames(marker), marker)
rownames(marker) = 1:nrow(marker) 

# Finding the cluster and the spot in spatial dataset
cluster = Idents(glio_spatial)
df_cluster = as.data.frame(cluster)
df_cluster = cbind(spot_id = rownames(df_cluster), df_cluster)
rownames(df_cluster) = 1:nrow(df_cluster)
df_cluster = df_cluster[df_cluster$cluster == sample(df_cluster[,ncol(df_cluster)],1),]
spot_id = sample(df_cluster[,1],1)
spatial_count = as.data.frame(glio_spatial@assays$Spatial@counts)
spatial_count = cbind(gene = rownames(spatial_count), spatial_count)
rownames(spatial_count) = 1:nrow(spatial_count) 
spatial_count = spatial_count[c("gene", spot_id)]
spatial_count = rename(spatial_count, "count" = spot_id)

#Finding the percentage of the marker gene expressed in the spot
# List of genes with not zero count at the chosen spot
gene_spot  = spatial_count[spatial_count$count != 0,]

#Finding gene intersection between scRNA-seq cluster and spot
intersection = inner_join(gene_spot, marker)

#Finding marker_gene sample
marker_gene = sample(head(intersection[order(intersection[2], decreasing = TRUE),],5)[,1],5)

l <- lapply( marker_gene, function(gene) {
    ratio_spot = gene_spot[which(gene_spot$gene == gene), ][,2]/sum(gene_spot[,ncol(gene_spot)])
    #Finding the percentage of the marker gene expressed outside the cluster
    all_cells_one_gene = glioblastoma@assays$RNA@counts[gene,]
    selected_cells_one_gene = all_cells_one_gene[cluster_id!=Idents(glioblastoma)]
    gene_count_out_cluster = glioblastoma@assays$RNA@counts[,cluster_id!=Idents(glioblastoma)]
    ratio_out_cluster = sum(selected_cells_one_gene)/sum(gene_count_out_cluster)
    result_1 = sprintf("The gene %s is a marker gene for scRNA-seq cluster %s and is expressed in the spot %s.", marker_gene, cluster_id, spot_id)
    result_2 = binom.test(as.integer(ratio_spot*sum(gene_spot[,ncol(gene_spot)])), as.integer(sum(gene_spot[,ncol(gene_spot)])), as.integer(ratio_out_cluster))
    list( result_1=result_1, result_2=result_2)
}
result_1 <- sapply(l, function(el) el$result_1 )
result_2 <- sapply(l, function(el) el$result_2 )

return(
list(
    head(intersection[order(intersection[2], decreasing = TRUE),],5)[,1:2],
    result_1,
    result_2
))
}

更新#3提到了@Sirius的答案。我得到的结果如下:我正在尝试给binom exact result dataframe列一个基于标记基因的名称,以使其易于阅读。

EN

回答 1

Stack Overflow用户

发布于 2021-03-29 06:02:12

这可能是更有意义的一种方式:

代码语言:javascript
复制
## change your for block rest of the function to this:

l <- lapply( marker_gene, function(gene) {
    ratio_spot = gene_spot[which(gene_spot$gene == gene), ][,2]/sum(gene_spot[,ncol(gene_spot)])
    #Finding the percentage of the marker gene expressed outside the cluster
    all_cells_one_gene = glioblastoma@assays$RNA@counts[gene,]
    selected_cells_one_gene = all_cells_one_gene[cluster_id!=Idents(glioblastoma)]
    gene_count_out_cluster = glioblastoma@assays$RNA@counts[,cluster_id!=Idents(glioblastoma)]
    ratio_out_cluster = sum(selected_cells_one_gene)/sum(gene_count_out_cluster)
    result_1 = sprintf("The gene %s is a marker gene for scRNA-seq cluster %s and is expressed in the spot %s.", marker_gene, cluster_id, spot_id)
    result_2 = binom.test(as.integer(ratio_spot*sum(gene_spot[,ncol(gene_spot)])), as.integer(sum(gene_spot[,ncol(gene_spot)])), as.integer(ratio_out_cluster))
    list( result_1=result_1, result_2=result_2 )
}) # <----- I forgot this closing ')'

## fetch the result_1 strings
result_1 <- sapply( l, function(el) el$result_1 )
result_2 <- sapply( l, function(el) el$result_2 )

return(
    list(
        head(intersection[order(intersection[2], decreasing = TRUE),],5)[,1:2],
        result_1,
        result_2
    )
)

使用lapply从每个for循环中获取内容,并将其放入列表中,稍后再解包。还有其他方法,但如果您不知道如何从for循环的每次迭代中拾取内容,这是一个很好的开始。

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/66846538

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档