我在chr16_bam文件夹中有一个bam文件列表,还有一个注释文件sgseq_sam.txt。
library(SGSeq)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(BSgenome.Hsapiens.UCSC.hg19)
library(org.Hs.eg.db)
library(tidyverse)
library(AnnotationDbi)
library(dplyr)
library(stringr)
# Rename the "file_bam" column values to the full path where the BAMs are stored
setwd("C:/Users/User/Downloads/")
bamPath = "C:/Users/User/Downloads/chr16_bam"
samFile <- read.delim("C:/Users/User/Downloads/sgseq_sam.txt", header=T)
samFile <- samFile %>%
mutate(file_bam = paste0(bamPath, file_bam))
# Save the TxDb.Hsapiens.UCSC.hg19.knownGene object into a variable.
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
txdb <- keepSeqlevels(txdb, "chr16")
seqlevelsStyle(txdb) <- "NCBI"
# Read in the gene list from Supplementary Table 1
gene.list <- read.table("Table_1_Differential Expression Analysis Revealing CLCA1 to Be a Prognostic and Diagnostic Biomarker for Colorectal Cancer.xls", header=T)
# Convert the gene symbols to Entrez IDs and remove the genes that don't map to an Entrez ID.
entrez = mapIds(org.Hs.eg.db, keys=gene.list$Name, column = "ENTREZID", keytype="SYMBOL")
gene.list <- gene.list %>%
mutate(entrez) %>%
filter(!is.na(entrez))
# Convert the TxDb.Hsapiens.UCSC.hg19.knownGene object
txf_ucsc <- convertToTxFeatures(txdb)
# Cast this object to a dataframe and save as another variable name
txf_df <- as.data.frame(txf_ucsc)
# Subset by the Entrez IDs
txf_df <- txf_df %>% filter(geneName %in% gene.list$entrez)
# Find the number of common transcripts
unique <- unique(txf_df$geneName)
gene.list <- gene.list[gene.list$entrez %in% unique,]
nrow(gene.list)
# Cast the seqnames column to factor
txf_df$seqnames <- as.factor(as.character(txf_df$seqnames))
# Remove the "chr" prefix from the seqnames column
txf_df %>%
rename_all(~stringr::str_replace(.,"^chr",""))
# Recast this dataframe back to a GRanges object
txf_grange <- makeGRangesFromDataFrame(txf_df, keep.extra.columns=T)现在,我想为每个基因创建一个循环,在迭代时,子集Grange对象在txf_grange中仅由该基因,使用reduce函数将基因的范围折叠成一个单一的向量,运行analyzeFeatures,然后运行annotate函数,最后是plotFeatures。
for (i in txf_grange$geneName) {
if (i=="343") {
next
}
else{
# For each of the 15 genes, subset the Granges objects by only the gene
grange.subset <- txf_grange[i == toString(i)]
# Collapse the ranges of the genes into a single vector
grange.subset <- unlist(IRanges::reduce((split(grange.subset, grange.subset$geneName))))
# Run analyzeFeatures
for (j in 1:dim(samFile)[[1]]) {
si <- samFile
for (k in si) {
for (l in list.files(path="C:/Users/User/Downloads/chr16_bam", pattern=".bam$", all.files=F, full.names=F)) {
sgfc_pred <- analyzeFeatures(k, which=grange.subset, features=txf_ucsc, predict=T)
# Annotate predicted features
sgfc_pred <- annotate(sgfc_pred, txf_ucsc)
# Plot features
pdf(paste("plot", l, ".pdf", sep=""))
plotFeatures(sgfc_pred, geneID=1)
dev.off()
}
}
}
}
}数据
> dput(head(txf_grange))
new("GRanges", seqnames = new("Rle", values = structure(1L, .Label = "16", class = "factor"),
lengths = 6L, elementMetadata = NULL, metadata = list()),
ranges = new("IRanges", start = c(12058964L, 12059311L, 12059311L,
12060052L, 12060198L, 12060198L), width = c(348L, 742L, 2117L,
147L, 680L, 1230L), NAMES = NULL, elementType = "ANY", elementMetadata = NULL,
metadata = list()), strand = new("Rle", values = structure(1L, .Label = c("+",
"-", "*"), class = "factor"), lengths = 6L, elementMetadata = NULL,
metadata = list()), seqinfo = new("Seqinfo", seqnames = "16",
seqlengths = NA_integer_, is_circular = NA, genome = NA_character_),
elementMetadata = new("DFrame", rownames = NULL, nrows = 6L,
listData = list(type = structure(c(3L, 1L, 1L, 2L, 1L,
1L), .Label = c("J", "I", "F", "L", "U"), class = "factor"),
txName = structure(list(c("uc002dbv.3", "uc010buy.3",
"uc010buz.3"), c("uc002dbv.3", "uc010buy.3"), "uc010buz.3",
c("uc002dbv.3", "uc010buy.3"), "uc010buy.3",
"uc002dbv.3"), class = "AsIs"), geneName = structure(list(
"608", "608", "608", "608", "608", "608"), class = "AsIs")),
elementType = "ANY", elementMetadata = NULL, metadata = list()),
elementType = "ANY", metadata = list())发布于 2022-02-18 21:22:14
变更线:
analyzeFeatures(samFile, grange.subset)此外,您也不需要那么多循环来运行这个问题。这个问题需要14幅图,你可以用你的循环数来绘制更多的图。
https://stackoverflow.com/questions/71169383
复制相似问题