超级增强子的基因注释距离没有统一标准,您需要根据您的研究目的和领域内的常见做法来选择。常见的距离有50kb、100kb、500kb等。
# 常用距离设置
library(ChIPseeker)
library(TxDb.Hsapiens.UCSC.hg38.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene
# 方法1:±50kb(较保守)
peak_anno <- annotatePeak(peak_data,
tssRegion = c(-50000, 50000),
TxDb = txdb,
annoDb = "org.Hs.eg.db",
verbose = FALSE)
# 方法2:±100kb(常用)
peak_anno <- annotatePeak(peak_data,
tssRegion = c(-100000, 100000),
TxDb = txdb,
annoDb = "org.Hs.eg.db",
verbose = FALSE)
# 方法3:±500kb(较宽松)
peak_anno <- annotatePeak(peak_data,
tssRegion = c(-500000, 500000),
TxDb = txdb,
annoDb = "org.Hs.eg.db",
verbose = FALSE)
# 完整代码:使用多种距离进行超级增强子注释
setwd("您的文件夹路径")
bed_files <- list.files(pattern = "\\.bed$")
print(paste("找到以下BED文件:", paste(bed_files, collapse = ", ")))
# 为不同距离创建注释
distance_settings <- c(50000, 100000, 500000) # 50kb, 100kb, 500kb
names(distance_settings) <- c("50kb", "100kb", "500kb")
# 对每个样本和每个距离进行注释
all_annotations <- list()
for (bed_file in bed_files) {
sample_name <- gsub("\\.bed$", "", bed_file)
peak_data <- readPeakFile(bed_file)
cat("正在处理:", sample_name, "\n")
# 对每个距离进行注释
sample_annotations <- list()
for (dist_name in names(distance_settings)) {
distance <- distance_settings[dist_name]
cat(" 距离设置:", dist_name, "(", distance, "bp)\n")
peak_anno <- annotatePeak(peak_data,
tssRegion = c(-distance, distance),
TxDb = txdb,
annoDb = "org.Hs.eg.db",
addFlankGeneInfo = TRUE,
flankDistance = distance,
verbose = FALSE)
# 保存注释结果
anno_df <- as.data.frame(peak_anno)
output_file <- paste0(sample_name, "_", dist_name, "_annotated.txt")
write.table(anno_df, file = output_file, sep = "\t", quote = FALSE, row.names = FALSE)
sample_annotations[[dist_name]] <- peak_anno
}
all_annotations[[sample_name]] <- sample_annotations
}