我有一些基因组位置,我想注释这些位置(查找Ensembl基因ID,外显子,内含子,.)基于Ensembl使用biomaRt R软件包。
我的部分数据
chr start stop strand
chr10 100572320 100572373 -
chr10 100572649 100572658 + 发布于 2016-02-23 18:08:42
准备查询biomaRt的数据
样本数据
data = data.frame(chr = "chr17", start = 63973115, end = 64437414)
data$query = paste(gsub("chr",'',data$chr),data$start,data$end, sep = ":")
#> data
# chr start end query
#1 chr17 63973115 64437414 17:63973115:64437414然后使用biomaRt
library(biomaRt)
# select your dataset of interest accordingly.
# I have used human specific dataset identifier
# you can see all available datasets using listDatasets(mart),
# after setting your mart of interest
mart = useMart(
'ENSEMBL_MART_ENSEMBL',
host = 'ensembl.org',
dataset = 'hsapiens_gene_ensembl')
# do listAttributes(mart) to list all information you can extract using biomaRt
out = getBM(
attributes = c('ensembl_gene_id', 'external_gene_name', 'gene_biotype',
'ensembl_transcript_id', 'ensembl_exon_id'),
filters = 'chromosomal_region',
values = data$query,
mart = mart)这将给你在给定的基因组位置上存在的基因,转录本和外显子的集合in。biomaRt提供了更多的信息,所以不要忘记使用listAttributes()来查找所有信息。
https://stackoverflow.com/questions/35584151
复制相似问题