1 P1 P1.pre <NA> <NA> 2 P1.ut.AAACCTGGTGGTACAG-1 P1 P1.pre CD4 CD4_C8- Treg 3 P1.ut.AAACCTGTCACGGTTA-1 P1 P1.pre CD4 CD4_C8-Treg 4 P1.ut.AAACCTGTCCGCTGTT-1 1 P1 P1.pre <NA> <NA> 6 P1.ut.AAACCTGTCTTGAGAC-1 P1 P1.pre CD4 CD4_C8- 1.2 创建seurat对象 依然使用CreateSeuratObject 函数,此处count 为读取的矩阵文件。 1,CreateSeuratObject中的meta.data参数 CreateSeuratObject函数除了简单的过滤条件外 ,还有一个重要的meta.data参数,可以输入提供的meta信息。
使用CreateSeuratObject生成Seurat对象,后续分析都是在该对象上进行操作。 rownames(x = raw.data), value = FALSE) raw.data <- raw.data[-ercc.index,] dim(raw.data) 有了表达矩阵,直接使用 CreateSeuratObject (counts = Read10X(folder), project = folder ) }) # 此时的sceList是一个包含8个10X对象的集合,下一步需要将其合并 4]], sceList[[5]],sceList[[6]], sceList[[7]],sceList[[8] ]), add.cell.ids = folders, project = "mouse8") table(sce_big$orig.ident
seurat_obj <- CreateSeuratObject(counts = seurat_data, min.features /data/GSE130xxx/xxxx.txt.gz"), row.names = 1, header = TRUE, sep = "\t") # 使用CreateSeuratObject()函数创建 project = "GSE130xxx") h5ad格式: 下载测试文件: https://www.dropbox.com/s/ngs3p8n2i8y33hj dl=0 # 下载测试文件 # https://www.dropbox.com/s/ngs3p8n2i8y33hj/pbmc3k.h5ad? 参考链接:https://www.jianshu.com/p/5b26d7bc37b7 参考链接:https://mp.weixin.qq.com/s/M15kWdH8eDONfakNhY-enA
H21 <- CreateSeuratObject(counts = H21, project = "H21", min.cells = 3, min.features = 100) H21 H23 <- CreateSeuratObject(counts = H23, project = "H23", min.cells = 3, min.features = 100) H23 H24 <- CreateSeuratObject = H32, project = "H32", min.cells = 3, min.features = 100) H32 H33 <- CreateSeuratObject(counts = H33 = 3, min.features = 100) H36 H38 <- CreateSeuratObject(counts = H38, project = "H38", min.cells = 3, 3]] H24 <- ob.list[[4]] H32 <- ob.list[[5]] H33 <- ob.list[[6]] H34 <- ob.list[[7]] H35 <- ob.list[[8]
pbmc500_assay <- CreateChromatinAssay(pbmc500.counts, fragments = frags.500) pbmc500 <- CreateSeuratObject meta.data=md.500) pbmc1k_assay <- CreateChromatinAssay(pbmc1k.counts, fragments = frags.1k) pbmc1k <- CreateSeuratObject meta.data=md.1k) pbmc5k_assay <- CreateChromatinAssay(pbmc5k.counts, fragments = frags.5k) pbmc5k <- CreateSeuratObject meta.data=md.5k) pbmc10k_assay <- CreateChromatinAssay(pbmc10k.counts, fragments = frags.10k) pbmc10k <- CreateSeuratObject assay <- CreateChromatinAssay(counts = counts.500, sep = c(":", "-"), min.features = 500) pbmc500 <- CreateSeuratObject
2.2M Mar 8 2019 GSM3660655_SC94IPFUP_barcodes.tsv.gz 259K Mar 8 2019 GSM3660655_SC94IPFUP_genes.tsv.gz 259K Mar 8 2019 GSM3660656_SC95IPFLOW_genes.tsv.gz 31M Mar 8 2019 GSM3660656_SC95IPFLOW_matrix.mtx.gz 示例代码是: rm(list=ls()) options(stringsAsFactors = F) library(Seurat) sce1 <- CreateSeuratObject(Read10X tableOfCounts_rowLabels.tsv', header = T)[,2] head(cl) head(rl) rownames(mtx) <- rl colnames(mtx) <- cl sce=CreateSeuratObject 也就是说 readMM 函数即可,然后配合CreateSeuratObject来构建对象! 降维聚类分群和生物学注释都走起!
Read10X(data.dir = paste(dataset_loc, ids[1],"filtered_feature_bc_matrix", sep="/")) seurat_obj <- CreateSeuratObject Read10X_h5(file.path(dataset_loc, ids[1], "filtered_feature_bc_matrix.h5"), use.names = T) seurat_obj <- CreateSeuratObject row names as the gene IDs rownames(counts) <- gene_ids colnames(counts) <- cell_ids seurat_obj <- CreateSeuratObject 1L),sep="_") d10x }) seurat_merge <- do.call("cbind", d10x.data) # for "dgCMatrix" seurat_data <- CreateSeuratObject Read10X(data.dir = paste(dataset_loc, file,"filtered_feature_bc_matrix", sep="/")) seurat_obj <- CreateSeuratObject
<-CreateSeuratObject(counts = C149, project = "C149",min.cells = 3, min.features = 200) C152<-CreateSeuratObject ','FCN1','CD1C','TPSB2','CD14','MARCO','CXCR2', 'CLEC9A','IL3RA', 'CD3D','CD8A anyway,我们仿照作者进行分群,在Epithelial中TPPP3-Ciliated(13,25),KRT18(15),Myeloid中CD68-Macrophages(0、1、2、3、4、5、7、8、 准备完数据后,准备画图: new_order = c('0','1','2','3','4','5','7','8','9','11','12','16','17','20','21','23','28 ','#60c3f0','#8ccdf1','#cae5f7','#92519c','#b878b0','#d7b1d2','#e7262a','#e94746','#eb666d','#ee838f'
dgCMatrix" # attr(,"package") # [1] "Matrix" # 构建 Seurat 对象 # 初步过滤一般不需要修改参数,除非数据实在太难看 Seurat_object <- CreateSeuratObject ScRNAdata <- Read10X_h5(filename = "GSM3489182_Donor_01_raw_gene_bc_matrices_h5.h5") Seurat_object <- CreateSeuratObject read.table( "data/GSM2829942/GSM2829942_HE6W_LA.TPM.txt", row.names = 1, header = T) Seurat_object <- CreateSeuratObject CreateSeuratObject( counts, project = "CreateSeuratObject", assay = "RNA", names.field = 1,
这两天分析一个单细胞数据发现一个奇怪的问题,就是创建 seurat 对象的时候,我明明设置了参数 CreateSeuratObject 函数的project 参数,但是最后merge 不同的样本后发现 > gsub(".txt","", pro) [1] "IRI1d_1" CreateSeuratObject 这个函数做了什么? 赶紧查看 CreateSeuratObject 的帮助文档: Create a Seurat object Description Create a Seurat object from raw data = NULL, project = "CreateSeuratObject", ... ) Arguments counts Either a matrix-like object with # 创建Seurat对象 sce <- CreateSeuratObject(counts = counts, min.cells=3, project = gsub(".txt","", pro),
接下来分别读取 library(Seurat) sce.10x <- Read10X(data.dir = '~/four-PBMC-mtx/SRR7722939/') sce1 <- CreateSeuratObject project = "SRR7722939") sce.10x <- Read10X(data.dir = '~/four-PBMC-mtx/SRR7722940/') sce2 <- CreateSeuratObject SRR7722940" "SRR7722941" "SRR7722942" library(Seurat) sceList = lapply(folders,function(folder){ CreateSeuratObject genes.use, rownames(sce4@scale.data)) head(genes.use) ## [1] "hg38_S100A9" "hg38_PPBP" "hg38_S100A8" 26 272 6 261 7 0 1 361 1 8
如果是单个样品,直接读取进来然后创建seurat对象即可:初试Seurat的V5版本 主要区别在于,V4版本中一般是循环读取样品,使用CreateSeuratObject创建seurat对象,然后使用merge 那我们可以先把多个样品合并成为了一个超级大的表达量矩阵,并使其行名为基因名,列名为barcodes信息,后面直接针对它来使用CreateSeuratObject函数去构建Seurat对象,就是完美的下游分析的输入数据啦 GSE212975/',samples) names(dir) <- samples #读取数据创建Seurat对象 counts <- Read10X(data.dir = dir) sce.all = CreateSeuratObject sceList[[i]])<-paste0(samples[i],"_",col) } #数据整合后创建seurat对象 merge <- do.call(cbind,sceList) sce =CreateSeuratObject F,data.table = F ) head(rl) #整合矩阵信息 colnames(mtx)=cl$V1 rownames(mtx)=rl$V1 #创建seurat对象 sce.all=CreateSeuratObject
features.tsv 基因名与基因ID ‘ matrix.mtx 表达矩阵 mtx 是稀疏矩阵格式存储的矩阵数据 详细参见:03b-关于10x输出表达矩阵mtx格式[8] 读取数据 两种常用的函数 ctrl_raw_feature_bc_matrix") # Turn count matrix into a Seurat object (output is a Seurat object) ctrl <- CreateSeuratObject (counts = ctrl_counts, min.features = 100) 这里我们使用CreateSeuratObject 将矩阵转换为 //satijalab.org/seurat/v3.0/immune_alignment.html [6] this link: https://www.dropbox.com/s/we1gmyb9c8jej2u dl=1 [7] Kang et al, 2017: https://www.nature.com/articles/nbt.4042 [8] 03b-关于10x输出表达矩阵mtx格式: 03b-关于10x
,tsv/txt,h5ad格式10x格式的读取展开代码语言:TXTAI代码解释library(Seurat)ct=Read10X(data.dir="GSE145154_RAW/")seu.obj<-CreateSeuratObject install.packages("hdf5r")ct<-Read10X_h5("GSE200874_RAW/GSM6045826_wt_filtered_gene_bc_matrices_h5_2.h5")seu.obj<-CreateSeuratObject row.names=1#是将第一列设置为行名的意思ct<-read.csv("GSE130148_raw_counts.csv.gz",row.names=1)class(ct)seu.obj<-CreateSeuratObject
/data/pbmc4k/filtered_gene_bc_matrices/GRCh38/") pbmc4k <- CreateSeuratObject(counts = pbmc4k.data, project /data/pbmc8k/filtered_gene_bc_matrices/GRCh38/") pbmc8k <- CreateSeuratObject(counts = pbmc8k.data, project = "PBMC8K") pbmc8k ## An object of class Seurat ## 33694 features across 8381 samples within 1 assay pbmc.combined <- merge(pbmc4k, y = pbmc8k, add.cell.ids = c("4K", "8K"), project = "PBMC12K") pbmc.combined -1" "8K_TTTGTCATCATGTCCC-1" "8K_TTTGTCATCCGATATG-1" ## [4] "8K_TTTGTCATCGTCTGAA-1" "8K_TTTGTCATCTCGAGTA
往期专题 单细胞初级8讲和高级分析8讲 单细胞分析十八般武艺1:harmony LIGER简介 LIGER能够跨个体、物种和方法(基因表达、表观遗传或空间数据)识别共有的细胞类型,以及数据集特有的特征 scRNAlist <- list() for(i in 1:length(dir)){ counts <- Read10X(data.dir = dir[i]) scRNAlist[[i]] <- CreateSeuratObject height=3.6) ggsave("clustered_liger.png", plot=plot2, width=8, height=3.6) ? (scATAC, min.cells=3, min.features = 200) scRNA1 <- CreateSeuratObject(scRNA1, min.cells=3, min.features = 200) scRNA2 <- CreateSeuratObject(scRNA2, min.cells=3, min.features = 200) #合并后的数据生成两个副本 scRNA <-
library("Seurat") scrna_data_ctrl <- Read10X("data/GSE96583/ctrl/") ctrl <- CreateSeuratObject( counts , min.cells = 3, min.features = 200) scrna_data_stim <- Read10X("data/GSE96583/stim/") stim <- CreateSeuratObject sample_list){ filedir = str_c("data/GSE96583/",sample) scrna_data <- Read10X(filedir) Seurat_object <- CreateSeuratObject str_c("data/GSE96583/",sample) # 数据的读取 scrna_data <- Read10X(filedir) # 对象的构建 Seurat_object <- CreateSeuratObject
./5p_pbmc10k_filt.h5",use.names = T)$`Gene Expression` srat_3p <- CreateSeuratObject(matrix_3p,project = "pbmc10k_3p") srat_5p <- CreateSeuratObject(matrix_5p,project = "pbmc10k_5p") srat_3p srat_5p -- ) ncluster <- length(unique(pbmc_liger[[]]$seurat_clusters)) mycol <- colorRampPalette(brewer.pal(8,
GSM5678434_HNP210915_matrix.mtx.gz" # [7] "GSE185965_RAW/GSM5678435_HNP210929_barcodes.tsv.gz" # [8] GSM5678434_HNP210915_matrix.mtx.gz" # [7] "01_data/GSM5678435/GSM5678435_HNP210929_barcodes.tsv.gz" # [8] library(data.table) sceList = lapply(samples,function(pro){ # pro=samples[1] print(pro) sce=CreateSeuratObject 数据样本读取 pro = "train" list.files("input/") # [1] "GSM8128607_P1_B_filtered_feature_bc_matrix.h5" sce=CreateSeuratObject https://mp.weixin.qq.com/s/URu-4l97g18zDmHTrBhBEg 2、单细胞天地: https://mp.weixin.qq.com/s/mpEQU_aKcaq3cbzh8JsSBw
批量读入多个10x技术的单细胞转录组样品 我们先获得样本的路径,然后构建seurat对象(一共8个样本)。最后的部分即为读入后每个样本中的细胞数目。 filtered_feature_bc_matrix',tmp)] tmp=tmp[-1] tmp basename(tmp) library(Seurat) ct = Read10X(tmp) colnames(ct) sce.all=CreateSeuratObject min.cells = 5, min.features = 300) table(sce.all$orig.ident) 可以看到, 是如下所示的8个样品 [4] "outputs//HRD4" [5] "outputs//non-HRD1" [6] "outputs//non-HRD2" [7] "outputs//non-HRD3" [8] : lapply(tmp, function(x){ print(x) print(dim(CreateSeuratObject(counts = Read10X(x) ,