
对于空转的数据分析,一直没有时间整理笔记,今天抽空写一篇,就从读取数据的开始吧。




rm(list = ls())
library(Seurat) # 单细胞分析主包
library(ggplot2) # 绘图
library(patchwork) # 多图拼接
library(dplyr) # 数据处理
library(Rfast2) # 快速数据处理
library(hdf5r) # 读写h5文件
library(viridis)
library(glmGamPoi)
library(SingleR)
library(celldex)
library(RColorBrewer)
# 用户自定义参数
project_id <- "Sample2" # 项目/样本名称
output_dir <- "../SpatialAnalysisResults" # 输出文件夹
raw_h5_file <- "../data_fixed/filtered_feature_bc_matrix.h5" # 原始表达矩阵文件
#设置工作目录和输出目录
project_id <- "Sample2"
output_dir <- "../SpatialAnalysisResults"
# h5 文件和 spatial 文件路径(相对于工作目录 code/)
h5_file <- "../data_fixed/h5/filtered_feature_bc_matrix.h5"
spatial_dir <- "../data_fixed/spatial"
if (!dir.exists(output_dir)) { dir.create(output_dir) }
# 使用 Load10X_Spatial 读取整个 10x Visium 空间数据
sp_obj <- Load10X_Spatial(
data.dir = "../data_fixed", # data_fixed 目录
filename = "h5/filtered_feature_bc_matrix.h5", # h5 文件相对于 data.dir 的路径
assay = "Spatial",
slice = project_id
)# 获取分组数量
group_count <- length(unique(Idents(sp_obj)))
my_colors <- viridis::viridis(group_count, option = "C") # 你可以换成"A"/"B"/"D"等
# 绘制nCount_Spatial的小提琴图
vln_plot <- VlnPlot(
sp_obj,
features = "nCount_Spatial",
pt.size = 0, # 不显示点
cols = my_colors # 自动分配颜色
) +
geom_boxplot(width = 0.08, outlier.shape = NA, fill = "white", color = "black", alpha = 0.3) + # 窄箱线
theme_minimal(base_size = 16) +
theme(
panel.grid = element_blank(),
axis.text = element_text(color = "black", size = 14),
axis.title = element_text(size = 16, face = "bold"),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),
legend.position = "none"
) +
labs(
title = "nCount_Spatial Distribution",
x = "Group",
y = "nCount_Spatial"
)
# 绘制nCount_Spatial的空间分布图
spatial_plot <- SpatialFeaturePlot(sp_obj, features = "nCount_Spatial") +
theme(legend.position = "right") # 空间分布
# 图片保存
pdf(file = file.path(output_dir, "FeatureDistribution_violin.pdf"), width = 5, height = 6)
print(vln_plot)
dev.off()
pdf(file = file.path(output_dir, "FeatureDistribution_spatial.pdf"), width = 9, height = 6)
print(spatial_plot)
dev.off()
sp_obj <- SCTransform(sp_obj, assay = "Spatial", verbose = FALSE) # 标准化
sp_obj <- RunPCA(sp_obj, assay = "SCT", verbose = FALSE) # PCA
sp_obj <- FindNeighbors(sp_obj, reduction = "pca", dims = 1:30) # 邻居查找
#sp_obj <- FindClusters(sp_obj, verbose = FALSE) # 聚类,这里直接注释掉
sp_obj <- RunUMAP(sp_obj, reduction = "pca", dims = 1:30) # UMAP降维
> head(sp_obj@meta.data)
orig.ident nCount_Spatial nFeature_Spatial nCount_SCT nFeature_SCT
AACACCTACTATCGAA-1 SeuratProject 90225 11403 15803 6494
AACACGTGCATCGCAC-1 SeuratProject 2638 1898 12580 5813
AACACTTGGCAAGGAA-1 SeuratProject 63980 10160 15590 6199
AACAGGAAGAGCATAG-1 SeuratProject 2956 2310 12369 5787
AACAGGATTCATAGTT-1 SeuratProject 2274 1539 13026 6160
AACAGGCCAACGATTA-1 SeuratProject 2161 1655 13554 6184
> load("ano_2.Rdata")
> head(ano_2)
Barcode Cluster CellType
1 AACACCTACTATCGAA-1 11 Neuroendocrine_carcinoma_cell
5 AACACGTGCATCGCAC-1 4 Fibroblasts
8 AACACTTGGCAAGGAA-1 1 Neuroendocrine_carcinoma_cell
12 AACAGGAAGAGCATAG-1 7 Fibroblasts
14 AACAGGATTCATAGTT-1 4 Fibroblasts
17 AACAGGCCAACGATTA-1 3 Necrotic_cell
> table(rownames(sp_obj@meta.data) %in% ano_2$Barcode)
FALSE TRUE
101 4891
> # FALSE TRUE
> # 101 4891
# 找出可以匹配的 spot
matched_barcodes <- intersect(rownames(sp_obj@meta.data), ano_2$Barcode)
# 过滤 Seurat 对象,只保留匹配的 spot
sp_obj <- subset(sp_obj, cells = matched_barcodes)
# 挂载 cluster 和 CellType 注释
matched_idx <- match(matched_barcodes, ano_2$Barcode)
sp_obj$Cluster <- ano_2$Cluster[matched_idx]
sp_obj$CellType <- ano_2$CellType[matched_idx]
# 检查
table(is.na(sp_obj$Cluster)) # 应该全部为 FALSE
table(is.na(sp_obj$CellType)) # 应该全部为 FALSE
metatada <- sp_obj@meta.data
metatada[1:2,]
table(metatada$CellType)
#一个 CellType 来自哪些 cluster
cluster_main_celltype <- table(metatada$Cluster, metatada$CellType)
write.csv(cluster_main_celltype,
file = "../SpatialAnalysisResults/1.数据概览/Sample#2/cluster_main_celltype.csv")# 可视化
#umap
pdf(file = file.path("../SpatialAnalysisResults/1.数据概览/Sample#2/umap_celltype.pdf"), width = 8.5, height = 6)
DimPlot(sp_obj, reduction = "umap", group.by = "CellType", label = TRUE,label.size = 4)
dev.off()
pdf(file = file.path("../SpatialAnalysisResults/1.数据概览/Sample#2/umap_cluster.pdf"), width = 7.5, height = 6)
DimPlot(sp_obj, reduction = "umap", group.by = "Cluster", label = TRUE)
dev.off()
#空间聚类CellType
pdf(file = file.path("../SpatialAnalysisResults/1.数据概览/Sample#2/Spatial_celltype_image.pdf"), width = 8.5, height = 6)
SpatialDimPlot(
sp_obj,
group.by = "CellType",
pt.size.factor = 4.5,
stroke = 0
) + theme(legend.position = "right")
dev.off()
pdf(file = file.path("../SpatialAnalysisResults/1.数据概览/Sample#2/Spatial_celltype.pdf"), width = 8.5, height = 6)
SpatialDimPlot(
sp_obj,
group.by = "CellType",
pt.size.factor = 4.5,
stroke = 0,
image.alpha = 0#组织切片完全透明
) +
theme(legend.position = "right")
dev.off()
#空间聚类Cluster
pdf(file = file.path("../SpatialAnalysisResults/1.数据概览/Sample#2/Spatial_cluster_image.pdf"), width = 7.5, height = 6)
SpatialDimPlot(
sp_obj,
group.by = "Cluster",
pt.size.factor = 4.5,
stroke = 0
) +
theme(legend.position = "right")
dev.off()
pdf(file = file.path("../SpatialAnalysisResults/1.数据概览/Sample#2/Spatial_cluster.pdf"), width = 7.5, height = 6)
SpatialDimPlot(
sp_obj,
group.by = "Cluster",
pt.size.factor = 4.5,
stroke = 0,
image.alpha = 0#组织切片完全透明
) +
theme(legend.position = "right")
dev.off()
#只显示Adenocarcinoma_cell(腺癌细胞)与Adipocytes(脂肪细胞)
{
sp_obj$CellType_highlight <- sp_obj$CellType
sp_obj$CellType_highlight[
!sp_obj$CellType %in% c("Adenocarcinoma_cell", "Adipocytes")
] <- "Other"
cols <- c(
"Adenocarcinoma_cell" = "#4575B4",
"Adipocytes" = "#FDAE61",
"Other" = "grey85"
)
pdf(file = file.path("../SpatialAnalysisResults/1.数据概览/Sample#2/Spatial_celltype_highlight.pdf"), width = 8.5, height = 6)
SpatialDimPlot(
sp_obj,
group.by = "CellType_highlight",
pt.size.factor = 4.5,
stroke = 0,
image.alpha = 0,
cols = cols
) +
theme(
legend.position = "right",
legend.title = element_blank()
)
dev.off()
pdf(file = file.path("../SpatialAnalysisResults/1.数据概览/Sample#2/Spatial_celltype_highlight_image.pdf"), width = 8.5, height = 6)
SpatialDimPlot(
sp_obj,
group.by = "CellType_highlight",
pt.size.factor = 4.5,
stroke = 0,
cols = cols
) +
theme(
legend.position = "right",
legend.title = element_blank()
)
dev.off()
}
sample2_sp_obj <- sp_obj
save(sample2_sp_obj,file = "../Rdata/step1_sample2_sp_obj.Rdata")
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。
原创声明:本文系作者授权腾讯云开发者社区发表,未经许可,不得转载。
如有侵权,请联系 cloudcommunity@tencent.com 删除。