骨肉瘤单细胞实操

骨肉瘤单细胞实操 ##系统报错改为英文 Sys.setenv(LANGUAGE en) ##禁止转化为因子 options(stringsAsFactors FALSE) ##清空环境 rm(listls()) library(Seurat) library(tidyverse) library(dplyr) library(ggplot2) library(patchwork) library(cowplot) library(SingleR) library(celldex) library(clustree) library(scDblFinder) loaded_pkgs - sessionInfo()$otherPkgs data.frame( Package names(loaded_pkgs), Version sapply(loaded_pkgs, function(x) x$Version), row.names NULL ) .libPaths() setwd(/mnt/DATA/home/jiangxingyu109/scRNAseq/GSExxxxx-rawdata/) ##储存GEO上下载的数据到目标目录下10x需要的3个文件 ####数据读取#### dir - c(BC2/ , BC3/ , BC5/ , BC6/, BC10/, BC11/, BC16/, BC17/, BC20/, BC21/ ,BC22/) names(dir) - c(BC2, BC3, BC5 ,BC6 , BC10, BC11 ,BC16, BC17, BC20 ,BC21 ,BC22) scRNAlist - list() for (i in seq_along(dir)) { counts - Read10X(data.dir dir[i]) sample_name - basename(dir[i]) scRNAlist[[i]] - CreateSeuratObject( counts counts, project sample_name, min.cells 3, min.features 300 ) scRNAlist[[i]] - RenameCells( scRNAlist[[i]], add.cell.id sample_name ) } setwd(/mnt/DATA/home/jiangxingyu109/scRNAseq/) save(scRNAlist, file scRNAlist260522new.Rdata) ##26年5月更新版本原始数据汇总没有进行质量控制这一版是用新版的seurat做的 ##首先加载scRNAlist2605.Rdata ####质量控制#### # 2) 逐样本 QC 指标含 HB for (i in 1:length(scRNAlist)) { sc - scRNAlist[[i]] sc[[percent.mt]] - PercentageFeatureSet(sc, pattern ^MT-) HB_genes - c(HBA1,HBA2,HBB,HBD,HBE1,HBG1,HBG2,HBM,HBQ1,HBZ) HB_m - match(HB_genes, rownames(sc)) HB_genes - rownames(sc)[HB_m] HB_genes - HB_genes[!is.na(HB_genes)] sc[[percent.HB]] - PercentageFeatureSet(sc, features HB_genes) scRNAlist[[i]] - sc rm(sc) } ##先NormalizeData for (i in seq_along(scRNAlist)) { scRNAlist[[i]] - NormalizeData(scRNAlist[[i]], verbose FALSE) } ##查看的小提琴图 violin_before - list() for(i in 1:length(scRNAlist)){ violin_before[[i]] - VlnPlot(scRNAlist[[i]], features c(nFeature_RNA, nCount_RNA, percent.mt, percent.HB), pt.size 0.01, ncol 4) plot_annotation(title paste(Sample, i, - Before QC), theme theme(plot.title element_text(hjust 0.5, face bold))) } violin_before violin_before[[1]] violin_before[[2]] violin_before[[3]] violin_before[[4]] violin_before[[5]] violin_before[[6]] violin_before[[7]] violin_before[[8]] violin_before[[9]] violin_before[[10]] violin_before[[11]] #### 保存 质量控制 前小提琴图为 SCI TIFF #### dir.create(QC_violin_before_TIFF, showWarnings FALSE) names(scRNAlist) - c(BC2, BC3, BC5, BC6, BC10, BC11, BC16, BC17, BC20, BC21, BC22) violin_before - list() for(i in 1:length(scRNAlist)){ sample_name - names(scRNAlist)[i] p - VlnPlot( scRNAlist[[i]], features c(nFeature_RNA, nCount_RNA, percent.mt, percent.HB), pt.size 0.01, ncol 4 ) plot_annotation( title paste0(sample_name, - Before QC), theme theme( plot.title element_text( hjust 0.5, face bold, size 16 ) ) ) theme_classic(base_size 14) theme( axis.title element_text(size 13, face bold), axis.text element_text(size 11, color black), plot.title element_text(size 13, face bold, hjust 0.5), legend.position none ) violin_before[[sample_name]] - p tiff( filename paste0(QC_violin_before_TIFF/, sample_name, _QC_before.tiff), width 12, height 4, units in, res 600, compression lzw ) print(p) dev.off() } #### 进行样本过滤#### ##逐样本过滤保持你的阈值 scRNAlist - lapply(scRNAlist, function(x) { subset(x, subset nFeature_RNA 300 percent.mt 10) }) ####去除双细胞#### set.seed(123) for(i in 1:length(scRNAlist)){ # 转换为SingleCellExperiment sce - as.SingleCellExperiment(scRNAlist[[i]]) # 运行scDblFinder sce - scDblFinder(sce) # 查看该样本的双细胞统计 cat(样本, i, 双细胞统计:\n) print(table(sce$scDblFinder.class)) # 添加回Seurat对象 scRNAlist[[i]]$doublet_class - sce$scDblFinder.class scRNAlist[[i]]$doublet_score - sce$scDblFinder.score # 记录过滤信息 before - ncol(scRNAlist[[i]]) scRNAlist[[i]] - subset(scRNAlist[[i]], doublet_class singlet) after - ncol(scRNAlist[[i]]) cat(paste(样本, i, 过滤:, before, →, after, 细胞, 去除, before - after, 个双细胞\n\n)) } ##这一步时间较长会迭代很多次 ##合并样本保留 orig.ident dir - c(BC2/ , BC3/ , BC5/ , BC6/, BC10/, BC11/, BC16/, BC17/, BC20/, BC21/ ,BC22/) names(dir) - c(BC2, BC3, BC5 ,BC6 , BC10, BC11 ,BC16, BC17, BC20 ,BC21 ,BC22) scRNAlist_merge - merge(x scRNAlist[[1]], y scRNAlist[-1], add.cell.ids names(dir)) #### 合并后 QC 小提琴图 #### p_qc_merge - VlnPlot( object scRNAlist_merge, features c(nFeature_RNA, nCount_RNA, percent.mt, percent.HB), group.by orig.ident, pt.size 0.01, ncol 4 ) theme_classic(base_size 14) theme( axis.title element_text(size 13, face bold), axis.text.x element_text(size 10, angle 45, hjust 1, color black), axis.text.y element_text(size 11, color black), plot.title element_text(size 13, face bold, hjust 0.5), legend.position none ) p_qc_merge tiff( filename Merged_samples_QC_violin.tiff, width 14, height 6, units in, res 600, compression lzw ) print(p_qc_merge) dev.off() save(scRNAlist_merge, file scRNAlist_merge2605new.Rdata) ##处理过程详细记录是新版 ##这里为止只保存两个处理数据数据经过NormalizeData、nFeature_RNA 300、percent.mt 过滤、scDblFinder 去双细胞 ##新版的S4对象处理跟以前不相同以后的包肯定越来越新下方用的scRNAlist_merge2605new.Rdata读取并命名的scRNA1 ##系统报错改为英文 Sys.setenv(LANGUAGE en) ##禁止转化为因子 options(stringsAsFactors FALSE) ##清空环境 rm(listls()) ##防呆设计 scRNA1 scRNAlist_merge meta - scRNA1meta.data qc_ncount_summary - do.call(rbind, lapply(split(meta, meta$orig.ident), function(df) { x - df$nCount_RNA data.frame( sample unique(df$orig.ident), min min(x, na.rm TRUE), q25 as.numeric(quantile(x, 0.25, na.rm TRUE)), median median(x, na.rm TRUE), q75 as.numeric(quantile(x, 0.75, na.rm TRUE)), q95 as.numeric(quantile(x, 0.95, na.rm TRUE)), q99 as.numeric(quantile(x, 0.99, na.rm TRUE)), q995 as.numeric(quantile(x, 0.995, na.rm TRUE)), max max(x, na.rm TRUE) ) })) qc_ncount_summary ##这里注意有细胞RNA数拖尾后续差异分析要注意就是ncount 99.5%和最大值差距很大这一部分细胞需要去除 ##harmony整合 scRNA_harmony scRNA1 scRNA_harmony - NormalizeData(scRNA_harmony) scRNA_harmony - FindVariableFeatures(scRNA_harmony) scRNA_harmony - ScaleData(scRNA_harmony) scRNA_harmony - RunPCA(scRNA_harmony) ##检查PCA选择dims值 ElbowPlot(scRNA_harmony, ndims 50) ##还是选择30 dims30 set.seed(123) scRNA_harmony - RunHarmony(scRNA_harmony, group.by.vars orig.ident, dims 1:30) scRNA_harmony - FindNeighbors(scRNA_harmony, reduction harmony, dims 1:30) scRNA_harmony - FindClusters(scRNA_harmony, resolution 0.15) ##我记得是0.15刚好是原文中的组数后续进一步区分 scRNA_harmony - RunUMAP(scRNA_harmony, dims 1:30, reduction harmony) ##应该是13或14个 plot1 DimPlot(scRNA_harmony, reduction umap, group.by c(orig.ident)) plot2 DimPlot(scRNA_harmony, reduction umap, group.by c( seurat_clusters)) plotc plot1 plot2 plotc ##tsne scRNA_harmony - RunTSNE(scRNA_harmony, reduction harmony, dims 1:30) plot3 DimPlot(scRNA_harmony, reduction tsne, group.by c(orig.ident)) plot4 DimPlot(scRNA_harmony, reduction tsne, group.by c( seurat_clusters)) plotd plot3 plot4 plotd这是第一部分截至到实现文章中的分组数