7 8 9 10 11 12 13 14 15 16 17 18#> Levels: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18design <- model.matrix (~0 +group + pairinfo)design2 <- model.matrix(~0 +group)design3 <- model.matrix(~0 +pairinfo)"group"的 after after after after after #> [33] after after after after #> Levels: before afterdesign <- model.matrix
1255_g_at 3.899327 4.403982 4.092681 4.221116 4.060228 # 差异分析 # full model > mod = model.matrix (~as.factor(cancer), data=pheno) # null model > mod0 = model.matrix(~1,data=pheno) > pValuesComBat = bladderbatch) > data(bladderdata) > pheno = pData(bladderEset) > edata = exprs(bladderEset) > mod = model.matrix (~as.factor(cancer), data=pheno) > mod0 = model.matrix(~1,data=pheno) > n.sv = num.sv(edata,mod,method
方法二——model.matrix函数: R语言内置包stat中有一个model.matrix函数(无需单独加载既可用),它可以处理分类变量的哑变量处理过程,语法非常简单。 dumy <- model.matrix( ~ Species -1, data = iris) iris_data <- cbind(iris,dumy[,-1]) ? 回顾一下今天分享的哑变量处理知识点: R语言: 方案一——:dummy包的dummy函数 方法二——:model.matrix函数 方法三——:caret包中的dummyVars函数 Python:
f <- factor(c("liver20", "liver20", "liver10", "liver10")) design <- model.matrix(~ 0 + f) design ? , pair = c(1, 2, 3, 1, 2, 3), batch = c(1, 2, 1, 2, 1, 2)) des <- model.matrix 0, 0, 3, 3), stringsAsFactors = FALSE) df$f <- factor(paste(df$condition, df$time, sep=".")) des <- model.matrix 对于model.matrix中的formula,下表可以帮助大家理解。
2.2 模型介绍 模型介绍 固定因子:Herd 随机因子:Sire 观测值:Yield 2.3 固定因子矩阵X和随机因子Z 固定因子矩阵X X = model.matrix(~Herd-1,data 随机因子矩阵Z Z = model.matrix(~Sire-1,data=dat) Z ?
1,1,1,2,2,3,4,4,4)) y <- c(240,190,170,180,200,140,170,100,130) REML_data <- data.frame(herd,sire,y) X <- model.matrix (y~herd-1) Z <- model.matrix(y~sire-1) A <- matrix(c(1,0.25,0,0,0.25,1,0,0,0,0,1,0,0,0,0,1),nr=4) 3.1 1,1,1,2,2,3,4,4,4)) y <- c(240,190,170,180,200,140,170,100,130) REML_data <- data.frame(herd,sire,y) X <- model.matrix (y~herd-1) Z <- model.matrix(y~sire-1) XX <- crossprod(X) XZ <- crossprod(X,Z) ZX <- t(XZ) ZZ <- crossprod
direction="both") summary(model.step) # 6 Lasso ## 6.1 将数据转化为矩阵 tmp.y <- data$Status_death tmp.x <- model.matrix summary(model.step) # 8 Lasso ### 8.1 将数据转化为矩阵 tmp.y <- Surv(data$Time_death,data$Status_death) tmp.x <- model.matrix stepAIC(model.full, direction="both") summary(model.step) # 3.2 Lasso ### 数据转化 tmp.y <- data$X tmp.x <- model.matrix
summary(model.step) # 1.2 Lasso # Make data frame into matrix tmp.y <- data$Status_death tmp.x <- model.matrix 2.2 Lasso # Make data frame into matrix tmp.y <- Surv(data$Time_death,data$Status_death) tmp.x <- model.matrix ="both") summary(model.step) # 3.2 Lasso # Make data frame into matrix tmp.y <- data$X tmp.x <- model.matrix
1.0000000 18 5 3 -1.0000000 19 5 5 2.0000000 教科书的结果, 两者一样 3, 构建模型 $$ y = Xb + Zu + e $$ 构建固定因子矩阵 这里使用函数model.matrix 构建矩阵, 比较方便 for(i in 1:4) dat[,i] <- as.factor(dat[,i]) X <- model.matrix(~Chang-1,dat) X Chang1 Chang2 Ainv = makeAinv(pped)$AinvAinv makeAinv(pped)$listAinv for(i in 1:4) dat[,i] <- as.factor(dat[,i])X <- model.matrix
(qcData)[i] == rownames(qcMetadata)[i])) { stop(paste0(i, " Wrong")) } } # covar_mat <- model.matrix ){ qcMetadata <- pData(x) qcData <- t(exprs(x)) # Set up design matrix covDesignNorm <- model.matrix normalize.method="none") # List biological and normalization variables in model matrices bio.var <- model.matrix data=qcMetadata) colnames(bio.var) <- gsub('([[:punct:]])|\\s+','',colnames(bio.var)) adj.var <- model.matrix (~ dex, colData(dds))mod0 <- model.matrix(~ 1, colData(dds))# calculating the variablesn.sv <- num.sv
1.0000000 18 5 3 -1.0000000 19 5 5 2.0000000 教科书的结果, 两者一样 3, 构建模型 y = Xb + Zu + e 构建固定因子矩阵 这里使用函数model.matrix 构建矩阵, 比较方便 for(i in 1:4) dat[,i] <- as.factor(dat[,i]) X <- model.matrix(~Chang-1,dat) X Chang1 Chang2 makeAinv(pped)$Ainv Ainv makeAinv(pped)$listAinv for(i in 1:4) dat[,i] <- as.factor(dat[,i]) X <- model.matrix
design_non_paried <- model.matrix(~ 0 + treatment) colnames(design_non_paried) <- c("Control","anti-BTLA #p.value = 0.05 ) 7配对处理 7.1 整理分组矩阵 design_paried <- model.matrix
factor(paste(pData(phenoData)[,1], pData(phenoData)[,2], sep = "_")) design <- model.matrix varMetadata=vMtData) maqc <- read.xysfiles(xys.files, phenoData=pd) class(maqc) eset <- rma(maqc) design <- model.matrix ", "", pd.s$title) f <- factor(f) design <- model.matrix(~-1+f) colnames(design) <- sub("^f", "", colnames rem, ] dim(BSData.norm) rna <- factor(pData(BSData.norm)[,"SampleFac"]) design <- model.matrix(~-1+rna
step1output.Rdata")5、加载数据rm(list = ls())load(file = "step1output.Rdata")6、芯片数据使用limma差异分析-差异基因火山图design = model.matrix library(dplyr)library(limma)design = model.matrix(~Group)fit = lmFit(exp,design)fit = eBayes(fit)deg miRNA分析流程学习(二)推文中的高通量测序数据集,并采用limma包的芯片流程进行差异分析,核心代码如下library(limma)library(dplyr)# limma-arraydesign = model.matrix Inf)# limma-RNA-seq#dge <- edgeR::DGEList(counts=exp)#dge <- edgeR::calcNormFactors(dge)#design <- model.matrix
1.0000000 18 5 3 -1.0000000 19 5 5 2.0000000 教科书的结果, 两者一样 3, 构建模型 y = Xb + Zu + e 构建固定因子矩阵 这里使用函数model.matrix 构建矩阵, 比较方便 for(i in1:4) dat[,i] <- as.factor(dat[,i]) X <- model.matrix(~Chang-1,dat) X Chang1 Chang2 = makeAinv(pped)$Ainv Ainv makeAinv(pped)$listAinv for(iin 1:4) dat[,i] <- as.factor(dat[,i]) X<- model.matrix
exp,group=Group) dge$samples$lib.size <- colSums(dge$counts) dge <- calcNormFactors(dge) design <- model.matrix limma---- library(limma) dge <- edgeR::DGEList(counts=exp) dge <- edgeR::calcNormFactors(dge) design <- model.matrix (~Group) v <- voom(dge,design, normalize="quantile") design <- model.matrix(~Group) fit <- lmFit(v,
factor(paste(pData(phenoData)[,1], pData(phenoData)[,2], sep = "_")) design <- model.matrix varMetadata=vMtData) maqc <- read.xysfiles(xys.files, phenoData=pd) class(maqc) eset <- rma(maqc) design <- model.matrix ", "", pd.s$title) f <- factor(f) design <- model.matrix(~-1+f) colnames(design) <- sub("^f", "", colnames rem, ] dim(BSData.norm) rna <- factor(pData(BSData.norm)[,"SampleFac"]) design <- model.matrix(~-1+rna
keep.lib.sizes=FALSE] d$samples$lib.size <- colSums(d$counts) d <- calcNormFactors(d) d$samples dge=d design <- model.matrix 做差异分析 load(file = 'symbol_matrix.Rdata') symbol_matrix[1:4,1:4] exprSet = symbol_matrix design <- model.matrix
这时候的默认顺序是 normal在前,uc在后,这时候你的设计矩阵design是这样的: library(limma) # 用不用factor()都不影响,必定是 靠后的 vs 靠前的 design <- model.matrix uc" "uc" "uc" "uc" ## [15] "uc" "uc" "uc" 这时候的design也会跟着变化: design <- model.matrix “如果你喜欢在设计矩阵时喜欢用factor()函数,比如design <- model.matrix(~ factor(group_list)),也是一样的道理,顺序靠后的 vs 顺序靠前的。 group_list <- c(rep('znormal',7),rep('uc',10)) # 定义因子顺序,让zormal在前,uc在后 design <- model.matrix(~ factor "uc" "uc" "uc" "uc" "uc" "uc" ## [17] "uc" # 用不用factor()都无所谓 design <- model.matrix
counts(dds, normalized = TRUE) idx <- rowMeans(dat) > 1 dat <- dat[idx, ] # 根据关键生物表型构建设计矩阵 mod <- model.matrix (as.formula(paste0("~ ", design)), colData(dds)) # 构建对照设计矩阵 mod0 <- model.matrix(~ 1, colData(dds)) # # 获取标准化后的表达矩阵 dat <- normexpr$rlog # 根据关键生物表型构建设计矩阵 mod <- model.matrix(as.formula(paste0("~ ", design )), colData(dds)) # 构建对照设计矩阵 mod0 <- model.matrix(~ 1, colData(dds)) # 指定混杂因素的数目为 2,也可以让 sva 自己预测 svseq2