我有一个多响应表的数据,要理解多个响应问题,请检查
https://www.harvestyourdata.com/fileadmin/images/question-type-screenshots/Grid-multi-select.jpg
所以我试图为这类问题做一个总结。
无论如何,从数据的角度来看,这类数据通常以宽格式存储,其中每一行*列组合为一个变量,该变量编码为0/1 (如果调查参与者不选中该框,则为0,否则为1)。
data<-data.frame(
gender = c(1,2,1,2,1,2,1,2,2,2,2,1,1,2,2,2,2,1,1,1,1,1,2,1,2,1,2,2,2,1,2,1,2,1,2,1,2,2,2),
sector = c(3,3,1,2,5,4,4,4,4,3,3,4,3,4,2,1,4,2,3,4,4,4,3,1,2,1,5,5,4,3,1,4,5,2,3,4,5,1,4),
col1=c(1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1),
col2=c(1,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,0,0),
col3=c(1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,1,0,0,1,1,1,1,1,0,0,1),
col4=c(1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
col5=c(1,0,1,1,1,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0)
)
the labeling below was done already
data$gender<-factor(data$gender, levels=c(1,2), labels=c("Male","female"))
data$sector<-factor(data$sector, levels=c(1,2,3,4,5), labels=c("TX","CA","NY","LA","WA"))
data$col1<-factor(data$col1, levels=1, labels="Sales")
data$col2<-factor(data$col2, levels=1, labels="OPS")
data$col3<-factor(data$col3, levels=1, labels="Management")
data$col4<-factor(data$col4, levels=1, labels="HR")
data$col5<-factor(data$col5, levels=1, labels="Ware-housing")
library(expss)
library(tidyverse)
library(rlang)
library(flextable)
data1 <- data
var_list <- c("col1", "col2", "col3", "col4", "col5")
first_col_param <- "col1"
second_col_param <- "col5"
grouping_var <- "sector"
tab_multi_cross <- function(data,var_list,first_col_param,second_col_param,grouping_var,total_var){
total_col <- ifelse(total_var== TRUE,1,0)
grouping_var <- rlang::parse_expr(grouping_var)
data1<-data[var_list] %>% as.data.frame()
data2 <- data %>% select(all_of(grouping_var))
var_lab(data2[[grouping_var]]) <-""
#var_lab(data1[1]) <- ""
#var_lab(colnames(data1)[ncol(data1)]) <- ""
subset_data <- cbind(data2,data1)
tab1 <- eval(rlang::parse_expr(paste0("tab_cells(subset_data,mdset(",
first_col_param ," %to% ",second_col_param,"))"))) %>%
tab_cols(total(), subset_data[1]) %>%
tab_stat_cpct() %>% tab_pivot()
tab1 <- as.data.frame(tab1)
tab1[which(tab1[,1]=="#Total cases"),1] <- "N"
setnames(tab1,"row_labels"," ")
tab1[is.na(tab1)] <- 0
tab1 <- tab1 %>%
mutate(
across(
.cols = where(is.numeric),
.fns = ~ round(.x, digits = 1)
)
)
tab1[tab1 == 0] <- '--'
mask_indices <- sapply(tab1, function(x) x[length(x)] %in% c(3, 4, 5)) %>%
which()
tab1[, mask_indices] <- "--"
tab1[-nrow(tab1), -c(1, mask_indices)] <- sapply(
tab1[-nrow(tab1),-c(1, mask_indices)], function(x) # --- (2)
ifelse(x != '--', paste(format(as.numeric(x), nsmall = 1), "%"), x)) # --- (3)
i <- match("N", tab1[,1])
tab2 <- rbind(tab1[i,], tab1[-i,])
if(total_col==0){tab2 = tab2[,!grepl("^#Tot",names(tab2))]}
tab2
}由于其他函数的要求,数据已经有标签了,但是数据给出了实际的列名,我需要标签名。我们有什么解决办法吗?我可以在下面的摘要中得到这个标签的名字吗?
输出摘要是
但它应该是
因此,对于变量,我再次应用标签,但我有更多的300这样的总结,这将花费太长的时间来再次应用标签。我们是否有任何解决方案,不需要再次应用标签来获得所需的输出。
data <- apply_labels(dat,
col1 = "Sales",
col2 = "OPS",
col3 = "Management",
col4 = "HR",
col5 = "Ware-housing"
)发布于 2020-08-10 00:08:17
如果使用mdset,则不需要将变量转换为因素。您需要在它们上设置变量标签:
library(expss)
library(tidyverse)
library(rlang)
library(flextable)
data = apply_labels(data,
col1="Sales",
col2="OPS",
col3="Management",
col4="HR",
col5="Ware-housing",
col6="Admin"
)
data1 <- data
var_list <- c("col1", "col2", "col3", "col4", "col5", "col6")
first_col_param <- "col1"
second_col_param <- "col6"
grouping_var <- "sector"
grouping_var <- rlang::parse_expr(grouping_var)
data<-data[var_list] %>% as.data.frame()
data2 <- data1 %>% select(all_of(grouping_var))
var_lab(data2[[grouping_var]]) <-""
subset_data <- cbind(data2,data)
tab1 <- eval(parse_expr(paste0("tab_cells(subset_data,mdset(",
first_col_param ," %to% ",second_col_param,"))"))) %>%
tab_cols(subset_data[1]) %>%
tab_stat_cpct() %>% tab_pivot()
tab1 <- as.data.frame(tab1)
setnames(tab1,"row_labels"," ")
tab1[is.na(tab1)] <- 0更新-使用因素的解决方案。
如果您需要坚持使用mdset的insead以外的因素,则应该使用mrset。
https://stackoverflow.com/questions/63323761
复制相似问题