首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >在EXPSS摘要中保存标签

在EXPSS摘要中保存标签
EN

Stack Overflow用户
提问于 2020-08-09 07:41:34
回答 1查看 66关注 0票数 0

我有一个多响应表的数据,要理解多个响应问题,请检查

https://www.harvestyourdata.com/fileadmin/images/question-type-screenshots/Grid-multi-select.jpg

所以我试图为这类问题做一个总结。

无论如何,从数据的角度来看,这类数据通常以宽格式存储,其中每一行*列组合为一个变量,该变量编码为0/1 (如果调查参与者不选中该框,则为0,否则为1)。

代码语言:javascript
复制
data<-data.frame(
  gender = c(1,2,1,2,1,2,1,2,2,2,2,1,1,2,2,2,2,1,1,1,1,1,2,1,2,1,2,2,2,1,2,1,2,1,2,1,2,2,2),
  sector = c(3,3,1,2,5,4,4,4,4,3,3,4,3,4,2,1,4,2,3,4,4,4,3,1,2,1,5,5,4,3,1,4,5,2,3,4,5,1,4),
  col1=c(1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,1,1,0,1,1,0,0,0,0,1,0,0,0,0,1,0,1),
  col2=c(1,1,1,1,1,0,0,0,0,1,1,1,1,1,0,0,0,1,1,1,0,1,1,1,1,1,0,0,0,1,1,1,1,1,1,1,0,0,0),
  col3=c(1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,0,1,0,0,1,1,1,1,1,0,0,1),
  col4=c(1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
  col5=c(1,0,1,1,1,0,1,0,0,1,0,1,1,1,0,0,0,1,1,1,0,1,0,1,1,1,0,0,0,1,1,0,0,1,1,1,0,0,0)
)

the labeling below was done already

data$gender<-factor(data$gender, levels=c(1,2), labels=c("Male","female"))
data$sector<-factor(data$sector, levels=c(1,2,3,4,5), labels=c("TX","CA","NY","LA","WA"))
data$col1<-factor(data$col1, levels=1, labels="Sales")
data$col2<-factor(data$col2, levels=1, labels="OPS")
data$col3<-factor(data$col3, levels=1, labels="Management")
data$col4<-factor(data$col4, levels=1, labels="HR")
data$col5<-factor(data$col5, levels=1, labels="Ware-housing")

library(expss)
library(tidyverse)
library(rlang)
library(flextable)

data1 <- data
var_list <- c("col1", "col2", "col3", "col4", "col5")
first_col_param <- "col1"
second_col_param <- "col5"
grouping_var <- "sector"

tab_multi_cross <- function(data,var_list,first_col_param,second_col_param,grouping_var,total_var){ 
  
    total_col <- ifelse(total_var== TRUE,1,0)
  grouping_var <- rlang::parse_expr(grouping_var)
  data1<-data[var_list] %>% as.data.frame()
  data2 <- data %>% select(all_of(grouping_var))
  var_lab(data2[[grouping_var]]) <-""
  #var_lab(data1[1]) <- ""
  #var_lab(colnames(data1)[ncol(data1)]) <- ""
  subset_data <- cbind(data2,data1)
  tab1 <- eval(rlang::parse_expr(paste0("tab_cells(subset_data,mdset(",
                                        first_col_param ," %to% ",second_col_param,"))"))) %>% 
    tab_cols(total(), subset_data[1]) %>% 
    tab_stat_cpct() %>% tab_pivot()
  tab1 <-  as.data.frame(tab1)
  tab1[which(tab1[,1]=="#Total cases"),1] <- "N"
  setnames(tab1,"row_labels"," ")
  tab1[is.na(tab1)] <- 0
  tab1 <- tab1 %>%
    mutate(
      across(
        .cols = where(is.numeric),
        .fns = ~ round(.x, digits = 1)
      )
    )
  tab1[tab1 == 0] <- '--'
  mask_indices <- sapply(tab1, function(x) x[length(x)] %in% c(3, 4, 5)) %>%
    which()  
  tab1[, mask_indices] <- "--"
  tab1[-nrow(tab1), -c(1, mask_indices)] <- sapply(
    tab1[-nrow(tab1),-c(1, mask_indices)], function(x) # --- (2)
      ifelse(x != '--', paste(format(as.numeric(x), nsmall = 1), "%"), x)) # --- (3)
  i <- match("N", tab1[,1])
  tab2 <- rbind(tab1[i,], tab1[-i,])
  if(total_col==0){tab2 = tab2[,!grepl("^#Tot",names(tab2))]}
tab2
}

由于其他函数的要求,数据已经有标签了,但是数据给出了实际的列名,我需要标签名。我们有什么解决办法吗?我可以在下面的摘要中得到这个标签的名字吗?

输出摘要是

在这里输入图像描述

但它应该是

在这里输入图像描述

因此,对于变量,我再次应用标签,但我有更多的300这样的总结,这将花费太长的时间来再次应用标签。我们是否有任何解决方案,不需要再次应用标签来获得所需的输出。

代码语言:javascript
复制
data <- apply_labels(dat,
                      col1 = "Sales",
                      col2 = "OPS",
                      col3 = "Management",
                      col4 = "HR",
                      col5 = "Ware-housing"
)
EN

回答 1

Stack Overflow用户

回答已采纳

发布于 2020-08-10 00:08:17

如果使用mdset,则不需要将变量转换为因素。您需要在它们上设置变量标签:

代码语言:javascript
复制
library(expss)
library(tidyverse)
library(rlang)
library(flextable)

data = apply_labels(data,
                    col1="Sales",
                    col2="OPS",
                    col3="Management",
                    col4="HR",
                    col5="Ware-housing",
                    col6="Admin"
                    )

data1 <- data
var_list <- c("col1", "col2", "col3", "col4", "col5", "col6")
first_col_param <- "col1"
second_col_param <- "col6"
grouping_var <- "sector"

grouping_var <- rlang::parse_expr(grouping_var)
data<-data[var_list] %>% as.data.frame()
data2 <- data1 %>% select(all_of(grouping_var))
var_lab(data2[[grouping_var]]) <-""
subset_data <- cbind(data2,data)
tab1 <- eval(parse_expr(paste0("tab_cells(subset_data,mdset(",
                               first_col_param ," %to% ",second_col_param,"))"))) %>% 
    tab_cols(subset_data[1]) %>% 
    tab_stat_cpct() %>% tab_pivot()
tab1 <-  as.data.frame(tab1)
setnames(tab1,"row_labels"," ")
tab1[is.na(tab1)] <- 0

更新-使用因素的解决方案。

如果您需要坚持使用mdset的insead以外的因素,则应该使用mrset

票数 0
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/63323761

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档