我想使用一个变量作为subset参数,这样我就可以将它放入一个函数中
formula <- paste0(response_name,
" ~ .")
if (subset_filter != ""){
subset_filter <- "G3 < 10"
model <-
lm(as.formula(formula),
subset = subset_filter,
data = train_dataset)
} else {
model <-
lm(as.formula(formula),
data = train_dataset)
}我的数据集是-
student_performance <-
read_csv("https://raw.githubusercontent.com/UBC-MDS/ellognea-smwatts-student-performance/master/data/student-math-perf.csv") %>%
as_tibble()我的响应变量是G3,我用下面的代码拆分集合
split_sets <- function(dataset,
response_name,
output_set_type){
set.seed(1)
training.samples <- createDataPartition(as_vector(dataset[response_name]),
p = 0.8,
list = F)
train.data <- suppressWarnings(dataset[training.samples, ])
test.data <- suppressWarnings(dataset[-training.samples, ])
l <- list()
l[["train.data"]] <-
train.data
l[["test.data"]] <-
test.data
ifelse(output_set_type == "train",
return(as_tibble(l$train.data)),
return(as_tibble(l$test.data)))
}我希望这样就可以将值提交到子集筛选器参数中,然后使用它们
发布于 2020-10-26 07:05:21
如果我们需要传递一个字符串,那么我们可以parse和evaluate
library(caret)
library(readr)
create_model <- function(data, response_name, subset_filter) {
formula <- paste0(response_name, " ~ .")
if (subset_filter != ""){
model <-
lm(as.formula(formula),
subset = eval(parse(text = subset_filter), envir = data),
data = data)
} else {
model <-
lm(as.formula(formula),
data = data)
}
model$call <- as.formula(formula)
return(model)
}对数据执行-apply函数
create_model(train_dat, "G3", "G3 < 10" )
#Call:
#G3 ~ .
#Coefficients:
# (Intercept) schoolMS sexM age addressU famsizeLE3
# -4.42602 1.18145 0.15315 -0.16790 1.11708 -0.08173
# PstatusT Medu Fedu Mjobhealth Mjobother Mjobservices
# 1.32870 1.00518 -0.62716 -1.98356 -1.31388 -0.94443
# Mjobteacher Fjobhealth Fjobother Fjobservices Fjobteacher reasonhome
# -1.28718 0.03242 0.02968 0.32962 -1.53201 -2.10665
# reasonother reasonreputation guardianmother guardianother traveltime studytime
# -0.51770 0.22395 -0.29893 1.85975 -0.39072 -1.56920
# failures schoolsupyes famsupyes paidyes activitiesyes nurseryyes
# -0.17344 2.35607 0.35207 0.29857 -0.91373 0.09838
# higheryes internetyes romanticyes famrel freetime goout
# 1.06065 -0.58727 0.09469 0.69217 0.25081 0.14379
# Dalc Walc health absences G1 G2
# -1.39164 0.60450 0.86492 0.12033 0.11660 0.78624 在这里,'train_data‘是从
split_sets <- function(dataset,
response_name,
output_set_type){
set.seed(1)
training.samples <- createDataPartition(as_vector(dataset[response_name]),
p = 0.8,
list = F)
train.data <- suppressWarnings(dataset[training.samples, ])
test.data <- suppressWarnings(dataset[-training.samples, ])
l <- list()
l[["train.data"]] <-
train.data
l[["test.data"]] <-
test.data
out <- if(output_set_type == "train") {
as_tibble(l$train.data)
} else {
as_tibble(l$test.data)
}
return(out)
}
train_dat <- split_sets(student_performance, "G3", "train")https://stackoverflow.com/questions/64529604
复制相似问题