我正在尝试在Caret中模拟bartMachine使用的一个很好的例子,但我似乎不能用Caret正确地模拟bartMachine,谁能告诉我,主要的错误到底是什么?或者,BART建模有简单的可重现的代码吗?
下面是我使用HouseVotes84和cars数据集的一些虚拟数据对bartMachine进行建模的代码片段:
library(mlbench)
library(caret)
data("HouseVotes84")
#Using HouseVotes84 as Classification Task Dataset and mtcars as Regression Task Dataset
dummy_data_classif <- HouseVotes84[,2:length(colnames(HouseVotes84))] %>%
mutate_if(is.factor, as.numeric)
dummy_data_classif <- data.frame(cbind(Class=HouseVotes84[,1], dummy_data_classif))
dummy_data_classif[is.na(dummy_data_classif)] <- 0
data("cars")
dummy_data_regr <- cars
caret_method_tester <- function(dummy_data, formula, resample_plan=1,
test_method, time_limit=30,
grid_param=c(), parallel_mode=FALSE){
library(caret)
library(R.utils)
formula <- as.formula(formula)
resampling <- NULL
if(resample_plan==1){
resampling <- trainControl(method = "repeatedcv",
number = 10,
repeats = 5,
allowParallel = parallel_mode)
}
else if(resample_plan==2){
resampling <- trainControl(method = "cv",
number = 5,
allowParallel = parallel_mode)
}
else if(resample_plan==3){
resampling <- trainControl(method = "adaptive_cv",
number = 10, repeats = 5,
allowParallel = parallel_mode,
adaptive = list(min = 3, alpha = 0.05,
method = "BT", complete = FALSE))
}
else if(resample_plan==4){
resampling <- trainControl(method = "boot",
number = 5,
allowParallel = parallel_mode)
}
else if(resample_plan==5){
resampling <- trainControl(method = "boot_all",
number = 5,
allowParallel = parallel_mode)
}
tryCatch(
expr={
if(length(grid_param) > 0){
withTimeout(
model <- caret::train(formula,
data = dummy_data,
method = test_method,
trControl = resampling,
tuneGrid=grid_param), timeout = 300
)
}
else{
withTimeout(
model <- caret::train(formula,
data = dummy_data,
method = test_method,
trControl = resampling), timeout=300
)
}
return(model)
},
error=function(cond){
message("Test Model Failed")
message("Here's the original error message:")
message(cond)
return(NULL)
},
warning=function(cond){
message("Warning Triggered!")
message("Here's the original warning message:")
message(cond)
return(model)
}
)
}
bart_reg <- caret_method_tester(dummy_data_regr, "Price ~ .",
test_method="bartMachine", time_limit=30, resample_plan=2)
Test Model Failed
Here's the original error message:
argument is of length zero
bart_classif <- caret_method_tester(dummy_data_classif, "Class ~ .",
test_method="bartMachine", time_limit=30, resample_plan=2)
Test Model Failed
Here's the original error message:
incorrect number of dimensions我使用try Catch方法轻松地通知代码进度,以便在代码失败、发出警告或成功时一目了然。
就我而言,数据集也没有任何NA值
发布于 2020-07-19 18:57:29
如果你将代码减少到必要的部分会更好,基本上bartMachine的训练函数不起作用。我们可以用这个例子来说明这一点,并且我们得到了相同的错误消息:
mdl = train(mpg ~ .,data=mtcars,method="bartMachine",trControl=trainControl(method="cv"))
Error in if (grepl("adaptive", trControl$method) & nrow(tuneGrid) == 1) { :
argument is of length zero错误是caret中的代码错误,如果不提供调优网格,则用于创建网格的默认函数不会返回data.frame:
getModelInfo()$bartMachine$grid
function(x, y, len = NULL, search = "grid") {
if(search == "grid") {
out <- expand.grid(num_trees = 50,
k = (1:len)+ 1,
alpha = seq(.9, .99, length = len),
beta = seq(1, 3, length = len),
nu = (1:len)+ 1)
} else {
out <- data.frame(num_trees = sample(10:100, replace = TRUE, size = len),
k = runif(len, min = 0, max = 5),
alpha = runif(len, min = .9, max = 1),
beta = runif(len, min = 0, max = 4),
nu = runif(len, min = 0, max = 5))
}
if(is.factor(y)) {
out$k <- NA
out$nu <- NA
}
}您可以提供一个调整网格:
mdl = train(mpg ~ .,data=mtcars,method="bartMachine",
trControl=trainControl(method="boot"),
tuneGrid=data.frame(num_trees=50,k=3,alpha=0.1,beta=0.1,nu=4))
mdl
Bayesian Additive Regression Trees
32 samples
10 predictors
No pre-processing
Resampling: Bootstrapped (25 reps)
Summary of sample sizes: 32, 32, 32, 32, 32, 32, ...
Resampling results:
RMSE Rsquared MAE
2.826126 0.8344417 2.292464
Tuning parameter 'num_trees' was held constant at a value of 50
'beta' was held constant at a value of 0.1
Tuning parameter 'nu' was
held constant at a value of 4或者修复上面的函数并创建一个新方法,您可以读取更多的here
newBartMachine = getModelInfo()$bartMachine
newBartMachine$grid = function(x, y, len = NULL, search = "grid") {
if(search == "grid") {
out <- expand.grid(num_trees = 50,
k = (1:len)+ 1,
alpha = seq(.9, .99, length = len),
beta = seq(1, 3, length = len),
nu = (1:len)+ 1)
} else {
out <- data.frame(num_trees = sample(10:100, replace = TRUE, size = len),
k = runif(len, min = 0, max = 5),
alpha = runif(len, min = .9, max = 1),
beta = runif(len, min = 0, max = 4),
nu = runif(len, min = 0, max = 5))
}
if(is.factor(y)) {
out$k <- NA
out$nu <- NA
}
return(out)
}
mdl = train(mpg ~ .,data=mtcars,method=newBartMachine,trControl=trainControl(method="cv"),tuneLength=1)
Bayesian Additive Regression Trees
32 samples
10 predictors
No pre-processing
Resampling: Cross-Validated (10 fold)
Summary of sample sizes: 28, 28, 28, 29, 30, 30, ...
Resampling results:
RMSE Rsquared MAE
2.338429 0.9581958 2.057181
Tuning parameter 'num_trees' was held constant at a value of 50
'beta' was held constant at a value of 1
Tuning parameter 'nu' was
held constant at a value of 2https://stackoverflow.com/questions/62946906
复制相似问题