我的密码在下面
library(mlr3verse)
library(mlr3pipelines)
library(mlr3filters)
library(paradox)
filter_importance = mlr_pipeops$get(
"filter",
filter = FilterImportance$new(learner = lrn("classif.ranger", importance = "impurity")),
param_vals = list(filter.frac = 0.7)
)
learner_classif = lrn(
"classif.ranger",
predict_type = "prob",
importance = "impurity",
num.trees = 500
)
polrn_classif = PipeOpLearner$new(learner_classif)
# create learner graph
glrn_classif = filter_importance %>>% polrn_classif
glrn_classif = GraphLearner$new(glrn_classif)
glrn_classif$predict_type = "prob"
# task
task = tsk("german_credit")
# set search_space
ps_classif = ParamSet$new(list(
ParamInt$new("classif.ranger.num.trees", lower = 300, upper = 500),
ParamDbl$new("classif.ranger.sample.fraction", lower = 0.7, upper = 0.8)
))
# auto tunning
at = AutoTuner$new(
learner = glrn_classif,
resampling = rsmp("cv", folds = 3),
measure = msr("classif.auc"),
search_space = ps_classif,
terminator = trm("evals", n_evals = 3),
tuner = tnr("random_search")
)
# sampling
rr = resample(task, at, rsmp("cv", folds = 2))当我有了rr对象,从重新采样和训练的学习者at。我能问一下这些步骤是干什么的吗?
例如:
当我有来自test_index)?
filter_importance步骤中选择了变量?在这一步中,每个变量的得分是多少?
非常感谢!
发布于 2021-06-08 07:06:03
能够在重新调整模型后,最好使用store_models = TRUE调用重采样
用你的例子
library(mlr3verse)
set.seed(1)
rr <- resample(task,
at,
rsmp("cv", folds = 2),
store_models = TRUE)完成重采样之后,可以访问生成对象的内部结构,如下所示:
要获取每个折叠中的行in,请执行以下操作:
rr$resampling$instance
#output
row_id fold
1: 5 1
2: 8 1
3: 9 1
4: 12 1
5: 13 1
---
996: 989 2
997: 993 2
998: 994 2
999: 995 2
1000: 996 2有了这些和调优的自动调谐器,我们可以手动生成预测。
生成测试索引列表
rsample <- split(rr$resampling$instance$row_id,
rr$resampling$instance$fold)在折叠和调谐的自动调谐器上迭代并预测:
lapply(1:2, function(i){
x <- rsample[[i]] #get the test row ids
task_test <- task$clone() #clone the task so we don't change the original task
task_test$filter(x) #filter on the test row ids
preds <- rr$learners[[i]]$predict(task_test) #use the trained autotuner and above filtered task
preds
}) -> preds_manual以检查这些预测是否符合重采样的输出。
all.equal(preds_manual,
rr$predictions())
#output
TRUE获取有关调优的信息
zz <- rr$data$learners()$learner
lapply(zz, function(x) x$tuning_result)
#output
[[1]]
classif.ranger.num.trees classif.ranger.sample.fraction learner_param_vals
1: 342 0.7931022 <list[7]>
x_domain classif.auc
1: <list[2]> 0.7981283
[[2]]
classif.ranger.num.trees classif.ranger.sample.fraction learner_param_vals
1: 407 0.7964164 <list[7]>
x_domain classif.auc
1: <list[2]> 0.7706533插槽
zz[[1]]$learner$state$model$importance包含有关filter_importance步骤的信息
具体来说
lapply(zz, function(x) x$learner$state$model$importance$scores)
#output
[[1]]
amount status age
27.491369 25.776145 22.021369
duration purpose credit_history
18.732521 16.251643 14.884843
employment_duration savings property
11.225678 10.796583 9.078619
personal_status_sex present_residence installment_rate
8.914802 7.875384 7.491573
job number_credits other_installment_plans
6.293323 5.662485 5.345666
housing telephone other_debtors
4.869471 3.742213 3.548856
people_liable foreign_worker
2.632163 1.054919
[[2]]
amount duration age
26.764389 22.139400 20.749865
status purpose employment_duration
20.524764 11.793789 10.962301
credit_history installment_rate savings
10.416572 9.597835 9.491894
property present_residence job
9.403157 7.877391 6.760945
personal_status_sex housing other_installment_plans
6.699065 5.811131 5.710761
telephone other_debtors number_credits
4.716322 4.318972 3.974793
people_liable foreign_worker
3.196563 0.846520 包含功能的排序。而
lapply(zz, function(x) x$learner$state$model$importance$outtasklayout)
#output
[[1]]
id type
1: age integer
2: amount integer
3: credit_history factor
4: duration integer
5: employment_duration factor
6: installment_rate ordered
7: job factor
8: number_credits ordered
9: personal_status_sex factor
10: present_residence ordered
11: property factor
12: purpose factor
13: savings factor
14: status factor
[[2]]
id type
1: age integer
2: amount integer
3: credit_history factor
4: duration integer
5: employment_duration factor
6: housing factor
7: installment_rate ordered
8: job factor
9: personal_status_sex factor
10: present_residence ordered
11: property factor
12: purpose factor
13: savings factor
14: status factor包含筛选步骤后保留的功能。
https://stackoverflow.com/questions/67869401
复制相似问题