我在R中运行了下面的GLM模型,总结结果是没有意义的。P-值相同,系数非常大。有点不对劲,但我不知道具体是什么。
mod.glm <- glm(factor(CloseWon) ~ days_to_close + factor(state) + number_of_times_contacted + number_of_sales_activities + factor(original_source) + average_pageviews + marketing_emails_delivered + sends_since_last_engagement, data = HSBI_train, family = binomial('logit'))
summary(mod)下面是指向输出图像的链接,谢谢1:https://i.stack.imgur.com/A5jsQ.jpg
按要求提交:
dput(HSBI)
structure(list(days_to_close = c(143L, 0L, 264L, 0L, 138L, 0L,
117L, 48L, 258L, 0L, 59L, 125L, 204L, 260L, 0L, 210L, 0L, 119L,
0L, 286L, 29L, 0L, 56L, 0L, 0L, 92L, 92L, 94L, 38L, 223L, 284L,
0L, 289L, 278L, 128L, 52L, 137L, 0L, 256L, 0L, 119L, 175L, 225L,
118L, 161L, 129L, 94L, 0L, 33L, 0L, 0L, 26L, 0L, 0L, 0L, 0L,
163L, 112L, 0L, 0L, 0L, 6L, 0L, 0L, 8L, 0L, 0L, 251L, 0L, 0L,
100L, 0L, 118L, 126L, 65L, 0L, 116L, 120L, 0L, 115L, 20L, 40L,
0L, 77L, 0L, 0L, 0L, 0L, 0L, 0L, 184L, 0L, 268L, 0L, 49L, 128L,
0L, 129L, 240L, 0L, 164L, 0L, 73L, 0L, 0L, 200L, 0L, 22L, 0L,
0L, 0L, 0L, 0L, 268L, 20L, 0L, 0L, 31L, 99L, 0L, 0L, 0L, 0L,
263L, 0L, 0L, 265L, 0L, 280L, 174L, 267L, 0L, 0L, 260L, 0L, 0L,
0L, 219L, 0L, 0L, 292L, 0L, 259L, 0L, 0L, 114L, 127L, 127L, 0L,
41L, 0L, 251L, 281L, 0L, 226L, 277L, 268L, 0L, 219L, 0L, 97L,
0L, 0L, 0L, 218L, 0L, 98L, 64L, 0L, 0L, 101L, 0L, 0L, 0L, 0L,
0L, 165L, 0L, 0L, 0L, 0L, 76L, 48L, 233L, 0L, 0L, 0L, 0L, 0L,
0L, 107L, 189L, 0L, 94L, 19L, 223L, 128L, 0L, 0L, 0L, 106L, 246L,
0L, 0L, 118L, 168L, 160L, 0L, 225L, 231L, 222L, 0L, 0L, 122L,
0L, 37L, 236L, 246L, 0L, 16L, 0L, 70L, 0L, 123L, 264L, 0L, 0L,
0L, 0L, 264L, 0L, 41L, 296L, 124L, 198L, 0L, 0L, 0L, 58L, 156L,
166L, 274L, 0L, 88L, 2L, 0L, 124L, 0L, 80L, 41L, 278L, 0L, 0L,
252L, 0L, 80L, 0L, 0L), state = c("", "fl", "fl", "sj", "ga",
"nc", "ga", "in", "ga", "ca", "va", "ca", "va", "tn", "co", "tx",
"fl", "tn", "ca", "tn", "in", "ga", "il", "nj", "ca", "ga", "ga",
"", "", "ga", "ga", "fl", "ga", "nc", "ga", "tx", "", "ga", "ga",
"dc", "ny", "tn", "fl", "va", "ga", "al", "ca", "nv", "ca", "ga",
"sc", "va", "ga", "oh", "ga", "fl", "la", "tn", "ny", "fl", "ca",
"ca", "wa", "ny", "il", "ga", "ca", "fl", "fl", "al", "al", "fl",
"al", "tn", "sc", "", "fl", "ga", "az", "fl", "ga", "", "ca",
"ga", "ga", "oh", "ga", "al", "ga", "", "tx", "sc", "ga", "ny",
"nc", "tn", "co", "oh", "al", "tx", "", "co", "ne", "ny", "fl",
"oh", "ga", "ia", "va", "fl", "sc", "ca", "tn", "ga", "co", "ok",
"ga", "al", "tx", "", "fl", "md", "ga", "al", "tn", "wa", "al",
"oh", "ga", "ga", "", "pa", "oh", "al", "ny", "az", "tn", "oh",
"ga", "tx", "ga", "tx", "tn", "va", "fl", "ga", "ga", "fl", "ny",
"fl", "az", "ga", "ga", "tn", "ga", "", "ga", "pa", "fl", "tn",
"al", "ga", "al", "ga", "ms", "vt", "ca", "fl", "fl", "ky", "oh",
"wa", "fl", "wa", "ga", "az", "il", "al", "nc", "al", "nj", "tx",
"tx", "fl", "la", "ga", "nc", "", "ga", "al", "tx", "oh", "fl",
"tn", "tn", "fl", "ga", "ca", "", "ca", "fl", "ga", "fl", "dc",
"md", "fl", "tx", "oh", "tx", "", "al", "tx", "fl", "tn", "tx",
"ny", "ny", "tn", "az", "ga", "al", "tx", "sc", "tn", "tn", "ca",
"al", "tn", "fl", "al", "tn", "ga", "ga", "ga", "fl", "pa", "tx",
"co", "sc", "tx", "tx", "ga", "ny", "ma", "ny", "fl", "fl", "ca",
"tn", "nv", "ga", "ga", "tx", "", "or", "", "fl", "il"), number_of_times_contacted = c(7L,
5L, 7L, 2L, 40L, 4L, 6L, 6L, 5L, 1L, 3L, 5L, 8L, 8L, 1L, 10L,
4L, 9L, 3L, 10L, 6L, 4L, 7L, 7L, 2L, 9L, 1L, 5L, 3L, 9L, 11L,
4L, 8L, 10L, 10L, 6L, 10L, 3L, 10L, 12L, 5L, 7L, 8L, 5L, 31L,
10L, 6L, 1L, 5L, 20L, 15L, 7L, 3L, 3L, 6L, 6L, 6L, 7L, 2L, 3L,
1L, 2L, 1L, 19L, 2L, 3L, 1L, 10L, 5L, 3L, 7L, 2L, 8L, 9L, 3L,
3L, 8L, 12L, 1L, 7L, 2L, 2L, 4L, 50L, 6L, 4L, 2L, 3L, 1L, 9L,
7L, 3L, 14L, 1L, 3L, 5L, 7L, 4L, 8L, 4L, 6L, 2L, 4L, 7L, 5L,
7L, 7L, 6L, 5L, 6L, 4L, 1L, 5L, 8L, 2L, 1L, 6L, 3L, 4L, 4L, 4L,
1L, 6L, 8L, 4L, 3L, 8L, 11L, 10L, 6L, 8L, 5L, 8L, 6L, 4L, 2L,
10L, 8L, 6L, 8L, 8L, 8L, 8L, 5L, 6L, 4L, 5L, 9L, 12L, 4L, 1L,
11L, 12L, 7L, 9L, 14L, 8L, 3L, 7L, 17L, 8L, 4L, 2L, 33L, 9L,
1L, 4L, 8L, 6L, NA, 7L, 3L, 9L, 2L, 5L, 8L, 6L, 6L, 4L, 10L,
3L, 4L, 3L, 12L, 24L, 9L, 3L, 11L, 3L, 19L, 3L, 7L, 4L, 9L, 6L,
7L, 10L, 1L, 1L, 1L, 4L, 10L, 1L, 3L, 5L, 7L, 6L, 2L, 7L, 9L,
8L, 10L, 6L, 8L, 2L, 3L, 8L, 9L, 1L, 3L, 13L, 10L, 5L, 9L, 8L,
1L, 6L, 12L, 4L, 11L, 10L, 4L, 10L, 10L, 7L, 6L, 3L, 4L, 3L,
6L, 6L, 10L, 4L, 9L, 2L, 21L, 9L, 1L, 4L, 3L, 21L, 8L, 5L, 10L,
3L, 8L, 8L, 7L), number_of_sales_activities = c(8L, 5L, 7L, 2L,
61L, 4L, 7L, 6L, 5L, 1L, 3L, 5L, 8L, 8L, 1L, 10L, 4L, 9L, 3L,
10L, 6L, 4L, 7L, 9L, 2L, 9L, 2L, 5L, 3L, 9L, 11L, 4L, 8L, 10L,
10L, 6L, 14L, 3L, 10L, 14L, 5L, 8L, 8L, 5L, 43L, 10L, 7L, 1L,
5L, 22L, 21L, 7L, 3L, 3L, 7L, 6L, 6L, 7L, 2L, 3L, 1L, 2L, 1L,
28L, 2L, 3L, 1L, 10L, 5L, 4L, 7L, 2L, 8L, 9L, 3L, 3L, 8L, 12L,
1L, 7L, 2L, 2L, 4L, 98L, 6L, 6L, 2L, 3L, 1L, 9L, 7L, 4L, 17L,
2L, 3L, 5L, 7L, 4L, 8L, 4L, 7L, 2L, 5L, 8L, 5L, 7L, 8L, 6L, 5L,
6L, 4L, 1L, 5L, 8L, 2L, 1L, 6L, 3L, 4L, 4L, 4L, 1L, 6L, 9L, 4L,
3L, 8L, 16L, 10L, 6L, 8L, 5L, 9L, 6L, 4L, 2L, 10L, 8L, 7L, 8L,
8L, 8L, 8L, 5L, 6L, 4L, 5L, 9L, 15L, 4L, 1L, 11L, 12L, 9L, 10L,
21L, 8L, 4L, 7L, 21L, 8L, 4L, 2L, 61L, 9L, 1L, 4L, 8L, 7L, NA,
7L, 3L, 9L, 2L, 5L, 9L, 6L, 6L, 4L, 12L, 3L, 5L, 3L, 12L, 35L,
17L, 3L, 12L, 3L, 28L, 3L, 8L, 4L, 9L, 6L, 7L, 10L, 1L, 1L, 1L,
4L, 10L, 1L, 3L, 5L, 7L, 6L, 2L, 7L, 10L, 8L, 10L, 6L, 8L, 3L,
3L, 8L, 9L, 1L, 3L, 20L, 10L, 7L, 9L, 8L, 1L, 6L, 12L, 4L, 12L,
10L, 5L, 10L, 11L, 7L, 7L, 3L, 4L, 3L, 6L, 6L, 11L, 4L, 9L, 2L,
26L, 9L, 1L, 4L, 3L, 39L, 8L, 6L, 10L, 3L, 8L, 8L, 7L), original_source = c("Direct Traffic",
"Paid Social", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Organic Search",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Paid Social", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Paid Social", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Paid Social", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Paid Social", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Paid Social", "Paid Social",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Paid Social", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Paid Social", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Organic Search",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Organic Social", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Paid Social", "Paid Social",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Paid Social", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Paid Social", "Direct Traffic",
"Paid Social", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Paid Social",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Paid Social", "Paid Social", "Direct Traffic", "Direct Traffic",
"Paid Social", "Paid Social", "Direct Traffic", "Direct Traffic",
"Organic Search", "Direct Traffic", "Direct Traffic", "Direct Traffic",
"Direct Traffic"), average_pageviews = c(1L, 1L, 2L, 2L, 1L,
2L, 1L, 2L, 7L, 2L, 2L, 1L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 2L, 2L,
4L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 5L, 1L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 2L,
3L, 3L, 2L, 1L, 1L, 4L, 3L, 2L, 2L, 4L, 1L, 2L, 3L, 2L, 1L, 2L,
5L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 5L, 6L, 1L, 1L, 4L,
2L, 2L, 2L, 3L, 2L, 1L, 2L, 2L, 2L, 2L, 3L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 5L, 2L, 2L, 2L, 6L,
3L, 2L, 2L, 2L, 2L, 1L, 2L, 5L, 1L, 2L, 3L, 2L, 2L, 3L, 1L, 3L,
2L, 2L, 3L, 3L, 2L, 1L, 1L, 2L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 4L, 2L, 3L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 1L, 2L,
3L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 3L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 4L,
4L, 1L, 2L, 2L, 0L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 3L, 2L, 2L, 2L,
4L, 1L, 3L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L), marketing_emails_delivered = c(15L,
6L, 13L, 7L, 65L, 10L, 11L, 11L, 22L, 3L, 8L, 12L, 17L, 15L,
2L, 18L, 9L, 11L, 9L, 20L, 10L, 12L, 11L, 14L, 8L, 11L, 11L,
6L, 8L, 19L, 21L, 3L, 20L, 22L, 15L, 11L, 14L, 8L, 21L, 21L,
11L, 14L, 15L, 11L, 47L, 16L, 10L, 9L, 8L, 22L, 14L, 10L, 8L,
2L, 13L, 15L, 16L, 16L, 2L, 7L, 2L, 9L, 9L, 21L, 9L, 8L, 3L,
21L, 15L, 2L, 11L, 2L, 11L, 16L, 8L, 3L, 16L, 17L, 2L, 16L, 8L,
8L, 9L, 44L, 19L, 9L, 9L, 9L, 3L, 12L, 7L, 9L, 22L, 7L, 8L, 12L,
12L, 12L, 21L, 5L, 16L, 2L, 3L, 18L, 15L, 15L, 21L, 10L, 9L,
10L, 12L, 3L, 13L, 22L, 9L, 6L, 17L, 7L, 10L, 3L, 3L, 9L, 13L,
22L, 14L, 9L, 22L, 8L, 23L, 13L, 20L, 9L, 10L, 22L, 9L, 9L, 18L,
19L, 13L, 19L, 20L, 18L, 13L, 7L, 10L, 11L, 12L, 16L, 19L, 8L,
3L, 13L, 24L, 15L, 28L, 24L, 22L, 4L, 19L, 31L, 15L, 9L, 2L,
44L, 11L, 2L, 10L, 13L, 18L, 3L, 15L, 9L, 19L, 8L, 6L, 9L, 16L,
10L, 5L, 19L, 2L, 9L, 8L, 20L, 14L, 8L, 9L, 21L, 7L, 21L, 11L,
14L, 19L, 15L, 10L, 19L, 16L, 1L, 9L, 3L, 10L, 21L, 3L, 3L, 11L,
15L, 16L, 2L, 11L, 21L, 11L, 22L, 16L, 16L, 9L, 8L, 21L, 21L,
3L, 1L, 20L, 13L, 9L, 16L, 13L, 6L, 10L, 15L, 3L, 20L, 11L, 8L,
21L, 16L, 14L, 8L, 9L, 15L, 8L, 16L, 9L, 23L, 9L, 15L, 2L, 22L,
16L, 3L, 9L, 8L, 31L, 19L, 18L, 21L, 9L, 14L, 19L, 10L), CloseWon = c(1,
0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0,
1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1,
1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0,
1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0,
0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1,
0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1,
0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1,
0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0,
0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0,
1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1,
0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1,
1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0,
1, 0, 1, 0, 0)), class = "data.frame", row.names = c(NA, -258L
))发布于 2022-02-11 22:29:53
您的回归者days_to_close是有问题的。请注意,在这个简单的回归中,拟合的概率值为0或1有一个错误。
> mod.glm <- glm(factor(CloseWon) ~ days_to_close, data=data,
+ family = binomial('logit'))
Warning messages:
1: glm.fit: algorithm did not converge
2: glm.fit: fitted probabilities numerically 0 or 1 occurred 要了解为什么会这样,请注意
> table(data$days_to_close > 0, data$CloseWon)
0 1
FALSE 124 2
TRUE 0 132因此,您可以看到,与其他回归者得到的值无关,当other大于零时,days_to_close总是为0。粗略地说,这意味着您要优化的函数的值为+oo。优化算法在此基础上运行,并最终将其他回归系数推到巨大的值上。
关于如何解决完美分离的问题,有大量的论文。你可以开始,例如,here。
https://stackoverflow.com/questions/71071638
复制相似问题