我尝试使用arules包使用离散化函数来转换变量。但是输出的标签非常难看。谁能建议如何将这些标签转换为“低”、“中”、“高”或简单的1、2、3。
library(arules)
#> Warning: package 'arules' was built under R version 3.6.3
#> Loading required package: Matrix
#>
#> Attaching package: 'arules'
#> The following objects are masked from 'package:base':
#>
#> abbreviate, write
discretize(iris[,1], breaks = 3)
#> [1] [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [5.4,6.3) [4.3,5.4)
#> [8] [4.3,5.4) [4.3,5.4) [4.3,5.4) [5.4,6.3) [4.3,5.4) [4.3,5.4) [4.3,5.4)
#> [15] [5.4,6.3) [5.4,6.3) [5.4,6.3) [4.3,5.4) [5.4,6.3) [4.3,5.4) [5.4,6.3)
#> [22] [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4)
#> [29] [4.3,5.4) [4.3,5.4) [4.3,5.4) [5.4,6.3) [4.3,5.4) [5.4,6.3) [4.3,5.4)
#> [36] [4.3,5.4) [5.4,6.3) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4)
#> [43] [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4) [4.3,5.4)
#> [50] [4.3,5.4) [6.3,7.9] [6.3,7.9] [6.3,7.9] [5.4,6.3) [6.3,7.9] [5.4,6.3)
#> [57] [6.3,7.9] [4.3,5.4) [6.3,7.9] [4.3,5.4) [4.3,5.4) [5.4,6.3) [5.4,6.3)
#> [64] [5.4,6.3) [5.4,6.3) [6.3,7.9] [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3)
#> [71] [5.4,6.3) [5.4,6.3) [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [78] [6.3,7.9] [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3)
#> [85] [5.4,6.3) [5.4,6.3) [6.3,7.9] [6.3,7.9] [5.4,6.3) [5.4,6.3) [5.4,6.3)
#> [92] [5.4,6.3) [5.4,6.3) [4.3,5.4) [5.4,6.3) [5.4,6.3) [5.4,6.3) [5.4,6.3)
#> [99] [4.3,5.4) [5.4,6.3) [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [106] [6.3,7.9] [4.3,5.4) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [113] [6.3,7.9] [5.4,6.3) [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [120] [5.4,6.3) [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [127] [5.4,6.3) [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [134] [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [5.4,6.3) [6.3,7.9]
#> [141] [6.3,7.9] [6.3,7.9] [5.4,6.3) [6.3,7.9] [6.3,7.9] [6.3,7.9] [6.3,7.9]
#> [148] [6.3,7.9] [5.4,6.3) [5.4,6.3)
#> attr(,"discretized:breaks")
#> [1] 4.3 5.4 6.3 7.9
#> attr(,"discretized:method")
#> [1] frequency
#> Levels: [4.3,5.4) [5.4,6.3) [6.3,7.9]
table(discretize(iris[,1], breaks = 3))
#>
#> [4.3,5.4) [5.4,6.3) [6.3,7.9]
#> 46 53 51发布于 2020-10-23 20:26:18
如果我没看错您的目标,您可以对基本cut函数执行相同的操作。例如,
cut(iris$Sepal.Length, breaks = c(4.3, 5.4, 6.3, 7.9), labels = c('lo', 'med', 'hi'))如果要将这些值替换为剪切:
cuts <- cut(iris$Sepal.Length, breaks = c(4.3, 5.4, 6.3, 7.9), labels = c('lo', 'med', 'hi'))
iris$Sepal.Length <- cuts只需将标签替换为您自己的标签。
发布于 2020-10-23 20:43:27
对于其中一列,您可以这样做:
discretize(iris[,1], breaks = 3,labels=c(letters[1:3]))对于data.frame,您可以使用default=参数传递命令:
discretizeDF(iris, default = list(method = "interval", breaks = 3,labels=1:3))这些可以在help page中提供的示例中找到。
https://stackoverflow.com/questions/64499722
复制相似问题