我一直在使用一组数据,其中有一列有3,000个观测结果的州。为了运行一个神经网络,我试图将这些州编码成区域列,包括太平洋、中部、东部、AK、HI和山区。
下面的代码可以工作,但我觉得一定有更简单的方法。
安装的软件包:
library(tidyverse)
library(readr)
library(FNN)
library(rpart)
library(C50)
library(nnet)
library(FME)我一直在使用的for循环。
for (i in 1:length(churn$Churn.)) {
if(churn$State[i]== "CT" | churn$State[i]== "DE"| churn$State[i]== "FL" | churn$State[i]== "GA" | churn$State[i]== "IN" | churn$State[i]== "ME" | churn$State[i]== "MD" | churn$State[i]== "MA" |churn$State[i]== "MI" |churn$State[i]== "NH" |churn$State[i]== "NJ" | churn$State[i]== "NY" |churn$State[i]== "NC" | churn$State[i]== "OH" |churn$State[i]== "PA" |churn$State[i]== "RI" |churn$State[i]== "SC" | churn$State[i]== "VT" | churn$State[i]== "VA" |churn$State[i]== "DC" | churn$State[i]== "WV" ) {
churn$state.cat.east[i]<-1
} else {
churn$state.cat.east[i]<-0
}
}
for (i in 1:length(churn$Churn.)) {
if(churn$State[i]== "AL" | churn$State[i]== "AR" | churn$State[i]== "IL" | churn$State[i]== "IA" | churn$State[i]== "KS" | churn$State[i]== "KY" | churn$State[i]== "LA" | churn$State[i]== "MN" | churn$State[i]== "MS" | churn$State[i]== "MO" | churn$State[i]== "NE" | churn$State[i]== "ND" | churn$State[i]== "OK" | churn$State[i]== "SD" | churn$State[i]== "TN" | churn$State[i]== "TX" | churn$State[i]== "WI" ) {
churn$state.cat.central[i]<-1
} else {
churn$state.cat.central[i]<-0
}
}这是我在这里的第一篇帖子,希望我能得到我所需要的一切&谢谢你的帮助!
发布于 2018-02-23 07:06:19
您可以在2行中这样做,在%操作符中使用ifelse &%:
#FIRST STATEMENT
east <- c("CT", "DE", "FL", "GA", "IN", "ME", "MD", "MA", "MI", "NH", "NJ", "NY", "NC", "OH", "PA", "RI", "SC", "VT", "VA", "DC", "WV")
churn$state.cat.east <- ifelse(churn$State %in% east,1,0)对中心值重复相同的
#2ND STATEMENT
central <- c("AL" , "AR" , "IL" , "IA" , "KS" , "KY" , "LA" , "MN" , "MS" , "MO" , "NE" , "ND" , "OK" , "SD" , "TN" , "TX" , "WI")
churn$state.cat.central <- ifelse(churn$State %in% central,1,0)希望这能帮上忙
戈塔维亚诺尼
发布于 2018-02-23 07:38:19
另一种选择是使用R中可用的内置数据。
#Sample data
churn <- data.frame(state=c('CA', 'NY', 'TX', 'CA', 'TX'), stringsAsFactors = F)
#map each state to it's division using inbuilt data
data(state)
churn$state_division <- sapply(churn$state, function(x) state.division[which(state.abb==x)])
#dummy code the new column created using above mapping
library(dummies)
churn <- dummy.data.frame(churn, names="state_division", sep = "-")发布于 2018-02-23 07:16:27
我们可以在不使用ifelse的情况下完成这一任务,因为as.integer强制执行二进制操作。
churn$state.cat.east <- with(churn,as.integer( State %in% c("CT", "DE", "FL", ...)))
churn$state.cat.central <- with(churn,as.integer( State %in% c("AL" , "AR", ...)))注:...指的是其他状态
如果我们需要为所有地区创造
library(purrr)
library(dplyr)
state.region %>%
unique %>%
as.character %>%
set_names(.) %>%
map_df(~ as.integer(setNames(state.region, state.abb)[churn$State] %in% .x) ) %>%
bind_cols(churn, .)数据
set.seed(24)
churn <- data.frame(State = sample(state.abb, 100, replace = TRUE), stringsAsFactors = FALSE)https://stackoverflow.com/questions/48942285
复制相似问题