我有超过100个csv文件包含这样的数据...
> dput(head(hobo.temp))
structure(list(Serial = c("Plot Title: 20461693", "#", "1",
"2", "3", "4"), Date = c("", "Date Time, GMT-05:00", "02/14/20 10:14:50 AM",
"02/14/20 10:14:57 AM", "02/14/20 11:14:50 AM", "02/14/20 12:14:50 PM"
), Temp = c("", "Temp, °C (LGR S/N: 20461693, SEN S/N: 20461693)",
"18.866", "", "20.817", "20.913"), X1 = c("", "Coupler Detached (LGR S/N: 20461693, SEN S/N: 20461693)",
"", "Logged", "", ""), X2 = c("", "Coupler Attached (LGR S/N: 20461693, SEN S/N: 20461693)",
"", "", "", ""), X3 = c("", "Host Connected (LGR S/N: 20461693, SEN S/N: 20461693)",
"", "", "", ""), X4 = c("", "End Of File (LGR S/N: 20461693, SEN S/N: 20461693)",
"", "", "", "")), row.names = c(NA, 6L), class = "data.frame")它很脏,所以我写了代码来清理它。
hobo.temp <- read.csv("20461693_suw_main_01_19_2021.csv",
colClasses = c(rep("character", 3), rep("NULL", 4)),
col.names = c("Serial", "Date", "Temp", 1, 2, 3, 4),
header = FALSE, fill = TRUE, stringsAsFactors = FALSE)
hobo.temp$Date = as.POSIXct(hobo.temp$Date, format="%m/%d/%y %H:%M")
hobo.temp[,1] <- hobo.temp[1,1]
hobo.temp <- hobo.temp[-c(1:4),]
hobo.temp <- na.omit(hobo.temp)
hobo.temp <- arrange(hobo.temp, Date)
row.names(hobo.temp) <- NULL
hobo.temp$Serial <- gsub("Plot Title: ", "", hobo.temp$Serial, fixed = TRUE)
hobo.temp$Temp <- as.numeric(hobo.temp$Temp)
return(hobo.temp)但是当我尝试将它转换为函数并使用下面的代码迭代它时。
filenames <- list.files(path = ".", pattern='^.*\\.csv$')
hobo.read <- function(fnam) {
hobo.temp <- read.csv(fnam, colClasses = c(rep("character", 3), rep("NULL", 4)),
col.names = c("Serial", "Date", "Temp", 1, 2, 3, 4),
header = FALSE, fill = TRUE, stringsAsFactors = FALSE)
hobo.temp$Date = as.POSIXct(hobo.temp$Date, format="%m/%d/%y %H:%M")
hobo.temp[,1] <- hobo.temp[1,1]
hobo.temp <- hobo.temp[-c(1:4),]
hobo.temp <- na.omit(hobo.temp)
hobo.temp <- arrange(hobo.temp, Date)
row.names(hobo.temp) <- NULL
hobo.temp$Serial <- gsub("Plot Title: ", "", hobo.temp$Serial, fixed = TRUE)
hobo.temp$Temp <- as.numeric(hobo.temp$Temp)
return(hobo.temp)
}
my.df <- do.call("rbind", lapply(filenames, hobo.read))我得到了这个错误
Error in read.table(file = file, header = header, sep = sep, quote = quote, :
more columns than column names 我不擅长写函数,所以我提前道歉。
发布于 2021-01-26 12:15:24
我意识到有几个文件有8列。我认为这可能是这种情况,并试图用我原来的问题代码colClasses = c(rep("character", 3), rep("NULL", 4))来解释它。当我将`rep("NULL",5)中的4切换为5时,它正确地使第8列无效。我修改了我原来的问题代码,使之更具可读性(也许是这样)。这是我的第一个真正的函数,它嵌套在boot中。这是草率的,但我对此感到非常自豪。
#reads filenames from
filenames <- list.files(path = ".", pattern='^.*\\.csv$')
#first function imports data
hobo.read <- function(x) {
#in... rep("NULL", 5)... 5 has to be larger than the number for columns in the csv with the largest number of columns
df1 <- read.csv(x, colClasses = c(rep("character", 3), rep("NULL", 5)),
col.names = c("Serial", "Date", "Temp", 1, 2, 3, 4),
header = FALSE, fill = TRUE, stringsAsFactors = FALSE)
# line applies the action function below
df2 <- hobo.fix(df1)
}
#function of actions to apply within 1st function
hobo.fix <- function(hobo.temp) {
hobo.temp[,1] <- hobo.temp[1,1]
hobo.temp <- hobo.temp[-c(1:4),]
hobo.temp$Serial <- gsub("Plot Title: ", "", hobo.temp$Serial, fixed = TRUE)
hobo.temp$Temp <- as.numeric(hobo.temp$Temp)
hobo.temp$Date = as.POSIXct(hobo.temp$Date, format="%m/%d/%y %H:%M")
hobo.temp <- na.omit(hobo.temp)
hobo.temp <- dplyr::arrange(hobo.temp, Date)
row.names(hobo.temp) <- NULL
return(hobo.temp)
}
hobo <- do.call("rbind", lapply(filenames, hobo.read))https://stackoverflow.com/questions/65891712
复制相似问题