我有一个像下面这样的数据框'test1‘,
test1 <- structure(list(day = c("01/01/2019 00:00:00", "02/01/2019 00:00:00", "03/01/2019 00:00:00", "04/01/2019 00:00:00", "05/01/2019 00:00:00", "06/01/2019 00:00:00", "07/01/2019 00:00:00", "08/01/2019 00:00:00","09/01/2019 00:00:00", "10/01/2019 00:00:00"), Rain = c(0, 0.2, 0, 0.4, 0, 0, 0, 0, 0, 0), SWC_11 = c(51, 51.5, 51.3, NA, NA, NA, NA, NA, NA, NA), SWC_12 = c(60, 60.3, 60.3, NA, NA, NA, NA, NA, NA, NA), SWC_13 = c(63, 63.4, 63.3, NA, NA, NA, NA, NA, NA, NA), SWC_14 = c(60, 60.8, 60.6, NA, NA, NA, NA, NA, NA, NA),
SWC_21 = c(64, 64.4, 64.1, NA, NA, NA, NA, NA, NA, NA)), row.names = c(NA, -10L), class = "data.frame")现在,我想将“day”列拆分为5分钟列,而其他列则计算平均数据。我试过了
test1$day <- as.POSIXct(test1$day, format="%d/%m/%Y ")
fill_1<-split(test1, cut.POSIXt(test1$day, format="%Y-%m-%d %H:%M:%S",breaks = "5 min")) #this code helped to break day column into 5 minutes column, but not other columns and 'fill_1' is not a dataframe, so I tried the next step.
fill_2<-as.data.frame(split(test1, cut.POSIXt(test1$day, format="%Y-%m-%d %H:%M:%S",breaks = "5 min"))) # here it doesn't work 我希望将fill_1转换为数据框,并计算其他列的平均值。警告消息如下:
Error in (function (..., row.names = NULL, check.rows = FALSE, check.names = TRUE, :
arguments imply differing number of rows: 1, 0发布于 2020-09-20 03:56:05
格式不是"%Y-%m-%d %H:%M:%S"。它可以是%m/%d/%Y或%d/%m/%Y (从显示的数据中看不清楚),后跟时间部分。
lst1 <- split(test1, droplevels(cut(as.POSIXct(test1$day,
format="%m/%d/%Y %T"),breaks = "5 min")))发布于 2020-09-20 05:43:09
您的意思是要将日期时间放入长度为5分钟的bin中,然后计算每个bin中每个变量的平均值?如果是这样的话,您应该使用lubridate包中的floor_date。
library(tidyverse)
library(lubridate)
raw <- tibble(datetime = seq(ymd_hms("2019-01-01 00:00:00"), ymd_hms("2019-01-03 0:00:00"), length.out = 500),
SWC_11 = runif(500, 30, 60),
SWC_12 = runif(500, 30, 60),
SWC_13 = runif(500, 30, 60),
SWC_14 = runif(500, 30, 60),
SWC_21 = runif(500, 30, 60))
raw %>%
mutate(datetime = floor_date(datetime, unit = "5 min")) %>%
group_by(datetime) %>%
summarise(across(everything(), mean), .groups = "drop") %>%
arrange(datetime)如果您想知道哪些bin没有值,那么您可以使用complete和full_seq来填充它们。period的单位似乎是秒,所以我使用5*60。
raw %>%
mutate(datetime = floor_date(datetime, unit = "5 min")) %>%
group_by(datetime) %>%
summarise(across(everything(), mean), .groups = "drop") %>%
complete(datetime = full_seq(datetime, period =5*60)) %>%
arrange(datetime)发布于 2020-09-27 10:25:55
Base R解决方案:
# Coerce day to POSIXct vector: test2 => data.frame
test2 <- transform(test1, day = as.POSIXct(day, format = "%d/%m/%Y %T"))
# Store the date range: dtrange => POSIXct vector
dtrange <- range(test2$day)
# Expand the range of the test2 data.frame: fill_1 => data.frame
fill_1 <- merge(test2, data.frame(day = seq.POSIXt(dtrange[1], dtrange[2], by = "5 min")),
all.y = TRUE)
# Store a vector of the numeric column indicies: num_vecs => numeric vector
num_vecs <- which(sapply(fill_1, is.numeric))
# Calculate the numeric vectors' and revalue the num_vecs: fill_1 => data.frame
fill_1[,num_vecs] <- colMeans(fill_1[,num_vecs], na.rm = TRUE)数据
# Data: test1 => data.frame
test1 <-
structure(
list(
day = c(
"01/01/2019 00:00:00",
"02/01/2019 00:00:00",
"03/01/2019 00:00:00",
"04/01/2019 00:00:00",
"05/01/2019 00:00:00",
"06/01/2019 00:00:00",
"07/01/2019 00:00:00",
"08/01/2019 00:00:00",
"09/01/2019 00:00:00",
"10/01/2019 00:00:00"
),
Rain = c(0, 0.2, 0, 0.4, 0, 0, 0, 0, 0, 0),
SWC_11 = c(51, 51.5, 51.3, NA, NA, NA, NA, NA, NA, NA),
SWC_12 = c(60, 60.3, 60.3, NA, NA, NA, NA, NA, NA, NA),
SWC_13 = c(63, 63.4, 63.3, NA, NA, NA, NA, NA, NA, NA),
SWC_14 = c(60, 60.8, 60.6, NA, NA, NA, NA, NA, NA, NA),
SWC_21 = c(64, 64.4, 64.1, NA, NA, NA, NA, NA, NA, NA)
),
row.names = c(NA,-10L),
class = "data.frame"
)https://stackoverflow.com/questions/63972855
复制相似问题