文章/答案/技术大牛

发布

社区首页 >问答首页 >R数据中1加法的均值函数

问R数据中1加法的均值函数
EN

Stack Overflow用户

提问于 2018-03-04 07:56:58

回答 1查看 63关注 0票数 0

我有下面提到的数据：

my_data <- read.table(text = 
                      "ID     Date1                     T1     Date2     Val1
                      A-1    '2018-01-10 15:05:24'       A    2018-01-15  10
                      A-2    '2018-01-05 14:15:22'       B    2018-01-14  12
                      A-3    '2018-01-04 13:20:21'       A    2018-01-13  15
                      A-4    '2018-01-01 18:35:45'       B    2018-01-12  22
                      A-5    '2017-12-28 19:45:10'       A    2018-01-11  18
                      A-6    '2017-12-10 08:03:29'       A    2018-01-10  21
                      A-7    '2017-12-06 20:55:55'       A    2018-01-09  28
                      A-8    '2018-01-10 10:02:12'       A    2018-01-15  10
                      A-9    '2018-01-05 17:15:14'       B    2018-01-14  12
                      A-10   '2018-01-04 18:35:58'       A    2018-01-13  15
                      A-11   '2018-01-01 21:09:25'       B    2018-01-12  22
                      A-12   '2017-12-28 02:12:22'       A    2018-01-11  18
                      A-13   '2017-12-10 03:45:44'       A    2018-01-10  21
                      A-14   '2017-12-06 07:15:25'       A    2018-01-09  28 
                      A-18   '2017-10-07 08:02:84        B    2017-11-05  20
                      A-21   '2017-10-01 06:04:04        A    2017-10-20  15
                      A-51   '2017-09-20 08:07:06        A    2017-09-28  10
                      A-35   '2017-09-14 08:02:45        A    2017-09-25  20
                      A-30   '2017-08-10 15:03:08        A    2017-08-30  25", 
header = TRUE, stringsAsFactors = FALSE)

我正在使用下面提到的代码来获得输出，如下所述：

date_range = expand.grid(Date1 = seq(min(ymd_hms(my_data$Date1)), max(ymd_hms(my_data$Date1)), 
                                     by = "1 month"),
                         T1 = c("A", "B"),
                         stringsAsFactors = FALSE)

table_2 <- merge( 
  my_data %>% 
    mutate(Date2 = ymd(Date2),
           Date1 = ymd_hms(Date1)) %>% 
    full_join(date_range, by = c("Date1", "T1")) %>% # join date ranges to table
    arrange(Date1) %>% # sort by date
    mutate(Month = paste(month(Date1, label = TRUE), year(Date1), sep = "-"),
           row_number = row_number(), # create row_numbers to keep up order
           Val1 = coalesce(Val1, 0L)) %>% # replace NA with 0 in Val1
    filter(T1 == "A") %>%
    group_by(Month) %>% 
    summarise("# of A" = sum(!is.na(Date2)),
              "sum of A" = sum(Val1, na.rm = TRUE),
              "Mean of A" = mean(Val1, na.rm = TRUE), # compute median
              "Avg Time of A" = round(mean(difftime(Date2, Date1),
                                           na.rm = TRUE), # compute avg time
                                      2),
              row_number = min(row_number)) %>% # get min row number
    arrange(row_number) %>% # sort by row number (to sort months)
    mutate("MOM Growth # of A" = round(apply(cbind(`# of A`, lag(- `# of A`)), 
                                             1, sum, na.rm = TRUE) / lag(`# of A`), 2),
           "MOM Growth sum of A" = round(apply(cbind(`sum of A`, lag(- `sum of A`)), 
                                               1, sum, na.rm = TRUE) / lag(`sum of A`) * 100, 2)) %>% 
    mutate("MOM Growth # of A" = if_else(is.infinite(`MOM Growth # of A`), 100, `MOM Growth # of A`), # replace Inf with 100
           "MOM Growth sum of A" = if_else(is.infinite(`MOM Growth sum of A`), 100, `MOM Growth sum of A`)) %>% 
    select(Month, `# of A`, `MOM Growth # of A`,
           `sum of A`, `MOM Growth sum of A`,
           `Mean of A`, `Avg Time of A`), 
  my_data %>% 
    mutate(Date2 = ymd(Date2),
           Date1 = ymd_hms(Date1)) %>% 
    full_join(date_range, by = c("Date1", "T1")) %>% 
    arrange(Date1) %>% 
    mutate(Month = paste(month(Date1, label = TRUE), year(Date1), sep = "-"),
           row_number = row_number(),
           Val1 = coalesce(Val1, 0L)) %>% 
    filter(T1 == "B") %>%
    group_by(Month) %>% 
    summarise("# of B" = sum(!is.na(Date2)),
              "sum of B" = sum(Val1, na.rm = TRUE),
              "Mean of B" = mean(Val1, na.rm = TRUE),
              "Avg Time of B" = round(mean(difftime(Date2, Date1),
                                           na.rm = TRUE),
                                      2),
              row_number = min(row_number)) %>%
    arrange(row_number) %>% 
    mutate("MOM Growth # of B" = round(apply(cbind(`# of B`, lag(- `# of B`)), 
                                             1, sum, na.rm = TRUE) / lag(`# of B`), 2),
           "MOM Growth sum of B" = round(apply(cbind(`sum of B`, lag(- `sum of B`)), 
                                               1, sum, na.rm = TRUE) / lag(`sum of B`) * 100, 2)) %>% 
    mutate("MOM Growth # of B" = if_else(is.infinite(`MOM Growth # of B`), 100, `MOM Growth # of B`),
           "MOM Growth sum of B" = if_else(is.infinite(`MOM Growth sum of B`), 100, `MOM Growth sum of B`)) %>% 
    select(Month, `# of B`, `MOM Growth # of B`,
           `sum of B`, `MOM Growth sum of B`,
           `Mean of B`, `Avg Time of B`), 
  by = "Month", 
  all = TRUE,
  sort = FALSE) # do not sort by ID column to keep month order

当计算Mean of A和Mean of B整整一个月时，它给了我错误的平均值。(例如，如果# of A是10，而sum of A是100，那么mean应该是10，但是它给出了9.09，因为我不知道，但是对于所有完整的月份，它在计算平均值时会自动在# of A中添加+1。

dataframe

ggplot2

dplyr

tidyr

回答 1

Stack Overflow用户

回答已采纳

发布于 2018-03-04 09:31:34

您定义的# number of A不是mean函数使用的

因为当val1中缺少的值被替换为0时，mean将除以所有填充的行数(最初是NAs)，因此，如果删除Val1 = coalesce(Val1, 0L)，我相信您将得到所需的结果。

例如，试试这个：

x <- c(3, 4, 6, 8, 9, 0) mean(x, na.rm = TRUE)

然后这个：

x <- c(3, 4, 6, 8, 9, NA) mean(x, na.rm = TRUE)

编辑：

如果坚持用零替换NAs，也可以这样做来计算平均值：

 my_data %>% 
  mutate(Date2 = ymd(Date2),
         Date1 = ymd_hms(Date1)) %>% 
  full_join(date_range, by = c("Date1", "T1")) %>% # join date ranges to table
  arrange(Date1) %>% # sort by date
  mutate(Month = paste(month(Date1, label = TRUE), year(Date1), sep = "-"),
         row_number = row_number(), # create row_numbers to keep up order
       Val1 = coalesce(Val1, 0L)) %>% # replace NA with 0 in Val1
  filter(T1 == "A") %>%
  group_by(Month) %>% 
  summarise("# of A" = sum(!is.na(Date2)),
            "sum of A" = sum(Val1, na.rm = TRUE),
            "Avg Time of A" = round(mean(difftime(Date2, Date1),
                                         na.rm = TRUE), # compute avg time
                                    2),
            row_number = min(row_number)) %>% 
  mutate("Mean of A" = get('sum of A') / get('# of A') )

注意:我还建议使用没有空格的列名(例如sum_of_A)，以便更容易地访问(我不得不使用get('sum of A')访问mutate中的列)

票数 2

页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持

原文链接：

https://stackoverflow.com/questions/49093248

复制

相似问题

问R数据中1加法的均值函数
EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问R数据中1加法的均值函数EN

回答 1

Stack Overflow用户

社区

活动

圈层

关于

腾讯云开发者

热门产品

热门推荐

更多推荐

问R数据中1加法的均值函数
EN