首页
学习
活动
专区
圈层
工具
发布
社区首页 >问答首页 >将多个DPLYR命令转换为一个DPLYR命令

将多个DPLYR命令转换为一个DPLYR命令
EN

Stack Overflow用户
提问于 2022-03-05 04:09:43
回答 2查看 177关注 0票数 0

我正在使用R编程语言。

我有以下数据集("my_data"):

代码语言:javascript
复制
structure(list(idd = 1:50, group_1 = c("B", "B", "A", "B", "B", 
"A", "A", "A", "B", "A", "A", "B", "B", "B", "A", "A", "A", "A", 
"B", "B", "A", "B", "A", "B", "A", "B", "B", "A", "B", "B", "B", 
"A", "B", "A", "B", "B", "A", "A", "A", "A", "A", "B", "B", "B", 
"A", "B", "B", "B", "B", "B"), v1 = c(15.7296737049317, -4.33377704672207, 
-0.551850185265, 2.66888122578048, 12.109072642513, 0.0107927293899017, 
20.7785032320562, -1.98974382507874, 12.1663703518471, 11.4308702978893, 
-0.657500910529805, 5.71376589298221, 3.43820523228653, 19.5939432685761, 
25.5605263610222, -0.407964337882465, 19.3057240854025, 9.24554068987809, 
-9.6719534905096, 2.44096357354807, 14.6114916050676, 11.4510663104787, 
-14.4231132108142, 15.8031868545157, 16.5505199848675, 6.95491162740581, 
2.92431767382703, 29.7157201447823, 9.10001319352251, 9.85982748068076, 
-1.23456937110154, -3.44130123376206, -5.23155771062088, 5.78031789617826, 
23.6092446408098, 27.5379484533487, 25.6836473435279, 22.9675556994775, 
7.62403748556388, -2.24150135680706, 6.72187319859928, -14.1245027627225, 
6.8620712655661, 26.5987870464572, 11.3095310060752, 20.9588868268958, 
14.8934095694391, 2.21089704551347, 27.4355935292935, 9.21612714668934
), group_2 = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 
8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L)), row.names = c(NA, -50L), class = "data.frame")

head(my_data)

   idd group_1          v1 group_2
1    1       B 15.72967370       1
2    2       B -4.33377705       2
3    3       A -0.55185019       3
4    4       B  2.66888123       4
5    5       B 12.10907264       5
6    6       A  0.01079273       6
7    7       A 20.77850323       7
8    8       A -1.98974383       8
9    9       B 12.16637035       9
10  10       A 11.43087030      10
11  11       A -0.65750091       1
12  12       B  5.71376589       2

对于此数据集,我希望在“dplyr”中执行以下步骤:

  • 为每组10行,为group_1 = "A“和group_2 = "B”
  • ,为每个分组创建一个新变量("v2"),即:"A“如果sum(group_1 = A) > sum(group_1 = B),”B“如果sum(group_1 = A) < sum(group_1 = B)或"0”如果sum(group_1 = A) = sum(group_1 = B)

我知道如何在R中手动完成:

代码语言:javascript
复制
#STEP 1: since my_data has 50 rows, break my_data into 5 groups of 10 rows

rows_1 = my_data[1:10,]
rows_2 = my_data[11:20,]
rows_3 = my_data[21:30,]
rows_4 = my_data[31:40,]
rows_5 = my_data[41:50,]

# STEP 2: find out values of "v2"

library(dplyr)

dplyr_row_1 = data.frame(rows_1 %>% group_by(group_1) %>% summarize(sum = sum(v1)))

dplyr_row_1$v2 = ifelse(dplyr_row_1[1,2] > dplyr_row_1[2,2], "A", ifelse(dplyr_row_1[1,2] < dplyr_row_1[2,2], "B", 0))

dplyr_row_2 = data.frame(rows_2 %>% group_by(group_1) %>% summarize(sum = sum(v1)))

dplyr_row_2$v2 = ifelse(dplyr_row_2[1,2] > dplyr_row_2[2,2], "A", ifelse(dplyr_row_2[1,2] < dplyr_row_2[2,2], "B", 0))

dplyr_row_3 = data.frame(rows_3 %>% group_by(group_1) %>% summarize(sum = sum(v1)))

dplyr_row_3$v2 = ifelse(dplyr_row_3[1,2] > dplyr_row_3[2,2], "A", ifelse(dplyr_row_3[1,2] < dplyr_row_3[2,2], "B", 0))

dplyr_row_4 = data.frame(rows_4 %>% group_by(group_1) %>% summarize(sum = sum(v1)))

dplyr_row_4$v2 = ifelse(dplyr_row_4[1,2] > dplyr_row_4[2,2], "A", ifelse(dplyr_row_4[1,2] < dplyr_row_4[2,2], "B", 0))

dplyr_row_5 = data.frame(rows_5 %>% group_by(group_1) %>% summarize(sum = sum(v1)))

dplyr_row_5$v2 = ifelse(dplyr_row_5[1,2] > dplyr_row_5[2,2], "A", ifelse(dplyr_row_5[1,2] < dplyr_row_5[2,2], "B", 0))

# STEP 3: append "v2" to first 5 files:

rows_1$v2 = dplyr_row_1$v2
rows_2$v2 = dplyr_row_2$v2
rows_3$v2 = dplyr_row_3$v2
rows_4$v2 = dplyr_row_4$v2
rows_5$v2 = dplyr_row_5$v2

# STEP 4: create final file:

final_file = rbind(rows_1,rows_2, rows_3, rows_4, rows_5)

因此,最终文件如下所示:

代码语言:javascript
复制
  idd group_1          v1 group_2 v2
1    1       B 15.72967370       1  B
2    2       B -4.33377705       2  B
3    3       A -0.55185019       3  B
4    4       B  2.66888123       4  B
5    5       B 12.10907264       5  B
6    6       A  0.01079273       6  B
7    7       A 20.77850323       7  B
8    8       A -1.98974383       8  B
9    9       B 12.16637035       9  B
10  10       A 11.43087030      10  B
11  11       A -0.65750091       1  A

我的问题:能不能告诉我如何在一个"dplyr“命令中执行步骤1到步骤4?

谢谢!

EN

回答 2

Stack Overflow用户

回答已采纳

发布于 2022-03-05 07:23:52

这是另一种方法。

代码语言:javascript
复制
library(tidyverse)

df %>% 
  mutate(group_index = rep(1:(n() /10), each = 10)) %>% 
  group_by(group_index) %>%
  mutate(
   v2 = case_when(
    sum(v1[group_1 == 'A']) > sum(v1[group_1 == 'B']) ~ 'A', 
    sum(v1[group_1 == 'A']) < sum(v1[group_1 == 'B']) ~ 'B', 
    TRUE ~'0')
) 
票数 1
EN

Stack Overflow用户

发布于 2022-03-05 06:53:28

首先,我将创建一个B.

  • If,每10行分组一次。

  • 然后对相关列进行group_by并计算和。

  • 移除group_1的分组层,因为我们需要比较A中的值,并将其唯一长度等于"1",这意味着它们是相同的,然后在列v2中输入"0“。如果它们不相同,则输出存储在group_1.

  • Finally中的最大类别,删除sum列并按idd.

排序

该方法能够解决group_1中两组以上的问题。

例如,前20行如下所示。

代码语言:javascript
复制
library(tidyverse)

df %>% 
  mutate(group_index = rep(1:(nrow(df)/10), each = 10)) %>% 
  group_by(group_index, group_1) %>% 
  mutate(sum = sum(v1)) %>% 
  group_by(group_index) %>%
  mutate(v2 = ifelse(length(unique(sum)) == 1, 0, group_1[which.max(sum)])) %>%
  ungroup() %>% 
  select(-c(sum, group_index))

# A tibble: 20 x 5
     idd group_1      v1 group_2 v2   
   <int> <chr>     <dbl>   <int> <chr>
 1     1 B       15.7          1 B    
 2     2 B       -4.33         2 B    
 3     3 A       -0.552        3 B    
 4     4 B        2.67         4 B    
 5     5 B       12.1          5 B    
 6     6 A        0.0108       6 B    
 7     7 A       20.8          7 B    
 8     8 A       -1.99         8 B    
 9     9 B       12.2          9 B    
10    10 A       11.4         10 B    
11    11 A       -0.658        1 A    
12    12 B        5.71         2 A    
13    13 B        3.44         3 A    
14    14 B       19.6          4 A    
15    15 A       25.6          5 A    
16    16 A       -0.408        6 A    
17    17 A       19.3          7 A    
18    18 A        9.25         8 A    
19    19 B       -9.67         9 A    
20    20 B        2.44        10 A    
票数 1
EN
页面原文内容由Stack Overflow提供。腾讯云小微IT领域专用引擎提供翻译支持
原文链接:

https://stackoverflow.com/questions/71359421

复制
相关文章

相似问题

领券
问题归档专栏文章快讯文章归档关键词归档开发者手册归档开发者手册 Section 归档