我希望减少代码行数来对不同的变量进行分组,并在不从基础中删除变量的情况下进行添加。
这就是我所做的(它可以工作,但是代码有点长)
test1
names(test1)
#[1] "id" "DPA_DESPAR"
#[3] "DPA_VALOR" "DPA_ANIO"
#[5] "DPA_CANTON" "DPA_DESCAN"
#[7] "DPA_PROVIN" "DPA_DESPRO"
#...
#[19] "exp_omega" "total_empl"
#...
#[23] "geometry"
ss1= test1 %>%
group_by(DPA_CANTON) %>%
summarise(tot1= sum(total_empl))
ss2 = test1 %>%
group_by(id) %>%
summarise(tot2 = sum(total_empl))
test1 = left_join(test1, ss1, by = 'DPA_CANTON') %>%
left_join(., ss2, by = 'id')
test1[1:10,21:22]
#tot1 tot2
#1 37781 23049
#2 37781 1456
#3 37781 1198
#4 37781 2253
#5 37781 800
#6 37781 1705
#7 37781 615
#8 37781 4663
#9 37781 2042
#10 6593 5022这不起作用
我使用了this
aggregate(test1$total_empl, by=list(Category=test1[c(1,5)]), FUN=sum)
#Error in aggregate.data.frame(as.data.frame(x), ...) :
# arguments must have same length数据
test1 = structure(list(id= c("020150", "020151",
"020153", "020155", "020156", "020157", "020158", "020159", "020160",
"020250", "020251", "020350", "020351", "020353", "020354", "020450",
"020550", "020551", "020552", "020553"), DPA_CANTON = c("0201",
"0201", "0201", "0201", "0201", "0201", "0201", "0201", "0201",
"0202", "0202", "0203", "0203", "0203", "0203", "0204", "0205",
"0205", "0205", "0205"), total_empl= c(23049, 1456,
1198, 2253, 800, 1705, 615, 4663, 2042, 5022, 1571, 2481, 1077,
973, 441, 4689, 4884, 875, 1046, 341)), row.names = c(NA, 20L
), class = "data.frame")发布于 2020-09-27 03:42:11
按照传统,您可以使用mutate而不是summarize。mutate允许您在计算新列的同时保留所有原始数据。
test1 <- test1 %>%
group_by(DPA_CANTON) %>%
mutate(tot1 = sum(total_empl)) %>%
group_by(id) %>%
mutate(tot2 = sum(total_empl))发布于 2020-09-27 03:37:23
你可以坚持使用base
transform(
test1,
tot1 = ave(total_empl, DPA_CANTON, FUN = sum),
tot2 = ave(total_empl, id, FUN = sum)
)输出:
id DPA_CANTON total_empl tot1 tot2
1 020150 0201 23049 37781 23049
2 020151 0201 1456 37781 1456
3 020153 0201 1198 37781 1198
4 020155 0201 2253 37781 2253
5 020156 0201 800 37781 800
6 020157 0201 1705 37781 1705
7 020158 0201 615 37781 615
8 020159 0201 4663 37781 4663
9 020160 0201 2042 37781 2042
10 020250 0202 5022 6593 5022
11 020251 0202 1571 6593 1571
12 020350 0203 2481 4972 2481
13 020351 0203 1077 4972 1077
14 020353 0203 973 4972 973
15 020354 0203 441 4972 441
16 020450 0204 4689 4689 4689
17 020550 0205 4884 7146 4884
18 020551 0205 875 7146 875
19 020552 0205 1046 7146 1046
20 020553 0205 341 7146 341发布于 2020-09-27 04:36:03
如果您打开使用data.table,则会产生相同的结果:
library(data.table)
setDT(test1)
test1[, tot1 := sum(total_empl), by = DPA_CANTON][, tot2 := sum(total_empl), by = id]
test1[1:10, c("tot1", "tot2")]https://stackoverflow.com/questions/64081649
复制相似问题