我有按ID值组织的数据。每个ID值最多有1-6行数据。我想要计算每个ID值的总体积和平均体积,然后将它们强制放到一行中。
以下是数据:
> dput(head(COMPLETED_DATASET_allsizes, n = 150))
structure(list(PVC = c("6,056,589", "6,056,589", "6,056,589",
"6,013,925", "6,013,925", "6,013,925", "6,034,050", "6,034,050",
"6,034,050", "6070852", "6070852", "6070852", "6070862", "6014535",
"6014326", "6013176", "6013176", "6013176", "AF6", "AF6", "AF6",
"6019049", "6019049", "6008710", "6008710", "6013198", "601321_",
"AF600", "AF600", "AF600", "6020617", "6020617", "6020617", "6008806",
"6008806", "6021296", "6021296", "6021296", "6014829", "6014829",
"6013907", "6013907", "6013907", "601404_", "601404_", "6013766",
"6013766", "6013766", "6034421", "6034421", "6034421", "20F",
"ZHD", "ZHD", "X11", "X11", "28R", "28R", "37V", "UVX", "37V",
"HY3", "HY3", "6,014,837", "ZWJ", "ZWJ", "ZWJ", "BX4", "BX4",
"BX4", "BHD", "BHD", "BHD", "ACX", "ACX", "ACX", "XXS", "XXS",
"XXS", "6,005,684", "6,005,684", "6,005,684", "BHX", "BHX", "BHX",
"SP3", "SP3", "SP3", "B1A", "B1A", "B1A", "B5F", "B5F", "B5F",
"BJN", "BJN", "BJN", "AW5", "AW5", "AW5", "HNU", "HNU", "HNU",
"BSP", "BSP", "BSP", "FW2", "FW2", "ANB", "ANB", "ANB", "SJM",
"SJM", "FSR", "FSR", "FSR", "UYF", "UYF", "2HZ", "2HZ", "F1X",
"F1X", "FFR", "FFR", "FFR", "HX1", "HX1", "SUL", "SUL", "SF0",
"SF0", "SF0", "SHJ", "SHJ", "SHJ", "HRJ", "HRJ", "AP8", "AP8",
"J4H", "J4H", "XLV", "XLV", "XLV", "H94", "H94", "S2Y", "S2Y",
"S2Y", "UU9"), Egg_order = c("1", "2", "3", "1", "2", "3", "1",
"2", "3", "9", "9", "9", "9", "9", "9", "1", "2", "3", "1", "2",
"3", "1", "2", "9", "9", "1", "1", "9", "9", "9", "1", "2", "3",
"9", "9", "9", "9", "9", "1", "2", "9", "9", "9", "1", "2", "9",
"9", "9", "9", "9", "9", "1", "9", "9", "9", "9", "9", "9", "9",
"9", "9", "9", "9", "9", "9", "9", "9", "1", "2", "3", "3", "2",
"1", "9", "9", "3", "9", "3", "9", "1", "2", "3", "9", "9", "9",
"9", "9", "3", "3", "9", "9", "9", "9", "9", "9", "9", "3", "1",
"2", "3", "3", "9", "9", "9", "9", "9", "2", "1", "9", "9", "9",
"1", "2", "9", "9", "3", "9", "9", "1", "2", "2", "1", "9", "9",
"9", "9", "9", "1", "2", "9", "9", "3", "2", "3", "1", "9", "9",
"1", "2", "2", "1", "1", "2", "3", "9", "9", "9", "9", "3", "3"
), Volume = c(62.00468911, 63.19034545, 64.16343369, 62.77290704,
62.64955122, 56.63460056, 49.49860059, 57.88686491, 55.4345925,
57.89358963, 58.12847589, 51.49938302, 58.49927302, 57.34171155,
54.93179064, 64.56208923, 62.27948266, 56.38218371, 60.06696521,
62.21206032, 62.90496759, 59.13580929, 57.13564917, 59.70944308,
55.19527717, 64.84888743, 67.34476853, 65.0132184, 59.66593229,
62.01691353, 55.27413286, 54.68206904, 54.3717803, 57.36582528,
60.15748095, 55.06099115, 59.07830469, 56.82857152, 62.76590186,
55.36801363, 71.70516332, 65.4342034, 59.83429871, 59.18484744,
57.34744503, 56.11336458, 65.73279116, 57.95890035, 55.09798577,
58.94565679, 59.97265877, 58.93118052, 67.62983812, 62.45908065,
61.78370028, 60.67968894, 61.49240694, 60.85435534, 57.82379732,
52.64023837, 53.7619782, 65.19952241, 62.84622159, 66.40863935,
57.62326609, 53.89348305, 47.2993185, 61.48633419, 66.30863861,
58.42476707, 53.26130145, 63.20292711, 62.49620272, 61.32741787,
62.72989825, 51.11998856, 62.26934629, 58.56945328, 60.3546269,
55.01982539, 62.3489285, 53.77593518, 56.49758306, 63.53920939,
64.94851437, 62.25274976, 64.39375777, 54.22321067, 51.28730416,
58.31060124, 55.25169993, 58.25732223, 57.18221296, 56.44838126,
53.77395184, 57.43319075, 52.95671938, 61.9705976, 57.10131146,
54.57651784, 59.66268577, 62.44376038, 59.27393481, 55.46520431,
54.98693888, 64.79370574, 55.48500646, 54.60260849, 64.98274004,
63.29243525, 61.18560498, 26.13699345, 23.31596147, 64.26879934,
65.30612915, 57.39204739, 57.32271224, 53.93674759, 60.36058271,
55.62742343, 57.91773772, 61.03729104, 54.03165992, 60.84743965,
59.86017015, 59.90175385, 56.49922666, 63.50802185, 65.16800489,
64.94391206, 67.2387067, 59.05588275, 57.91108456, 54.43705009,
59.39818285, 63.9228549, 56.78342653, 54.58236138, 60.47677836,
60.82957858, 60.51827256, 58.34245187, 57.28814624, 54.81098523,
54.26852328, 52.43400015, 57.57770424, 60.09658763, 56.22640525,
59.19068945), Clutch = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L,
1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 3L,
3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 3L), Year = c(1996L,
1996L, 1996L, 1996L, 1996L, 1996L, 1996L, 1996L, 1996L, 1997L,
1997L, 1997L, 1998L, 1998L, 1998L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L, 1999L,
1999L, 1999L, 1999L, 1999L, 1999L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L), egg_status = c("complete",
"complete", "complete", "complete", "complete", "complete", "complete",
"complete", "complete", "none", "none", "none", "none", "none",
"none", "complete", "complete", "complete", "complete", "complete",
"complete", "complete", "complete", "none", "none", "complete",
"complete", "none", "none", "none", "complete", "complete", "complete",
"none", "none", "none", "none", "none", "complete", "complete",
"none", "none", "none", "complete", "complete", "none", "none",
"none", "none", "none", "none", "complete", "none", "none", "none",
"none", "none", "none", "none", "none", "none", "none", "none",
"none", "none", "none", "none", "complete", "complete", "complete",
"complete", "complete", "complete", "estimated", "estimated",
"estimated", "estimated", "estimated", "estimated", "complete",
"complete", "complete", "none", "none", "none", "estimated",
"estimated", "estimated", "estimated", "estimated", "estimated",
"none", "none", "none", "estimated", "estimated", "estimated",
"complete", "complete", "complete", "estimated", "estimated",
"estimated", "none", "none", "none", "complete", "complete",
"none", "none", "none", "complete", "complete", "estimated",
"estimated", "estimated", "none", "none", "complete", "complete",
"complete", "complete", "none", "none", "none", "none", "none",
"complete", "complete", "estimated", "estimated", "estimated",
"complete", "complete", "complete", "none", "none", "complete",
"complete", "complete", "complete", "complete", "complete", "complete",
"none", "none", "estimated", "estimated", "estimated", "complete"
), estimated_Egg_order = c("1", "2", "3", "1", "2", "3", "1",
"2", "3", "9", "9", "9", "9", "9", "9", "1", "2", "3", "1", "2",
"3", "1", "2", "9", "9", "1", "1", "9", "9", "9", "1", "2", "3",
"9", "9", "9", "9", "9", "1", "2", "9", "9", "9", "1", "2", "9",
"9", "9", "9", "9", "9", "1", "9", "9", "9", "9", "9", "9", "9",
"9", "9", "9", "9", "9", "9", "9", "9", "1", "2", "3", "3", "2",
"1", "1", "2", "3", "1", "3", "2", "1", "2", "3", "9", "9", "9",
"1", "2", "3", "3", "1", "2", "9", "9", "9", "1", "2", "3", "1",
"2", "3", "3", "1", "2", "9", "9", "9", "2", "1", "9", "9", "9",
"1", "2", "1", "2", "3", "9", "9", "1", "2", "2", "1", "9", "9",
"9", "9", "9", "1", "2", "1", "2", "3", "2", "3", "1", "9", "9",
"1", "2", "2", "1", "1", "2", "3", "9", "9", "1", "2", "3", "3"
), Edat = c(7L, 7L, 7L, 9L, 9L, 9L, 4L, 4L, 4L, 6L, 6L, 6L, 7L,
2L, 1L, 13L, 13L, 13L, 11L, 11L, 11L, 12L, 12L, 14L, 14L, 13L,
13L, 11L, 11L, 11L, 12L, 12L, 12L, 14L, 14L, 9L, 9L, 9L, 13L,
13L, 12L, 12L, 12L, 13L, 13L, 12L, 12L, 12L, 9L, 9L, 9L, 4L,
5L, 5L, 6L, 6L, 4L, 4L, 3L, 7L, 3L, 9L, 9L, 15L, 5L, 5L, 5L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 5L, 5L, 5L, 16L,
16L, 16L, 13L, 13L, 13L, 8L, 8L, 8L, 13L, 13L, 13L, 9L, 9L, 9L,
13L, 13L, 13L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 13L,
13L, 13L, 8L, 8L, 10L, 10L, 10L, 7L, 7L, 4L, 4L, 13L, 13L, 9L,
9L, 9L, 12L, 12L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 11L,
11L, 11L, 11L, 6L, 6L, 6L, 10L, 10L, 8L, 8L, 8L, 7L), ID = c(378L,
378L, 378L, 362L, 362L, 362L, 370L, 370L, 370L, 383L, 383L, 383L,
434L, 426L, 416L, 511L, 511L, 511L, 499L, 499L, 499L, 459L, 459L,
458L, 458L, 521L, 532L, 501L, 501L, 501L, 470L, 470L, 470L, 500L,
500L, 481L, 481L, 481L, 576L, 576L, 554L, 554L, 554L, 565L, 565L,
543L, 543L, 543L, 492L, 492L, 492L, 695L, 722L, 722L, 706L, 706L,
800L, 800L, 734L, 700L, 734L, 802L, 802L, 737L, 727L, 727L, 727L,
771L, 771L, 771L, 763L, 763L, 763L, 742L, 742L, 742L, 715L, 715L,
715L, 735L, 735L, 735L, 764L, 764L, 764L, 815L, 815L, 815L, 754L,
754L, 754L, 759L, 759L, 759L, 765L, 765L, 765L, 752L, 752L, 752L,
791L, 791L, 791L, 769L, 769L, 769L, 785L, 785L, 748L, 748L, 748L,
814L, 814L, 781L, 781L, 781L, 702L, 702L, 732L, 732L, 774L, 774L,
777L, 777L, 777L, 799L, 799L, 817L, 817L, 810L, 810L, 810L, 813L,
813L, 813L, 795L, 795L, 749L, 749L, 803L, 803L, 712L, 712L, 712L,
787L, 787L, 807L, 807L, 807L, 697L)), row.names = c(NA, 150L), class = "data.frame")这是一张我希望它会是什么样子的图片。这张照片没有ID值,但是如果每行都是PVC值,则为图像。

谢谢你的帮助!
发布于 2022-08-02 13:15:57
一种dplyr解
library(tidyverse)
df %>%
group_by(PVC, Year, ID) %>%
summarise(totalV = sum(Volume),
averageV = mean(Volume, na.rm = TRUE),
Clutch = last(Clutch),
Edat = last(Edat), .groups = "drop")
# A tibble: 63 x 7
PVC Year ID totalV averageV Clutch Edat
<chr> <int> <int> <dbl> <dbl> <int> <int>
1 20F 2001 695 58.9 58.9 1 4
2 28R 2001 800 122. 61.2 2 4
3 2HZ 2001 732 116. 58.0 2 4
4 37V 2001 734 112. 55.8 2 3
5 6,005,684 2001 735 171. 57.0 3 16
6 6,013,925 1996 362 182. 60.7 3 9
7 6,014,837 2001 737 66.4 66.4 1 15
8 6,034,050 1996 370 163. 54.3 3 4
9 6,056,589 1996 378 189. 63.1 3 7
10 6008710 1999 458 115. 57.5 2 14
# ... with 53 more rows发布于 2022-08-02 13:12:13
我们可以用data.table在一行中这样做,按PVC分组,得到体积之和或平均值。我假设一年,离合器和埃达特,所有相同的PVC行有相同的值,所以我们可以使用最大值或分钟。
library(data.table)
dt <- data.table(COMPLETED_DATASET_allsizes)
dt1 <- dt[,.(totalV = sum(Volume), averageV = mean(Volume), Year = max(Year), Clutch = max(Clutch), Edat = max(Edat)), by = PVC]
> head(dt1)
PVC totalV averageV Year Clutch Edat
1: 6,056,589 189.35847 63.11949 1996 3 7
2: 6,013,925 182.05706 60.68569 1996 3 9
3: 6,034,050 162.82006 54.27335 1996 3 4
4: 6070852 167.52145 55.84048 1997 3 6
5: 6070862 58.49927 58.49927 1998 1 7
6: 6014535 57.34171 57.34171 1998 1 2发布于 2022-08-02 13:14:39
Dplyr溶液我按ID、PVC、Year、Clutch和Edat分组,因为它们都在您的新data.frame中。我假设ID是这个组合的标识符?
#group by and summarise
dt %>% group_by(ID, PVC, Year, Clutch, Edat) %>%
summarise(averageV = mean((Volume)),
totalV = sum(Volume)) %>% ungroup() |> head()
ID PVC Year Clutch Edat averageV totalV
<int> <chr> <int> <int> <int> <dbl> <dbl>
1 362 6,013,925 1996 3 9 60.7 182.
2 370 6,034,050 1996 3 4 54.3 163.
3 378 6,056,589 1996 3 7 63.1 189.
4 383 6070852 1997 3 6 55.8 168.
5 416 6014326 1998 1 1 54.9 54.9
6 426 6014535 1998 1 2 57.3 57.3https://stackoverflow.com/questions/73208049
复制相似问题