我的数据集示例:
tree=structure(list(vyd = c(108L, 108L, 108L, 108L, 108L, 108L, 108L,
108L, 108L, 108L, 108L, 108L, 108L), date = c("08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018"), row = c(3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L), col = c(25L, 26L, 27L, 28L, 25L, 26L, 27L, 28L,
29L, 30L, 25L, 26L, 27L), B1 = c(10987, 10987, 10987, 10987,
11077, 11077, 11077, 11077, 10802, 10802, 11077, 11077, 11077
), B2 = c(10368, 10336, 10400, 10472, 10272, 10312, 10368, 10408,
10296, 10208, 10192, 10216, 10344), B3 = c(9584, 9496, 9520,
9456, 9520, 9520, 9496, 9384, 9528, 9304, 9624, 9568, 9464),
B4 = c(10136, 9920, 9904, 9936, 10000, 9792, 9824, 9896,
9712, 9592, 9904, 9904, 9856), B5 = c(10463, 10463, 10472,
10472, 10471, 10471, 10359, 10359, 10162, 9978, 10471, 10471,
10359), B6 = c(10173, 10173, 9980, 9980, 10114, 10114, 10036,
10036, 9866, 9553, 10114, 10114, 10036), B7 = c(9886, 9886,
9733, 9733, 9851, 9851, 9703, 9703, 9504, 9266, 9851, 9851,
9703), B8 = c(10456, 10416, 10528, 10416, 10432, 10576, 10592,
10384, 10432, 10184, 10528, 10664, 10592), B8A = c(9814,
9814, 9592, 9592, 9796, 9796, 9598, 9598, 9283, 9017, 9796,
9796, 9598), B9 = c(13463, 13463, 13463, 13463, 13689, 13689,
13689, 13689, 13254, 13254, 13689, 13689, 13689), B10 = c(7416,
7416, 7323, 7323, 7373, 7373, 7271, 7271, 7072, 6961, 7373,
7373, 7271), B11 = c(6244, 6244, 6057, 6057, 6148, 6148,
6003, 6003, 5790, 5742, 6148, 6148, 6003), B12 = c(1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Y = c("5E3C2B+OC", "5E3C2B+OC",
"5E3C2B+OC", "5E3C2B+OC", "5E3C2B+OC", "5E3C2B+OC", "5E3C2B+OC",
"5E3C2B+OC", "5E3C2B+OC", "5E3C2B+OC", "5E3C2B+OC", "5E3C2B+OC",
"5E3C2B+OC")), class = "data.frame", row.names = c(NA, -13L
))这里,Y变量具有复合值,例如5E3C2B+OC。
如何对数据进行重构,使每个复合值具有相同的独立数据集,并且复合值本身成为百分比?
例如,这里的5E,3C,2B (加号之后的所有内容,我们从来没有碰过) 5E=50%E ,3C=30%C,2B=20%。
因此,这个数据集应该重复三次,其中两个新列一起添加--字母组件及其百分比组件。嗯,例如,它看起来是这样的(为了清晰起见,略为缩短)。
vyd date row col B1 B2 B3 B4 B5 B6 B7 B8 B8A B9 B10 B11 B12 Y Letter perc
108 08.01.2018 3 25 10987.0 10368.0 9584.0 10136.0 10463.0 10173.0 9886.0 10456.0 9814.0 13463.0 7416.0 6244.0 1.0 5Е3С2B+ОС E 50
108 08.01.2018 3 26 10987.0 10336.0 9496.0 9920.0 10463.0 10173.0 9886.0 10416.0 9814.0 13463.0 7416.0 6244.0 1.0 5Е3С2B+ОС E 50
………………………………………………………………………………………………………………………………………………………………………………………………………………………………. ….. NNN
108 08.01.2018 3 25 10987.0 10368.0 9584.0 10136.0 10463.0 10173.0 9886.0 10456.0 9814.0 13463.0 7416.0 6244.0 1.0 5Е3С2B+ОС C 30
108 08.01.2018 3 26 10987.0 10336.0 9496.0 9920.0 10463.0 10173.0 9886.0 10416.0 9814.0 13463.0 7416.0 6244.0 1.0 5Е3С2B+ОС C 30
………………………………………………………………………………………………………………………………………………………………………………………………………………………………. ….. NNN
108 08.01.2018 3 25 10987.0 10368.0 9584.0 10136.0 10463.0 10173.0 9886.0 10456.0 9814.0 13463.0 7416.0 6244.0 1.0 5Е3С2B+ОС B 20
108 08.01.2018 3 26 10987.0 10336.0 9496.0 9920.0 10463.0 10173.0 9886.0 10416.0 9814.0 13463.0 7416.0 6244.0 1.0 5Е3С2B+ОС B 20或通过dput():获得期望的结果
Desired_result=structure(list(vyd = c(108L, 108L, 108L, 108L, 108L, 108L, 108L,
108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L,
108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L,
108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L, 108L),
date = c("08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018",
"08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018", "08.01.2018"
), row = c(3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L), col = c(25L,
26L, 27L, 28L, 25L, 26L, 27L, 28L, 29L, 30L, 25L, 26L, 27L,
25L, 26L, 27L, 28L, 25L, 26L, 27L, 28L, 29L, 30L, 25L, 26L,
27L, 25L, 26L, 27L, 28L, 25L, 26L, 27L, 28L, 29L, 30L, 25L,
26L, 27L), B1 = c(10987, 10987, 10987, 10987, 11077, 11077,
11077, 11077, 10802, 10802, 11077, 11077, 11077, 10987, 10987,
10987, 10987, 11077, 11077, 11077, 11077, 10802, 10802, 11077,
11077, 11077, 10987, 10987, 10987, 10987, 11077, 11077, 11077,
11077, 10802, 10802, 11077, 11077, 11077), B2 = c(10368,
10336, 10400, 10472, 10272, 10312, 10368, 10408, 10296, 10208,
10192, 10216, 10344, 10368, 10336, 10400, 10472, 10272, 10312,
10368, 10408, 10296, 10208, 10192, 10216, 10344, 10368, 10336,
10400, 10472, 10272, 10312, 10368, 10408, 10296, 10208, 10192,
10216, 10344), B3 = c(9584, 9496, 9520, 9456, 9520, 9520,
9496, 9384, 9528, 9304, 9624, 9568, 9464, 9584, 9496, 9520,
9456, 9520, 9520, 9496, 9384, 9528, 9304, 9624, 9568, 9464,
9584, 9496, 9520, 9456, 9520, 9520, 9496, 9384, 9528, 9304,
9624, 9568, 9464), B4 = c(10136, 9920, 9904, 9936, 10000,
9792, 9824, 9896, 9712, 9592, 9904, 9904, 9856, 10136, 9920,
9904, 9936, 10000, 9792, 9824, 9896, 9712, 9592, 9904, 9904,
9856, 10136, 9920, 9904, 9936, 10000, 9792, 9824, 9896, 9712,
9592, 9904, 9904, 9856), B5 = c(10463, 10463, 10472, 10472,
10471, 10471, 10359, 10359, 10162, 9978, 10471, 10471, 10359,
10463, 10463, 10472, 10472, 10471, 10471, 10359, 10359, 10162,
9978, 10471, 10471, 10359, 10463, 10463, 10472, 10472, 10471,
10471, 10359, 10359, 10162, 9978, 10471, 10471, 10359), B6 = c(10173,
10173, 9980, 9980, 10114, 10114, 10036, 10036, 9866, 9553,
10114, 10114, 10036, 10173, 10173, 9980, 9980, 10114, 10114,
10036, 10036, 9866, 9553, 10114, 10114, 10036, 10173, 10173,
9980, 9980, 10114, 10114, 10036, 10036, 9866, 9553, 10114,
10114, 10036), B7 = c(9886, 9886, 9733, 9733, 9851, 9851,
9703, 9703, 9504, 9266, 9851, 9851, 9703, 9886, 9886, 9733,
9733, 9851, 9851, 9703, 9703, 9504, 9266, 9851, 9851, 9703,
9886, 9886, 9733, 9733, 9851, 9851, 9703, 9703, 9504, 9266,
9851, 9851, 9703), B8 = c(10456, 10416, 10528, 10416, 10432,
10576, 10592, 10384, 10432, 10184, 10528, 10664, 10592, 10456,
10416, 10528, 10416, 10432, 10576, 10592, 10384, 10432, 10184,
10528, 10664, 10592, 10456, 10416, 10528, 10416, 10432, 10576,
10592, 10384, 10432, 10184, 10528, 10664, 10592), B8A = c(9814,
9814, 9592, 9592, 9796, 9796, 9598, 9598, 9283, 9017, 9796,
9796, 9598, 9814, 9814, 9592, 9592, 9796, 9796, 9598, 9598,
9283, 9017, 9796, 9796, 9598, 9814, 9814, 9592, 9592, 9796,
9796, 9598, 9598, 9283, 9017, 9796, 9796, 9598), B9 = c(13463,
13463, 13463, 13463, 13689, 13689, 13689, 13689, 13254, 13254,
13689, 13689, 13689, 13463, 13463, 13463, 13463, 13689, 13689,
13689, 13689, 13254, 13254, 13689, 13689, 13689, 13463, 13463,
13463, 13463, 13689, 13689, 13689, 13689, 13254, 13254, 13689,
13689, 13689), B10 = c(7416, 7416, 7323, 7323, 7373, 7373,
7271, 7271, 7072, 6961, 7373, 7373, 7271, 7416, 7416, 7323,
7323, 7373, 7373, 7271, 7271, 7072, 6961, 7373, 7373, 7271,
7416, 7416, 7323, 7323, 7373, 7373, 7271, 7271, 7072, 6961,
7373, 7373, 7271), B11 = c(6244, 6244, 6057, 6057, 6148,
6148, 6003, 6003, 5790, 5742, 6148, 6148, 6003, 6244, 6244,
6057, 6057, 6148, 6148, 6003, 6003, 5790, 5742, 6148, 6148,
6003, 6244, 6244, 6057, 6057, 6148, 6148, 6003, 6003, 5790,
5742, 6148, 6148, 6003), B12 = c(1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Y = c("5E3C2B", "5E3C2B",
"5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B",
"5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B",
"5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B",
"5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B",
"5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B",
"5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B", "5E3C2B",
"5E3C2B"), Letter = c("E", "E", "E", "E", "E", "E", "E",
"E", "E", "E", "E", "E", "E", "C", "C", "C", "C", "C", "C",
"C", "C", "C", "C", "C", "C", "C", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B", "B"), perc = c(50L, 50L,
50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L)), class = "data.frame", row.names = c(NA, -39L))如果有其他包含其他复合值的行,则对它们执行相同的操作。例如,如果行出现在4o6b中的Y中,那么两个字母O= 40%和B=60%的列将按照与我前面描述的相同的原则显示。(即用不同的字母复制数据集2次)
如何对数据进行这样的改革?
发布于 2022-02-03 14:21:03
你可以试试tidyverse。向右边添加三个新列
library(tidyverse)
tree %>%
separate(Y, into = c("E", "C", "B"), sep=c(1,3,5), remove = F) %>%
mutate(across(c(C, B), ~str_remove_all(., "[A-Z]"))) %>%
mutate(across(c(E, C, B), ~as.numeric(.)*10))
vyd date row col B1 B2 B3 B4 B5 B6 B7 B8 B8A B9 B10 B11 B12 Y E C B
1 108 08.01.2018 3 25 10987 10368 9584 10136 10463 10173 9886 10456 9814 13463 7416 6244 1 5E3C2B+OC 50 30 20
2 108 08.01.2018 3 26 10987 10336 9496 9920 10463 10173 9886 10416 9814 13463 7416 6244 1 5E3C2B+OC 50 30 20
3 108 08.01.2018 3 27 10987 10400 9520 9904 10472 9980 9733 10528 9592 13463 7323 6057 1 5E3C2B+OC 50 30 20
4 108 08.01.2018 3 28 10987 10472 9456 9936 10472 9980 9733 10416 9592 13463 7323 6057 1 5E3C2B+OC 50 30 20
5 108 08.01.2018 4 25 11077 10272 9520 10000 10471 10114 9851 10432 9796 13689 7373 6148 1 5E3C2B+OC 50 30 20
6 108 08.01.2018 4 26 11077 10312 9520 9792 10471 10114 9851 10576 9796 13689 7373 6148 1 5E3C2B+OC 50 30 20
7 108 08.01.2018 4 27 11077 10368 9496 9824 10359 10036 9703 10592 9598 13689 7271 6003 1 5E3C2B+OC 50 30 20
8 108 08.01.2018 4 28 11077 10408 9384 9896 10359 10036 9703 10384 9598 13689 7271 6003 1 5E3C2B+OC 50 30 20
9 108 08.01.2018 4 29 10802 10296 9528 9712 10162 9866 9504 10432 9283 13254 7072 5790 1 5E3C2B+OC 50 30 20
10 108 08.01.2018 4 30 10802 10208 9304 9592 9978 9553 9266 10184 9017 13254 6961 5742 1 5E3C2B+OC 50 30 20
11 108 08.01.2018 5 25 11077 10192 9624 9904 10471 10114 9851 10528 9796 13689 7373 6148 1 5E3C2B+OC 50 30 20
12 108 08.01.2018 5 26 11077 10216 9568 9904 10471 10114 9851 10664 9796 13689 7373 6148 1 5E3C2B+OC 50 30 20
13 108 08.01.2018 5 27 11077 10344 9464 9856 10359 10036 9703 10592 9598 13689 7271 6003 1 5E3C2B+OC 50 30 20或使用该方法复制三行
tree %>%
mutate(Y1=str_replace_all(Y, "[A-Z]", "_")) %>%
mutate(Y2=str_replace_all(Y, "[0-9]", "_") %>% str_remove(., "_")) %>%
mutate(across(c(Y1, Y2), ~str_sub(., 1,5))) %>%
separate_rows(Y1, Y2, sep="_") %>%
mutate(Y1 = as.numeric(Y1)*10)
vyd date row col B1 B2 B3 B4 B5 B6 B7 B8 B8A B9 B10 B11 B12 Y Y1 Y2
<int> <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr>
1 108 08.01.2018 3 25 10987 10368 9584 10136 10463 10173 9886 10456 9814 13463 7416 6244 1 5E3C2B+OC 50 E
2 108 08.01.2018 3 25 10987 10368 9584 10136 10463 10173 9886 10456 9814 13463 7416 6244 1 5E3C2B+OC 30 C
3 108 08.01.2018 3 25 10987 10368 9584 10136 10463 10173 9886 10456 9814 13463 7416 6244 1 5E3C2B+OC 20 B
4 108 08.01.2018 3 26 10987 10336 9496 9920 10463 10173 9886 10416 9814 13463 7416 6244 1 5E3C2B+OC 50 E
5 108 08.01.2018 3 26 10987 10336 9496 9920 10463 10173 9886 10416 9814 13463 7416 6244 1 5E3C2B+OC 30 C
6 108 08.01.2018 3 26 10987 10336 9496 9920 10463 10173 9886 10416 9814 13463 7416 6244 1 5E3C2B+OC 20 B
7 108 08.01.2018 3 27 10987 10400 9520 9904 10472 9980 9733 10528 9592 13463 7323 6057 1 5E3C2B+OC 50 E
8 108 08.01.2018 3 27 10987 10400 9520 9904 10472 9980 9733 10528 9592 13463 7323 6057 1 5E3C2B+OC 30 C
9 108 08.01.2018 3 27 10987 10400 9520 9904 10472 9980 9733 10528 9592 13463 7323 6057 1 5E3C2B+OC 20 B
10 108 08.01.2018 3 28 10987 10472 9456 9936 10472 9980 9733 10416 9592 13463 7323 6057 1 5E3C2B+OC 50 E
# ... with 29 more rowshttps://stackoverflow.com/questions/70972511
复制相似问题