我有一些污染监测站的数据。我正在尝试创建一个新的tibble,如果数据满足某些条件,它将被合并到列elevation的左侧。需要满足的条件是,water_bodies (water_body)等于,而高程(elevation)小于它将要合并的观测值。
试图用语言来解释这个问题听起来很愚蠢,因为英语不是我的第一语言,但是这里有一个我正在寻找的例子,我认为它能澄清一切。
以下是数据:
tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
id_emca water_body id_estmun dbo elevation
<chr> <chr> <dbl> <dbl> <dbl>
1 DLAGU20 RIO CHICALOTE_16 1005 365. 1865
2 DLAGU20 RIO CHICALOTE_17 1005 486. 1865
3 DLAGU20 RIO CHICALOTE_18 1005 270. 1865
4 DLBAJ155 RIO MULEGE_12 3003 20.3 215
5 DLBAJ155 RIO MULEGE_13 3003 14.0 215
6 DLBAJ157 RIO CHICALOTE_17 3007 315. 1677
7 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677
8 DLBAJ159 RIO MULEGE_13 3010 17.9 477
9 DLBAJ159 RIO MULEGE_14 3010 36.0 477
10 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
11 DLCAM85 RIO CHICALOTE_18 4009 323. 573
12 DLCAM122 RIO MULEGE_12 4001 37.8 300新的tibble看起来应该是这样的:

发布于 2021-04-15 05:18:46
你可以用非马氏联接制作一个data.table
df <- tibble::tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
library(data.table)
setDT(df)
df[,elevation_join := elevation]
result <- df[df, on = .(water_body = water_body, elevation_join < elevation_join)]
result[,.(id_emca = i.id_emca,
water_body,
id_estmun = i.id_estmun,
dbo = i.dbo,
elevation = i.elevation,
id_emca_b = id_emca,
id_estmun_b = id_estmun,
dbo_b = dbo,
elevation_b = elevation)]
id_emca water_body id_estmun dbo elevation id_emca_b water_body_b id_estmun_b dbo_b elevation_b
1: DLAGU20 RIO CHICALOTE_16 1005 364.920 1865 <NA> <NA> NA NA NA
2: DLAGU20 RIO CHICALOTE_17 1005 485.910 1865 DLBAJ157 RIO CHICALOTE_17 3007 315.340 1677
3: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677
4: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLCAM80 RIO CHICALOTE_18 4012 157.660 1003
5: DLAGU20 RIO CHICALOTE_18 1005 270.320 1865 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
6: DLBAJ155 RIO MULEGE_12 3003 20.300 215 <NA> <NA> NA NA NA
7: DLBAJ155 RIO MULEGE_13 3003 14.005 215 <NA> <NA> NA NA NA
8: DLBAJ157 RIO CHICALOTE_17 3007 315.340 1677 <NA> <NA> NA NA NA
9: DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677 DLCAM80 RIO CHICALOTE_18 4012 157.660 1003
10: DLBAJ157 RIO CHICALOTE_18 3007 430.210 1677 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
11: DLBAJ159 RIO MULEGE_13 3010 17.890 477 DLBAJ155 RIO MULEGE_13 3003 14.005 215
12: DLBAJ159 RIO MULEGE_14 3010 35.980 477 <NA> <NA> NA NA NA
13: DLCAM80 RIO CHICALOTE_18 4012 157.660 1003 DLCAM85 RIO CHICALOTE_18 4009 323.020 573
14: DLCAM85 RIO CHICALOTE_18 4009 323.020 573 <NA> <NA> NA NA NA
15: DLCAM122 RIO MULEGE_12 4001 37.810 300 DLBAJ155 RIO MULEGE_12 3003 20.300 215发布于 2021-04-15 05:23:01
library(tidyverse)
df <- tribble(
~id_emca, ~water_body, ~id_estmun, ~dbo, ~elevation,
"DLAGU20", "RIO CHICALOTE_16", 1005, 364.92, 1865,
"DLAGU20", "RIO CHICALOTE_17", 1005, 485.91, 1865,
"DLAGU20", "RIO CHICALOTE_18", 1005, 270.32, 1865,
"DLBAJ155", "RIO MULEGE_12", 3003, 20.3, 215,
"DLBAJ155", "RIO MULEGE_13", 3003, 14.005, 215,
"DLBAJ157", "RIO CHICALOTE_17", 3007, 315.34, 1677,
"DLBAJ157", "RIO CHICALOTE_18", 3007, 430.21, 1677,
"DLBAJ159", "RIO MULEGE_13", 3010, 17.89, 477,
"DLBAJ159", "RIO MULEGE_14", 3010, 35.98, 477,
"DLCAM80", "RIO CHICALOTE_18", 4012, 157.66, 1003,
"DLCAM85", "RIO CHICALOTE_18", 4009, 323.02, 573,
"DLCAM122", "RIO MULEGE_12", 4001, 37.81, 300
)
temp <- df %>%
group_by(water_body) %>%
filter(n() > 1)
df %>%
arrange(water_body) %>%
left_join(temp,
by = "water_body",
keep = TRUE) %>%
filter(elevation.x > elevation.y | is.na(elevation.y))输出:
# A tibble: 11 x 10
id_emca.x water_body.x id_estmun.x dbo.x elevation.x id_emca.y water_body.y id_estmun.y dbo.y elevation.y
<chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <dbl> <dbl>
1 DLAGU20 RIO CHICALOTE_16 1005 365. 1865 NA NA NA NA NA
2 DLAGU20 RIO CHICALOTE_17 1005 486. 1865 DLBAJ157 RIO CHICALOTE_17 3007 315. 1677
3 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677
4 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
5 DLAGU20 RIO CHICALOTE_18 1005 270. 1865 DLCAM85 RIO CHICALOTE_18 4009 323. 573
6 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677 DLCAM80 RIO CHICALOTE_18 4012 158. 1003
7 DLBAJ157 RIO CHICALOTE_18 3007 430. 1677 DLCAM85 RIO CHICALOTE_18 4009 323. 573
8 DLCAM80 RIO CHICALOTE_18 4012 158. 1003 DLCAM85 RIO CHICALOTE_18 4009 323. 573
9 DLCAM122 RIO MULEGE_12 4001 37.8 300 DLBAJ155 RIO MULEGE_12 3003 20.3 215
10 DLBAJ159 RIO MULEGE_13 3010 17.9 477 DLBAJ155 RIO MULEGE_13 3003 14.0 215
11 DLBAJ159 RIO MULEGE_14 3010 36.0 477 NA NA NA NA NAhttps://stackoverflow.com/questions/67102277
复制相似问题