我有几张清单。例如:
b1 <-
list(
duck = list(
day = "Monday",
location = list("Cisco Park", "Elephant Park")
),
eagle = list(day = "Saturday"),
pigeon = list(location = "backyard")
)
b2 <- list(
duck = list(day = "Tuesday", location = "Valley Green"),
goose = list(location = "Old man Johnson's Farm")
)我想以一种聚合这些列表中每个元素的元素的方式来合并它们。只有当day或list可以是原始列表中的向量时,这才是递归的。但这是一个很深的问题。
desired <-
list(
duck = list(
day = list("Monday", "Tuesday"),
location = list("Cisco Park", "Elephant Park", "Valley Green")
),
eagle = list(day = "Saturday"),
pigeon = list(location = "backyard"),
goose = list(location = "Old man Johnson's Farm")
)我写了一个有效的lapply()解决方案,但它很长而且非常慢。接下来,我尝试了Combine/merge lists by elements names:
l <-
list(b1, b2)
keys <- unique(unlist(lapply(l, names)))
merged <-
setNames(do.call(mapply, c(FUN = c, lapply(l, `[`, keys))), keys)
dput(merged)这很快,合并了两个列表,但创建了多个具有相同名称的元素:
list(duck = list(day = "Monday", location = list("Cisco Park",
"Elephant Park"), day = "Tuesday", location = "Valley Green"),
eagle = list(day = "Saturday"), pigeon = list(location = "backyard"),
goose = list(location = "Old man Johnson's Farm"))发布于 2020-05-22 22:52:22
我不知道这样的事情将如何高效地完成,但这里有一个选择:
out <- c(b1, b2[setdiff(names(b2), names(b1))])
for (k in intersect(names(b2), names(b1))) {
elements <- names(b2[[k]])
for (v in elements) {
if (v %in% names(b1[[k]])) {
out[[k]][[v]] <- unique(c(b1[[k]][[v]], b2[[k]][v]))
} else {
out[[k]][[v]] <- b2[[k]][v]
}
}
}也许列表不是手头任务的最佳数据结构?我会尝试使用列表列的data.table。下面是一个原始的例子:
library(data.table)
bDT <- rbindlist(lapply(bt, function(x) lapply(x, identity)), fill = TRUE)
bDT[, animal := rep(names(bt), sapply(bt, function(x) max(lengths(x))))]
bDT <- bDT[, .(day = list(unique(day)), location = list(unique(location))), by = animal]
bDT[animal == "duck", location]
# [[1]]
# [[1]][[1]]
# [1] "Cisco Park"
#
# [[1]][[2]]
# [1] "Elephant Park"
#
# [[1]][[3]]
# [1] "Valley Green"发布于 2020-05-23 01:03:37
多有趣的问题啊!
这种使用嵌套lapply的方法能得到您想要的结果吗?我不确定它的效率有多高--我猜它应该不会太差(保留空值应该可以提高性能)。
l <- list(b1, b2)
l1.keys <- unique(unlist(lapply(l, names)))
l2.keys <- unique(unlist(lapply(lapply(l, "[[", 1), names)))
l1 = lapply(l1.keys, function(l1.key){
l2 = lapply(l2.keys, function(l2.key){
as.list(do.call("c", lapply(lapply(l, "[[", l1.key), "[[", l2.key))) # This [as.list(c())]...
})
names(l2) = l2.keys
l2[lengths(l2) != 0] # ...and this prevent NULL in the output.
})
names(l1) = l1.keys
l1输出:
$duck
$duck$day
$duck$day[[1]]
[1] "Monday"
$duck$day[[2]]
[1] "Tuesday"
$duck$location
$duck$location[[1]]
[1] "Cisco Park"
$duck$location[[2]]
[1] "Elephant Park"
$duck$location[[3]]
[1] "Valley Green"
$eagle
$eagle$day
$eagle$day[[1]]
[1] "Saturday"
$pigeon
$pigeon$location
$pigeon$location[[1]]
[1] "backyard"
$goose
$goose$location
$goose$location[[1]]
[1] "Old man Johnson's Farm"发布于 2020-05-23 01:44:02
这基本上是列表的递归合并。我不确定这在整个数据集上的速度有多快,但这应该也适用于任意深度
library(purrr)
library(magrittr)
mergeMyLists <- function(x, y){
# determine names missing from x
newNames <- setdiff(names(y), names(x))
# create new list with all names and data from x
z <- x
if(length(newNames) > 0) {
z[seq(length(x)+1, length(x) + length(newNames)) ] <- list(NULL)
names(z) <- c(names(x), newNames)
}
# if member is list recursively join
# else combine and make sure it is a list as sometimes input is vector
map(names(z),
~{
if(is.list(z[[..1]]) && is.list(y[[..1]])) {
mergeMyLists(z[[..1]], y[[..1]])
} else {
tmp <- c(z[[..1]], y[[..1]])
if(length(tmp)> 1 && !is.list(tmp)) as.list(tmp) else tmp
}
}
) %>%
set_names(names(z))
}
mergedList <- mergeMyLists(b1, b2)你可以通过使你的输入数据更加一致来加速它(这样它就不需要做太多的检查)。例如,b1$duck$location是一个列表,而b2$duck$location是一个字符向量。如果合并函数知道这两个函数都是list,那么它就不需要检查并潜在地将输出转换为list来实现您想要的结构。
https://stackoverflow.com/questions/61957013
复制相似问题