我试图在R中建立一个模拟模型,在有限的资源范围内,模拟不同寄生虫产卵行为对寄生虫后代数量的影响。简单地说,这个模拟遍历了连续几代的寄生虫,并跟踪了每一类性状中的寄生虫数量(它们每个浆果产卵多少)。我想看看是否有任何特定的种群比其他种群更频繁地走向灭绝。
Question:可以加速这个模拟吗?我感兴趣的是在不同的参数值下使用它进行灵敏度分析,但是要达到合理的世代数(n >= 100)和每个参数集的模拟,需要相当长的时间才能运行。我对apply/map函数家族相当满意,但我对R没有足够的经验,不知道在这个脚本中是否有可以用向量化版本替换任何循环或非向量化函数的地方。
另一个注意事项:我目前只返回数据帧对象generation_tracker,但我最终也将返回berry_mat和oviposition_location对象。
我已经包括了下面的模拟代码。代码确实需要几个包才能运行,并且模拟有两个助手函数,它们在这些代码块的顶部提供。谢谢!
library(tidyverse)
library(magrittr)
library(extraDistr)
initBerry <- function(n_berries) {
berry_mat <- matrix(0, nrow = 4, ncol = n_berries)
return(berry_mat)
}
initWasps <- function(n_wasps,
n_eggs,
eggs_per_berry,
pr_double) {
wasps_df <- data.frame(matrix(NA, nrow = n_wasps, ncol = 5))
colnames(wasps_df) <- c("starting_eggs",
"tot_eggs",
"eggs_per_berry",
"pr_double",
"larvae_survive")
wasps_df$starting_eggs <- n_eggs
wasps_df$tot_eggs <- n_eggs
wasps_df$eggs_per_berry <- eggs_per_berry
wasps_df$pr_double <- pr_double
return(wasps_df)
}
simMultiGenerations <- function(n_berries,
n_wasps,
n_eggs,
pr_double,
double_rate,
n_gens,
n_sims)
{
# Initialize simulation ---------------------------------------------------
cl <- makeCluster(detectCores() - 1, type = "SOCK")
registerDoSNOW(cl)
output <- foreach(
j = 1:n_sims,
.export = c("initBerry",
"initWasps"),
.packages = c("magrittr",
"tidyverse",
"extraDistr")
) %dopar%
{
# NOTE: need to assign to something
berry_mat <- initBerry(n_berries = n_berries)
wasp_df <- initWasps(
n_wasps = n_wasps,
n_eggs = rtpois(n_wasps, n_eggs,a = 4, b = 20),
eggs_per_berry = sample(1:4, n_wasps, replace = T),
pr_double = pr_double
)
# Track time with t, as t increases, increase pr_double
t <- 0
# get a parameter for logistic equation for probability of laying
a <- (1 - pr_double) / pr_double
# nested list to store which wasps oviposit in which seeds
oviposition_location <- vector("list", n_berries)
oviposition_location %<>% map(function(.x) {
.x <- vector("list", 4)
})
generation_tracker <- data.frame(
generation = 1:n_gens,
eggs_per_berry1 = NA,
eggs_per_berry2 = NA,
eggs_per_berry3 = NA,
eggs_per_berry4 = NA
)
for (g in 1:n_gens) {
while (any(wasp_df$tot_eggs > 0)) {
# Repeat loop while any wasps have eggs, stop when no eggs remain
for (i in 1:nrow(wasp_df)) {
# Oviposition happens discretely, one wasp at a time
if (wasp_df$tot_eggs[i] == 0) {
# If the current wasp has no eggs, jump to next wasp
next
} else {
# Pick which berry to oviposit in. According to Etsuro, this is random
berry <-
sample(ncol(berry_mat),
size = 1,
replace = TRUE)
# Pick which seed to oviposit in.
if (wasp_df$tot_eggs[i] >= wasp_df$eggs_per_berry[i]) {
seed <- sample(
nrow(berry_mat),
size = wasp_df$eggs_per_berry[i],
replace = FALSE
)
} else {
seed <- sample(nrow(berry_mat),
size = 1,
replace = FALSE)
}
# If the seed is empty, oviposit and subtract an egg
for (j in 1:length(seed)) {
temp_seed <- seed[j]
if (berry_mat[temp_seed, berry] == 0) {
berry_mat[temp_seed, berry] <- berry_mat[temp_seed, berry] + 1
wasp_df$tot_eggs[i] <-
wasp_df$tot_eggs[i] - 1
oviposition_location[[berry]][[temp_seed]] <-
c(i, oviposition_location[[berry]][[temp_seed]])
} else {
# Wasp decides whether to oviposit in already oviposited in egg
lay_egg <-
rbinom(1, 1, prob = wasp_df$pr_double[i])
# If 0, wasp does not oviposit
# If 1, wasp oviposits
if (lay_egg == 1) {
berry_mat[temp_seed, berry] <- berry_mat[temp_seed, berry] + 1
wasp_df$tot_eggs[i] <-
wasp_df$tot_eggs[i] - 1
oviposition_location[[berry]][[temp_seed]] <-
c(i, oviposition_location[[berry]][[temp_seed]])
}
}
}
}
# increment time forward for the purpose of increasing oviposition rate
t <- t + 1
if (t %% nrow(wasp_df) == 0) {
# Every time all the wasps have a chance to oviposit, increase
# their likelihood to oviposit in an already occupied seed
wasp_df$pr_double <-
1 / (1 + a * exp(-double_rate * (t / nrow(wasp_df))))
}
}
}
# End while ---------------------------------------------------------------
oviposition_location <-
oviposition_location %>%
modify_depth(2, function(.x) {
if (is.null(.x)) {
.x <- 0
} else {
# Randomly select which parasitoid develops
.x <-
.x[sample(length(.x), 1)]
}
})
id <- unlist(oviposition_location)
wasp_larvae <- as.data.frame(table(id))
if (any(wasp_larvae$id == 0)) {
wasp_larvae <- wasp_larvae[-1, ]
}
wasp_larvae$id <- as.integer(as.character(wasp_larvae$id))
wasp_df$larvae_survive[wasp_larvae$id] <- wasp_larvae$Freq
wasp_df$larvae_survive <- ifelse(is.na(wasp_df$larvae_survive),
0,
wasp_df$larvae_survive)
wasp_df %<>% mutate(prop_surv = larvae_survive / starting_eggs)
wasp_df$eggs_per_berry <- factor(as.character(wasp_df$eggs_per_berry),
levels = c("1", "2", "3", "4"))
wasp_df %<>% complete(eggs_per_berry, fill = list(larvae_survive = 0))
n_offspring <- wasp_df %>%
group_by(eggs_per_berry) %>%
summarise(offspring = sum(larvae_survive))
generation_tracker[g, 2:5] <- t(n_offspring[, 2])
frequency_eggs_p_berry <- c(rep(1, n_offspring$offspring[1]),
rep(2, n_offspring$offspring[2]),
rep(3, n_offspring$offspring[3]),
rep(4, n_offspring$offspring[4]))
n_eggs <- rtpois(sum(n_offspring$offspring), n_eggs,a = 4, b = 20)
wasp_df <- data.frame(
n_wasps = 1:sum(n_offspring$offspring),
starting_eggs = n_eggs,
tot_eggs = n_eggs,
eggs_per_berry = sample(frequency_eggs_p_berry,
sum(n_offspring$offspring),
replace = FALSE),
pr_double = rep(pr_double, sum(n_offspring$offspring)),
larvae_survive = NA
)
oviposition_location <- vector("list", n_berries)
oviposition_location %<>% map(function(.x) {
.x <- vector("list", 4)
})
}
# End generations loop ----------------------------------------------------
generation_tracker
}
# End simulation loop (end foreach) ---------------------------------------
stopCluster(cl)
closeAllConnections()
return(output)
}
simMultiGenerations(n_berries = 100,
n_wasps = 20,
n_eggs = 12,
pr_double = 0.1,
double_rate = 0.1,
n_gens = 100,
n_sims = 10)发布于 2018-09-19 01:49:47
apply函数不会大大加快代码的速度,但它们将使代码更加可读性更强,然后您将能够发现效率低下的地方,但在这种情况下,您将更经常地需要purrr::accumulate,或者使用accumulate = TRUE的Reduce,因为给定的迭代使用的是上一次迭代的结果。使用空值启动的所有对象很可能都是某种累加调用的输出。your_list <- ovoposite(your_list)之类的操作。generation tracker应该是一个大的data.frame,它应该是一个从累积中得到的列表,您可以在它上使用bind_rows来获得一个data.frame。for (j in 1:length(seed)) {temp_seed <- seed[j]应该是for (temp_seed in seed)data.table,数据争用会更快,循环使用Rcpp会更快,但是您确实需要更早地构造代码。for (j in 1:length(seed)) {temp_seed <- seed[j]应该是for (temp_seed in seed)if next序列中,跳过else并保存一对括号。编辑:关于累积的更多信息
我不能直接从您的代码构建一个示例,因为它有点复杂,但希望这会有所帮助。
这是您生成的代码类型:
x <- c(10,20,30)
res <- numeric(3)
a <- 1
b <- 5
res[[1]] <- a * b + x[1]
for(i in 2:length(res)){
a = a+1 # using the previous value
b = b^2
res[[i]] <- a * b + x[i] - res[[i-1]]
}
res
# [1] 15 55 1850让我们简化一下,a和b可以更有效地定义为循环外的向量:
a <- 1:3
b <- accumulate(1:2, ~.^2,.init=5)
# or better b <- 5^(2^(0:2)), i needed to think deeper about the math but it's faster, and this intermediate step helped do it
library(purrr)
abx <- a * b + x
res <- accumulate(abx[-1], ~ .y - .x, .init = abx[1])
res
# [1] 15 55 1850这也会运行得更快,这并不是因为accumulate只是隐藏了一个常规循环,而是因为它迫使我更清楚地思考我的输入和输出是什么,并将可能的内容向量化。
我们可以通过给用于获取res的函数取一个名称来提高它的可读性,我们将这个函数与另一个文件放在R文件的顶部,然后记录它,这很容易,因为它们是很少的参数,而且它可以做一些简单的事情。:
iterate_on_result <- function(prev, abx_i) abx_i - prev在我们的代码中,我们将调用:
res <- accumulate(abx[-1], iterate_on_result, .init = abx[1])这就清楚了什么是输入和什么是修改的,并且简化了代码的其余部分。
https://codereview.stackexchange.com/questions/203941
复制相似问题