嗨,我有一个数据集,我需要的日期是扩展到有行每个日期条目。由于调度变量的原因,问题更加复杂。有19个日程安排选项(见附图)。
对于任何"nx一天“的选择,我只需要将剂量乘以每天给药的次数。
每隔一周、每一天等的其他计划选项需要合并到日期范围(开始-停止)扩展中。

数据集结构
structure(list(id = c(1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010004,
1010004, 1010016, 1010021, 1010021, 1010026, 1010032, 1010032,
1010032, 1010032, 1010055, 1010068, 1010107, 1020094, 2010116,
2010116, 2010125, 2010125, 3010026, 4010026, 4020144), drug = c("Acetaminophen",
"Acetaminophen", "Calcium Carbonate", "Cefalexin", "Cotrimoxazole",
"Dexamethasone", "Dextrose 5%/Sodium Chloride 0.9%/Potassium Chloride 20mmol/L",
"Lactulose", "Morphine", "Morphine", "Oxycodone Immediate Release",
"Calcitriol", "Vitamin D3", "Heparin Lock", "CMV Immune Globulin 5%",
"Heparin Lock", "Cysteamine", "CMV Immune Globulin 5%", "Hydromorphone",
"Leucovorin", "Lorazepam", "Morphine", "Hydromorphone", "Salbutamol",
"Lorazepam", "Warfarin", "Warfarin", "Heparin", "Lorazepam",
"Salbutamol", "Sirolimus", "Hydromorphone"), start = structure(c(1247875200,
1248048000, 1247702400, 1248652800, 1250121600, 1247875200, 1247788800,
1248220800, 1247961600, 1247961600, 1248134400, 1235001600, 1235001600,
1280102400, 1290988800, 1290211200, 1298332800, 1284854400, 1365811200,
1363651200, 1363651200, 1317513600, 1291939200, 1409875200, 1263513600,
1367452800, 1367366400, 1454803200, 1451088000, 1420070400, 1372809600,
1342051200), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
stop = structure(c(1250035200, 1248048000, 1249948800, 1249689600,
1250121600, 1248134400, 1247875200, 1248307200, 1248048000,
1248048000, 1248998400, 1235001600, 1235001600, 1280188800,
1290988800, 1290816000, 1298332800, 1287360000, 1367452800,
1364083200, 1364169600, 1317686400, 1292371200, 1409875200,
1264809600, 1371945600, 1371772800, 1456099200, 1455840000,
1420070400, 1373155200, 1342051200), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), dose = c(1000, 1000, 200, 1000,
160, 8, 150, 10, 4, 15, 5, 0.25, 400, 2250, 2500, 250, 1,
2500, 0.25, 12, 2.2, 3, 6, 0.5, 0.25, 1, 2, 130, 1, 1, 0.5,
1), units = c("mg", "mg", "mg (ca++)", "mg", "mg (trimethoprim)",
"mg", "ml/hr", "ml", "mg", "mg", "mg", "mcg", "IU", "U",
"mg", "U", "drop(s)", "mg", "mg", "mg", "mg", "mg", "mg",
"ml", "mg", "mg", "mg", "U", "mg", "ml", "mg", "mg"), route = c("Oral",
"Oral", "Oral", "Oral", "Oral", "Intravenous", "Intravenous",
"Oral", "Intravenous", "Oral", "Oral", "Oral", "Oral", "Injection",
"Intravenous", "Injection", "Ophthalmic", "Intravenous",
"Intravenous", "Intravenous", "Intravenous", "Intravenous",
"Oral", "Inhalation", "Intravenous", "Oral", "Oral", "Intravenous",
"Intravenous", "Inhalation", "Oral", "Intravenous"), schedule = c("4x a day",
"4x a day", "3x a day", "3x a day", "2x a day", "1x a day",
"1x a day", "2x a day", "12x a day", "6x a day", "6x a day",
"every other day", "every other day", "Every 7 days", "every other week",
"Every 7 days", "24x a day", "every other week", "12x a day",
"8x a day", "24x a day", "48x a day", "8x a day", "48x a day",
"72x a day", "Every 3 days", "Every 3 days", "96x a day",
"96x a day", "72x a day", "every 4 days", "144x a day")), row.names = c(NA,
-32L), class = c("tbl_df", "tbl", "data.frame"))所需的数据集结构(显示id为1010002的第一个药物条目已展开并计算出每日剂量
structure(list(id = c(1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002,
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002
), drug = c("Acetaminophen", "Acetaminophen", "Acetaminophen",
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen",
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen",
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen",
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen",
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen",
"Acetaminophen", "Acetaminophen", "Acetaminophen"), start = structure(c(1247875200,
1247961600, 1248048000, 1248134400, 1248220800, 1248307200, 1248393600,
1248480000, 1248566400, 1248652800, 1248739200, 1248825600, 1248912000,
1248998400, 1249084800, 1249171200, 1249257600, 1249344000, 1249430400,
1249516800, 1249603200, 1249689600, 1249776000, 1249862400, 1249948800,
1250035200), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
dailydose = c(4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000,
4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000,
4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000), units = c("mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg",
"mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg",
"mg", "mg", "mg", "mg", "mg"), route = c("Oral", "Oral",
"Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral",
"Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral",
"Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral"
)), row.names = c(NA, -26L), class = c("tbl_df", "tbl", "data.frame"
)) 发布于 2020-02-01 08:03:59
我们可以通过解析来自'schedule‘的数字部分并与’list‘相乘来创建’list‘列,然后使用map2循环各行,从'start’、'stop‘列和unnest创建一个dailydose列
library(dplyr)
library(tidyr)
library(readr)
library(purrr)
df1 %>%
transmute(id, drug, start, stop,
dailydose = dose * parse_number(schedule), units, route) %>%
mutate(start = map2(start, stop, seq, by = 'day')) %>%
select(-stop) %>%
unnest(c(start))
# A tibble: 378 x 6
# id drug start dailydose units route
# <dbl> <chr> <dttm> <dbl> <chr> <chr>
# 1 1010002 Acetaminophen 2009-07-18 00:00:00 4000 mg Oral
# 2 1010002 Acetaminophen 2009-07-19 00:00:00 4000 mg Oral
# 3 1010002 Acetaminophen 2009-07-20 00:00:00 4000 mg Oral
# 4 1010002 Acetaminophen 2009-07-21 00:00:00 4000 mg Oral
# 5 1010002 Acetaminophen 2009-07-22 00:00:00 4000 mg Oral
# 6 1010002 Acetaminophen 2009-07-23 00:00:00 4000 mg Oral
# 7 1010002 Acetaminophen 2009-07-24 00:00:00 4000 mg Oral
# 8 1010002 Acetaminophen 2009-07-25 00:00:00 4000 mg Oral
# 9 1010002 Acetaminophen 2009-07-26 00:00:00 4000 mg Oral
#10 1010002 Acetaminophen 2009-07-27 00:00:00 4000 mg Oral
# … with 368 more rows如果readr不可用,我们可以使用str_extract提取值并转换为numeric,即将parse_number(schedule)更改为as.numeric(stringr::str_extract(schedule, '[0-9]+'))
https://stackoverflow.com/questions/60012760
复制相似问题