我试图使用R+ MonetDB作为一个大数据分析堆栈,并且在创建一个新列并将其填充到我的分析中的数据时遇到了困难。下面是一个玩具示例:
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB.R::MonetDB(), embedded="./test.db")
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## insert data into the new column
v1 <- mtcars["mpg"] * pi
dbSendQuery(db, "INSERT INTO mtcars (v1) VALUES (?)", bind.data=v1)以及错误消息:
Error in .local(conn, statement, ...) :
Unable to execute statement 'INSERT INTO mtcars (v1) VALUES ('c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.23008278...'.
Server says 'ERROR: 49.6371639267187, 61.8893752757189, 47.1238898038469, 67.2300827868216)' to type dbl failed.
' [#conversion of string 'c(65.9734457253857, 65.9734457253857, 71.6283125018473, 67.2300827868216, 58.7477826221291, 56.8628270299753, 44.924774946334, 76.6548607475909, 71.6283125018473, 60.318578948924, 55.9203492338983, 51.5221195188726, 54.3495529071034, 47.7522083345649, 32.6725635973338, 32.6725635973338, 46.18141200777, 101.787601976309, 95.5044166691297, 106.499990956694, 67.5442420521806, 48.6946861306418, 47.7522083345649, 41.7831822927443, 60.318578948924, 85.7654794430014, 81.6814089933346, 95.5044166691297,
].
In addition: Warning message:
In if (is.na(value)) statement <- sub("?", "NULL", statement, fixed = TRUE) else if (valueClass %in% :
the condition has length > 1 and only the first element will be used从这个错误中,我推测bind.data可能不能与MonetDBLite一起使用?
问题:
如何将列添加到MonetDBLite表并使用来自R会话的数据填充它?
发布于 2018-08-27 12:44:14
首先,最后一条语句中的“插入”命令是不正确的。您将需要“更新”语句。
尽管如此,我提出了一个解决方案,您可以直接从R填充MonetDBLite表:
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
# I added a rownbr to the dataset so it will be easier later
mtcars$rownbr <- 1:nrow(mtcars)
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## insert data into the new column
v1 <- mtcars["mpg"] * pi
for (i in 1:nrow(mtcars)){
myquery <- paste0("UPDATE mtcars SET v1 = ",v1$mpg[i], "where rownbr =",i," ;")
dbSendQuery(db, myquery )
}发布于 2018-08-27 22:30:15
通过dbBind和MonetDBLite进行参数化的SQL查询似乎存在一个问题(参见https://github.com/hannesmuehleisen/MonetDBLite-R/issues/16)。下面的代码适用于SQLite:
library(RSQLite)
data(mtcars)
db <- dbConnect(SQLite(), ":memory:")
# load mtcars into the database
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)
## Add a new column
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
## do computation with R
mtcars$v1 <- mtcars$mpg * pi
mtcars$row_names <- rownames(mtcars)
update_query <- dbSendQuery(db, 'update mtcars set "v1"=$v1 WHERE row_names=$row_names')
dbBind(update_query, mtcars[, c("v1", "row_names")]) # send the updated data
dbClearResult(update_query) # release the prepared statement
dbReadTable(db, "mtcars")
dbDisconnect(db)但是,对于MonetDBLite,它会为dbBind步骤生成一个错误(和一个警告):
> dbBind(update_query, mtcars[, c("v1", "row_names")]) # send the updated data
Error in vapply(params, function(x) { : values must be length 1,
but FUN(X[[1]]) result is length 32
In addition: Warning message:
In if (is.na(x)) "NULL" else if (is.numeric(x) || is.logical(x)) { :
the condition has length > 1 and only the first element will be used我想出的一个解决办法是使用glue包中的glue“手动”组合查询(不需要遍历行):
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
dbWriteTable(conn=db, value = mtcars, name = "mtcars", overwrite = TRUE, row.names=TRUE)
dbSendQuery(db, "ALTER TABLE mtcars ADD v1 DOUBLE;")
library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
update_query <- glue_data_sql(mtcars, "update mtcars set v1 = {v1} where row_names = {row_names};", .con=db)
lapply(update_query, dbSendQuery, conn=db)
# verify
dbReadTable(db, "mtcars")
dbDisconnect(db)另一个解决方法是在一个查询中执行insert (更接近@Zelazny7 7最初的尝试):
library(MonetDBLite)
library(DBI)
data(mtcars)
db <- dbConnect(MonetDB(), embedded="./test.db")
dbSendQuery(db, "CREATE TABLE mtcars (
row_names VARCHAR(32),
v1 DOUBLE);")
library(glue)
mtcars$row_names <- rownames(mtcars)
mtcars$v1 <- mtcars$mpg * pi
insert_values <- glue_data(mtcars, "('{row_names}', {v1})")
insert_values <- glue_collapse(insert_values, sep=", ", last="")
insert_query <- glue("INSERT INTO mtcars (row_names, v1) VALUES {insert_values}")
dbSendQuery(db, insert_query)
dbReadTable(db, "mtcars")
dbDisconnect(db)https://stackoverflow.com/questions/37708534
复制相似问题