Changeset: b58218b5fd9d for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=b58218b5fd9d Modified Files: clients/R/MonetDB.R/NAMESPACE clients/R/MonetDB.R/NEWS clients/R/MonetDB.R/R/dbapply.R clients/R/MonetDB.R/man/dbApply.Rd clients/R/Tests/dbapply.R Branch: default Log Message:
R Connector: renamed dbApply to mdbapply at the request of Hadley diffs (243 lines): diff --git a/clients/R/MonetDB.R/NAMESPACE b/clients/R/MonetDB.R/NAMESPACE --- a/clients/R/MonetDB.R/NAMESPACE +++ b/clients/R/MonetDB.R/NAMESPACE @@ -4,7 +4,7 @@ import(DBI,digest,methods,codetools) export(MonetDB,MonetR,MonetDBR,MonetDB.R) export(monet.read.csv,monetdb.read.csv) # this one is not in the DBI -exportMethods(dbSendUpdate,dbSendUpdateAsync,dbTransaction,dbApply) +exportMethods(dbSendUpdate,dbSendUpdateAsync,dbTransaction,mdbapply) # shorthands export(mc,mq) useDynLib(MonetDB.R) diff --git a/clients/R/MonetDB.R/NEWS b/clients/R/MonetDB.R/NEWS --- a/clients/R/MonetDB.R/NEWS +++ b/clients/R/MonetDB.R/NEWS @@ -3,7 +3,7 @@ 0.9.8 - Cleaned up SQL to R type mapping (we had this twice) - Now creating actual R integers if data fits - dbWriteTable now quotes table/column names if necessary, and outputs warnings if it did -- New dbApply function to automatically create embedded R functions in MonetDB +- New mdbapply function to automatically create and run embedded R functions in MonetDB - Fixes for dplyr backend 0.9.7 diff --git a/clients/R/MonetDB.R/R/dbapply.R b/clients/R/MonetDB.R/R/dbapply.R --- a/clients/R/MonetDB.R/R/dbapply.R +++ b/clients/R/MonetDB.R/R/dbapply.R @@ -1,32 +1,24 @@ -.encodeGlobals <- function(name) { - vars <- findGlobals(name,merge=F)$variables - if (length(vars) < 1) { - return(NA) +# TOOD: support running this on query results? +# TODO: support remote dbs, find out whether its local via canary file + +if (is.null(getGeneric("mdbapply"))) setGeneric("mdbapply", function(conn, table, fun, ...) + standardGeneric("mdbapply")) + +setMethod("mdbapply", signature(conn="MonetDBConnection"), def=function(conn, table, fun, ...) { + # make sure table exists + if (!dbExistsTable(conn, table)) { + stop("Table ", table, " does not exist.") } - if (getOption("monetdb.debug.query",FALSE)) - message("Variable(s) ",paste0(vars,collapse=", ")) - # TODO: optionally inline serialized context for remote dbs - res <- tempfile() - save(list=vars,file=res,envir=environment(name), compress=T) - return(res) -} - -# TODO: support arbitrary arguments that are passed to fun -# TOOD: support running this on query results? -if (is.null(getGeneric("dbApply"))) setGeneric("dbApply", function(conn, table, fun) - standardGeneric("dbApply")) - -setMethod("dbApply", signature(conn="MonetDBConnection"), def=function(conn, table, fun) { # generate unique function name - dbfunname <- "__r_dapply_autogen_" - while (dbGetQuery(conn,paste0("select count(*) from functions where name='",dbfunname,"'"))[[1]] > 0) - dbfunname <- paste0(dbfunname,sample(letters,1)) + dbfunname <- "mdbapply_autogen_" + while (dbGetQuery(conn, paste0("select count(*) from functions where name='", dbfunname, "'"))[[1]] > 0) + dbfunname <- paste0(dbfunname, sample(letters, 1)) # test R integration with dummy function dbBegin(conn) - dbSendQuery(conn,paste0("CREATE FUNCTION ",dbfunname,"() RETURNS TABLE(d INTEGER) LANGUAGE R {1L}")) - res <- dbGetQuery(conn,paste0("SELECT * FROM ",dbfunname,"()"))[[1]] + dbSendQuery(conn,paste0("CREATE FUNCTION ", dbfunname, "() RETURNS TABLE(d INTEGER) LANGUAGE R {1L}")) + res <- dbGetQuery(conn,paste0("SELECT * FROM ", dbfunname, "()"))[[1]] dbRollback(conn) # now generate the UDF @@ -41,32 +33,42 @@ setMethod("dbApply", signature(conn="Mon ',unique(sapply(strsplit(grep("^package:", search(), value=T),":"), function(x) x[[2]]))), function(pname) library(pname, character.only=T, quietly=T)))\n') } # serialize global variables into ascii string, and add the code to scan it again into the current env - sfilename <- .encodeGlobals(fun) - if (!is.na(sfilename)) { - dbrcode <- paste0(dbrcode,'# load serialized global variables\nload("',sfilename,'")\n') + vars <- findGlobals(fun,merge=F)$variables + dotdot <- list(...) + sfilename <- NA + if (length(vars) > 1 || length(dotdot) > 1) { + if (getOption("monetdb.debug.query",FALSE)) + message("Variable(s) ",paste0(vars,dotdot,collapse=", ")) + #vars$mdbapply_dotdot <- dotdot + sfilename <- tempfile() + save(list=vars,file=sfilename,envir=environment(fun), compress=T) + dbrcode <- paste0(dbrcode, '# load serialized global variables\nload("', sfilename, '")\n') } + rfilename <- tempfile() # get source of user function and append - dbrcode <- paste0(dbrcode,"# user-supplied function\n.userfun <- ",paste0(deparse(fun),collapse="\n"),"\n# calling user function\nsaveRDS(.userfun(.dbdata),file=\"",rfilename,"\")\nreturn(42L)\n") + dbrcode <- paste0(dbrcode, "# user-supplied function\nmdbapply_userfun <- ", paste0(deparse(fun), collapse="\n"), + "\n# calling user function\nsaveRDS(mdbapply_userfun(mdbapply_dbdata),file=\"", rfilename, "\")\nreturn(42L)\n") - # find out things about the table, then wrap the r function - res <- dbSendQuery(conn,paste0("SELECT * FROM ",table," LIMIT 1")) - dbnames <- res@env$info$names - dbtypes <- res@env$info$dbtypes - dbfun <- paste0("CREATE FUNCTION ",dbfunname,"(",paste0(dbnames," ", dbtypes, collapse=", "), - ") \nRETURNS TABLE(retval INTEGER) LANGUAGE R {\n# rename arguments\n.dbdata <- data.frame(", - paste0(dbnames, collapse=", "),")\n",dbrcode,"};\n") + # find out things about the table, then wrap the R function + query <- paste0("SELECT * FROM ", table, " AS t") + res <- monetdb_queryinfo(conn, query) + dbfun <- paste0("CREATE FUNCTION ", dbfunname,"(", paste0(dbQuoteIdentifier(conn, res$names)," ", res$dbtypes, collapse=", "), + ") \nRETURNS TABLE(retval INTEGER) LANGUAGE R {\n# rename arguments\nmdbapply_dbdata <- data.frame(", + paste0(res$names, collapse=", "),", stringsAsFactors=F)\n", dbrcode, "};\n") # call the function we just created - dbsel <- paste0("SELECT * FROM ", dbfunname, "( (SELECT * FROM ", table, " AS t) );\n") + dbsel <- paste0("SELECT * FROM ", dbfunname, "( (",query,") );\n") # ok, talk to DB (EZ) + res <- NA dbBegin(conn) - dbSendQuery(conn,dbfun) - dres <- dbGetQuery(conn,dbsel) - dbRollback(conn) - # TODO: check dres - # TODO: check if sfilename exists and is valid - res <- readRDS(rfilename) - on.exit(file.remove(na.omit(c(sfilename, rfilename)))) + tryCatch({ + dbSendQuery(conn, dbfun) + dbGetQuery(conn, dbsel) + res <- readRDS(rfilename) + }, finally={ + dbRollback(conn) + file.remove(stats::na.omit(c(sfilename, rfilename))) + }) res }) diff --git a/clients/R/MonetDB.R/man/dbApply.Rd b/clients/R/MonetDB.R/man/dbApply.Rd --- a/clients/R/MonetDB.R/man/dbApply.Rd +++ b/clients/R/MonetDB.R/man/dbApply.Rd @@ -1,6 +1,6 @@ -\name{dbApply} -\alias{dbApply} -\alias{dbApply,MonetDBConnection-method} +\name{mdbapply} +\alias{mdbapply} +\alias{mdbapply,MonetDBConnection-method} \title{ Apply a R function to a MonetDB table. @@ -10,13 +10,15 @@ } \usage{ - dbApply(conn, table, fun) + mdbapply(conn, table, fun, ...) } \arguments{ \item{conn}{A MonetDB.R database connection. Created using \code{\link[DBI]{dbConnect}} with the \code{\link[MonetDB.R]{MonetDB.R}} database driver.} \item{table}{A MonetDB database table. Can also be a view or temporary table.} \item{fun}{A R function to be run on the database table. The function gets passed a single \code{data.frame} argument which represents the database table. The function needs to return a single vector (for now).} + \item{...}{Other parameters to be passed to the function} + } \value{ Returns the result of the function applied to the database table. @@ -27,7 +29,7 @@ conn <- dbConnect(MonetDB.R(), "demo") data(mtcars) dbWriteTable(conn, "mtcars", mtcars) -mpgplus42 <- dbApply(conn, "mtcars", "double", function(d) { +mpgplus42 <- mdbapply(conn, "mtcars", "double", function(d) { d$mpg + 42 }) }} diff --git a/clients/R/Tests/dbapply.R b/clients/R/Tests/dbapply.R --- a/clients/R/Tests/dbapply.R +++ b/clients/R/Tests/dbapply.R @@ -14,6 +14,7 @@ if (length(args) > 1) options(monetdb.insert.splitsize=10) options(monetdb.profile=F) +options(monetdb.debug.query=T) tname <- "monetdbtest" @@ -33,27 +34,49 @@ data(mtcars) dbWriteTable(con,tname,mtcars, overwrite=T) stopifnot(identical(TRUE, dbExistsTable(con,tname))) -res <- dbApply(con, tname, function(d) { +res <- mdbapply(con, tname, function(d) { d$mpg }) stopifnot(identical(res, mtcars$mpg)) -res <- dbApply(con, tname, function(d) { +res <- mdbapply(con, tname, function(d) { min(d$mpg) }) stopifnot(identical(res, min(mtcars$mpg))) # model fitting / in-db application -fitted <- lm(mpg~.,data=mtcars) -predictions <- dbApply(con,tname,function(d) { - predict(fitted, newdata=d) +fitted <- lm(mpg~., data=mtcars) +predictions <- mdbapply(con, tname, function(d) { + predict(fitted, newdata=data.frame(d, stringsAsFactors=T)) }) stopifnot(identical(unname(predict(fitted, newdata=mtcars)), unname(predictions))) +# make sure we bubble up the error +haderror <- FALSE +tryCatch({ + res <- mdbapply(con,tname,function(d) { + stop("i am an error") + }) +}, error=function(e) { + haderror <<- TRUE +}) +stopifnot(haderror) + +# run simple test again to make sure the error did dbRollback() and we are consistent +res <- mdbapply(con, tname, function(d) { + d$mpg +}) +stopifnot(identical(res, mtcars$mpg)) + + +# additional parameters +res <- mdbapply(con,tname,function(d, n, m) { + n+m +}, 20, 22) + dbRemoveTable(con,tname) stopifnot(identical(FALSE, dbExistsTable(con,tname))) - print("SUCCESS") _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list