Changeset: 408ff01df3c3 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=408ff01df3c3 Added Files: tools/embedded/rpackage/tests/testthat/test_dplyr.R Modified Files: clients/R/MonetDB.R/NAMESPACE clients/R/MonetDB.R/R/dbi.R clients/R/MonetDB.R/R/dplyr.R monetdb5/extras/rapi/converters.c tools/embedded/embedded.c tools/embedded/rpackage/R/monetdb.R tools/embedded/rpackage/tests/testthat/test_dbi.R Branch: embedded Log Message:
More testing and fixes diffs (truncated from 365 to 300 lines): diff --git a/clients/R/MonetDB.R/NAMESPACE b/clients/R/MonetDB.R/NAMESPACE --- a/clients/R/MonetDB.R/NAMESPACE +++ b/clients/R/MonetDB.R/NAMESPACE @@ -22,7 +22,9 @@ export(src_translate_env.src_monetdb) export(src_desc.src_monetdb) export(tbl.src_monetdb) export(db_query_fields.MonetDBConnection) +export(db_query_fields.MonetDBEmbeddedConnection) export(db_query_rows.MonetDBConnection) +export(db_query_rows.MonetDBEmbeddedConnection) export(db_save_query.MonetDBConnection) export(db_insert_into.MonetDBConnection) export(db_create_index.MonetDBConnection) diff --git a/clients/R/MonetDB.R/R/dbi.R b/clients/R/MonetDB.R/R/dbi.R --- a/clients/R/MonetDB.R/R/dbi.R +++ b/clients/R/MonetDB.R/R/dbi.R @@ -299,7 +299,6 @@ setMethod("dbSendQuery", signature(conn= }) - # This one does all the work in this class setMethod("dbSendQuery", signature(conn="MonetDBEmbeddedConnection", statement="character"), def=function(conn, statement, ..., list=NULL) { @@ -311,11 +310,7 @@ setMethod("dbSendQuery", signature(conn= if (!is.null(list)) statement <- .bindParameters(statement, list) } env <- NULL - if (getOption("monetdb.debug.query", F)) message("QQ: '", statement, "'") - # make the progress bar wait for querylog.define - # if (getOption("monetdb.profile", T)) .profiler_arm() - - # the actual request + if (getOption("monetdb.debug.query", F)) message("QQ: '", statement, "'") resp <- monetdb_embedded_query(statement) @@ -403,7 +398,6 @@ setMethod("dbWriteTable", "MonetDBConnec if (overwrite && append) { stop("Setting both overwrite and append to TRUE makes no sense.") } - qname <- quoteIfNeeded(conn, name) if (dbExistsTable(conn, qname)) { if (overwrite) dbRemoveTable(conn, qname) @@ -422,8 +416,15 @@ setMethod("dbWriteTable", "MonetDBConnec if (csvdump) { warning("Ignoring csvdump setting in embedded mode") } + # convert Date cols to characters + # TODO: use type mapping to select correct converters + classes <- unlist(lapply(value, class)) + datecols <- names(classes[classes=="Date"]) + for (c in datecols) { + value[[c]] <- as.character(value[[c]]) + } insres <- monetdb_embedded_append(qname, value) - if (!is.logical(insres) && insres) { + if (!is.logical(insres)) { stop("Failed to insert data: ", insres) } } diff --git a/clients/R/MonetDB.R/R/dplyr.R b/clients/R/MonetDB.R/R/dplyr.R --- a/clients/R/MonetDB.R/R/dplyr.R +++ b/clients/R/MonetDB.R/R/dplyr.R @@ -53,10 +53,26 @@ db_query_fields.MonetDBConnection <- fun dbGetQuery(con, dplyr::build_sql("PREPARE SELECT * FROM ", sql))$column } +db_query_fields.MonetDBEmbeddedConnection <- function(con, sql, ...) { + # PREPARE does not work in embedded mode + names(dbGetQuery(con, dplyr::build_sql("SELECT * FROM ", sql, " WHERE 1=0"))) +} + db_query_rows.MonetDBConnection <- function(con, sql, ...) { monetdb_queryinfo(con,sql)$rows } +db_query_rows.MonetDBEmbeddedConnection <- function(con, sql, ...) { + # TODO: is there a better way of doing this? + if (!grepl("^\\w*SELECT.*", as.character(sql), perl=T, ignore.case=T)) { + sql <- dplyr::build_sql("SELECT * FROM ", sql) + } + dbGetQuery(con, dplyr::build_sql("CREATE TEMPORARY TABLE dqrv AS ", sql, " WITH DATA")) + ct <- dbGetQuery(con, dplyr::build_sql("SELECT COUNT(*) AS ct FROM dqrv"))$ct[[1]] + dbGetQuery(con, dplyr::build_sql("DROP TABLE dqrv")) + return(ct) +} + db_insert_into.MonetDBConnection <- function(con, table, values, ...) { dbWriteTable(con,dbQuoteIdentifier(con,table),values, append=T,transaction=F,csvdump=T) diff --git a/monetdb5/extras/rapi/converters.c b/monetdb5/extras/rapi/converters.c --- a/monetdb5/extras/rapi/converters.c +++ b/monetdb5/extras/rapi/converters.c @@ -117,7 +117,7 @@ static SEXP bat_to_sexp(BAT* b) { } static BAT* sexp_to_bat(SEXP s, int type) { - BAT* b; + BAT* b = NULL; BUN cnt = LENGTH(s); switch (type) { case TYPE_int: { @@ -143,7 +143,8 @@ static BAT* sexp_to_bat(SEXP s, int type break; } #endif - case TYPE_bte: { // only R logical types fit into bte BATs + case TYPE_bte: + case TYPE_bit: { // only R logical types fit into bit BATs if (!IS_LOGICAL(s)) { return NULL; } @@ -200,6 +201,7 @@ static BAT* sexp_to_bat(SEXP s, int type if (b != NULL) { BATsetcount(b, cnt); + BBPkeepref(b->batCacheid); } return b; } diff --git a/tools/embedded/embedded.c b/tools/embedded/embedded.c --- a/tools/embedded/embedded.c +++ b/tools/embedded/embedded.c @@ -171,7 +171,7 @@ char* monetdb_append(const char* schema, assert(table != NULL && data != NULL && col_ct > 0); // very black MAL magic below - mb.var = GDKmalloc(6 * sizeof(VarRecord*)); + mb.var = GDKmalloc(nvar * sizeof(VarRecord*)); stk = GDKmalloc(sizeof(MalStack) + nvar * sizeof(ValRecord)); pci = GDKmalloc(sizeof(InstrRecord) + nvar * sizeof(int)); assert(mb.var != NULL && stk != NULL && pci != NULL); // cough, cough @@ -326,13 +326,14 @@ SEXP monetdb_append_R(SEXP schemasexp, S } ad = GDKmalloc(col_ct * sizeof(append_data)); + assert(ad != NULL); for (i = 0; i < col_ct; i++) { SEXP ret_col = VECTOR_ELT(tabledatasexp, i); int bat_type = t_column_types[i]; b = sexp_to_bat(ret_col, bat_type); if (b == NULL) { - msg = GDKstrdup("eek"); // TODO: better error message here + msg = createException(MAL, "embedded", "Could not convert column %i %s to type %i ", i, t_column_names[i], bat_type); goto wrapup; } ad[i].colname = t_column_names[i]; @@ -340,6 +341,7 @@ SEXP monetdb_append_R(SEXP schemasexp, S } msg = monetdb_append(schema, name, ad, col_ct); + wrapup: if (t_column_names != NULL) { GDKfree(t_column_names); diff --git a/tools/embedded/rpackage/R/monetdb.R b/tools/embedded/rpackage/R/monetdb.R --- a/tools/embedded/rpackage/R/monetdb.R +++ b/tools/embedded/rpackage/R/monetdb.R @@ -50,6 +50,8 @@ monetdb_embedded_query <- function(query monetdb_embedded_append <- function(table, tdata, schema="sys") { table <- as.character(table) + table <- gsub("(^\"|\"$)", "", table) + if (length(table) != 1) { stop("Need a single table name as parameter.") } diff --git a/tools/embedded/rpackage/tests/testthat/test_dbi.R b/tools/embedded/rpackage/tests/testthat/test_dbi.R --- a/tools/embedded/rpackage/tests/testthat/test_dbi.R +++ b/tools/embedded/rpackage/tests/testthat/test_dbi.R @@ -171,13 +171,12 @@ test_that("evil table from survey works" dbRollback(con) }) - -# some DBI test cases borrowed from RSQLite +# below some DBI test cases 'borrowed' from RSQLite basicDf <- data.frame( - name = c("Alice", "Bob", "Carl", "NA", NA), - fldInt = as.integer(c(as.integer(1:4), NA)), - fldDbl = as.double(c(1.1, 2.2, 3.3, 4.4, NA)), - stringsAsFactors = FALSE + name = c("Alice", "Bob", "Carl", "NA", NA), + fldInt = as.integer(c(as.integer(1:4), NA)), + fldDbl = as.double(c(1.1, 2.2, 3.3, 4.4, NA)), + stringsAsFactors = FALSE ) test_that("round-trip leaves data.frame unchanged", { @@ -197,17 +196,14 @@ test_that("NAs work in first row", { test_that("row-by-row fetch is equivalent", { dbWriteTable(con, "t1", basicDf, row.names = FALSE) - rs <- dbSendQuery(con, "SELECT * FROM t1") on.exit(dbClearResult(rs)) for (i in 1:5) { row <- dbFetch(rs, 1L) expect_equal(row, basicDf[i, ], check.attributes = FALSE) } - row <- dbFetch(rs, 1L) expect_equal(nrow(row), 0L) - expect_true(dbHasCompleted(rs)) dbRemoveTable(con, "t1") }) @@ -221,16 +217,16 @@ test_that("row-by-row fetch is equivalen # }) test_that("correct number of columns, even if 0 rows", { - ans <- dbGetQuery(con, "select 1 as a, 2 as b where 1=1") - expect_equal(dim(ans), c(1L, 2L)) - ans <- dbGetQuery(con, "select 1 as a, 2 as b where 0=1") - expect_equal(dim(ans), c(0L, 2L)) + ans <- dbGetQuery(con, "select 1 as a, 2 as b where 1=1") + expect_equal(dim(ans), c(1L, 2L)) + ans <- dbGetQuery(con, "select 1 as a, 2 as b where 0=1") + expect_equal(dim(ans), c(0L, 2L)) }) test_that("accessing cleared result throws error", { - res <- dbSendQuery(con, "SELECT 1;") - dbClearResult(res) - expect_error(dbFetch(res)) + res <- dbSendQuery(con, "SELECT 1;") + dbClearResult(res) + expect_error(dbFetch(res)) }) test_that("fetch with no arguments gets all rows", { @@ -251,7 +247,6 @@ test_that("fetch progressively pulls in dbRemoveTable(con, "test") }) - test_that("dis/re-connect", { expect_true(dbIsValid(con)) dbDisconnect(con) diff --git a/tools/embedded/rpackage/tests/testthat/test_dplyr.R b/tools/embedded/rpackage/tests/testthat/test_dplyr.R new file mode 100644 --- /dev/null +++ b/tools/embedded/rpackage/tests/testthat/test_dplyr.R @@ -0,0 +1,118 @@ +library(testthat) +library(MonetDB.R) +library(dplyr) + +library(Lahman) +data(Batting) + +test_that("source creation / import", { + dps <<- src_monetdb(dbname="", embedded=tempdir()) + copy_lahman(dps) +}) + +test_that("basics", { + batting <<- tbl(dps, "Batting") + expect_equal(dim(batting), dim(Batting)) + expect_equal(colnames(batting), names(Batting)) + expect_equal(nrow(batting), nrow(Batting)) + expect_equal(dim(collect(batting)), dim(Batting)) + expect_equal(dim(filter(batting, yearID > 2005, G > 130)), c(1126, 24)) + expect_equal(dim(select(batting, playerID:lgID)), c(97889, 5)) + + print(dim(arrange(batting, playerID, desc(yearID)))) + print(dim(summarise(batting, G = mean(G), n = n()))) + print(dim(mutate(batting, rbi2 = if(!is.null(AB) & AB > 0) 1.0 * R / AB else 0))) +}) + + +stop() + + +# co* verbs + +# cc <- compute(batting) +# head(cc) + + +# note that all operations are lazy: they don't do anything until you +# request the data, either by `print()`ing it (which shows the first ten +# rows), by looking at the `head()`, or `collect()` the results locally. +nrow(head(collect(filter(batting, yearID > 2010)), n=15L)) + +# Group by operations ------------------------------------------------------- +# To perform operations by group, create a grouped object with group_by +players <- group_by(batting, playerID) +length(group_size(players)) > 1 +nrow(head(summarise(players, mean_g = mean(G), best_ab = max(AB)), n=16L)) + +# When you group by multiple level, each summarise peels off one level +per_year <- group_by(batting, playerID, yearID) +stints <- summarise(per_year, stints = max(stint)) +nrow(head(filter(stints, stints > 3), n=17L)) +nrow(head(summarise(stints, max(stints)), n=18L)) + _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list