Changeset: 0df14e109ce1 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0df14e109ce1 Modified Files: monetdb5/extras/rapi/converters.c Branch: embedded Log Message:
new low uniqueness string column conversion diffs (95 lines): diff --git a/monetdb5/extras/rapi/converters.c b/monetdb5/extras/rapi/converters.c --- a/monetdb5/extras/rapi/converters.c +++ b/monetdb5/extras/rapi/converters.c @@ -111,63 +111,33 @@ static SEXP bat_to_sexp(BAT* b) { BAT_TO_REALSXP(b, lng, varvalue, 0); break; case TYPE_str: { // there is only one string type, thus no macro here - // this was found to be always slower. - // TODO: find a way to keep BATgroup from memory-mapping the group BAT - /*if (GDK_ELIMDOUBLES(b->T->vheap) && BATcount(b) > 0) { - BAT *grp, *ext; - BUN p, q; - BATiter b_it, ext_it, grp_it; - SEXP *sptrs;//#include <time.h> - // we group on the passed string column to get unique strings, then only convert each string once - - gdk_return r = BATgroup(&grp, &ext, NULL, b, NULL, NULL, NULL); - if (r != GDK_SUCCEED || grp == NULL || ext == NULL) { + BUN p, q, j = 0; + BATiter li = bat_iterator(b); + varvalue = PROTECT(NEW_STRING(BATcount(b))); + if (varvalue == NULL) { + return NULL; + } + /* special case where we exploit the duplicate-eliminated string heap */ + if (GDK_ELIMDOUBLES(b->T->vheap)) { + void* sexp_ptrs = GDKzalloc(b->T->vheap->free * sizeof(void*)); + if (!sexp_ptrs) { return NULL; } - - sptrs = GDKmalloc(sizeof(SEXP) * BATcount(ext)); - varvalue = PROTECT(NEW_STRING(BATcount(b))); - - if (sptrs == NULL || varvalue == NULL) { - BBPunfix(grp->batCacheid); - BBPunfix(ext->batCacheid); - return NULL; - } - - b_it = bat_iterator(b); - ext_it = bat_iterator(ext); - grp_it = bat_iterator(grp); - - if (b->T->nonil) { - BATloop(ext, p, q) { - sptrs[p] = mkCharCE((const char *) BUNtail(b_it, *((oid*) BUNtail(ext_it, p))), CE_UTF8);; - } - } else { - BATloop(ext, p, q) { - const char* t = (const char *) BUNtail(b_it, *((oid*) BUNtail(ext_it, p))); + BATloop(b, p, q) { + const char *t = (const char *) BUNtail(li, p); + void** s = &sexp_ptrs[t - b->T->vheap->base]; + if (!*s) { if (strcmp(t, str_nil) == 0) { - sptrs[p] = NA_STRING; + *s = (void*) NA_STRING; } else { - sptrs[p] = mkCharCE(t, CE_UTF8); + *s = (void*) mkCharCE(t, CE_UTF8); } } + STRING_ELT(varvalue, j++) = (SEXP) *s; } - - BATloop(grp, p, q) { - STRING_ELT(varvalue, p) = sptrs[*((oid*) BUNtail(grp_it, p))]; - } - - GDKfree(sptrs); - BBPunfix(grp->batCacheid); - BBPunfix(ext->batCacheid); + GDKfree(sexp_ptrs); } - else {*/ - BUN p, q, j = 0; - BATiter li = bat_iterator(b); - varvalue = PROTECT(NEW_STRING(BATcount(b))); - if (varvalue == NULL) { - return NULL; - } + else { if (b->T->nonil) { BATloop(b, p, q) { STRING_ELT(varvalue, j++) = mkCharCE( @@ -184,7 +154,7 @@ static SEXP bat_to_sexp(BAT* b) { } } } - //} + } } break; } return varvalue; _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list