Changeset: 949a39673e5c for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=949a39673e5c Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdftypes.h sql/backends/monet5/sql.h sql/backends/monet5/sql_rdf.c sql/backends/monet5/sql_rdf.mal Branch: rdf Log Message:
Bulk operator for rdf_idtostr diffs (truncated from 398 to 300 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -11402,8 +11402,11 @@ RDFreorganize(int *ret, CStableStat *cst //if (1) printListOntology(); readParamsInput(); - - + + printf("Min positive integer-encoded oid is: "BUNFMT"\n", MIN_POSI_INT_OID); + printf("Max positive integer-encoded oid is: "BUNFMT"\n", MAX_POSI_INT_OID); + printf("Min negative integer-encoded oid is: "BUNFMT"\n", MIN_NEGA_INT_OID); + printf("Max negative integer-encoded oid is: "BUNFMT"\n", MAX_NEGA_INT_OID); if (RDFextractCSwithTypes(ret, sbatid, pbatid, obatid, mapbatid, ontbatid, freqThreshold, freqCSset,&subjCSMap, &maxCSoid, &maxNumPwithDup, &labels, &csRelMergeFreqSet) != MAL_SUCCEED){ throw(RDF, "rdf.RDFreorganize", "Problem in extracting CSs"); @@ -11710,9 +11713,11 @@ RDFreorganize(int *ret, CStableStat *cst cstablestat->resbat = BATcopy(sNewBat, sNewBat->htype, sNewBat->ttype, TRUE, TRANSIENT); cstablestat->repbat = BATcopy(pNewBat, pNewBat->htype, pNewBat->ttype, TRUE, TRANSIENT); cstablestat->reobat = BATcopy(oNewBat, oNewBat->htype, oNewBat->ttype, TRUE, TRANSIENT); - if (RDFtriplesubsort(&cstablestat->resbat, &cstablestat->repbat, &cstablestat->reobat) != MAL_SUCCEED){ - throw(RDF, "rdf.RDFreorganize", "Problem in sorting reorganized SPO"); - } + if (RDFtriplesubsort(&cstablestat->repbat, &cstablestat->resbat, &cstablestat->reobat) != MAL_SUCCEED){ + throw(RDF, "rdf.RDFreorganize", "Problem in sorting reorganized PSO"); + } + //Set the property for the BAT + cstablestat->repbat->tsorted = 1; printf("Done\n"); #endif diff --git a/monetdb5/extras/rdf/rdftypes.h b/monetdb5/extras/rdf/rdftypes.h --- a/monetdb5/extras/rdf/rdftypes.h +++ b/monetdb5/extras/rdf/rdftypes.h @@ -36,7 +36,6 @@ #define rdf_export extern #endif - typedef enum { URI, DATETIME, @@ -47,6 +46,13 @@ typedef enum { MULTIVALUES // For the multi-value property } ObjectType; +#define MIN_POSI_INT_OID ((BUN)INTEGER << (sizeof(BUN)*8 - 4)) +#define MAX_POSI_INT_OID (((BUN)((INTEGER << 1) + 1) << (sizeof(BUN)*8 - 5)) - 1) + +#define MIN_NEGA_INT_OID ((BUN)((INTEGER << 1) + 1) << (sizeof(BUN)*8 - 5)) +#define MAX_NEGA_INT_OID (((BUN)DOUBLE << (sizeof(BUN)*8 - 4)) - 1) + + rdf_export char* substring(char *string, int position, int length); diff --git a/sql/backends/monet5/sql.h b/sql/backends/monet5/sql.h --- a/sql/backends/monet5/sql.h +++ b/sql/backends/monet5/sql.h @@ -148,8 +148,11 @@ sql5_export str SQLrdfShred(Client cntxt sql5_export str SQLrdfreorganize(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); sql5_export str SQLrdfRetrieveSubschema(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); sql5_export str SQLrdfScan(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); -sql5_export str SQLrdfidtostr(str *ret, oid *id); +//sql5_export str SQLrdfidtostr(str *ret, oid *id); +sql5_export str SQLrdfidtostr(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); +sql5_export str SQLrdfidtostr_bat(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); sql5_export str SQLrdfstrtoid(oid *ret, str *s); +//sql5_export str SQLrdfstrtoid(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); sql5_export str SQLoptimizersUpdate(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci); sql5_export str month_interval_str(int *ret, const str *s, const int *ek, const int *sk); sql5_export str second_interval_str(lng *res, const str *s, const int *ek, const int *sk); diff --git a/sql/backends/monet5/sql_rdf.c b/sql/backends/monet5/sql_rdf.c --- a/sql/backends/monet5/sql_rdf.c +++ b/sql/backends/monet5/sql_rdf.c @@ -874,17 +874,18 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr respotbl = mvc_create_table(m, sch, "triples", tt_table, 0, SQL_PERSIST, 0, 3); totalNoTablesCreated++; + mvc_create_column(m, respotbl, "p", &tpe); mvc_create_column(m, respotbl, "s", &tpe); - mvc_create_column(m, respotbl, "p", &tpe); mvc_create_column(m, respotbl, "o", &tpe); + + store_funcs.append_col(m->session->tr, + mvc_bind_column(m, respotbl,"p" ), + cstablestat->repbat, TYPE_bat); store_funcs.append_col(m->session->tr, mvc_bind_column(m, respotbl,"s" ), cstablestat->resbat, TYPE_bat); store_funcs.append_col(m->session->tr, - mvc_bind_column(m, respotbl,"p" ), - cstablestat->repbat, TYPE_bat); - store_funcs.append_col(m->session->tr, mvc_bind_column(m, respotbl,"o" ), cstablestat->reobat, TYPE_bat); printf("Done\n"); @@ -1140,58 +1141,183 @@ SQLrdfreorganize(Client cntxt, MalBlkPtr #endif /* HAVE_RAPTOR */ } -#if 0 +#if 1 str SQLrdfidtostr(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ str msg; mvc *m = NULL; - BAT *lmapBat = NULL, *rmapBat = NULL; + BAT *lmapBat = NULL, *rmapBat = NULL, *mBat = NULL; bat lmapBatId, rmapBatId; str bnamelBat = "map_to_tknz_left"; str bnamerBat = "map_to_tknz_right"; + char *schema = "rdf"; + sql_schema *sch; BUN pos; oid *origId; + ObjectType objType; oid *id = (oid *)getArgReference(stk,pci,1); - str *s; + str *ret = (str *) getArgReference(stk, pci, 0); rethrow("sql.rdfidtostr", msg, getSQLContext(cntxt, mb, &m, NULL)); + objType = getObjType(*id); + + if (objType == STRING){ + str tmpObjStr; + BATiter mapi; + if ((sch = mvc_bind_schema(m, schema)) == NULL) + throw(SQL, "sql.rdfShred", "3F000!schema missing"); + + mBat = mvc_bind(m, schema, "map0", "lexical",0); + mapi = bat_iterator(mBat); + + pos = (*id) - (objType*2 + 1) * RDF_MIN_LITERAL; /* Get the position of the string in the map bat */ + tmpObjStr = (str) BUNtail(mapi, BUNfirst(mBat) + pos); + + *ret = GDKstrdup(tmpObjStr); + + } + else if (objType == URI || objType == BLANKNODE){ + lmapBatId = BBPindex(bnamelBat); + rmapBatId = BBPindex(bnamerBat); + + if (lmapBatId == 0 || rmapBatId == 0){ + throw(SQL, "sql.SQLrdfidtostr", "The lmap/rmap Bats should be built already"); + } + + if ((lmapBat= BATdescriptor(lmapBatId)) == NULL) { + throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); + } + + if ((rmapBat= BATdescriptor(rmapBatId)) == NULL) { + throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); + } + + pos = BUNfnd(BATmirror(lmapBat),id); + if (pos == BUN_NONE) //this id is not converted to a new id + origId = id; + else + origId = (oid *) Tloc(rmapBat, pos); + + /*First convert the id to the original tokenizer odi */ + rethrow("sql.rdfidtostr", msg, takeOid(*origId, ret)); + } else { + throw(SQL, "sql.SQLrdfidtostr", "This Id cannot convert to str"); + } + + if (msg != MAL_SUCCEED){ + throw(SQL, "sql.SQLrdfidtostr", "Problem in retrieving str from oid"); + } + + return msg; +} + + +str +SQLrdfidtostr_bat(Client cntxt, MalBlkPtr mb, MalStkPtr stk, InstrPtr pci){ + str msg; + mvc *m = NULL; + BAT *lmapBat = NULL, *rmapBat = NULL, *mBat = NULL; + bat lmapBatId, rmapBatId; + str bnamelBat = "map_to_tknz_left"; + str bnamerBat = "map_to_tknz_right"; + char *schema = "rdf"; + sql_schema *sch; + BUN pos; + oid *origId; + ObjectType objType; + str tmpObjStr; + BATiter mapi; + BAT *srcBat = NULL, *desBat = NULL; + BATiter srci; + BUN p, q; + bat *srcbid, *desbid; + oid *id; + str s; + srcbid = (bat *)getArgReference(stk,pci,1); + desbid = (bat *) getArgReference(stk, pci, 0); + + rethrow("sql.rdfidtostr", msg, getSQLContext(cntxt, mb, &m, NULL)); + + if ((srcBat = BATdescriptor(*srcbid)) == NULL){ + throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); + } + srci = bat_iterator(srcBat); + + desBat = BATnew(TYPE_void, TYPE_str, BATcount(srcBat) + 1, TRANSIENT); + BATseqbase(desBat, 0); + + /* Init the BATs for looking up the URIs*/ lmapBatId = BBPindex(bnamelBat); rmapBatId = BBPindex(bnamerBat); if (lmapBatId == 0 || rmapBatId == 0){ - throw(SQL, "sql.SQLrdfidtostr", "The lmap/rmap Bats should be built already"); + throw(SQL, "sqlbat.SQLrdfidtostr_bat", "The lmap/rmap Bats should be built already"); } if ((lmapBat= BATdescriptor(lmapBatId)) == NULL) { - throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); + throw(MAL, "sqlbat.SQLrdfidtostr_bat", RUNTIME_OBJECT_MISSING); } if ((rmapBat= BATdescriptor(rmapBatId)) == NULL) { - throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); + throw(MAL, "sqlbat.SQLrdfidtostr_bat", RUNTIME_OBJECT_MISSING); } - pos = BUNfnd(BATmirror(lmapBat),id); - if (pos == BUN_NONE) //this id is not converted to a new id - origId = id; - else - origId = (oid *) Tloc(rmapBat, pos); - - VALset(getArgReference(stk, pci, 1), TYPE_oid, origId); + /* Init the map BAT for looking up the literal values*/ + if ((sch = mvc_bind_schema(m, schema)) == NULL) + throw(SQL, "sql.rdfShred", "3F000!schema missing"); - /*First convert the id to the original tokenizer odi */ - rethrow("sql.rdfidtostr", msg, TKNZRtakeOid(cntxt,mb,stk,pci)); - - s = (str *) getArgReference(stk, pci, 0); - - if (msg == MAL_SUCCEED){ - //throw(SQL, "sql.rdfidtostr", "String for "BUNFMT" is %s\n",*id, *s); - return sql_message("Literal value: %s\n", *s); + mBat = mvc_bind(m, schema, "map0", "lexical",0); + mapi = bat_iterator(mBat); + + + BATloop(srcBat, p, q){ + id = (oid *)BUNtloc(srci, p); + + objType = getObjType(*id); + + if (objType == STRING){ + + pos = (*id) - (objType*2 + 1) * RDF_MIN_LITERAL; /* Get the position of the string in the map bat */ + tmpObjStr = (str) BUNtail(mapi, BUNfirst(mBat) + pos); + + s = GDKstrdup(tmpObjStr); + } + else if (objType == URI || objType == BLANKNODE){ + + pos = BUNfnd(BATmirror(lmapBat),id); + if (pos == BUN_NONE) //this id is not converted to a new id + origId = id; + else + origId = (oid *) Tloc(rmapBat, pos); + + /*First convert the id to the original tokenizer odi */ + rethrow("sql.rdfidtostr", msg, takeOid(*origId, &s)); + } else { + throw(SQL, "sql.SQLrdfidtostr", "This Id cannot convert to str"); + } + + + if (msg != MAL_SUCCEED){ + throw(SQL, "sql.SQLrdfidtostr", "Problem in retrieving str from oid"); + } + + //Append to desBAT + desBat = BUNappend(desBat, s, TRUE); + } + *desbid = desBat->batCacheid; + BBPkeepref(*desbid); + + BBPunfix(lmapBat->batCacheid); + BBPunfix(rmapBat->batCacheid); + BBPunfix(mBat->batCacheid); + return msg; } -#endif _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list