Changeset: 548ec8e2e127 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=548ec8e2e127 Modified Files: monetdb5/extras/rdf/rdfontologyload.c monetdb5/extras/rdf/rdfontologyload.h monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h monetdb5/extras/rdf/rdfschema.mal Branch: rdf Log Message:
Use the ontology labels if available for the table name TODO: Use the ontology property labels if available for the column names in the final schema diffs (truncated from 376 to 300 lines): diff --git a/monetdb5/extras/rdf/rdfontologyload.c b/monetdb5/extras/rdf/rdfontologyload.c --- a/monetdb5/extras/rdf/rdfontologyload.c +++ b/monetdb5/extras/rdf/rdfontologyload.c @@ -261,7 +261,7 @@ int compareProp (const void * a, const v } static -str buildOntologyClassesInfo(oid **ontmetadat, int ontmetadataCount, oid **ontattributes, int ontattributesCount){ +str buildOntologyClassesInfo(oid **ontmetadat, int ontmetadataCount, oid **ontattributes, int ontattributesCount, str *tmpOntLabels){ int i; oid classOid; //The class Oid comes from @@ -281,7 +281,7 @@ str buildOntologyClassesInfo(oid **ontme int tmpNumProp = 0; oid* buffProps = NULL; int maxNumPropPerOntology = 1000; - + str *tmpLabelsShortlist = NULL; //Read all ontmetadata and store them in the ontmetaBat @@ -291,6 +291,11 @@ str buildOntologyClassesInfo(oid **ontme if (!(ontmetaBat->T->hash)){ throw(RDF, "buildOntologyClassesInfo", "Cannot allocate the hash for Bat"); } + + tmpLabelsShortlist = (str *) malloc(sizeof(str) * ontmetadataCount); + for (i = 0; i < ontmetadataCount; i++) tmpLabelsShortlist[i] = NULL; + + classIdx = 0; for (i = 0; i < ontmetadataCount; i++){ classOid = ontmetadat[0][i]; assert(classOid != BUN_NONE); @@ -299,8 +304,20 @@ str buildOntologyClassesInfo(oid **ontme if (tmpBun == BUN_NONE){ //If it is a new class if (BUNappend(ontmetaBat,&classOid, TRUE) == NULL) throw(RDF, "buildOntologyClassesInfo", "Cannot insert to ontmetaBat"); + + if (tmpOntLabels[i] != NULL){ + tmpLabelsShortlist[classIdx] = GDKstrdup(tmpOntLabels[i]); + } + else + tmpLabelsShortlist[classIdx] = NULL; + + classIdx++; } + + } + //Also add super class to list of ontology classes + for (i = 0; i < ontmetadataCount; i++){ scOid = ontmetadat[1][i]; if (scOid != BUN_NONE){ //The superClass oid is there @@ -308,6 +325,9 @@ str buildOntologyClassesInfo(oid **ontme if (tmpBun == BUN_NONE){ //If it is a new class if (BUNappend(ontmetaBat, &scOid, TRUE) == NULL) throw(RDF, "buildOntologyClassesInfo", "Cannot insert to ontmetaBat"); + + tmpLabelsShortlist[classIdx] = NULL; + classIdx++; } } } @@ -332,6 +352,11 @@ str buildOntologyClassesInfo(oid **ontme tmpontclassSet[i].cOid = *tmpOid; + if (tmpLabelsShortlist[i] != NULL) + tmpontclassSet[i].label = GDKstrdup(tmpLabelsShortlist[i]); + else + tmpontclassSet[i].label = NULL; + //Init other info tmpontclassSet[i].scIdxes = (int *) malloc(sizeof(int) * NUMSC_PER_ONTCLASS); tmpontclassSet[i].numsc = 0; @@ -343,6 +368,13 @@ str buildOntologyClassesInfo(oid **ontme i++; } + //Free + for (i = 0; i < numClass; i++){ + if (tmpLabelsShortlist[i] != NULL) + GDKfree(tmpLabelsShortlist[i]); + } + free(tmpLabelsShortlist); + //Add sc for (i = 0; i < ontmetadataCount; i++){ @@ -425,14 +457,15 @@ str buildOntologyClassesInfo(oid **ontme } str -RDFloadsqlontologies(int *ret, bat *auriid, bat *aattrid, bat *muriid, bat *msuperid){ +RDFloadsqlontologies(int *ret, bat *auriid, bat *aattrid, bat *muriid, bat *msuperid, bat *mlabelid){ BUN p, q; - BAT *auri = NULL, *aattr = NULL, *muri = NULL, *msuper = NULL; - BATiter aurii, aattri, murii, msuperi; - BUN bun, bun2, bun3, bun4; + BAT *auri = NULL, *aattr = NULL, *muri = NULL, *msuper = NULL, *mlabel = NULL; + BATiter aurii, aattri, murii, msuperi, mlabeli; + BUN bun, bun2, bun3, bun4, bun5; BUN auriCount, muriCount; int i; str schema = "rdf"; + str *tmpOntLabels = NULL; //Set of ontology labels TKNZRopen (NULL, &schema); @@ -462,10 +495,19 @@ RDFloadsqlontologies(int *ret, bat *auri throw(MAL, "rdf.RDFloadsqlontologies", RUNTIME_OBJECT_MISSING); } + if ((mlabel = BATdescriptor(*mlabelid)) == NULL) { + BBPreleaseref(auri->batCacheid); + BBPreleaseref(aattr->batCacheid); + BBPreleaseref(muri->batCacheid); + BBPreleaseref(msuper->batCacheid); + throw(MAL, "rdf.RDFloadsqlontologies", RUNTIME_OBJECT_MISSING); + } + aurii = bat_iterator(auri); aattri = bat_iterator(aattr); murii = bat_iterator(muri); msuperi = bat_iterator(msuper); + mlabeli = bat_iterator(mlabel); // load ontattributes i = 0; @@ -517,18 +559,23 @@ RDFloadsqlontologies(int *ret, bat *auri i = 0; bun3 = BUNfirst(muri); bun4 = BUNfirst(msuper); - + bun5 = BUNfirst(mlabel); + muriCount = BATcount(muri); ontmetadata = (oid**) malloc(sizeof(oid *) * 2); if (!ontmetadata) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); - ontmetadata[0] = malloc(sizeof(str) * muriCount); // uri - ontmetadata[1] = malloc(sizeof(str) * muriCount); // superclass + ontmetadata[0] = malloc(sizeof(oid) * muriCount); // uri + ontmetadata[1] = malloc(sizeof(oid) * muriCount); // superclass if (!ontmetadata[0] || !ontmetadata[1]) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + tmpOntLabels = (str*)malloc(sizeof(str) * muriCount); //labels of ontology classes + if (!tmpOntLabels) fprintf(stderr, "ERROR: Couldn't malloc memory!\n"); + BATloop(muri, p, q){ str muristr = (str) BUNtail(murii, bun3 + i); str msuperstr = (str) BUNtail(msuperi, bun4 + i); + str mlabelstr = (str) BUNtail(mlabeli, bun5 + i); oid murioid, msuperoid; @@ -554,6 +601,14 @@ RDFloadsqlontologies(int *ret, bat *auri } else { ontmetadata[1][ontmetadataCount] = msuperoid; } + + if (strcmp(mlabelstr, "\x80") == 0) { + tmpOntLabels[ontmetadataCount] = NULL; + } else { + tmpOntLabels[ontmetadataCount] = GDKstrdup(mlabelstr); + } + + ontmetadataCount += 1; ++i; @@ -562,8 +617,14 @@ RDFloadsqlontologies(int *ret, bat *auri GDKfree(msuperstr2); } - buildOntologyClassesInfo(ontmetadata, ontmetadataCount, ontattributes, ontattributesCount); + buildOntologyClassesInfo(ontmetadata, ontmetadataCount, ontattributes, ontattributesCount, tmpOntLabels); + for (i = 0; i < ontmetadataCount; i++){ + if (tmpOntLabels[i] != NULL) + GDKfree(tmpOntLabels[i]); + } + free(tmpOntLabels); + BBPreclaim(auri); BBPreclaim(aattr); BBPreclaim(muri); diff --git a/monetdb5/extras/rdf/rdfontologyload.h b/monetdb5/extras/rdf/rdfontologyload.h --- a/monetdb5/extras/rdf/rdfontologyload.h +++ b/monetdb5/extras/rdf/rdfontologyload.h @@ -24,6 +24,7 @@ typedef struct OntClass { oid cOid; /*class Oid*/ + char* label; /*ontology label (if available) */ int* scIdxes; /*Idx of super classes*/ int numsc; /*Number of super classes*/ int numAllocation; @@ -36,7 +37,7 @@ rdf_export str RDFOntologyParser(int *ret, str *location, str *schema); rdf_export str -RDFloadsqlontologies(int *ret, bat *auri, bat *aattr, bat *muri, bat *msuper); +RDFloadsqlontologies(int *ret, bat *auri, bat *aattr, bat *muri, bat *msuper, bat *mlabel); /* rdf_export str diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -158,6 +158,15 @@ str printTKNZStringFromOid(oid id){ return MAL_SUCCEED; } + +static +char isOntologyName(oid valueOid, BUN *ontClassPos){ + *ontClassPos = BUN_NONE; + *ontClassPos = BUNfnd(BATmirror(ontmetaBat), &valueOid); + if (*ontClassPos == BUN_NONE) return 0; + else return 1; +} + //Get the string for static char getStringName(oid objOid, str *objStr, BATiter mapi, BAT *mapbat, char isTblName){ @@ -166,38 +175,57 @@ char getStringName(oid objOid, str *objS oid realObjOid; BUN bun; int i = 0; - - if (objType == URI || objType == BLANKNODE){ - realObjOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 4)); - takeOid(realObjOid, objStr); - } - else{ - str tmpObjStr; - str s; - int len; - realObjOid = objOid - (objType*2 + 1) * RDF_MIN_LITERAL; /* Get the real objOid from Map or Tokenizer */ - bun = BUNfirst(mapbat); - tmpObjStr = (str) BUNtail(mapi, bun + realObjOid); - - *objStr = GDKstrdup(tmpObjStr); + char hasOntologyLabel = 0; + + #if USE_ONTLABEL_FOR_NAME + if (isTblName){ + char isOntName = 0; + BUN tmpontClassPos = BUN_NONE; + + isOntName = isOntologyName(objOid, &tmpontClassPos); + + if (isOntName == 1){ + //Check if label is availabel + if (ontclassSet[tmpontClassPos].label != NULL){ //Use this label + *objStr = GDKstrdup(ontclassSet[tmpontClassPos].label); + hasOntologyLabel = 1; + } + } + } + #endif + + if (hasOntologyLabel == 0){ + if (objType == URI || objType == BLANKNODE){ + realObjOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 4)); + takeOid(realObjOid, objStr); + } + else{ + str tmpObjStr; + str s; + int len; + realObjOid = objOid - (objType*2 + 1) * RDF_MIN_LITERAL; /* Get the real objOid from Map or Tokenizer */ + bun = BUNfirst(mapbat); + tmpObjStr = (str) BUNtail(mapi, bun + realObjOid); - if (isTblName){ - s = *objStr; - len = strlen(s); - //Replace all non-alphabet character by ___ - for (i = 0; i < len; i++) - { - //printf("i = %d: %c \n",i, s[i]); - if (!isalpha(*s)){ - *s = '_'; - } - s++; - - } - } - - } - + *objStr = GDKstrdup(tmpObjStr); + + if (isTblName){ + s = *objStr; + len = strlen(s); + //Replace all non-alphabet character by ___ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list