Changeset: 054157433545 for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=054157433545 Modified Files: monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h sql/backends/monet5/sql_rdf.c Branch: rdf Log Message:
Create function for generating string from both URI-based oid and string-based oid. This function is used for table name since the name's oid can appears in tokenizer or mapbat diffs (truncated from 521 to 300 lines): diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -120,6 +120,13 @@ static void initcsIdFreqIdxMap(int* inpu } +char +getObjType(oid objOid){ + char objType = (char) (objOid >> (sizeof(BUN)*8 - 4)) & 7 ; + + return objType; + +} str printTKNZStringFromOid(oid id){ int ret; @@ -140,6 +147,50 @@ str printTKNZStringFromOid(oid id){ return MAL_SUCCEED; } +//Get the string for +static +char getStringName(oid objOid, str *objStr, BATiter mapi, BAT *mapbat, char isTblName){ + + char objType = getObjType(objOid); + oid realObjOid; + BUN bun; + int i = 0; + + if (objType == URI || objType == BLANKNODE){ + realObjOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 4)); + takeOid(realObjOid, objStr); + } + else{ + str tmpObjStr; + str s; + int len; + realObjOid = objOid - (objType*2 + 1) * RDF_MIN_LITERAL; /* Get the real objOid from Map or Tokenizer */ + bun = BUNfirst(mapbat); + tmpObjStr = (str) BUNtail(mapi, bun + realObjOid); + + *objStr = GDKstrdup(tmpObjStr); + + if (isTblName){ + s = *objStr; + len = strlen(s); + //Replace all non-alphabet character by XXX + for (i = 0; i < len; i++) + { + //printf("i = %d: %c \n",i, s[i]); + if (!isalpha(*s)){ + *s = 'X'; + } + s++; + + } + } + + } + + + return objType; +} + char isCSTable(CS item){ if (item.parentFreqIdx != -1) return 0; @@ -620,13 +671,7 @@ void printSubCSInformation(SubCSSet *sub #endif /*NO_OUTPUTFILE*/ #endif /* NEEDSUBCS */ -char -getObjType(oid objOid){ - char objType = (char) (objOid >> (sizeof(BUN)*8 - 4)) & 7 ; - - return objType; - -} + @@ -1881,8 +1926,6 @@ str printMergedFreqCSSet(CSset *freqCSse str subStr; str objStr; oid objOid; - char objType; - BUN bun; #endif int ret; char* schema = "rdf"; @@ -1929,12 +1972,12 @@ str printMergedFreqCSSet(CSset *freqCSse if (i < freqCSset->numOrigFreqCS){ if (cs.subject != BUN_NONE){ takeOid(cs.subject, &subStr); - if (labels[i].name == BUN_NONE) { fprintf(fout,"CS " BUNFMT " - FreqId %d - Name: %s (Freq: %d) | Subject: %s | FreqParentIdx %d \n", cs.csId, i, "DUMMY", freq, subStr, cs.parentFreqIdx); } else { str labelStr; - takeOid(labels[i].name, &labelStr); + //takeOid(labels[i].name, &labelStr); + getStringName(labels[i].name, &labelStr, mapi, mapbat, 1); fprintf(fout,"CS " BUNFMT " - FreqId %d - Name: %s (Freq: %d) | Subject: %s | FreqParentIdx %d \n", cs.csId, i, labelStr, freq, subStr, cs.parentFreqIdx); GDKfree(labelStr); } @@ -1946,7 +1989,8 @@ str printMergedFreqCSSet(CSset *freqCSse fprintf(fout,"CS " BUNFMT " - FreqId %d - Name: %s (Freq: %d) | FreqParentIdx %d \n", cs.csId, i, "DUMMY", freq, cs.parentFreqIdx); } else { str labelStr; - takeOid(labels[i].name, &labelStr); + //takeOid(labels[i].name, &labelStr); + getStringName(labels[i].name, &labelStr, mapi, mapbat, 1); fprintf(fout,"CS " BUNFMT " - FreqId %d - Name: %s (Freq: %d) | FreqParentIdx %d \n", cs.csId, i, labelStr, freq, cs.parentFreqIdx); GDKfree(labelStr); } @@ -1958,7 +2002,8 @@ str printMergedFreqCSSet(CSset *freqCSse fprintf(fout,"CS " BUNFMT " - FreqId %d - Name: %s (Freq: %d) | Subject: <Not available> | FreqParentIdx %d \n", cs.csId, i, "DUMMY", freq, cs.parentFreqIdx); } else { str labelStr; - takeOid(labels[i].name, &labelStr); + //takeOid(labels[i].name, &labelStr); + getStringName(labels[i].name, &labelStr, mapi, mapbat, 1); fprintf(fout,"CS " BUNFMT " - FreqId %d - Name: %s (Freq: %d) | Subject: <Not available> | FreqParentIdx %d \n", cs.csId, i, labelStr, freq, cs.parentFreqIdx); GDKfree(labelStr); } @@ -1972,7 +2017,8 @@ str printMergedFreqCSSet(CSset *freqCSse else{ str labelStr = NULL; str labelShortStr = NULL; - takeOid(labels[tmpParentFreqId].name, &labelStr); + //takeOid(labels[tmpParentFreqId].name, &labelStr); + getStringName(labels[tmpParentFreqId].name, &labelStr, mapi, mapbat, 1); getPropNameShort(&labelShortStr,labelStr); fprintf(fout, "[%s] ",labelShortStr); GDKfree(labelShortStr); @@ -1999,24 +2045,9 @@ str printMergedFreqCSSet(CSset *freqCSse } if (cs.lstObj != NULL){ objOid = cs.lstObj[j]; - - objType = getObjType(objOid); - - if (objType == URI || objType == BLANKNODE){ - objOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 4)); - takeOid(objOid, &objStr); - } - else{ - objOid = objOid - (objType*2 + 1) * RDF_MIN_LITERAL; /* Get the real objOid from Map or Tokenizer */ - bun = BUNfirst(mapbat); - objStr = (str) BUNtail(mapi, bun + objOid); - } - + getStringName(objOid, &objStr, mapi, mapbat, 0); fprintf(fout, " O: %s \n", objStr); - - if (objType == URI || objType == BLANKNODE){ - GDKfree(objStr); - } + GDKfree(objStr); } else{ fprintf(fout, " <No Object value> \n"); @@ -2140,7 +2171,7 @@ str printmergeCSSet(CSset *freqCSset, in #if NO_OUTPUTFILE == 0 static -str printsubsetFromCSset(CSset *freqCSset, BAT* subsetIdxBat, int num, int* mergeCSFreqCSMap, CSlabel *label, int sampleVersion){ +str printsubsetFromCSset(CSset *freqCSset, BAT* subsetIdxBat, BAT *mbat, int num, int* mergeCSFreqCSMap, CSlabel *label, int sampleVersion){ int i,j; FILE *fout; @@ -2155,8 +2186,9 @@ str printsubsetFromCSset(CSset *freqCSse CS cs; int tmpNumcand; str canStr; - - + BATiter mapi; + + mapi = bat_iterator(mbat); if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) { throw(RDF, "rdf.rdfschema", "could not open the tokenizer\n"); @@ -2184,7 +2216,9 @@ str printsubsetFromCSset(CSset *freqCSse #if USE_SHORT_NAMES str canStrShort = NULL; #endif - takeOid(label[freqIdx].candidates[j], &canStr); + //takeOid(label[freqIdx].candidates[j], &canStr); + getStringName(label[freqIdx].candidates[j], &canStr, mapi, mbat, 1); + #if USE_SHORT_NAMES getPropNameShort(&canStrShort, canStr); fprintf(fout," %s ::: ", canStrShort); @@ -3445,7 +3479,7 @@ void doMerge(CSset *freqCSset, int ruleN } static -str mergeMaxFreqCSByS1(CSset *freqCSset, CSlabel** labels, oid *mergecsId, oid** ontmetadata, int ontmetadataCount){ +str mergeMaxFreqCSByS1(CSset *freqCSset, CSlabel** labels, oid *mergecsId, oid** ontmetadata, int ontmetadataCount,bat *mapbatid){ int i; #if !USE_MULTIWAY_MERGING @@ -3467,6 +3501,8 @@ str mergeMaxFreqCSByS1(CSset *freqCSset, char* schema = "rdf"; int ret = 0; str tmpLabel; + BAT *mbat = NULL; + BATiter mapi; #if USE_SHORT_NAMES str canStrShort = NULL; @@ -3479,12 +3515,19 @@ str mergeMaxFreqCSByS1(CSset *freqCSset, (void) cs1; (void) cs2; #endif + (void) mapbatid; + labelStat = initLabelStat(); buildLabelStat(labelStat, (*labels), freqCSset, TOPK); printf("Num FreqCSadded before using S1 = %d \n", freqCSset->numCSadded); #if OUTPUT_FREQID_PER_LABEL + if ((mbat = BATdescriptor(*mapbatid)) == NULL) { + throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING); + } + mapi = bat_iterator(mbat); + if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) { throw(RDF, "rdf.rdfschema", "could not open the tokenizer\n"); @@ -3627,7 +3670,9 @@ str mergeMaxFreqCSByS1(CSset *freqCSset, #endif /* USE_MULTIWAY_MERGING */ #if OUTPUT_FREQID_PER_LABEL - takeOid(*name, &tmpLabel); + //takeOid(*name, &tmpLabel); + getStringName(*name, &tmpLabel, mapi, mbat, 1); + #if USE_SHORT_NAMES getPropNameShort(&canStrShort, tmpLabel); fprintf(fout,"Label %d: %s \n", i, canStrShort); @@ -5659,7 +5704,7 @@ void getTblName(char *name, oid nameId){ } */ -void getTblName(str *name, oid nameId){ +void getTblName(str *name, oid nameId, BATiter mapi, BAT *mbat){ str canStr = NULL; str canStrShort = NULL; char *pch; @@ -5668,7 +5713,8 @@ void getTblName(str *name, oid nameId){ int i; if (nameId != BUN_NONE){ - takeOid(nameId, &canStr); + //takeOid(nameId, &canStr); + getStringName(nameId, &canStr, mapi, mbat, 1); getPropNameShort(&canStrShort, canStr); if (strstr (canStrShort,".") != NULL || @@ -5788,7 +5834,8 @@ str printSampleData(CSSample *csSample, #if USE_SHORT_NAMES str canStrShort = NULL; #endif - takeOid(sample.candidates[j], &canStr); + //takeOid(sample.candidates[j], &canStr); + getStringName(sample.candidates[j], &canStr, mapi, mbat, 1); #if USE_SHORT_NAMES getPropNameShort(&canStrShort, canStr); fprintf(fout,";%s", canStrShort); @@ -5805,7 +5852,8 @@ str printSampleData(CSSample *csSample, if (sample.name != BUN_NONE){ str canStrShort = NULL; - takeOid(sample.name, &canStr); + //takeOid(sample.name, &canStr); + getStringName(sample.name, &canStr, mapi, mbat, 1); getPropNameShort(&canStrShort, canStr); if (strstr (canStrShort,".") != NULL || @@ -5971,7 +6019,9 @@ str printSampleData(CSSample *csSample, if (sample.name != BUN_NONE){ str canStrShort = NULL; - takeOid(sample.name, &canStr); + //takeOid(sample.name, &canStr); + getStringName(sample.name, &canStr, mapi, mbat, 1); + getPropNameShort(&canStrShort, canStr); if (strstr (canStrShort,".") != NULL || @@ -6011,7 +6061,7 @@ str printSampleData(CSSample *csSample, #if NO_OUTPUTFILE == 0 static -str printFullSampleData(CSSampleExtend *csSampleEx, int num){ _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list