Changeset: 054157433545 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=054157433545
Modified Files:
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
        sql/backends/monet5/sql_rdf.c
Branch: rdf
Log Message:

Create function for generating string from both URI-based oid and string-based 
oid.

This function is used for table name since the name's oid can appears in 
tokenizer or mapbat


diffs (truncated from 521 to 300 lines):

diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -120,6 +120,13 @@ static void initcsIdFreqIdxMap(int* inpu
 }
 
 
+char
+getObjType(oid objOid){
+       char objType = (char) (objOid >> (sizeof(BUN)*8 - 4))  &  7 ;
+
+       return objType; 
+
+}
 
 str printTKNZStringFromOid(oid id){
        int ret; 
@@ -140,6 +147,50 @@ str printTKNZStringFromOid(oid id){
        return MAL_SUCCEED; 
 }
 
+//Get the string for 
+static
+char getStringName(oid objOid, str *objStr, BATiter mapi, BAT *mapbat, char 
isTblName){
+       
+       char    objType = getObjType(objOid); 
+       oid     realObjOid; 
+       BUN     bun;
+       int     i = 0;
+
+       if (objType == URI || objType == BLANKNODE){
+               realObjOid = objOid - ((oid)objType << (sizeof(BUN)*8 - 4));
+               takeOid(realObjOid, objStr); 
+       }
+       else{
+               str tmpObjStr;
+               str s;
+               int len; 
+               realObjOid = objOid - (objType*2 + 1) *  RDF_MIN_LITERAL;   /* 
Get the real objOid from Map or Tokenizer */ 
+               bun = BUNfirst(mapbat);
+               tmpObjStr = (str) BUNtail(mapi, bun + realObjOid); 
+               
+               *objStr = GDKstrdup(tmpObjStr);
+                       
+               if (isTblName){
+                       s = *objStr;
+                       len = strlen(s);
+                       //Replace all non-alphabet character by XXX
+                       for (i = 0; i < len; i++)
+                       {       
+                               //printf("i = %d: %c \n",i, s[i]);
+                               if (!isalpha(*s)){
+                                       *s = 'X';
+                               }
+                               s++;
+                               
+                       }
+               }
+
+       }
+
+       
+       return objType;
+}
+
 
 char isCSTable(CS item){
        if (item.parentFreqIdx != -1) return 0; 
@@ -620,13 +671,7 @@ void printSubCSInformation(SubCSSet *sub
 #endif /*NO_OUTPUTFILE*/
 #endif  /* NEEDSUBCS */
 
-char
-getObjType(oid objOid){
-       char objType = (char) (objOid >> (sizeof(BUN)*8 - 4))  &  7 ;
-
-       return objType; 
-
-}
+
 
 
 
@@ -1881,8 +1926,6 @@ str printMergedFreqCSSet(CSset *freqCSse
        str     subStr; 
        str     objStr; 
        oid     objOid; 
-       char    objType; 
-       BUN     bun; 
        #endif
        int     ret; 
        char*   schema = "rdf";
@@ -1929,12 +1972,12 @@ str printMergedFreqCSSet(CSset *freqCSse
                        if (i < freqCSset->numOrigFreqCS){
                                if (cs.subject != BUN_NONE){
                                        takeOid(cs.subject, &subStr);
-
                                        if (labels[i].name == BUN_NONE) {
                                                fprintf(fout,"CS " BUNFMT " - 
FreqId %d - Name: %s  (Freq: %d) | Subject: %s  | FreqParentIdx %d \n", 
cs.csId, i, "DUMMY", freq, subStr, cs.parentFreqIdx);
                                        } else {
                                                str labelStr;
-                                               takeOid(labels[i].name, 
&labelStr);
+                                               //takeOid(labels[i].name, 
&labelStr);
+                                               getStringName(labels[i].name, 
&labelStr, mapi, mapbat, 1);
                                                fprintf(fout,"CS " BUNFMT " - 
FreqId %d - Name: %s  (Freq: %d) | Subject: %s  | FreqParentIdx %d \n", 
cs.csId, i, labelStr, freq, subStr, cs.parentFreqIdx);
                                                GDKfree(labelStr); 
                                        }
@@ -1946,7 +1989,8 @@ str printMergedFreqCSSet(CSset *freqCSse
                                                fprintf(fout,"CS " BUNFMT " - 
FreqId %d - Name: %s  (Freq: %d) | FreqParentIdx %d \n", cs.csId, i, "DUMMY", 
freq, cs.parentFreqIdx);
                                        } else {
                                                str labelStr;
-                                               takeOid(labels[i].name, 
&labelStr);
+                                               //takeOid(labels[i].name, 
&labelStr);
+                                               getStringName(labels[i].name, 
&labelStr, mapi, mapbat, 1);
                                                fprintf(fout,"CS " BUNFMT " - 
FreqId %d - Name: %s  (Freq: %d) | FreqParentIdx %d \n", cs.csId, i, labelStr, 
freq, cs.parentFreqIdx);
                                                GDKfree(labelStr);
                                        }
@@ -1958,7 +2002,8 @@ str printMergedFreqCSSet(CSset *freqCSse
                                        fprintf(fout,"CS " BUNFMT " - FreqId %d 
- Name: %s  (Freq: %d) | Subject: <Not available>  | FreqParentIdx %d \n", 
cs.csId, i, "DUMMY", freq, cs.parentFreqIdx);
                                } else {
                                        str labelStr;
-                                       takeOid(labels[i].name, &labelStr);
+                                       //takeOid(labels[i].name, &labelStr);
+                                       getStringName(labels[i].name, 
&labelStr, mapi, mapbat, 1);      
                                        fprintf(fout,"CS " BUNFMT " - FreqId %d 
- Name: %s  (Freq: %d) | Subject: <Not available>  | FreqParentIdx %d \n", 
cs.csId, i, labelStr, freq, cs.parentFreqIdx);
                                        GDKfree(labelStr); 
                                }
@@ -1972,7 +2017,8 @@ str printMergedFreqCSSet(CSset *freqCSse
                                        else{
                                                str labelStr = NULL;
                                                str labelShortStr = NULL; 
-                                               
takeOid(labels[tmpParentFreqId].name, &labelStr);
+                                               
//takeOid(labels[tmpParentFreqId].name, &labelStr);
+                                               
getStringName(labels[tmpParentFreqId].name, &labelStr, mapi, mapbat, 1);
                                                
getPropNameShort(&labelShortStr,labelStr);
                                                fprintf(fout, "[%s]  
",labelShortStr);
                                                GDKfree(labelShortStr);
@@ -1999,24 +2045,9 @@ str printMergedFreqCSSet(CSset *freqCSse
                                }
                                if (cs.lstObj != NULL){
                                        objOid = cs.lstObj[j]; 
-
-                                       objType = getObjType(objOid); 
-
-                                       if (objType == URI || objType == 
BLANKNODE){
-                                               objOid = objOid - ((oid)objType 
<< (sizeof(BUN)*8 - 4));
-                                               takeOid(objOid, &objStr); 
-                                       }
-                                       else{
-                                               objOid = objOid - (objType*2 + 
1) *  RDF_MIN_LITERAL;   /* Get the real objOid from Map or Tokenizer */ 
-                                               bun = BUNfirst(mapbat);
-                                               objStr = (str) BUNtail(mapi, 
bun + objOid); 
-                                       }
-
+                                       getStringName(objOid, &objStr, mapi, 
mapbat, 0);
                                        fprintf(fout, "  O: %s \n", objStr);
-
-                                       if (objType == URI || objType == 
BLANKNODE){
-                                               GDKfree(objStr);
-                                       }
+                                       GDKfree(objStr);
                                }
                                else{
                                        fprintf(fout, " <No Object value>  \n");
@@ -2140,7 +2171,7 @@ str printmergeCSSet(CSset *freqCSset, in
 
 #if NO_OUTPUTFILE == 0 
 static 
-str printsubsetFromCSset(CSset *freqCSset, BAT* subsetIdxBat, int num, int* 
mergeCSFreqCSMap, CSlabel *label, int sampleVersion){
+str printsubsetFromCSset(CSset *freqCSset, BAT* subsetIdxBat, BAT *mbat, int 
num, int* mergeCSFreqCSMap, CSlabel *label, int sampleVersion){
 
        int     i,j; 
        FILE    *fout; 
@@ -2155,8 +2186,9 @@ str printsubsetFromCSset(CSset *freqCSse
        CS      cs; 
        int     tmpNumcand;
        str     canStr; 
-
-
+       BATiter mapi;
+
+       mapi = bat_iterator(mbat);
        if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
                throw(RDF, "rdf.rdfschema",
                                "could not open the tokenizer\n");
@@ -2184,7 +2216,9 @@ str printsubsetFromCSset(CSset *freqCSse
 #if USE_SHORT_NAMES
                                str canStrShort = NULL;
 #endif
-                               takeOid(label[freqIdx].candidates[j], &canStr); 
+                               //takeOid(label[freqIdx].candidates[j], 
&canStr); 
+                               getStringName(label[freqIdx].candidates[j], 
&canStr, mapi, mbat, 1);
+                               
 #if USE_SHORT_NAMES
                                getPropNameShort(&canStrShort, canStr);
                                fprintf(fout," %s  ::: ",  canStrShort);
@@ -3445,7 +3479,7 @@ void doMerge(CSset *freqCSset, int ruleN
 }
 
 static
-str mergeMaxFreqCSByS1(CSset *freqCSset, CSlabel** labels, oid *mergecsId, 
oid** ontmetadata, int ontmetadataCount){
+str mergeMaxFreqCSByS1(CSset *freqCSset, CSlabel** labels, oid *mergecsId, 
oid** ontmetadata, int ontmetadataCount,bat *mapbatid){
        int             i; 
 
        #if !USE_MULTIWAY_MERGING
@@ -3467,6 +3501,8 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
        char*           schema = "rdf";
        int             ret = 0;
        str             tmpLabel; 
+       BAT             *mbat = NULL; 
+       BATiter         mapi; 
        
        #if USE_SHORT_NAMES
        str canStrShort = NULL;
@@ -3479,12 +3515,19 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
        (void) cs1;
        (void) cs2;
        #endif
+       (void) mapbatid; 
+       
        labelStat = initLabelStat(); 
        buildLabelStat(labelStat, (*labels), freqCSset, TOPK);
        printf("Num FreqCSadded before using S1 = %d \n", 
freqCSset->numCSadded);
 
        #if OUTPUT_FREQID_PER_LABEL
 
+       if ((mbat = BATdescriptor(*mapbatid)) == NULL) {
+               throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
+       }
+       mapi = bat_iterator(mbat); 
+
        if (TKNZRopen (NULL, &schema) != MAL_SUCCEED) {
                throw(RDF, "rdf.rdfschema",
                                "could not open the tokenizer\n");
@@ -3627,7 +3670,9 @@ str mergeMaxFreqCSByS1(CSset *freqCSset,
                        #endif /* USE_MULTIWAY_MERGING */
 
                        #if OUTPUT_FREQID_PER_LABEL
-                       takeOid(*name, &tmpLabel); 
+                       //takeOid(*name, &tmpLabel); 
+                       getStringName(*name, &tmpLabel, mapi, mbat, 1);
+                       
                        #if USE_SHORT_NAMES
                        getPropNameShort(&canStrShort, tmpLabel);
                        fprintf(fout,"Label %d:  %s \n", i, canStrShort);
@@ -5659,7 +5704,7 @@ void getTblName(char *name, oid nameId){
 }
 */
 
-void getTblName(str *name, oid nameId){
+void getTblName(str *name, oid nameId, BATiter mapi, BAT *mbat){
        str canStr = NULL; 
        str canStrShort = NULL;
        char    *pch;
@@ -5668,7 +5713,8 @@ void getTblName(str *name, oid nameId){
        int     i; 
 
        if (nameId != BUN_NONE){
-               takeOid(nameId, &canStr);
+               //takeOid(nameId, &canStr);
+               getStringName(nameId, &canStr, mapi, mbat, 1);
                getPropNameShort(&canStrShort, canStr);
 
                if (strstr (canStrShort,".") != NULL || 
@@ -5788,7 +5834,8 @@ str printSampleData(CSSample *csSample, 
 #if USE_SHORT_NAMES
                                str canStrShort = NULL;
 #endif
-                               takeOid(sample.candidates[j], &canStr); 
+                               //takeOid(sample.candidates[j], &canStr); 
+                               getStringName(sample.candidates[j], &canStr, 
mapi, mbat, 1);                    
 #if USE_SHORT_NAMES
                                getPropNameShort(&canStrShort, canStr);
                                fprintf(fout,";%s",  canStrShort);
@@ -5805,7 +5852,8 @@ str printSampleData(CSSample *csSample, 
 
                if (sample.name != BUN_NONE){
                        str canStrShort = NULL;
-                       takeOid(sample.name, &canStr);
+                       //takeOid(sample.name, &canStr);
+                       getStringName(sample.name, &canStr, mapi, mbat, 1);
                        getPropNameShort(&canStrShort, canStr);
 
                        if (strstr (canStrShort,".") != NULL || 
@@ -5971,7 +6019,9 @@ str printSampleData(CSSample *csSample, 
 
                if (sample.name != BUN_NONE){
                        str canStrShort = NULL;
-                       takeOid(sample.name, &canStr);
+                       //takeOid(sample.name, &canStr);
+                       getStringName(sample.name, &canStr, mapi, mbat, 1); 
+
                        getPropNameShort(&canStrShort, canStr);
 
                        if (strstr (canStrShort,".") != NULL || 
@@ -6011,7 +6061,7 @@ str printSampleData(CSSample *csSample, 
 
 #if NO_OUTPUTFILE == 0
 static 
-str printFullSampleData(CSSampleExtend *csSampleEx, int num){
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to