Changeset: cf71a2dd1319 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cf71a2dd1319
Modified Files:
        monetdb5/extras/rdf/rdf_shredder.c
        monetdb5/extras/rdf/rdfschema.c
        monetdb5/extras/rdf/rdfschema.h
Branch: rdf
Log Message:

Fix bug in rdf_shredder (while assigning value for objtype) which also caused 
the problem in exploring CS properties' types.


diffs (221 lines):

diff --git a/monetdb5/extras/rdf/rdf_shredder.c 
b/monetdb5/extras/rdf/rdf_shredder.c
--- a/monetdb5/extras/rdf/rdf_shredder.c
+++ b/monetdb5/extras/rdf/rdf_shredder.c
@@ -158,7 +158,7 @@ rdf_BUNappend_unq_ForObj(parserData* pda
        
                /* Add the type here by changing 2 bits at position 62, 63 of 
oid */
                *bun |= (BUN)objType << (sizeof(BUN)*8 - 4);
-
+               
                //b = BUNappend(b, (ptr) (str)objStr, TRUE);
                b = BUNins(b, (ptr) bun, (ptr) (str)objStr, TRUE); 
 
@@ -235,7 +235,7 @@ char isInt(char *input){
 
 static ObjectType 
 getObjectType(unsigned char* objStr, BUN *realNumValue){
-       ObjectType obType; 
+       ObjectType obType = STRING; 
        unsigned char* endpart;
        char* valuepart; 
        const char* pos = NULL; 
@@ -275,6 +275,8 @@ getObjectType(unsigned char* objStr, BUN
                        //printf("%s: String \n", objStr); 
                }
        }
+       else
+               obType = STRING; 
 
        return obType; 
 }
@@ -362,7 +364,7 @@ tripleHandler(void* user_data, const rap
                        ObjectType objType = STRING;
                        objStr = raptor_term_to_string(triple->object);
                        objType = getObjectType(objStr, &realNumValue);
-
+                       
                        rdf_BUNappend_unq_ForObj(pdata, graph[MAP_LEX], 
(str)objStr, objType, &bun);    
                        rdf_BUNappend(pdata, graph[O_sort], &bun); 
 
diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c
--- a/monetdb5/extras/rdf/rdfschema.c
+++ b/monetdb5/extras/rdf/rdfschema.c
@@ -722,7 +722,21 @@ CSPropTypes* initCSPropTypes(CSset* freq
        return csPropTypes;
 }
 
-
+static 
+void printCSPropTypes(CSPropTypes* csPropTypes, int numMergedCS, CSset* 
freqCSset){
+       int i, j, k; 
+
+       for (i = 0; i < numMergedCS; i++){
+               printf("MergedCS %d (Freq: %d): \n", i, 
freqCSset->items[csPropTypes[i].freqCSId].support);
+               for(j = 0; j < csPropTypes[i].numProp; j++){
+                       printf("  P " BUNFMT " :  ", 
csPropTypes[i].lstPropTypes[j].prop);
+                       for (k = 0; k < csPropTypes[i].lstPropTypes[j].numType; 
k++){
+                               printf(" Type %d (%d)  | ", k, 
csPropTypes[i].lstPropTypes[j].lstFreq[k]);
+                       }
+                       printf("\n");
+               }
+       }
+}
 /*
  * Add types of properties 
  * Note that the property list is sorted by prop's oids
@@ -734,9 +748,11 @@ void addPropTypes(char *buffTypes, oid* 
        int i,j; 
        int tblId = csTblIdxMapping[csId];
        
+       //printf("Add %d prop from CS %d to table %d \n", numP, csId, tblId);
        j = 0;
        if (tblId != -1){
                for (i = 0; i < numP; i++){
+                       //printf("  P: " BUNFMT " Type: %d ", buffP[i], 
buffTypes[i]);
                        while (csPropTypes[tblId].lstPropTypes[j].prop != 
buffP[i]){
                                j++;
                        }       
@@ -745,6 +761,7 @@ void addPropTypes(char *buffTypes, oid* 
                        
                }
        }
+       //printf("\n");
 }
 
 static
@@ -4202,61 +4219,62 @@ RDFreorganize(int *ret, CStableStat *cst
                throw(RDF, "rdf.RDFreorganize", "Problem in extracting CSs");
        } 
 
+
+       
+       printf("Start re-organizing triple store for " BUNFMT " CSs \n", 
maxCSoid);
+
+       csTblIdxMapping = (int *) malloc (sizeof (int) * (maxCSoid + 1)); 
+       initIntArray(csTblIdxMapping, (maxCSoid + 1), -1);
+
+       mfreqIdxTblIdxMapping = (int *) malloc (sizeof (int) * 
freqCSset->numCSadded); 
+       initIntArray(mfreqIdxTblIdxMapping , freqCSset->numCSadded, -1);
+
+       mTblIdxFreqIdxMapping = (int *) malloc (sizeof (int) * 
freqCSset->numCSadded);  // A little bit reduntdant space
+       initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
+
+       //Mapping from from CSId to TableIdx 
+       initCSTableIdxMapping(freqCSset, csTblIdxMapping, 
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping);
+
+       // Init CStableStat
+       initCStables(cstablestat, freqCSset);
+
+       if ((sbat = BATdescriptor(*sbatid)) == NULL) {
+               throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
+       }
+
+       if ((obat = BATdescriptor(*obatid)) == NULL) {
+               BBPreleaseref(sbat->batCacheid);
+               throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
+       }
+
+       if ((pbat = BATdescriptor(*pbatid)) == NULL) {
+               BBPreleaseref(sbat->batCacheid);
+               BBPreleaseref(obat->batCacheid);
+               throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
+       }
+
+       si = bat_iterator(sbat); 
+       pi = bat_iterator(pbat); 
+       oi = bat_iterator(obat); 
+
+       /* Get possible types of each property in a table (i.e., mergedCS) */
+       csPropTypes = initCSPropTypes(freqCSset, cstablestat->numTables);
+       RDFExtractCSPropTypes(ret, sbat, si, pi, oi, subjCSMap, 
csTblIdxMapping, csPropTypes, maxNumPwithDup);
+       printCSPropTypes(csPropTypes,cstablestat->numTables, freqCSset);
+
        if (*mode == EXPLOREONLY){
                printf("Only explore the schema information \n");
                freeCSset(freqCSset); 
                free(subjCSMap);
                free(subjdefaultMap);
+               free(csTblIdxMapping);
+               free(mfreqIdxTblIdxMapping);
+               free(mTblIdxFreqIdxMapping);
 
                return MAL_SUCCEED;
        }
-       
-       printf("Start re-organizing triple store for " BUNFMT " CSs \n", 
maxCSoid);
-
-       csTblIdxMapping = (int *) malloc (sizeof (int) * (maxCSoid + 1)); 
-       initIntArray(csTblIdxMapping, (maxCSoid + 1), -1);
-
-       mfreqIdxTblIdxMapping = (int *) malloc (sizeof (int) * 
freqCSset->numCSadded); 
-       initIntArray(mfreqIdxTblIdxMapping , freqCSset->numCSadded, -1);
-
-       mTblIdxFreqIdxMapping = (int *) malloc (sizeof (int) * 
freqCSset->numCSadded);  // A little bit reduntdant space
-       initIntArray(mTblIdxFreqIdxMapping , freqCSset->numCSadded, -1);
-
-       //Mapping from from CSId to TableIdx 
-       initCSTableIdxMapping(freqCSset, csTblIdxMapping, 
mfreqIdxTblIdxMapping, mTblIdxFreqIdxMapping);
-
-       // Init CStableStat
-       initCStables(cstablestat, freqCSset);
-
-
-       lastSubjId = (oid *) malloc (sizeof(oid) * cstablestat->numTables); 
-       initArray(lastSubjId, cstablestat->numTables, -1); 
-       
-       lastSubjIdEx = (oid *) malloc (sizeof(oid) * cstablestat->numTables); 
-       initArray(lastSubjIdEx, cstablestat->numTables, -1); 
-
-       if ((sbat = BATdescriptor(*sbatid)) == NULL) {
-               throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
-       }
-
-       if ((obat = BATdescriptor(*obatid)) == NULL) {
-               BBPreleaseref(sbat->batCacheid);
-               throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
-       }
-
-       if ((pbat = BATdescriptor(*pbatid)) == NULL) {
-               BBPreleaseref(sbat->batCacheid);
-               BBPreleaseref(obat->batCacheid);
-               throw(MAL, "rdf.RDFreorganize", RUNTIME_OBJECT_MISSING);
-       }
-
-       si = bat_iterator(sbat); 
-       pi = bat_iterator(pbat); 
-       oi = bat_iterator(obat); 
-
-       /* Get possible types of each property in a table (i.e., mergedCS) */
-       csPropTypes = initCSPropTypes(freqCSset, cstablestat->numTables);
-       RDFExtractCSPropTypes(ret, sbat, si, pi, oi, subjCSMap, 
csTblIdxMapping, csPropTypes, maxNumPwithDup);
+
+
 
 
        sNewBat = BATnew(TYPE_void, TYPE_oid, BATcount(sbat));
@@ -4281,6 +4299,11 @@ RDFreorganize(int *ret, CStableStat *cst
 
        BATseqbase(rmap, 0);
        
+       lastSubjId = (oid *) malloc (sizeof(oid) * cstablestat->numTables); 
+       initArray(lastSubjId, cstablestat->numTables, -1); 
+       
+       lastSubjIdEx = (oid *) malloc (sizeof(oid) * cstablestat->numTables); 
+       initArray(lastSubjIdEx, cstablestat->numTables, -1); 
 
        printf("Re-assigning Subject oids ... ");
        lastS = -1; 
diff --git a/monetdb5/extras/rdf/rdfschema.h b/monetdb5/extras/rdf/rdfschema.h
--- a/monetdb5/extras/rdf/rdfschema.h
+++ b/monetdb5/extras/rdf/rdfschema.h
@@ -88,8 +88,8 @@ typedef struct PropStat {
 #define FULL_PROP_STAT 1       // Only use for showing the statistic on all 
properties / all CSs. (Default should be 0)
 
 
-#define USE_LABEL_FINDING_MAXCS        1       // Use the labels received from 
labeling process for finding maxCS 
-#define USE_LABEL_FOR_MERGING  1       // Use the labels received from 
labeling process for finding mergeCS
+#define USE_LABEL_FINDING_MAXCS        0       // Use the labels received from 
labeling process for finding maxCS 
+#define USE_LABEL_FOR_MERGING  0       // Use the labels received from 
labeling process for finding mergeCS
 
 typedef struct CS
 {
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
http://mail.monetdb.org/mailman/listinfo/checkin-list

Reply via email to