Changeset: 50a2cd0713ec for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=50a2cd0713ec Modified Files: monetdb5/extras/rdf/hashmap/hashmap.c monetdb5/extras/rdf/hashmap/hashmap.h monetdb5/extras/rdf/rdf.h monetdb5/extras/rdf/rdfschema.c monetdb5/extras/rdf/rdfschema.h monetdb5/extras/rdf/rdfschema.mal sql/backends/monet5/sql.mx Branch: rdf Log Message:
Check the foreign key relationship from RDF triples. diffs (truncated from 402 to 300 lines): diff --git a/monetdb5/extras/rdf/hashmap/hashmap.c b/monetdb5/extras/rdf/hashmap/hashmap.c --- a/monetdb5/extras/rdf/hashmap/hashmap.c +++ b/monetdb5/extras/rdf/hashmap/hashmap.c @@ -358,6 +358,34 @@ int hashmap_statistic_CSbysupport(map_t } +/* Simply print all the elements in the hashmap */ +int hashmap_print(map_t in){ + + int i,j ; + + + /* Cast the hashmap */ + hashmap_map* m = (hashmap_map*) in; + + /* On empty hashmap, return immediately */ + if (hashmap_length(m) <= 0) + return MAP_MISSING; + + /* Linear probing */ + for(i = 0; i< m->table_size; i++) + if(m->data[i].in_use != 0) { + /* Print each item */ + printf("Key: "); + for (j = 0; j < m->data[i].num; j++){ + printf(" %d ", m->data[i].key[j]); + } + printf("\n"); + printf("Freq: %d \n", m->data[i].freq); + /* m->data[i].data */ + } + return MAP_OK; +} + /* * Collect the number of CSs cummulatively for support values ranging * from 1 to maxfreqthreshold diff --git a/monetdb5/extras/rdf/hashmap/hashmap.h b/monetdb5/extras/rdf/hashmap/hashmap.h --- a/monetdb5/extras/rdf/hashmap/hashmap.h +++ b/monetdb5/extras/rdf/hashmap/hashmap.h @@ -37,7 +37,7 @@ typedef any_t map_t; /* We need to keep keys and values */ typedef struct _hashmap_element{ int* key; - int num; + int num; /* Number of integers for the key */ int freq; char in_use; any_t data; @@ -139,6 +139,13 @@ extern int hashmap_remove(map_t in, int* */ extern int hashmap_get_one(map_t in, any_t *arg, int remove); +/* + * Simply print all the elements in the hashmap + * + * */ +extern int hashmap_print(map_t in); + + /* * Free the hashmap */ diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h --- a/monetdb5/extras/rdf/rdf.h +++ b/monetdb5/extras/rdf/rdf.h @@ -52,9 +52,9 @@ rdf_export str RDFleftfetchjoin_sorted(int *result, int* lid, int *rid); typedef enum { + URI, DATETIME, NUMERIC, - URI, STRING } ObjectType; diff --git a/monetdb5/extras/rdf/rdfschema.c b/monetdb5/extras/rdf/rdfschema.c --- a/monetdb5/extras/rdf/rdfschema.c +++ b/monetdb5/extras/rdf/rdfschema.c @@ -114,26 +114,26 @@ CS* creatCS(int subId, int numP, int* bu * If yes, add that frequent CS to the freqCSset. * * */ -static void putaCStoHash(map_t csmap, int* buff, int num, oid *csoid, char isStoreFreqCS, int freqThreshold, CSset **freqCSset){ +static void putaCStoHash(map_t csmap, int* key, int num, oid *csoid, char isStoreFreqCS, int freqThreshold, CSset **freqCSset){ oid *getCSoid; oid *putCSoid; int err; - int* cs; + int* csKey; int freq = 0; CS *freqCS; - cs = (int*) malloc(sizeof(int) * num); - if (cs==NULL){ + csKey = (int*) malloc(sizeof(int) * num); + if (csKey==NULL){ printf("Malloc failed. at %d", num); exit(-1); } - copyIntSet(cs, buff, num); - if (hashmap_get(csmap, cs, num,(void**)(&getCSoid),1, &freq) != MAP_OK){ + copyIntSet(csKey, key, num); + if (hashmap_get(csmap, csKey, num,(void**)(&getCSoid),1, &freq) != MAP_OK){ putCSoid = malloc(sizeof(oid)); *putCSoid = *csoid; - err = hashmap_put(csmap, cs, num, 1, putCSoid); + err = hashmap_put(csmap, csKey, num, 1, putCSoid); assert(err == MAP_OK); (*csoid)++; @@ -142,11 +142,11 @@ static void putaCStoHash(map_t csmap, in if (isStoreFreqCS == 1){ /* Store the frequent CS to the CSset*/ //printf("FreqCS: Support = %d, Threshold %d \n ", freq, freqThreshold); if (freq == freqThreshold){ - freqCS = creatCS(*getCSoid, num, buff); + freqCS = creatCS(*getCSoid, num, key); addCStoSet(*freqCSset, *freqCS); } } - free(cs); + free(csKey); } } @@ -234,7 +234,7 @@ void getMaximumFreqCSs(CSset *freqCSset) -static void putPtoHash(map_t pmap, int value, oid *poid, int support){ +static void putPtoHash(map_t pmap, int key, oid *poid, int support){ oid *getPoid; oid *putPoid; int err; @@ -242,7 +242,7 @@ static void putPtoHash(map_t pmap, int v pkey = (int*) malloc(sizeof(int)); - *pkey = value; + *pkey = key; if (hashmap_get_forP(pmap, pkey,(void**)(&getPoid)) != MAP_OK){ putPoid = malloc(sizeof(oid)); @@ -351,6 +351,7 @@ RDFextractCS(int *ret, bat *sbatid, bat int maxNumProp = 0; CSset *freqCSset; /* Set of frequent CSs */ + buff = (int *) malloc (sizeof(int) * INIT_PROPERTY_NUM); if ((sbat = BATdescriptor(*sbatid)) == NULL) { @@ -431,10 +432,179 @@ RDFextractCS(int *ret, bat *sbatid, bat return MAL_SUCCEED; } +/* + * Get the refer CS + * Input: oid of a URI object + * Return the id of the CS + * */ +static +str getReferCS(BAT *sbat, BAT *pbat, oid *obt){ + + /* For detecting foreign key relationships */ + BAT *tmpbat; /* Get the result of searching objectURI from sbat */ + BATiter ti; + oid *tbt; + BUN pt, qt; + oid *s_t, *p_t; + //int *tmpbuff; + + //tmpbuff = (int *) malloc (sizeof(int) * INIT_PROPERTY_NUM); + + /* BATsubselect(inputbat, <dont know yet>, lowValue, Highvalue, isIncludeLowValue, isIncludeHigh, <anti> */ + printf("Checking for object " BUNFMT "\n", *obt); + tmpbat = BATsubselect(sbat, NULL, obt, obt, 1, 1, 0); + /* tmpbat tail contain head oid of sbat for matching elements */ + if (tmpbat != NULL){ + printf("Matching: " BUNFMT "\n", BATcount(tmpbat)); + BATprint(tmpbat); + + if (BATcount(tmpbat) > 0){ + ti = bat_iterator(tmpbat); + BATloop(tmpbat, pt, qt){ + tbt = (oid *) BUNtail(ti, pt); + s_t = (oid *) Tloc(sbat, *tbt); + p_t = (oid *) Tloc(pbat, *tbt); + printf("s_t: " BUNFMT "\n", (*s_t)); + printf("p_t: " BUNFMT "\n", (*p_t)); + /* Check which CS is referred */ + + } + } + } + else + throw(MAL, "rdf.RDFextractCSwithTypes", "Null Bat returned for BATsubselect"); + + + + /* temporarily use */ + if (tmpbat) + BBPunfix(tmpbat->batCacheid); + + return MAL_SUCCEED; +} + +/* Extract CS from SPO triples table */ +str +RDFextractCSwithTypes(int *ret, bat *sbatid, bat *pbatid, bat *obatid, int *freqThreshold){ + BUN p, q; + BAT *sbat = NULL, *pbat = NULL, *obat = NULL; + BATiter si, pi, oi; /*iterator for BAT of s,p,o columns in spo table */ + oid *sbt, *pbt, *obt; + oid curS; /* current Subject oid */ + oid curP; /* current Property oid */ + oid CSoid = 0; /* Characteristic set oid */ + int numP; /* Number of properties for current S */ + map_t csMap; + int* buff; + int INIT_PROPERTY_NUM = 5000; + int maxNumProp = 0; + CSset *freqCSset; /* Set of frequent CSs */ + oid objType; + + + + buff = (int *) malloc (sizeof(int) * INIT_PROPERTY_NUM); + + if ((sbat = BATdescriptor(*sbatid)) == NULL) { + throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING); + } + if (!(sbat->tsorted)){ + throw(MAL, "rdf.RDFextractCSwithTypes", "sbat is not sorted"); + } + + if ((pbat = BATdescriptor(*pbatid)) == NULL) { + throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING); + } + if ((obat = BATdescriptor(*obatid)) == NULL) { + throw(MAL, "rdf.RDFextractCSwithTypes", RUNTIME_OBJECT_MISSING); + } + + si = bat_iterator(sbat); + pi = bat_iterator(pbat); + oi = bat_iterator(obat); + + /* Init a hashmap */ + csMap = hashmap_new(); + freqCSset = initCSset(); + + numP = 0; + curP = 0; + + printf("freqThreshold = %d \n", *freqThreshold); + BATloop(sbat, p, q){ + sbt = (oid *) BUNtloc(si, p); + if (*sbt != curS){ + if (p != 0){ /* Not the first S */ + putaCStoHash(csMap, buff, numP, &CSoid, 1, *freqThreshold, &freqCSset); + + if (numP > maxNumProp) + maxNumProp = numP; + } + curS = *sbt; + curP = 0; + numP = 0; + } + + pbt = (oid *) BUNtloc(pi, p); + + if (numP > INIT_PROPERTY_NUM){ + throw(MAL, "rdf.RDFextractCS", "# of properties is greater than INIT_PROPERTY_NUM"); + exit(-1); + } + + if (curP != *pbt){ /* Multi values property */ + buff[numP] = *pbt; + numP++; + curP = *pbt; + } + + obt = (oid *) BUNtloc(oi, p); + /* Check type of object */ + objType = ((*obt) >> (sizeof(BUN)*8 - 3)) & 3 ; /* Get two bits 63th, 62nd from object oid */ + + //printf("object type: " BUNFMT "\n", objType); + + /* Look at sbat*/ + if (objType == URI){ + + getReferCS(sbat, pbat, obt); + } + } + + /*put the last CS */ + putaCStoHash(csMap, buff, numP, &CSoid, 1, *freqThreshold, &freqCSset ); _______________________________________________ checkin-list mailing list checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list