Changeset: 490b960fc611 for MonetDB
Modified Files:
Branch: rdf
Log Message:

Merge join with considering exception.
(Used for the microbenchmark)

diffs (240 lines):

diff --git a/monetdb5/extras/rdf/rdf.h b/monetdb5/extras/rdf/rdf.h
--- a/monetdb5/extras/rdf/rdf.h
+++ b/monetdb5/extras/rdf/rdf.h
@@ -65,6 +65,12 @@ RDFtriplesubsort(BAT **sbat, BAT **pbat,
 rdf_export str
 RDFbisubsort(BAT **lbat, BAT **rbat); 
+rdf_export str
+RDFexception_join(bat *ret1, bat *ret2, bat *sdense, bat *o1, bat *s2, bat 
*o2, bat *scand); 
+rdf_export str
+RDFmerge_join(bat *ret1, bat *ret2, bat *s1id, bat *o1id, bat *scandid); 
 #define RDF_MIN_LITERAL (((oid) 1) << ((sizeof(oid)==8)?59:27))
 #define IS_DUPLICATE_FREE 0            /* 0: Duplications have not been 
removed, otherwise 1 */
diff --git a/monetdb5/extras/rdf/rdfalgebra.c b/monetdb5/extras/rdf/rdfalgebra.c
--- a/monetdb5/extras/rdf/rdfalgebra.c
+++ b/monetdb5/extras/rdf/rdfalgebra.c
@@ -109,6 +109,193 @@ RDFpartialjoin(bat *retid, bat *lid, bat
+ * This function performs the join given the set of S candidates with a S 
+ * considering the exception data. 
+ * Input: 
+ * - S1 BAT (dense bat), O1 BAT, S2, 
+ * */
+RDFexception_join(bat *ret1, bat *ret2, bat *sdenseid, bat *o1id, bat *s2id, 
bat *o2id, bat *scandid){
+       BAT *resS = NULL, *resO = NULL;         
+       BAT *sdense, *o1, *s2, *o2, *scand; 
+       oid *sdensept, *o1pt, *s2pt, *o2pt, *scandpt, *resSpt, *resOpt; 
+       BUN estimate  = 0; 
+       int cnt1 = 0, cnt2 = 0, cntcand = 0; 
+       int i = 0, j = 0, lasti = -1; 
+       oid tmpS = BUN_NONE; 
+       BUN min_sdense, max_sdense; 
+       int rescnt = 0;
+       if ((sdense = BATdescriptor(*sdenseid)) == NULL) {
+               throw(MAL, "rdf.RDFexception_join", RUNTIME_OBJECT_MISSING);
+       }
+       if ((o1 = BATdescriptor(*o1id)) == NULL) {
+               BBPunfix(sdense->batCacheid); 
+               throw(MAL, "rdf.RDFexception_join", RUNTIME_OBJECT_MISSING);
+       }
+       if ((s2 = BATdescriptor(*s2id)) == NULL) {
+               BBPunfix(sdense->batCacheid); 
+               BBPunfix(o1->batCacheid);       
+               throw(MAL, "rdf.RDFexception_join", RUNTIME_OBJECT_MISSING);
+       }
+       if ((o2 = BATdescriptor(*o2id)) == NULL) {
+               BBPunfix(sdense->batCacheid); 
+               BBPunfix(o1->batCacheid);       
+               BBPunfix(s2->batCacheid);       
+               throw(MAL, "rdf.RDFexception_join", RUNTIME_OBJECT_MISSING);
+       }
+       if ((scand = BATdescriptor(*scandid)) == NULL) { 
+               BBPunfix(sdense->batCacheid); 
+               BBPunfix(o1->batCacheid);       
+               BBPunfix(s2->batCacheid);       
+               BBPunfix(o2->batCacheid);       
+               throw(MAL, "rdf.RDFexception_join", RUNTIME_OBJECT_MISSING);
+       }
+       sdensept = (oid *) Tloc(sdense, BUNfirst(sdense));
+       o1pt = (oid *) Tloc(o1, BUNfirst(o1)); 
+       s2pt = (oid *) Tloc(s2, BUNfirst(s2)); 
+       o2pt = (oid *) Tloc(o2, BUNfirst(o2)); 
+       scandpt = (oid *) Tloc(scand, BUNfirst(scand)); 
+       /*Estimate the total size of the output = the size of 
+         the candidate BAT and the exception BAT */
+       estimate = BATcount(scand) + BATcount(s2);  
+       resS = BATnew(TYPE_void, TYPE_oid, estimate, TRANSIENT);
+       resO = BATnew(TYPE_void, TYPE_oid, estimate, TRANSIENT);
+       resSpt = (oid *) Tloc(resS, BUNfirst(resS)); 
+       resOpt = (oid *) Tloc(resO, BUNfirst(resO));
+       cnt1 = (int) BATcount(sdense); 
+       cnt2 = (int) BATcount(s2); 
+       cntcand = (int) BATcount(scand); 
+       min_sdense = sdensept[0]; 
+       max_sdense = sdensept[cnt1-1]; 
+       i = 0; j  = 0, lasti = -1;
+       //printf("Number of cand = %d | Number of input = %d | Number of 
exception = %d\n", cntcand, cnt1, cnt2);       
+       while (i < cntcand && j < cnt2){
+               //fetch the result from dense
+               if (i != lasti && scandpt[i] >= min_sdense && scandpt[i] <= 
+                       resSpt[rescnt] = scandpt[i]; 
+                       resOpt[rescnt] = o1pt[scandpt[i] - min_sdense]; 
+                       rescnt++;
+                       lasti = i; 
+               }
+               if (scandpt[i] < s2pt[j]){
+                       i++;
+               } else if (scandpt[i] > s2pt[j]){
+                       j++;
+               } else { // (scandpt[i] == s2pt[j])
+                       //all same value of S in the exception
+                       tmpS = s2pt[j]; 
+                       while (j < cnt2 && s2pt[j] == tmpS){
+                               resSpt[rescnt] = scandpt[i]; 
+                               resOpt[rescnt] = o2pt[j]; 
+                               rescnt++;
+                               j++; 
+                       }
+                       i++; 
+               }       
+       }
+       //printf("Number of results %d\n", rescnt);     
+       BATsetcount(resS,rescnt);
+       BATsetcount(resO,rescnt);
+       *ret1 = resS->batCacheid;
+       *ret2 = resO->batCacheid; 
+       BBPkeepref(*ret1);
+       BBPkeepref(*ret2);
+       return MAL_SUCCEED; 
+ * This function performs the join given the set of S candidates with a S 
+ * considering the exception data. 
+ * Input: 
+ * - S1 BAT (dense bat), O1 BAT, S2, 
+ * */
+RDFmerge_join(bat *ret1, bat *ret2, bat *s1id, bat *o1id, bat *scandid){
+       BAT *resS = NULL, *resO = NULL;         
+       BAT *s1, *o1, *scand; 
+       oid *s1pt, *o1pt, *scandpt, *resSpt, *resOpt; 
+       BUN estimate  = 0; 
+       int cnt1 = 0, cntcand = 0; 
+       int i = 0, j = 0; 
+       oid tmpS = BUN_NONE; 
+       int rescnt = 0;
+       if ((s1 = BATdescriptor(*s1id)) == NULL) {
+               throw(MAL, "rdf.RDFexception_join", RUNTIME_OBJECT_MISSING);
+       }
+       if ((o1 = BATdescriptor(*o1id)) == NULL) {
+               BBPunfix(s1->batCacheid); 
+               throw(MAL, "rdf.RDFexception_join", RUNTIME_OBJECT_MISSING);
+       }
+       if ((scand = BATdescriptor(*scandid)) == NULL) { 
+               BBPunfix(s1->batCacheid); 
+               BBPunfix(o1->batCacheid);       
+               throw(MAL, "rdf.RDFexception_join", RUNTIME_OBJECT_MISSING);
+       }
+       s1pt = (oid *) Tloc(s1, BUNfirst(s1));
+       o1pt = (oid *) Tloc(o1, BUNfirst(o1)); 
+       scandpt = (oid *) Tloc(scand, BUNfirst(scand)); 
+       /*Estimate the total size of the output = the size of 
+         the candidate BAT and the exception BAT */
+       estimate = BATcount(scand) * 2;  
+       resS = BATnew(TYPE_void, TYPE_oid, estimate, TRANSIENT);
+       resO = BATnew(TYPE_void, TYPE_oid, estimate, TRANSIENT);
+       resSpt = (oid *) Tloc(resS, BUNfirst(resS)); 
+       resOpt = (oid *) Tloc(resO, BUNfirst(resO));
+       cnt1 = (int) BATcount(s1); 
+       cntcand = (int) BATcount(scand); 
+       //printf("Number of cand = %d | Number of input = %d\n", cntcand, 
+       i = 0; j  = 0;
+       while (i < cntcand && j < cnt1){
+               if (scandpt[i] < s1pt[j]){
+                       i++;
+               } else if (scandpt[i] > s1pt[j]){
+                       j++;
+               } else { // (scandpt[i] == s1pt[j])
+                       //all same value of S in the exception
+                       tmpS = s1pt[j]; 
+                       while (j < cnt1 && s1pt[j] == tmpS){
+                               resSpt[rescnt] = scandpt[i]; 
+                               resOpt[rescnt] = o1pt[j]; 
+                               rescnt++;
+                               j++; 
+                       }
+                       i++; 
+               }       
+       }
+       //printf("Number of results %d\n", rescnt);     
+       BATsetcount(resS,rescnt);
+       BATsetcount(resO,rescnt);
+       *ret1 = resS->batCacheid;
+       *ret2 = resO->batCacheid; 
+       BBPkeepref(*ret1);
+       BBPkeepref(*ret2);
+       return MAL_SUCCEED; 
 RDFpartialjoin(bat *retid, bat *lid, bat *rid, bat *inputid){
        BAT *left, *right, *result1, *result2, *result, *input;  
diff --git a/monetdb5/extras/rdf/rdfalgebra.mal 
--- a/monetdb5/extras/rdf/rdfalgebra.mal
+++ b/monetdb5/extras/rdf/rdfalgebra.mal
@@ -29,3 +29,11 @@ command partialjoin(lmap:bat[:oid], rmap
 address RDFpartialjoin
 comment "Join the input bat and the map bat (lmap, rmap). However, do not use 
nil for non-matching value but use the original value from input bat"
+command rdf_exception_join(s1:bat[:oid], o1:bat[:oid], s2:bat[:oid], 
o2:bat[:oid], scand:bat[:oid])(:bat[:oid],:bat[:oid])
+address RDFexception_join
+comment "Union join with the input consists of a dense S bat and an exception 
S. The output are two BATs S and O of satisfied S and corresponding O's"
+command rdf_merge_join(s1:bat[:oid], o1:bat[:oid], 
+address RDFmerge_join
+comment "Merge join between a set of candidate S's and . The output are two 
BATs S and O of statisfied S and corresponding O's"
diff --git a/sql/backends/monet5/sql_scenario.c 
--- a/sql/backends/monet5/sql_scenario.c
+++ b/sql/backends/monet5/sql_scenario.c
@@ -335,7 +335,7 @@ global_variables(mvc *sql, char *user, c
                rdf_opt_simply_crp = 0; 
-       printf("need_crp_opt = %s and rdf_opt_simply_crp = 
+       /* printf("need_crp_opt = %s and rdf_opt_simply_crp = 
%d\n",need_crp_opt,rdf_opt_simply_crp); */
        return 0;
