Changeset: 01e0593bc69e for MonetDB URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=01e0593bc69e Modified Files: MonetDB/src/gdk/gdk_relop.mx Branch: Jun2010 Log Message:
propagated new comments from Feb2010 branch: added comments that briefly explain the intentions of why and how to resort to sort-merge join for large out of memory joins diffs (108 lines): diff -r 63f8c7be221a -r 01e0593bc69e MonetDB/src/gdk/gdk_relop.mx --- a/MonetDB/src/gdk/gdk_relop.mx Sun May 09 10:48:35 2010 +0200 +++ b/MonetDB/src/gdk/gdk_relop.mx Sun May 09 13:55:04 2010 +0200 @@ -1293,23 +1293,30 @@ required (i.e., re-order allowed) */ BAT *ls, *rs, *j; + /* if not yet sorted on tail, sort left input on tail */ ls = BATtordered(l) & 1 ? l : BATmirror(BATsort(BATmirror(l))); ERRORcheck(ls == NULL, "BATjoin: BATmirror(BATsort(BATmirror(l))) failed"); + /* if not yet sorted on head, sort right input on head */ rs = BAThordered(r) & 1 ? r : BATsort(r); ERRORcheck(rs == NULL, "BATjoin: BATsort(r) failed"); if (swap && rsize > lsize) { + /* left-order-preserving not required: user smaller input as inner (right) */ + /* (not sure, though, whether this makes a difference with merge-join ...) */ ALGODEBUG THRprintf(GDKout, "#BATjoin: BATmirror(BATmergejoin(BATmirror(BATsort(r)), BATsort(BATmirror(l)), " BUNFMT "));\n", estimate); j = BATmirror(batmergejoin(BATmirror(rs), BATmirror(ls), estimate, swap, NULL)); ERRORcheck(j == NULL, "BATjoin: BATmirror(batmergejoin(BATmirror(rs), BATmirror(ls), estimate, swap, NULL)) failed"); } else { + /* left-order-preserving required, or inner (right) input is smaller one */ ALGODEBUG THRprintf(GDKout, "#BATjoin: BATmergejoin(BATmirror(BATsort(BATmirror(l))), BATsort(r), " BUNFMT "));\n", estimate); j = batmergejoin(ls, rs, estimate, swap, NULL); ERRORcheck(j == NULL, "BATjoin: batmergejoin(ls, rs, estimate, swap, NULL) failed"); } if (ls != l) { + /* release temp. tail-ordered copy of left input */ BBPunfix(ls->batCacheid); } if (rs != r) { + /* release temp. head-ordered copy of right input */ BBPunfix(rs->batCacheid); } return j; @@ -1324,20 +1331,28 @@ BAT *ls, *rs, *jj, *j; ALGODEBUG THRprintf(GDKout, "#BATjoin: BAT[s]sort(BATmergejoin(BATmirror(BAT[s]sort(BATmirror(l))), BATsort(r), " BUNFMT "));\n", estimate); + /* sort left input on tail, use stable sort to maintain original order of duplicate head values */ ls = BAThkey(l) ? BATmirror(BATsort(BATmirror(l))) : BATmirror(BATssort(BATmirror(l))); ERRORcheck(ls == NULL, "BATjoin: BATmirror(BAT[s]sort(BATmirror(l))) failed"); + /* if not yet sorted on head, sort right input on head */ rs = BAThordered(r) & 1 ? r : BATsort(r); ERRORcheck(rs == NULL, "BATjoin: BATsort(r) failed"); + /* perform merge join */ jj = batmergejoin(ls, rs, estimate, swap, NULL); ERRORcheck(jj == NULL, "BATjoin: batmergejoin(ls, rs, estimate, swap, NULL) failed"); + /* release temp. tail-ordered copy of left input */ BBPunfix(ls->batCacheid); ls = NULL; if (rs != r) { + /* release temp. head-ordered copy of right input */ BBPunfix(rs->batCacheid); rs = NULL; } + /* sort join result on head to restore physical left-input-order; + * use stable sort to maintain original order of duplicate head values */ j = BAThkey(l) ? BATsort(jj) : BATssort(jj); ERRORcheck(j == NULL, "BATjoin: BAT[s]sort(jj) failed"); + /* release temp. unordered join result */ BBPunfix(jj->batCacheid); jj = NULL; return j; @@ -1352,32 +1367,44 @@ BAT *lh, *lt, *ls, *rs, *jj, *js, *j; ALGODEBUG THRprintf(GDKout, "#BATjoin: BATmirror(batfetchjoin(BATmirror(BATsort(BATmergejoin(BATmirror(BATsort(BATmark(BATmirror(l),0))), BATsort(r), " BUNFMT "))), BATmirror(BATmark(l,0))));\n", estimate); + /* separate left head & tail using BATmark */ lh = BATmark(l, 0); ERRORcheck(lh == NULL, "BATjoin: BATmark(l,0) failed"); lt = BATmirror(BATmark(BATmirror(l), 0)); ERRORcheck(lt == NULL, "BATjoin: BATmirror(BATmark(BATmirror(l),0)) failed"); + /* sort left tail */ ls = BATmirror(BATsort(BATmirror(lt))); ERRORcheck(ls == NULL, "BATjoin: BATmirror(BATsort(BATmirror(lt))) failed"); + /* release temp. unsorted left tail */ BBPunfix(lt->batCacheid); lt = NULL; + /* if not yet sorted on head, sort right input on head */ rs = BAThordered(r) & 1 ? r : BATsort(r); ERRORcheck(rs == NULL, "BATjoin: BATsort(r) failed"); + /* perform merge join */ jj = batmergejoin(ls, rs, estimate, swap, NULL); ERRORcheck(jj == NULL, "BATjoin: batmergejoin(ls, rs, estimate, swap, NULL) failed"); + /* release temp. ordered copy of left tail */ BBPunfix(ls->batCacheid); ls = NULL; if (rs != r) { + /* release temp. head-ordered copy of right input */ BBPunfix(rs->batCacheid); rs = NULL; } + /* sort join result on head to restore physical left-input-order */ js = BATsort(jj); ERRORcheck(js == NULL, "BATjoin: BATsort(jj) failed"); + /* release temp. unordered join result */ BBPunfix(jj->batCacheid); jj = NULL; + /* restore original left head values */ j = BATmirror(batfetchjoin(BATmirror(js), BATmirror(lh), BATcount(js), swap, TRUE)); ERRORcheck(j == NULL, "BATjoin: BATmirror(batfetchjoin(BATmirror(js), BATmirror(lh), BATcount(js), swap, TRUE)) failed"); + /* release temp.sorted join result */ BBPunfix(js->batCacheid); js = NULL; + /* release temp. copy of left head */ BBPunfix(lh->batCacheid); lh = NULL; return j; _______________________________________________ Checkin-list mailing list Checkin-list@monetdb.org http://mail.monetdb.org/mailman/listinfo/checkin-list