Changeset: f466ade813cb for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=f466ade813cb
Modified Files:
        gdk/gdk_search.c
        gdk/gdk_search.h
Branch: leftmart
Log Message:

ORDERfind[first|last|any] perform bin search on order idx and returns positions 
on the order idx. These positions are to be used to take a slice out of the 
order idx, sort it, and pass it as a cand list result of subselect.


diffs (250 lines):

diff --git a/gdk/gdk_search.c b/gdk/gdk_search.c
--- a/gdk/gdk_search.c
+++ b/gdk/gdk_search.c
@@ -655,11 +655,11 @@ HASHgonebad(BAT *b, const void *v)
  * treated in one macro. Main motivation: distrust of gradient
  * performance on odmg data and its high mult/div overhead.
  */
-#define SORTfndloop(TYPE, CMP, BUNtail)                                        
\
+#define SORTfndloop(TYPE, CMP, BUNtail, POS)                           \
        do {                                                            \
                while (lo < hi) {                                       \
                        cur = mid = (lo + hi) >> 1;                     \
-                       cmp = CMP(BUNtail(bi, cur), v, TYPE);           \
+                       cmp = CMP(BUNtail(bi, POS), v, TYPE);           \
                        if (cmp < 0) {                                  \
                                lo = ++mid;                             \
                                cur++;                                  \
@@ -678,16 +678,18 @@ enum find_which {
 };
 
 static BUN
-SORTfndwhich(BAT *b, const void *v, enum find_which which)
+SORTfndwhich(BAT *b, const void *v, enum find_which which, int use_orderidx)
 {
        BUN lo, hi, mid;
        int cmp;
        BUN cur;
-       BATiter bi;
+       BAT *o;
+       BATiter bi, bio;
        BUN diff, end;
        int tp;
 
-       if (b == NULL || (!b->tsorted && !b->trevsorted))
+       if (b == NULL || (!b->tsorted && !b->trevsorted && !use_orderidx)
+                     || (use_orderidx && !b->torderidx.flags))
                return BUN_NONE;
 
        lo = BUNfirst(b);
@@ -723,10 +725,22 @@ SORTfndwhich(BAT *b, const void *v, enum
        /* only use storage type if comparison functions are equal */
        tp = ATOMbasetype(b->ttype);
 
+       if (use_orderidx) {
+               o = BBPdescriptor(b->torderidx.o);
+               if (o == NULL) {
+                       GDKerror("#ORDERfindwhich: order idx not found\n");
+               }
+               lo = BUNfirst(o);
+               hi = BUNlast(o);
+               bio = bat_iterator(o);
+       }
+
        switch (which) {
        case FIND_FIRST:
                end = lo;
-               if (lo >= hi || (b->tsorted ? atom_GE(BUNtail(bi, lo), v, 
b->ttype) : atom_LE(BUNtail(bi, lo), v, b->ttype))) {
+               if (lo >= hi ||
+                   (use_orderidx && (atom_GE(BUNtail(bi,*(oid 
*)BUNtail(bio,lo)), v, b->ttype))) ||
+                   (!use_orderidx && (b->tsorted ? atom_GE(BUNtail(bi, lo), v, 
b->ttype) : atom_LE(BUNtail(bi, lo), v, b->ttype)))) {
                        /* shortcut: if BAT is empty or first (and
                         * hence all) tail value is >= v (if sorted)
                         * or <= v (if revsorted), we're done */
@@ -735,7 +749,9 @@ SORTfndwhich(BAT *b, const void *v, enum
                break;
        case FIND_LAST:
                end = hi;
-               if (lo >= hi || (b->tsorted ? atom_LE(BUNtail(bi, hi - 1), v, 
b->ttype) : atom_GE(BUNtail(bi, hi - 1), v, b->ttype))) {
+               if (lo >= hi ||
+                   (use_orderidx && (atom_LE(BUNtail(bi,*(oid 
*)BUNtail(bio,hi-1)), v, b->ttype))) ||
+                   (!use_orderidx && b->tsorted ? atom_LE(BUNtail(bi, hi - 1), 
v, b->ttype) : atom_GE(BUNtail(bi, hi - 1), v, b->ttype))) {
                        /* shortcut: if BAT is empty or first (and
                         * hence all) tail value is <= v (if sorted)
                         * or >= v (if revsorted), we're done */
@@ -751,68 +767,97 @@ SORTfndwhich(BAT *b, const void *v, enum
                break;
        }
 
-       if (b->tsorted) {
+       if (use_orderidx) {
                switch (tp) {
                case TYPE_bte:
-                       SORTfndloop(bte, simple_CMP, BUNtloc);
+                       SORTfndloop(bte, simple_CMP, BUNtloc, *(oid 
*)BUNtail(bio,cur));
                        break;
                case TYPE_sht:
-                       SORTfndloop(sht, simple_CMP, BUNtloc);
+                       SORTfndloop(sht, simple_CMP, BUNtloc, *(oid 
*)BUNtail(bio,cur));
                        break;
                case TYPE_int:
-                       SORTfndloop(int, simple_CMP, BUNtloc);
+                       SORTfndloop(int, simple_CMP, BUNtloc, *(oid 
*)BUNtail(bio,cur));
                        break;
                case TYPE_lng:
-                       SORTfndloop(lng, simple_CMP, BUNtloc);
+                       SORTfndloop(lng, simple_CMP, BUNtloc, *(oid 
*)BUNtail(bio,cur));
                        break;
 #ifdef HAVE_HGE
                case TYPE_hge:
-                       SORTfndloop(hge, simple_CMP, BUNtloc);
+                       SORTfndloop(hge, simple_CMP, BUNtloc, *(oid 
*)BUNtail(bio,cur));
                        break;
 #endif
                case TYPE_flt:
-                       SORTfndloop(flt, simple_CMP, BUNtloc);
+                       SORTfndloop(flt, simple_CMP, BUNtloc, *(oid 
*)BUNtail(bio,cur));
                        break;
                case TYPE_dbl:
-                       SORTfndloop(dbl, simple_CMP, BUNtloc);
+                       SORTfndloop(dbl, simple_CMP, BUNtloc, *(oid 
*)BUNtail(bio,cur));
+                       break;
+               default:
+                       assert(0);
+                       break;
+               }
+       } else if (b->tsorted) {
+               switch (tp) {
+               case TYPE_bte:
+                       SORTfndloop(bte, simple_CMP, BUNtloc, cur);
+                       break;
+               case TYPE_sht:
+                       SORTfndloop(sht, simple_CMP, BUNtloc, cur);
+                       break;
+               case TYPE_int:
+                       SORTfndloop(int, simple_CMP, BUNtloc, cur);
+                       break;
+               case TYPE_lng:
+                       SORTfndloop(lng, simple_CMP, BUNtloc, cur);
+                       break;
+#ifdef HAVE_HGE
+               case TYPE_hge:
+                       SORTfndloop(hge, simple_CMP, BUNtloc, cur);
+                       break;
+#endif
+               case TYPE_flt:
+                       SORTfndloop(flt, simple_CMP, BUNtloc, cur);
+                       break;
+               case TYPE_dbl:
+                       SORTfndloop(dbl, simple_CMP, BUNtloc, cur);
                        break;
                default:
                        if (b->tvarsized)
-                               SORTfndloop(b->ttype, atom_CMP, BUNtvar);
+                               SORTfndloop(b->ttype, atom_CMP, BUNtvar, cur);
                        else
-                               SORTfndloop(b->ttype, atom_CMP, BUNtloc);
+                               SORTfndloop(b->ttype, atom_CMP, BUNtloc, cur);
                        break;
                }
        } else {
                switch (tp) {
                case TYPE_bte:
-                       SORTfndloop(bte, -simple_CMP, BUNtloc);
+                       SORTfndloop(bte, -simple_CMP, BUNtloc, cur);
                        break;
                case TYPE_sht:
-                       SORTfndloop(sht, -simple_CMP, BUNtloc);
+                       SORTfndloop(sht, -simple_CMP, BUNtloc, cur);
                        break;
                case TYPE_int:
-                       SORTfndloop(int, -simple_CMP, BUNtloc);
+                       SORTfndloop(int, -simple_CMP, BUNtloc, cur);
                        break;
                case TYPE_lng:
-                       SORTfndloop(lng, -simple_CMP, BUNtloc);
+                       SORTfndloop(lng, -simple_CMP, BUNtloc, cur);
                        break;
 #ifdef HAVE_HGE
                case TYPE_hge:
-                       SORTfndloop(hge, -simple_CMP, BUNtloc);
+                       SORTfndloop(hge, -simple_CMP, BUNtloc, cur);
                        break;
 #endif
                case TYPE_flt:
-                       SORTfndloop(flt, -simple_CMP, BUNtloc);
+                       SORTfndloop(flt, -simple_CMP, BUNtloc, cur);
                        break;
                case TYPE_dbl:
-                       SORTfndloop(dbl, -simple_CMP, BUNtloc);
+                       SORTfndloop(dbl, -simple_CMP, BUNtloc, cur);
                        break;
                default:
                        if (b->tvarsized)
-                               SORTfndloop(b->ttype, -atom_CMP, BUNtvar);
+                               SORTfndloop(b->ttype, -atom_CMP, BUNtvar, cur);
                        else
-                               SORTfndloop(b->ttype, -atom_CMP, BUNtloc);
+                               SORTfndloop(b->ttype, -atom_CMP, BUNtloc, cur);
                        break;
                }
        }
@@ -855,7 +900,14 @@ SORTfndwhich(BAT *b, const void *v, enum
 BUN
 SORTfnd(BAT *b, const void *v)
 {
-       return SORTfndwhich(b, v, FIND_ANY);
+       return SORTfndwhich(b, v, FIND_ANY, 0);
+}
+
+/* use orderidx, returns BUN on order index */
+BUN
+ORDERfnd(BAT *b, const void *v)
+{
+       return SORTfndwhich(b, v, FIND_ANY, 1);
 }
 
 /* Return the BUN of the first (lowest numbered) tail value that is
@@ -864,7 +916,14 @@ SORTfnd(BAT *b, const void *v)
 BUN
 SORTfndfirst(BAT *b, const void *v)
 {
-       return SORTfndwhich(b, v, FIND_FIRST);
+       return SORTfndwhich(b, v, FIND_FIRST, 0);
+}
+
+/* use orderidx, returns BUN on order index */
+BUN
+ORDERfndfirst(BAT *b, const void *v)
+{
+       return SORTfndwhich(b, v, FIND_FIRST, 1);
 }
 
 /* Return the BUN of the first (lowest numbered) tail value beyond v.
@@ -872,5 +931,12 @@ SORTfndfirst(BAT *b, const void *v)
 BUN
 SORTfndlast(BAT *b, const void *v)
 {
-       return SORTfndwhich(b, v, FIND_LAST);
+       return SORTfndwhich(b, v, FIND_LAST, 0);
 }
+
+/* use orderidx, returns BUN on order index */
+BUN
+ORDERfndlast(BAT *b, const void *v)
+{
+       return SORTfndwhich(b, v, FIND_LAST, 1);
+}
diff --git a/gdk/gdk_search.h b/gdk/gdk_search.h
--- a/gdk/gdk_search.h
+++ b/gdk/gdk_search.h
@@ -236,4 +236,9 @@ gdk_export BUN SORTfnd(BAT *b, const voi
 gdk_export BUN SORTfndfirst(BAT *b, const void *v);
 gdk_export BUN SORTfndlast(BAT *b, const void *v);
 
+gdk_export BUN ORDERfnd(BAT *b, const void *v);
+gdk_export BUN ORDERfndfirst(BAT *b, const void *v);
+gdk_export BUN ORDERfndlast(BAT *b, const void *v);
+
+
 #endif /* _GDK_SEARCH_H_ */
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to