Changeset: 014804abc3bd for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=014804abc3bd
Modified Files:
        gdk/gdk.h
        gdk/gdk_orderidx.c
        gdk/gdk_search.c
        gdk/gdk_select.c
Branch: leftmart
Log Message:

Use the most significant bit of oid's to mask the change of values in ordered 
idx.


diffs (truncated from 328 to 300 lines):

diff --git a/gdk/gdk.h b/gdk/gdk.h
--- a/gdk/gdk.h
+++ b/gdk/gdk.h
@@ -615,6 +615,8 @@ typedef size_t BUN;
 #else
 #define BUN_NONE ((BUN) LLONG_MAX)
 #endif
+#define BUN_MSK (~BUN_NONE)
+#define BUN_UNMSK BUN_NONE
 #define BUN_MAX (BUN_NONE - 1) /* maximum allowed size of a BAT */
 
 #define BUN2 2
diff --git a/gdk/gdk_orderidx.c b/gdk/gdk_orderidx.c
--- a/gdk/gdk_orderidx.c
+++ b/gdk/gdk_orderidx.c
@@ -210,22 +210,58 @@ BATorderidx(BAT *b, int stable)
        return GDK_SUCCEED;
 }
 
-#define BINARY_MERGE(TYPE)                                             \
+#define UNARY_MERGE(TYPE)                                              \
        do {                                                            \
                TYPE *v = (TYPE *) Tloc(b, BUNfirst(b));                \
-               while (p0 < q0 && p1 < q1) {                            \
-                       if (v[*p0 - b->hseqbase] <= v[*p1 - b->hseqbase]) { \
-                               *mv++ = *p0++;                          \
-                       } else {                                        \
-                               *mv++ = *p1++;                          \
+               while (p < q) {                                         \
+                       *mv = *p++;                                     \
+                       if (p < q && v[*mv - b->hseqbase] != v[*p - 
b->hseqbase]) { \
+                               *mv |= BUN_MSK;                         \
                        }                                               \
+                       mv++;                                           \
                }                                                       \
-               while (p0 < q0) {                                       \
-                       *mv++ = *p0++;                                  \
-               }                                                       \
-               while (p1 < q1) {                                       \
-                       *mv++ = *p1++;                                  \
-               }                                                       \
+               *(mv-1) |= BUN_MSK;                                     \
+       } while (0)
+
+#define BINARY_MERGE(TYPE)                                                     
                \
+       do {                                                                    
                \
+               TYPE *v = (TYPE *) Tloc(b, BUNfirst(b));                        
                \
+               while (p0 < q0 && p1 < q1) {                                    
                \
+                       if (v[*p0 - b->hseqbase] <= v[*p1 - b->hseqbase]) {     
                \
+                               *mv = *p0++;                                    
                \
+                               if (p0 < q0 && v[*mv - b->hseqbase] != v[*p0 - 
b->hseqbase]) {  \
+                                       *mv |= BUN_MSK;                         
                \
+                               }                                               
                \
+                               mv++;                                           
                \
+                       } else {                                                
                \
+                               *mv++ = *p1++;                                  
                \
+                               if (p1 < q1 && v[*mv - b->hseqbase] != v[*p1 - 
b->hseqbase]) {  \
+                                       *mv |= BUN_MSK;                         
                \
+                               }                                               
                \
+                               mv++;                                           
                \
+                       }                                                       
                \
+               }                                                               
                \
+               if (p0 < q0 && v[*(mv-1) - b->hseqbase] != v[*p0 - 
b->hseqbase]) {              \
+                       *(mv-1) |= BUN_MSK;                                     
                \
+               }                                                               
                \
+               while (p0 < q0) {                                               
                \
+                       *mv = *p0++;                                            
                \
+                       if (p0 < q0 && v[*mv - b->hseqbase] != v[*p0 - 
b->hseqbase]) {          \
+                               *mv |= BUN_MSK;                                 
                \
+                       }                                                       
                \
+                       mv++;                                                   
                \
+               }                                                               
                \
+               if (p1 < q1 && v[*(mv-1) - b->hseqbase] != v[*p1 - 
b->hseqbase]) {              \
+                       *(mv-1) |= BUN_MSK;                                     
                \
+               }                                                               
                \
+               while (p1 < q1) {                                               
                \
+                       *mv = *p1++;                                            
                \
+                       if (p1 < q1 && v[*mv - b->hseqbase] != v[*p1 - 
b->hseqbase]) {          \
+                               *mv |= BUN_MSK;                                 
                \
+                       }                                                       
                \
+                       mv++;                                                   
                \
+               }                                                               
                \
+               *(mv-1) |= BUN_MSK;                                             
                \
        } while(0)
 
 #define swap(X,Y,TMP)  (TMP)=(X);(X)=(Y);(Y)=(TMP)
@@ -258,37 +294,50 @@ BATorderidx(BAT *b, int stable)
                } while (cur != min);                                   \
        } while (0)
 
-#define NWAY_MERGE(TYPE)                                               \
-       do {                                                            \
-               TYPE *minhp, t;                                         \
-               TYPE *v = (TYPE *) Tloc(b, BUNfirst(b));                \
-               if ((minhp = (TYPE *) GDKmalloc(sizeof(TYPE)*n_ar)) == NULL) { \
-                       goto bailout;                                   \
-               }                                                       \
-               /* init min heap */                                     \
-               for (i = 0; i < n_ar; i++) {                            \
-                       minhp[i] = v[*p[i] - b->hseqbase];              \
-               }                                                       \
-               for (i = n_ar/2; i >=0 ; i--) {                         \
-                       HEAPIFY(i);                                     \
-               }                                                       \
-               /* merge */                                             \
-               while (n_ar > 1) {                                      \
-                       *mv++ = *(p[0])++;                              \
-                       if (p[0] < q[0]) {                              \
-                               minhp[0] = v[*p[0] - b->hseqbase];      \
-                       } else {                                        \
-                               swap(minhp[0], minhp[n_ar-1], t);       \
-                               swap(p[0], p[n_ar-1], t_oid);           \
-                               swap(q[0], q[n_ar-1], t_oid);           \
-                               n_ar--;                                 \
-                       }                                               \
-                       HEAPIFY(0);                                     \
-               }                                                       \
-               while (p[0] < q[0]) {                                   \
-                       *mv++ = *(p[0])++;                              \
-               }                                                       \
-               GDKfree(minhp);                                         \
+#define NWAY_MERGE(TYPE)                                                       
                \
+       do {                                                                    
                \
+               TYPE *minhp, t;                                                 
                \
+               TYPE *v = (TYPE *) Tloc(b, BUNfirst(b));                        
                \
+               if ((minhp = (TYPE *) GDKmalloc(sizeof(TYPE)*n_ar)) == NULL) {  
                \
+                       goto bailout;                                           
                \
+               }                                                               
                \
+               /* init min heap */                                             
                \
+               for (i = 0; i < n_ar; i++) {                                    
                \
+                       minhp[i] = v[*p[i] - b->hseqbase];                      
                \
+               }                                                               
                \
+               for (i = n_ar/2; i >=0 ; i--) {                                 
                \
+                       HEAPIFY(i);                                             
                \
+               }                                                               
                \
+               /* merge */                                                     
                \
+               while (n_ar > 1) {                                              
                \
+                       *mv = *(p[0])++;                                        
                \
+                       if (p[0] < q[0] && v[*mv - b->hseqbase] != v[*p[0] - 
b->hseqbase]) {    \
+                               *mv |= BUN_MSK;                                 
                \
+                       }                                                       
                \
+                       mv++;                                                   
                \
+                       if (p[0] < q[0]) {                                      
                \
+                               minhp[0] = v[*p[0] - b->hseqbase];              
                \
+                               HEAPIFY(0);                                     
                \
+                       } else {                                                
                \
+                               swap(minhp[0], minhp[n_ar-1], t);               
                \
+                               swap(p[0], p[n_ar-1], t_oid);                   
                \
+                               swap(q[0], q[n_ar-1], t_oid);                   
                \
+                               n_ar--;                                         
                \
+                               HEAPIFY(0);                                     
                \
+                               if (v[*(mv-1) - b->hseqbase] != v[*p[0] - 
b->hseqbase]) {       \
+                                       *(mv-1) |= BUN_MSK;                     
                \
+                               }                                               
                \
+                       }                                                       
                \
+               }                                                               
                \
+               while (p[0] < q[0]) {                                           
                \
+                       *mv = *(p[0])++;                                        
                \
+                       if (p[0] < q[0] && v[*mv - b->hseqbase] != v[*p[0] - 
b->hseqbase]) {    \
+                               *mv |= BUN_MSK;                                 
                \
+                       }                                                       
                \
+                       mv++;                                                   
                \
+               }                                                               
                \
+               *(mv-1) |= BUN_MSK;                                             
                \
+               GDKfree(minhp);                                                 
                \
        } while (0)
 
 gdk_return
@@ -328,10 +377,31 @@ GDKmergeidx(BAT *b, BAT**a, int n_ar)
        *mv++ = (oid) BATcount(b);
 
        if (n_ar == 1) {
+               const oid *restrict p, *q;
                /* One oid order bat, nothing to merge */
                assert(BATcount(a[0]) == BATcount(b));
                assert(VIEWtparent(a[0]) == -b->batCacheid && a[0]->torderidx);
-               memcpy(mv, (oid *) a[0]->torderidx->base + ORDERIDXOFF, 
BATcount(b) * SIZEOF_OID);
+               p = (const oid *) a[0]->torderidx->base + ORDERIDXOFF;
+               q = p + BATcount(a[0]);
+               switch (ATOMstorage(b->ttype)) {
+               case TYPE_bte: UNARY_MERGE(bte); break;
+               case TYPE_sht: UNARY_MERGE(sht); break;
+               case TYPE_int: UNARY_MERGE(int); break;
+               case TYPE_lng: UNARY_MERGE(lng); break;
+#ifdef HAVE_HGE
+               case TYPE_hge: UNARY_MERGE(hge); break;
+#endif
+               case TYPE_flt: UNARY_MERGE(flt); break;
+               case TYPE_dbl: UNARY_MERGE(dbl); break;
+               case TYPE_str:
+               default:
+                       /* TODO: support strings, date, timestamps etc. */
+                       assert(0);
+                       HEAPfree(m, 1);
+                       GDKfree(m);
+                       MT_lock_unset(&GDKhashLock(abs(b->batCacheid)));
+                       return GDK_FAIL;
+               }
        } else if (n_ar == 2) {
                /* sort merge with 1 comparison per BUN */
                const oid *restrict p0, *restrict p1, *q0, *q1;
diff --git a/gdk/gdk_search.c b/gdk/gdk_search.c
--- a/gdk/gdk_search.c
+++ b/gdk/gdk_search.c
@@ -141,7 +141,7 @@ SORTfndwhich(BAT *b, const void *v, enum
                end = lo;
                if (lo >= hi ||
                    (use_orderidx ?
-                    (atom_GE(BUNtail(bi, o[lo] - b->hseqbase + BUNfirst(b)), 
v, b->ttype)) :
+                    (atom_GE(BUNtail(bi, (o[lo]&BUN_UNMSK) - b->hseqbase + 
BUNfirst(b)), v, b->ttype)) :
                     (b->tsorted ? atom_GE(BUNtail(bi, lo), v, b->ttype) : 
atom_LE(BUNtail(bi, lo), v, b->ttype)))) {
                        /* shortcut: if BAT is empty or first (and
                         * hence all) tail value is >= v (if sorted)
@@ -153,7 +153,7 @@ SORTfndwhich(BAT *b, const void *v, enum
                end = hi;
                if (lo >= hi ||
                    (use_orderidx ?
-                    (atom_LE(BUNtail(bi, o[hi - 1] - b->hseqbase + 
BUNfirst(b)), v, b->ttype)) :
+                    (atom_LE(BUNtail(bi, (o[hi - 1]&BUN_UNMSK) - b->hseqbase + 
BUNfirst(b)), v, b->ttype)) :
                     (b->tsorted ? atom_LE(BUNtail(bi, hi - 1), v, b->ttype) : 
atom_GE(BUNtail(bi, hi - 1), v, b->ttype)))) {
                        /* shortcut: if BAT is empty or last (and
                         * hence all) tail value is <= v (if sorted)
@@ -238,27 +238,27 @@ SORTfndwhich(BAT *b, const void *v, enum
                assert(use_orderidx);
                switch (tp) {
                case TYPE_bte:
-                       SORTfndloop(bte, simple_CMP, BUNtloc, o[cur] - 
b->hseqbase + BUNfirst(b));
+                       SORTfndloop(bte, simple_CMP, BUNtloc, 
(o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b));
                        break;
                case TYPE_sht:
-                       SORTfndloop(sht, simple_CMP, BUNtloc, o[cur] - 
b->hseqbase + BUNfirst(b));
+                       SORTfndloop(sht, simple_CMP, BUNtloc, 
(o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b));
                        break;
                case TYPE_int:
-                       SORTfndloop(int, simple_CMP, BUNtloc, o[cur] - 
b->hseqbase + BUNfirst(b));
+                       SORTfndloop(int, simple_CMP, BUNtloc, 
(o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b));
                        break;
                case TYPE_lng:
-                       SORTfndloop(lng, simple_CMP, BUNtloc, o[cur] - 
b->hseqbase + BUNfirst(b));
+                       SORTfndloop(lng, simple_CMP, BUNtloc, 
(o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b));
                        break;
 #ifdef HAVE_HGE
                case TYPE_hge:
-                       SORTfndloop(hge, simple_CMP, BUNtloc, o[cur] - 
b->hseqbase + BUNfirst(b));
+                       SORTfndloop(hge, simple_CMP, BUNtloc, 
(o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b));
                        break;
 #endif
                case TYPE_flt:
-                       SORTfndloop(flt, simple_CMP, BUNtloc, o[cur] - 
b->hseqbase + BUNfirst(b));
+                       SORTfndloop(flt, simple_CMP, BUNtloc, 
(o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b));
                        break;
                case TYPE_dbl:
-                       SORTfndloop(dbl, simple_CMP, BUNtloc, o[cur] - 
b->hseqbase + BUNfirst(b));
+                       SORTfndloop(dbl, simple_CMP, BUNtloc, 
(o[cur]&BUN_UNMSK) - b->hseqbase + BUNfirst(b));
                        break;
                default:
                        assert(0);
@@ -270,20 +270,34 @@ SORTfndwhich(BAT *b, const void *v, enum
        case FIND_FIRST:
                if (cmp == 0 && b->tkey == 0) {
                        /* shift over multiple equals */
-                       for (diff = cur - end; diff; diff >>= 1) {
-                               while (cur >= end + diff &&
-                                      atom_EQ(BUNtail(bi, use_orderidx ? o[cur 
- diff] - b->hseqbase + BUNfirst(b) : cur - diff), v, b->ttype))
-                                       cur -= diff;
+                       if (use_orderidx) {
+                               while (cur >= end && !(o[cur]&BUN_MSK)) {
+                                       cur--;
+                               }
+                               cur++;
+                       } else {
+                               for (diff = cur - end; diff; diff >>= 1) {
+                                       while (cur >= end + diff &&
+                                              atom_EQ(BUNtail(bi, cur - diff), 
v, b->ttype))
+                                               cur -= diff;
+                               }
                        }
                }
                break;
        case FIND_LAST:
                if (cmp == 0 && b->tkey == 0) {
                        /* shift over multiple equals */
-                       for (diff = (end - cur) >> 1; diff; diff >>= 1) {
-                               while (cur + diff < end &&
-                                      atom_EQ(BUNtail(bi, use_orderidx ? o[cur 
+ diff] - b->hseqbase + BUNfirst(b) : cur + diff), v, b->ttype))
-                                       cur += diff;
+                       if (use_orderidx) {
+                               while (cur < end && !(o[cur]&BUN_MSK)) {
+                                       cur++;
+                               }
+                       } else {
+                               for (diff = (end - cur) >> 1; diff; diff >>= 1) 
{
+                                       while (cur + diff < end &&
+                                              atom_EQ(BUNtail(bi, cur + diff), 
v, b->ttype)) {
+                                               cur += diff;
+                                       }
+                               }
                        }
                }
                cur += (cmp == 0);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to