Changeset: 7f6e4ccba2c8 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=7f6e4ccba2c8
Modified Files:
        gdk/gdk_unique.c
Branch: candidate-exceptions
Log Message:

Converted BATunique to candidate iterators.


diffs (284 lines):

diff --git a/gdk/gdk_unique.c b/gdk/gdk_unique.c
--- a/gdk/gdk_unique.c
+++ b/gdk/gdk_unique.c
@@ -25,99 +25,46 @@ BAT *
 BATunique(BAT *b, BAT *s)
 {
        BAT *bn;
-       BUN start, end, cnt;
-       const oid *cand = NULL, *candend = NULL;
+       BUN cnt;
        const void *v;
        const char *vals;
        const char *vars;
        int width;
        oid i, o;
-       unsigned short *seen = NULL;
+       uint16_t *seen = NULL;
        const char *nme;
        Hash *hs = NULL;
        BUN hb;
        BATiter bi;
        int (*cmp)(const void *, const void *);
        bat parent;
+       struct canditer ci;
 
        BATcheck(b, "BATunique", NULL);
-       if (b->tkey || BATcount(b) <= 1 || BATtdense(b)) {
+       cnt = canditer_init(&ci, b, s);
+
+       if (b->tkey || cnt <= 1 || BATtdense(b)) {
                /* trivial: already unique */
-               if (!b->tkey) {
-                       b->tkey = true;
-                       b->batDirtydesc = true;
-               }
-               if (s) {
-                       /* we can return a slice of the candidate list */
-                       oid lo = b->hseqbase;
-                       oid hi = lo + BATcount(b);
-                       ALGODEBUG fprintf(stderr, "#BATunique(b=" ALGOBATFMT
-                                         ",s=" ALGOBATFMT "): trivial case: "
-                                         "already unique, slice candidates\n",
-                                         ALGOBATPAR(b), ALGOBATPAR(s));
-                       b = BATselect(s, NULL, &lo, &hi, true, false, false);
-                       if (b == NULL)
-                               return NULL;
-                       bn = BATproject(b, s);
-                       BBPunfix(b->batCacheid);
-                       bn = virtualize(bn);
-                       ALGODEBUG fprintf(stderr, "#BATunique(b=" ALGOBATFMT ","
-                                         "s=" ALGOBATFMT ")="
-                                         ALGOOPTBATFMT "\n",
-                                         ALGOBATPAR(b), ALGOBATPAR(s),
-                                         ALGOOPTBATPAR(bn));
-                       return bn;
-               }
-               /* we can return all values */
-               ALGODEBUG fprintf(stderr, "#BATunique(b=" ALGOBATFMT ",s=NULL):"
-                                 " trivial case: already unique, return all\n",
-                                 ALGOBATPAR(b));
-               return BATdense(0, b->hseqbase, BATcount(b));
-       }
-
-       CANDINIT(b, s, start, end, cnt, cand, candend);
-
-       if (start == end) {
-               /* trivial: empty result */
-               ALGODEBUG fprintf(stderr, "#BATunique(b=" ALGOBATFMT ",s="
-                                 ALGOOPTBATFMT "): trivial case: empty\n",
-                                 ALGOBATPAR(b), ALGOOPTBATPAR(s));
-               return BATdense(0, b->hseqbase, 0);
+               bn = canditer_slice(&ci, 0, ci.ncand);
+               ALGODEBUG fprintf(stderr, "#BATunique(b=" ALGOBATFMT
+                                 ",s=" ALGOOPTBATFMT ")=" ALGOOPTBATFMT
+                                 ": trivial case: "
+                                 "already unique, slice candidates\n",
+                                 ALGOBATPAR(b), ALGOOPTBATPAR(s),
+                                 ALGOOPTBATPAR(bn));
+               return bn;
        }
 
        if ((BATordered(b) && BATordered_rev(b)) ||
            (b->ttype == TYPE_void && is_oid_nil(b->tseqbase))) {
                /* trivial: all values are the same */
-               ALGODEBUG fprintf(stderr, "#BATunique(b=" ALGOBATFMT ",s="
-                                 ALGOOPTBATFMT "): trivial case: all equal\n",
-                                 ALGOBATPAR(b), ALGOOPTBATPAR(s));
-               return BATdense(0, cand ? *cand : b->hseqbase, 1);
-       }
-
-       if (cand && BATcount(b) > 16 * BATcount(s)) {
-               BAT *nb, *r, *nr;
-
+               bn = BATdense(0, ci.seq, 1);
                ALGODEBUG fprintf(stderr, "#BATunique(b=" ALGOBATFMT ",s="
-                                 ALGOBATFMT "): recurse: few candidates\n",
-                                 ALGOBATPAR(b), ALGOBATPAR(s));
-               nb = BATproject(s, b);
-               if (nb == NULL)
-                       return NULL;
-               r = BATunique(nb, NULL);
-               if (r == NULL) {
-                       BBPunfix(nb->batCacheid);
-                       return NULL;
-               }
-               nr = BATproject(r, s);
-               BBPunfix(nb->batCacheid);
-               BBPunfix(r->batCacheid);
-               nr = virtualize(nr);
-               ALGODEBUG fprintf(stderr, "#BATunique(b=" ALGOBATFMT ","
-                                 "s=" ALGOBATFMT ")="
-                                 ALGOOPTBATFMT "\n",
-                                 ALGOBATPAR(b), ALGOBATPAR(s),
-                                 ALGOOPTBATPAR(nr));
-               return nr;
+                                 ALGOOPTBATFMT ")=" ALGOOPTBATFMT
+                                 ": trivial case: all equal\n",
+                                 ALGOBATPAR(b), ALGOOPTBATPAR(s),
+                                 ALGOOPTBATPAR(bn));
+               return bn;
        }
 
        assert(b->ttype != TYPE_void);
@@ -140,21 +87,10 @@ BATunique(BAT *b, BAT *s)
                ALGODEBUG fprintf(stderr, "#BATunique(b=" ALGOBATFMT ",s="
                                  ALGOOPTBATFMT "): (reverse) sorted\n",
                                  ALGOBATPAR(b), ALGOOPTBATPAR(s));
-               for (;;) {
-                       if (cand) {
-                               if (cand == candend)
-                                       break;
-                               i = *cand++ - b->hseqbase;
-                               if (i >= end)
-                                       break;
-                       } else {
-                               i = start++;
-                               if (i == end)
-                                       break;
-                       }
-                       v = VALUE(i);
+               for (i = 0; i < ci.ncand; i++) {
+                       o = canditer_next(&ci);
+                       v = VALUE(o - b->hseqbase);
                        if (prev == NULL || (*cmp)(v, prev) != 0) {
-                               o = i + b->hseqbase;
                                bunfastappTYPE(oid, bn, &o);
                        }
                        prev = v;
@@ -169,22 +105,11 @@ BATunique(BAT *b, BAT *s)
                seen = GDKzalloc((256 / 16) * sizeof(seen[0]));
                if (seen == NULL)
                        goto bunins_failed;
-               for (;;) {
-                       if (cand) {
-                               if (cand == candend)
-                                       break;
-                               i = *cand++ - b->hseqbase;
-                               if (i >= end)
-                                       break;
-                       } else {
-                               i = start++;
-                               if (i == end)
-                                       break;
-                       }
-                       val = ((const unsigned char *) vals)[i];
+               for (i = 0; i < ci.ncand; i++) {
+                       o = canditer_next(&ci);
+                       val = ((const unsigned char *) vals)[o - b->hseqbase];
                        if (!(seen[val >> 4] & (1U << (val & 0xF)))) {
                                seen[val >> 4] |= 1U << (val & 0xF);
-                               o = i + b->hseqbase;
                                bunfastappTYPE(oid, bn, &o);
                                if (bn->batCount == 256) {
                                        /* there cannot be more than
@@ -205,22 +130,11 @@ BATunique(BAT *b, BAT *s)
                seen = GDKzalloc((65536 / 16) * sizeof(seen[0]));
                if (seen == NULL)
                        goto bunins_failed;
-               for (;;) {
-                       if (cand) {
-                               if (cand == candend)
-                                       break;
-                               i = *cand++ - b->hseqbase;
-                               if (i >= end)
-                                       break;
-                       } else {
-                               i = start++;
-                               if (i == end)
-                                       break;
-                       }
-                       val = ((const unsigned short *) vals)[i];
+               for (i = 0; i < ci.ncand; i++) {
+                       o = canditer_next(&ci);
+                       val = ((const unsigned short *) vals)[o - b->hseqbase];
                        if (!(seen[val >> 4] & (1U << (val & 0xF)))) {
                                seen[val >> 4] |= 1U << (val & 0xF);
-                               o = i + b->hseqbase;
                                bunfastappTYPE(oid, bn, &o);
                                if (bn->batCount == 65536) {
                                        /* there cannot be more than
@@ -255,35 +169,26 @@ BATunique(BAT *b, BAT *s)
                        lo = 0;
                }
                hs = b->thash;
-               for (;;) {
-                       if (cand) {
-                               if (cand == candend)
-                                       break;
-                               i = *cand++ - seq;
-                               if (i >= end)
-                                       break;
-                       } else {
-                               i = start++;
-                               if (i == end)
-                                       break;
-                       }
-                       v = VALUE(i);
-                       for (hb = HASHgetlink(hs, i + lo);
+               for (i = 0; i < ci.ncand; i++) {
+                       BUN p;
+
+                       o = canditer_next(&ci);
+                       p = o - seq;
+                       v = VALUE(p);
+                       for (hb = HASHgetlink(hs, p + lo);
                             hb != HASHnil(hs) && hb >= lo;
                             hb = HASHgetlink(hs, hb)) {
-                               assert(hb < i + lo);
-                               if (cmp(v, BUNtail(bi, hb)) == 0) {
-                                       o = hb - lo + seq;
-                                       if (cand == NULL ||
-                                           BATcandcontains(s, o)) {
-                                               /* we've seen this
-                                                * value before */
-                                               break;
-                                       }
+                               assert(hb < p + lo);
+                               if (cmp(v, BUNtail(bi, hb)) == 0 &&
+                                   canditer_search(&ci,
+                                                   hb - lo + seq,
+                                                   false) != BUN_NONE) {
+                                       /* we've seen this value
+                                        * before */
+                                       break;
                                }
                        }
                        if (hb == HASHnil(hs) || hb < lo) {
-                               o = i + seq;
                                bunfastappTYPE(oid, bn, &o);
                        }
                }
@@ -320,19 +225,9 @@ BATunique(BAT *b, BAT *s)
                        GDKerror("BATunique: cannot allocate hash table\n");
                        goto bunins_failed;
                }
-               for (;;) {
-                       if (cand) {
-                               if (cand == candend)
-                                       break;
-                               i = *cand++ - b->hseqbase;
-                               if (i >= end)
-                                       break;
-                       } else {
-                               i = start++;
-                               if (i == end)
-                                       break;
-                       }
-                       v = VALUE(i);
+               for (i = 0; i < ci.ncand; i++) {
+                       o = canditer_next(&ci);
+                       v = VALUE(o - b->hseqbase);
                        prb = HASHprobe(hs, v);
                        for (hb = HASHget(hs, prb);
                             hb != HASHnil(hs);
@@ -341,8 +236,7 @@ BATunique(BAT *b, BAT *s)
                                        break;
                        }
                        if (hb == HASHnil(hs)) {
-                               o = i + b->hseqbase;
-                               p = i;
+                               p = o - b->hseqbase;
                                bunfastappTYPE(oid, bn, &o);
                                /* enter into hash table */
                                HASHputlink(hs, p, HASHget(hs, prb));
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to