Changeset: 7a088315370a for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=7a088315370a
Modified Files:
        gdk/gdk_analytic.h
        gdk/gdk_analytic_func.c
        sql/backends/monet5/sql.c
        sql/backends/monet5/sql_rank.c
        sql/backends/monet5/sql_rank.h
Branch: window-tunning
Log Message:

Updated covariance and correlation


diffs (truncated from 1705 to 300 lines):

diff --git a/gdk/gdk_analytic.h b/gdk/gdk_analytic.h
--- a/gdk/gdk_analytic.h
+++ b/gdk/gdk_analytic.h
@@ -41,8 +41,8 @@ gdk_export gdk_return GDKanalytical_stdd
 gdk_export gdk_return GDKanalytical_stddev_pop(BAT *r, BAT *p, BAT *o, BAT *b, 
BAT *s, BAT *e, int tpe, int frame_type);
 gdk_export gdk_return GDKanalytical_variance_samp(BAT *r, BAT *p, BAT *o, BAT 
*b, BAT *s, BAT *e, int tpe, int frame_type);
 gdk_export gdk_return GDKanalytical_variance_pop(BAT *r, BAT *p, BAT *o, BAT 
*b, BAT *s, BAT *e, int tpe, int frame_type);
-gdk_export gdk_return GDKanalytical_covariance_pop(BAT *r, BAT *b1, BAT *b2, 
BAT *s, BAT *e, int tpe);
-gdk_export gdk_return GDKanalytical_covariance_samp(BAT *r, BAT *b1, BAT *b2, 
BAT *s, BAT *e, int tpe);
-gdk_export gdk_return GDKanalytical_correlation(BAT *r, BAT *b1, BAT *b2, BAT 
*s, BAT *e, int tpe);
+gdk_export gdk_return GDKanalytical_covariance_pop(BAT *r, BAT *p, BAT *o, BAT 
*b1, BAT *b2, BAT *s, BAT *e, int tpe, int frame_type);
+gdk_export gdk_return GDKanalytical_covariance_samp(BAT *r, BAT *p, BAT *o, 
BAT *b1, BAT *b2, BAT *s, BAT *e, int tpe, int frame_type);
+gdk_export gdk_return GDKanalytical_correlation(BAT *r, BAT *p, BAT *o, BAT 
*b1, BAT *b2, BAT *s, BAT *e, int tpe, int frame_type);
 
 #endif //_GDK_ANALYTIC_H_
diff --git a/gdk/gdk_analytic_func.c b/gdk/gdk_analytic_func.c
--- a/gdk/gdk_analytic_func.c
+++ b/gdk/gdk_analytic_func.c
@@ -2623,6 +2623,7 @@ GDKanalyticalavginteger(BAT *r, BAT *p, 
 
 #define ANALYTICAL_STDEV_VARIANCE_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP)  
\
        do { \
+               TPE *bp = (TPE*)Tloc(b, 0); \
                for (; k < i;) { \
                        j = k; \
                        do {    \
@@ -2653,10 +2654,11 @@ GDKanalyticalavginteger(BAT *r, BAT *p, 
 
 #define ANALYTICAL_STDEV_VARIANCE_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP)  
\
        do { \
+               TPE *bp = (TPE*)Tloc(b, 0); \
                l = i - 1; \
                for (j = l; ; j--) { \
                        TPE v = bp[j]; \
-                       if (!is_##TPE##_nil(bp[j]))     {       \
+                       if (!is_##TPE##_nil(v)) {       \
                                n++;                            \
                                delta = (dbl) v - mean;         \
                                mean += delta / n;              \
@@ -2686,6 +2688,7 @@ GDKanalyticalavginteger(BAT *r, BAT *p, 
 
 #define ANALYTICAL_STDEV_VARIANCE_ALL_ROWS(TPE, SAMPLE, OP)    \
        do { \
+               TPE *bp = (TPE*)Tloc(b, 0); \
                for (; j < i; j++) { \
                        TPE v = bp[j]; \
                        if (is_##TPE##_nil(v))          \
@@ -2712,7 +2715,6 @@ GDKanalyticalavginteger(BAT *r, BAT *p, 
 
 #define ANALYTICAL_STDEV_VARIANCE_CURRENT_ROW(TPE, SAMPLE, OP) \
        do {                                                            \
-               (void) bp;      \
                for (; k < i; k++) \
                        rb[k] = SAMPLE == 1 ? dbl_nil : 0;      \
                has_nils = is_dbl_nil(rb[k - 1]); \
@@ -2720,6 +2722,7 @@ GDKanalyticalavginteger(BAT *r, BAT *p, 
 
 #define ANALYTICAL_STDEV_VARIANCE_OTHERS(TPE, SAMPLE, OP)      \
        do {                                                            \
+               TPE *bp = (TPE*)Tloc(b, 0); \
                for (; k < i; k++) {                    \
                        TPE *bs = bp + start[k], *be = bp + end[k];             
\
                        for (; bs < be; bs++) {                         \
@@ -2745,9 +2748,8 @@ GDKanalyticalavginteger(BAT *r, BAT *p, 
                }       \
        } while (0)
 
-#define ANALYTICAL_STDEV_VARIANCE_PARTITIONS(TPE, SAMPLE, OP, IMP)             
\
+#define ANALYTICAL_STATISTICS_PARTITIONS(TPE, SAMPLE, OP, IMP)         \
        do {                                            \
-               TPE *bp = (TPE*)Tloc(b, 0); \
                if (p) {                                        \
                        for (; i < cnt; i++) {          \
                                if (np[i])                      \
@@ -2759,30 +2761,36 @@ GDKanalyticalavginteger(BAT *r, BAT *p, 
        } while (0)
 
 #ifdef HAVE_HGE
-#define ANALYTICAL_STDEV_VARIANCE_LIMIT(IMP, SAMPLE, OP) \
+#define ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP) \
        case TYPE_hge: \
-               ANALYTICAL_STDEV_VARIANCE_PARTITIONS(hge, SAMPLE, OP, 
ANALYTICAL_STDEV_VARIANCE_##IMP); \
+               ANALYTICAL_STATISTICS_PARTITIONS(hge, SAMPLE, OP, 
ANALYTICAL_##IMP); \
        break;
 #else
-#define ANALYTICAL_STDEV_VARIANCE_LIMIT(IMP, SAMPLE, OP)
+#define ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP)
 #endif
 
-#define ANALYTICAL_STDEV_VARIANCE_BRANCHES(IMP, SAMPLE, OP)            \
+#define ANALYTICAL_STATISTICS_BRANCHES(IMP, SAMPLE, OP)                \
        do { \
                switch (tpe) {  \
                case TYPE_bte:  \
-                       ANALYTICAL_STDEV_VARIANCE_PARTITIONS(bte, SAMPLE, OP, 
ANALYTICAL_STDEV_VARIANCE_##IMP); \
+                       ANALYTICAL_STATISTICS_PARTITIONS(bte, SAMPLE, OP, 
ANALYTICAL_##IMP);    \
                        break;  \
                case TYPE_sht:  \
-                       ANALYTICAL_STDEV_VARIANCE_PARTITIONS(sht, SAMPLE, OP, 
ANALYTICAL_STDEV_VARIANCE_##IMP); \
+                       ANALYTICAL_STATISTICS_PARTITIONS(sht, SAMPLE, OP, 
ANALYTICAL_##IMP);    \
                        break;  \
                case TYPE_int:  \
-                       ANALYTICAL_STDEV_VARIANCE_PARTITIONS(int, SAMPLE, OP, 
ANALYTICAL_STDEV_VARIANCE_##IMP); \
+                       ANALYTICAL_STATISTICS_PARTITIONS(int, SAMPLE, OP, 
ANALYTICAL_##IMP);    \
                        break;  \
                case TYPE_lng:  \
-                       ANALYTICAL_STDEV_VARIANCE_PARTITIONS(lng, SAMPLE, OP, 
ANALYTICAL_STDEV_VARIANCE_##IMP); \
+                       ANALYTICAL_STATISTICS_PARTITIONS(lng, SAMPLE, OP, 
ANALYTICAL_##IMP);    \
+                       break;  \
+               case TYPE_flt:  \
+                       ANALYTICAL_STATISTICS_PARTITIONS(flt, SAMPLE, OP, 
ANALYTICAL_##IMP);    \
                        break;  \
-               ANALYTICAL_STDEV_VARIANCE_LIMIT(IMP, SAMPLE, OP)        \
+               case TYPE_dbl:  \
+                       ANALYTICAL_STATISTICS_PARTITIONS(dbl, SAMPLE, OP, 
ANALYTICAL_##IMP);    \
+                       break;  \
+               ANALYTICAL_STATISTICS_LIMIT(IMP, SAMPLE, OP)    \
                default:        \
                        goto nosupport; \
                }       \
@@ -2801,23 +2809,23 @@ GDKanalytical_##NAME(BAT *r, BAT *p, BAT
        \
        switch (frame_type) {   \
        case 3: /* unbounded until current row */       {       \
-               ANALYTICAL_STDEV_VARIANCE_BRANCHES(UNBOUNDED_TILL_CURRENT_ROW, 
SAMPLE, OP);     \
+               
ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_UNBOUNDED_TILL_CURRENT_ROW, 
SAMPLE, OP);  \
        } break;        \
        case 4: /* current row until unbounded */       {       \
-               ANALYTICAL_STDEV_VARIANCE_BRANCHES(CURRENT_ROW_TILL_UNBOUNDED, 
SAMPLE, OP);     \
+               
ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_CURRENT_ROW_TILL_UNBOUNDED, 
SAMPLE, OP);  \
        } break;        \
        case 5: /* all rows */  {       \
-               ANALYTICAL_STDEV_VARIANCE_BRANCHES(ALL_ROWS, SAMPLE, OP);       
\
+               ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_ALL_ROWS, SAMPLE, 
OP);    \
        } break;        \
        case 6: /* current row */ {     \
-               ANALYTICAL_STDEV_VARIANCE_BRANCHES(CURRENT_ROW, SAMPLE, OP);    
\
+               ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_CURRENT_ROW, 
SAMPLE, OP); \
        } break;        \
        default: {      \
-               ANALYTICAL_STDEV_VARIANCE_BRANCHES(OTHERS, SAMPLE, OP); \
+               ANALYTICAL_STATISTICS_BRANCHES(STDEV_VARIANCE_OTHERS, SAMPLE, 
OP);      \
        }       \
        }       \
        \
-       BATsetcount(r, cnt); \
+       BATsetcount(r, (BUN) cnt); \
        r->tnonil = !has_nils;  \
        r->tnil = has_nils;     \
        return GDK_SUCCEED; \
@@ -2834,16 +2842,123 @@ GDK_ANALYTICAL_STDEV_VARIANCE(stddev_pop
 GDK_ANALYTICAL_STDEV_VARIANCE(variance_samp, 1, m2 / (n - 1), "variance")
 GDK_ANALYTICAL_STDEV_VARIANCE(variance_pop, 0, m2 / n, "variance")
 
-#define ANALYTICAL_COVARIANCE_CALC(TPE, SAMPLE, OP)    \
+#define ANALYTICAL_COVARIANCE_UNBOUNDED_TILL_CURRENT_ROW(TPE, SAMPLE, OP)      
\
+       do { \
+               TPE *bp1 = (TPE*)Tloc(b1, 0), *bp2 = (TPE*)Tloc(b2, 0); \
+               for (; k < i;) { \
+                       j = k; \
+                       do {    \
+                               TPE v1 = bp1[k], v2 = bp2[k]; \
+                               if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) 
{       \
+                                       n++;                            \
+                                       delta1 = (dbl) v1 - mean1;              
\
+                                       mean1 += delta1 / n;            \
+                                       delta2 = (dbl) v2 - mean2;              
\
+                                       mean2 += delta2 / n;            \
+                                       m2 += delta1 * ((dbl) v2 - mean2);      
\
+                               }       \
+                               k++; \
+                       } while (k < i && !op[k]);      \
+                       if (isinf(m2))  \
+                               goto overflow;          \
+                       if (n > SAMPLE) { \
+                               for (; j < k; j++) \
+                                       rb[j] = OP; \
+                       } else { \
+                               for (; j < k; j++) \
+                                       rb[j] = dbl_nil; \
+                               has_nils = true; \
+                       } \
+               } \
+               n = 0;  \
+               mean1 = 0;      \
+               mean2 = 0;      \
+               m2 = 0; \
+       } while (0)
+
+#define ANALYTICAL_COVARIANCE_CURRENT_ROW_TILL_UNBOUNDED(TPE, SAMPLE, OP)      
\
+       do { \
+               TPE *bp1 = (TPE*)Tloc(b1, 0), *bp2 = (TPE*)Tloc(b2, 0); \
+               l = i - 1; \
+               for (j = l; ; j--) { \
+                       TPE v1 = bp1[j], v2 = bp2[j]; \
+                       if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) {       
\
+                               n++;                            \
+                               delta1 = (dbl) v1 - mean1;              \
+                               mean1 += delta1 / n;            \
+                               delta2 = (dbl) v2 - mean2;              \
+                               mean2 += delta2 / n;            \
+                               m2 += delta1 * ((dbl) v2 - mean2);      \
+                       }       \
+                       if (op[j] || j == k) {  \
+                               if (isinf(m2))  \
+                                       goto overflow;          \
+                               if (n > SAMPLE) { \
+                                       for (; l >= j; l--) \
+                                               rb[l] = OP; \
+                               } else { \
+                                       for (; l >= j; l--) \
+                                               rb[l] = dbl_nil; \
+                                       has_nils = true; \
+                               } \
+                               if (j == k)     \
+                                       break;  \
+                               l = j - 1;      \
+                       }       \
+               }       \
+               n = 0;  \
+               mean1 = 0;      \
+               mean2 = 0;      \
+               m2 = 0; \
+               k = i; \
+       } while (0)
+
+#define ANALYTICAL_COVARIANCE_ALL_ROWS(TPE, SAMPLE, OP)        \
+       do { \
+               TPE *bp1 = (TPE*)Tloc(b1, 0), *bp2 = (TPE*)Tloc(b2, 0); \
+               for (; j < i; j++) { \
+                       TPE v1 = bp1[j], v2 = bp2[j]; \
+                       if (!is_##TPE##_nil(v1) && !is_##TPE##_nil(v2)) {       
\
+                               n++;                            \
+                               delta1 = (dbl) v1 - mean1;              \
+                               mean1 += delta1 / n;            \
+                               delta2 = (dbl) v2 - mean2;              \
+                               mean2 += delta2 / n;            \
+                               m2 += delta1 * ((dbl) v2 - mean2);      \
+                       }       \
+               } \
+               if (isinf(m2))  \
+                       goto overflow;          \
+               if (n > SAMPLE) { \
+                       for (; k < i; k++) \
+                               rb[k] = OP; \
+               } else { \
+                       for (; k < i; k++) \
+                               rb[k] = dbl_nil; \
+                       has_nils = true; \
+               } \
+               n = 0;  \
+               mean1 = 0;      \
+               mean2 = 0;      \
+               m2 = 0; \
+       } while (0)
+
+#define ANALYTICAL_COVARIANCE_CURRENT_ROW(TPE, SAMPLE, OP)     \
        do {                                                            \
-               TPE *bp1 = (TPE*)Tloc(b1, 0), *bp2 = (TPE*)Tloc(b2, 0), *bs1, 
*be1, *bs2, v1, v2;       \
-               for (; i < cnt; i++, rb++) {            \
-                       bs1 = bp1 + start[i];                           \
-                       be1 = bp1 + end[i];                             \
-                       bs2 = bp2 + start[i];           \
+               for (; k < i; k++) \
+                       rb[k] = SAMPLE == 1 ? dbl_nil : 0;      \
+               has_nils = is_dbl_nil(rb[k - 1]); \
+       } while (0)
+
+#define ANALYTICAL_COVARIANCE_OTHERS(TPE, SAMPLE, OP)  \
+       do {                                                            \
+               TPE *bp1 = (TPE*)Tloc(b1, 0), *bp2 = (TPE*)Tloc(b2, 0); \
+               for (; k < i; k++) {            \
+                       TPE *bs1 = bp1 + start[k];                              
\
+                       TPE *be1 = bp1 + end[k];                                
\
+                       TPE *bs2 = bp2 + start[k];              \
                        for (; bs1 < be1; bs1++, bs2++) {       \
-                               v1 = *bs1;                              \
-                               v2 = *bs2;                              \
+                               TPE v1 = *bs1, v2 = *bs2;                       
        \
                                if (is_##TPE##_nil(v1) || is_##TPE##_nil(v2))   
\
                                        continue;               \
                                n++;                            \
@@ -2853,13 +2968,13 @@ GDK_ANALYTICAL_STDEV_VARIANCE(variance_p
                                mean2 += delta2 / n;            \
                                m2 += delta1 * ((dbl) v2 - mean2);      \
                        }       \
-                       if (isinf(m2)) {        \
+                       if (isinf(m2))  \
                                goto overflow;          \
-                       } else if (n > SAMPLE) { \
-                               *rb = OP; \
+                       if (n > SAMPLE) { \
+                               rb[k] = OP; \
                        } else { \
-                               *rb = dbl_nil; \
-                               nils++; \
+                               rb[k] = dbl_nil; \
+                               has_nils = true; \
                        } \
                        n = 0;  \
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to