Changeset: cd1b1714c5d0 for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cd1b1714c5d0
Modified Files:
        gdk/gdk_aggr.c
        gdk/gdk_analytic_func.c
        sql/test/analytics/Tests/analytics16.sql
        sql/test/analytics/Tests/analytics16.stable.err
        sql/test/analytics/Tests/analytics16.stable.out
Branch: Jun2020
Log Message:

Added more missing overflow checks on statistical analytic functions


diffs (282 lines):

diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c
--- a/gdk/gdk_aggr.c
+++ b/gdk/gdk_aggr.c
@@ -3255,6 +3255,8 @@ BATcalcvariance_sample(dbl *avgp, BAT *b
                        delta2 = (dbl) y - mean2;                       \
                        mean2 += delta2 / n;                            \
                        m2 += delta1 * ((dbl) y - mean2);               \
+                       if (isinf(m2))                  \
+                               goto overflow;          \
                }                                                       \
        } while (0)
 
@@ -3295,6 +3297,9 @@ calccovariance(const void *v1, const voi
        if (n <= (BUN) issample)
                return dbl_nil;
        return m2 / (n - issample);
+  overflow:
+       GDKerror("22003!overflow in calculation.\n");
+       return dbl_nil;
 }
 
 dbl
@@ -3340,6 +3345,8 @@ BATcalccovariance_sample(BAT *b1, BAT *b
                        up += delta1 * aux;                             \
                        down1 += delta1 * ((dbl) x - mean1);            \
                        down2 += delta2 * aux;                          \
+                       if (isinf(up) || isinf(down1) || isinf(down2))          
\
+                               goto overflow;          \
                }                                                       \
        } while (0)
 
@@ -3390,6 +3397,9 @@ BATcalccorrelation(BAT *b1, BAT *b2)
        TRC_DEBUG(ALGO, "b1=" ALGOBATFMT ",b2=" ALGOBATFMT " (" LLFMT " 
usec)\n",
                  ALGOBATPAR(b1), ALGOBATPAR(b2), GDKusec() - t0);
        return aux;
+  overflow:
+       GDKerror("22003!overflow in calculation.\n");
+       return dbl_nil;
 }
 
 #define AGGR_STDEV(TYPE)                                               \
@@ -3423,6 +3433,8 @@ BATcalccorrelation(BAT *b1, BAT *b2)
                        } else if (cnts[i] == 1) {                      \
                                dbls[i] = issample ? dbl_nil : 0;       \
                                nils2++;                                \
+                       } else if (isinf(m2[i])) {                      \
+                               goto overflow;          \
                        } else if (variance) {                          \
                                dbls[i] = m2[i] / (cnts[i] - issample); \
                        } else {                                        \
@@ -3593,7 +3605,8 @@ dogroupstdev(BAT **avgb, BAT *b, BAT *g,
                  ALGOOPTBATPAR(bn), ALGOOPTBATPAR(an),
                  func, GDKusec() - t0);
        return bn;
-
+  overflow:
+       GDKerror("22003!overflow in calculation.\n");
   alloc_fail:
        if (an)
                BBPreclaim(an);
@@ -3675,6 +3688,8 @@ BATgroupvariance_population(BAT *b, BAT 
                        } else if (cnts[i] == 1) {                      \
                                dbls[i] = issample ? dbl_nil : 0;       \
                                nils2++;                                \
+                       } else if (isinf(m2[i])) {              \
+                               goto overflow;          \
                        } else {                                        \
                                dbls[i] = m2[i] / (cnts[i] - issample); \
                        }                                               \
@@ -3813,6 +3828,8 @@ dogroupcovariance(BAT *b1, BAT *b2, BAT 
                  ALGOOPTBATPAR(bn),
                  func, GDKusec() - t0);
        return bn;
+  overflow:
+       GDKerror("22003!overflow in calculation.\n");
   alloc_fail:
        BBPreclaim(bn);
        GDKfree(mean1);
@@ -3873,6 +3890,8 @@ BATgroupcovariance_population(BAT *b1, B
                        if (cnts[i] <= 1 || cnts[i] == BUN_NONE || down1[i] == 
0 || down2[i] == 0) { \
                                dbls[i] = dbl_nil;                      \
                                nils++;                                 \
+                       } else if (isinf(up[i]) || isinf(down1[i]) || 
isinf(down2[i])) {        \
+                               goto overflow;          \
                        } else {                                        \
                                dbls[i] = (up[i] / cnts[i]) / (sqrt(down1[i] / 
cnts[i]) * sqrt(down2[i] / cnts[i])); \
                                assert(!is_dbl_nil(dbls[i]));           \
@@ -4013,6 +4032,8 @@ BATgroupcorrelation(BAT *b1, BAT *b2, BA
                  ALGOOPTBATPAR(bn),
                  GDKusec() - t0);
        return bn;
+  overflow:
+       GDKerror("22003!overflow in calculation.\n");
   alloc_fail:
        BBPreclaim(bn);
        GDKfree(mean1);
diff --git a/gdk/gdk_analytic_func.c b/gdk/gdk_analytic_func.c
--- a/gdk/gdk_analytic_func.c
+++ b/gdk/gdk_analytic_func.c
@@ -1715,7 +1715,9 @@ GDKanalyticalavg(BAT *r, BAT *b, BAT *s,
                                mean += delta / n;              \
                                m2 += delta * ((dbl) v - mean); \
                        }                                               \
-                       if (n > SAMPLE) { \
+                       if (isinf(m2)) {        \
+                               goto overflow;          \
+                       } else if (n > SAMPLE) { \
                                *rb = OP; \
                        } else { \
                                *rb = dbl_nil; \
@@ -1776,6 +1778,9 @@ GDKanalytical_##NAME(BAT *r, BAT *b, BAT
        r->tnonil = nils == 0; \
        r->tnil = nils > 0; \
        return GDK_SUCCEED; \
+  overflow: \
+       GDKerror("22003!overflow in calculation.\n"); \
+       return GDK_FAIL; \
 }
 
 GDK_ANALYTICAL_STDEV_VARIANCE(stddev_samp, 1, sqrt(m2 / (n - 1)), "standard 
deviation")
@@ -1802,7 +1807,9 @@ GDK_ANALYTICAL_STDEV_VARIANCE(variance_p
                                mean2 += delta2 / n;            \
                                m2 += delta1 * ((dbl) v2 - mean2);      \
                        }       \
-                       if (n > SAMPLE) { \
+                       if (isinf(m2)) {        \
+                               goto overflow;          \
+                       } else if (n > SAMPLE) { \
                                *rb = OP; \
                        } else { \
                                *rb = dbl_nil; \
@@ -1864,6 +1871,9 @@ GDKanalytical_##NAME(BAT *r, BAT *b1, BA
        r->tnonil = nils == 0; \
        r->tnil = nils > 0; \
        return GDK_SUCCEED; \
+  overflow: \
+       GDKerror("22003!overflow in calculation.\n"); \
+       return GDK_FAIL; \
 }
 
 GDK_ANALYTICAL_COVARIANCE(covariance_samp, 1, m2 / (n - 1))
@@ -1891,7 +1901,9 @@ GDK_ANALYTICAL_COVARIANCE(covariance_pop
                                down1 += delta1 * ((dbl) v1 - mean1);   \
                                down2 += delta2 * aux;  \
                        }       \
-                       if (n != 0 && down1 != 0 && down2 != 0) { \
+                       if (isinf(up) || isinf(down1) || isinf(down2)) {        
\
+                               goto overflow;  \
+                       } else if (n != 0 && down1 != 0 && down2 != 0) { \
                                *rb = (up / n) / (sqrt(down1 / n) * sqrt(down2 
/ n)); \
                                assert(!is_dbl_nil(*rb)); \
                        } else { \
@@ -1950,4 +1962,7 @@ GDKanalytical_correlation(BAT *r, BAT *b
        r->tnonil = nils == 0;
        r->tnil = nils > 0;
        return GDK_SUCCEED;
+  overflow:
+       GDKerror("22003!overflow in calculation.\n");
+       return GDK_FAIL;
 }
diff --git a/sql/test/analytics/Tests/analytics16.sql 
b/sql/test/analytics/Tests/analytics16.sql
--- a/sql/test/analytics/Tests/analytics16.sql
+++ b/sql/test/analytics/Tests/analytics16.sql
@@ -137,3 +137,26 @@ select corr(a1.aa, a1.bb) from analytics
 select corr(a1.aa, a1.bb) from analytics a1 group by bb having a1.bb > (select 
corr(MAX(a1.aa) + a2.aa, MIN(a1.aa) + a2.aa) from analytics a2);
 
 rollback;
+
+CREATE TABLE t0(c0 DOUBLE, c1 INT);
+INSERT INTO t0(c0,c1) VALUES(1E200, 1), (0, 1);
+
+SELECT VAR_POP(c0) FROM t0; --error, overflow
+SELECT STDDEV_POP(c0) FROM t0; --error, overflow
+SELECT COVAR_POP(c0,c0) FROM t0; --error, overflow
+SELECT CORR(c0,c0) FROM t0; --error, overflow
+
+SELECT VAR_POP(c0) FROM t0 GROUP BY c0;
+SELECT STDDEV_POP(c0) FROM t0 GROUP BY c0;
+SELECT CORR(c0,c0) FROM t0 GROUP BY c0;
+
+SELECT VAR_POP(c0) FROM t0 GROUP BY c1;--error, overflow
+SELECT STDDEV_POP(c0) FROM t0 GROUP BY c1; --error, overflow
+SELECT CORR(c0,c0) FROM t0 GROUP BY c1; --error, overflow
+
+SELECT VAR_SAMP(c0) OVER () FROM t0; --error, overflow
+SELECT STDDEV_SAMP(c0) OVER () FROM t0; --error, overflow
+SELECT COVAR_SAMP(c0,c0) OVER () FROM t0; --error, overflow
+SELECT CORR(c0,c0) OVER () FROM t0; --error, overflow
+
+DROP TABLE T0;
diff --git a/sql/test/analytics/Tests/analytics16.stable.err 
b/sql/test/analytics/Tests/analytics16.stable.err
--- a/sql/test/analytics/Tests/analytics16.stable.err
+++ b/sql/test/analytics/Tests/analytics16.stable.err
@@ -5,20 +5,39 @@ stderr of test 'analytics16` in director
 # 18:06:21 >  "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" 
"mapi_open=true" "--set" "mapi_port=34861" "--set" 
"mapi_usock=/var/tmp/mtest-290531/.s.monetdb.34861" "--forcemito" 
"--dbpath=/home/ferreira/repositories/MonetDB-statistics-analytics/BUILD/var/MonetDB/mTests_sql_test_analytics"
 "--set" "embedded_c=true"
 # 18:06:21 >  
 
-# builtin opt  gdk_dbpath = 
/home/ferreira/repositories/MonetDB-statistics-analytics/BUILD/var/monetdb5/dbfarm/demo
-# builtin opt  mapi_port = 50000
-# builtin opt  mapi_open = false
-# builtin opt  mapi_ipv6 = false
-# builtin opt  mapi_autosense = false
-# builtin opt  sql_optimizer = default_pipe
-# builtin opt  sql_debug = 0
-# builtin opt  raw_strings = false
-# cmdline opt  gdk_nr_threads = 0
-# cmdline opt  mapi_open = true
-# cmdline opt  mapi_port = 34861
-# cmdline opt  mapi_usock = /var/tmp/mtest-290531/.s.monetdb.34861
-# cmdline opt  gdk_dbpath = 
/home/ferreira/repositories/MonetDB-statistics-analytics/BUILD/var/MonetDB/mTests_sql_test_analytics
-# cmdline opt  embedded_c = true
+MAPI  = (monetdb) /var/tmp/mtest-10191/.s.monetdb.31213
+QUERY = SELECT VAR_POP(c0) FROM t0; --error, overflow
+ERROR = !GDK reported error: calcvariance: 22003!overflow in calculation.
+MAPI  = (monetdb) /var/tmp/mtest-10668/.s.monetdb.32784
+QUERY = SELECT STDDEV_POP(c0) FROM t0; --error, overflow
+ERROR = !GDK reported error: calcvariance: 22003!overflow in calculation.
+MAPI  = (monetdb) /var/tmp/mtest-10668/.s.monetdb.32784
+QUERY = SELECT COVAR_POP(c0,c0) FROM t0; --error, overflow
+ERROR = !GDK reported error: calccovariance: 22003!overflow in calculation.
+MAPI  = (monetdb) /var/tmp/mtest-10668/.s.monetdb.32784
+QUERY = SELECT CORR(c0,c0) FROM t0; --error, overflow
+ERROR = !GDK reported error: BATcalccorrelation: 22003!overflow in calculation.
+MAPI  = (monetdb) /var/tmp/mtest-11411/.s.monetdb.37842
+QUERY = SELECT VAR_POP(c0) FROM t0 GROUP BY c1;--error, overflow
+ERROR = !GDK reported error: dogroupstdev: 22003!overflow in calculation.
+MAPI  = (monetdb) /var/tmp/mtest-11411/.s.monetdb.37842
+QUERY = SELECT STDDEV_POP(c0) FROM t0 GROUP BY c1; --error, overflow
+ERROR = !GDK reported error: dogroupstdev: 22003!overflow in calculation.
+MAPI  = (monetdb) /var/tmp/mtest-11411/.s.monetdb.37842
+QUERY = SELECT CORR(c0,c0) FROM t0 GROUP BY c1; --error, overflow
+ERROR = !GDK reported error: BATgroupcorrelation: 22003!overflow in 
calculation.
+MAPI  = (monetdb) /var/tmp/mtest-11411/.s.monetdb.37842
+QUERY = SELECT VAR_SAMP(c0) OVER () FROM t0; --error, overflow
+ERROR = !GDK reported error: GDKanalytical_variance_samp: 22003!overflow in 
calculation.
+MAPI  = (monetdb) /var/tmp/mtest-10668/.s.monetdb.32784
+QUERY = SELECT STDDEV_SAMP(c0) OVER () FROM t0; --error, overflow
+ERROR = !GDK reported error: GDKanalytical_stddev_samp: 22003!overflow in 
calculation.
+MAPI  = (monetdb) /var/tmp/mtest-10668/.s.monetdb.32784
+QUERY = SELECT COVAR_SAMP(c0,c0) OVER () FROM t0; --error, overflow
+ERROR = !GDK reported error: GDKanalytical_covariance_samp: 22003!overflow in 
calculation.
+MAPI  = (monetdb) /var/tmp/mtest-10434/.s.monetdb.38486
+QUERY = SELECT CORR(c0,c0) OVER () FROM t0; --error, overflow
+ERROR = !GDK reported error: GDKanalytical_correlation: 22003!overflow in 
calculation.
 
 # 18:06:22 >  
 # 18:06:22 >  "mclient" "-lsql" "-ftest" "-tnone" "-Eutf-8" "-i" "-e" 
"--host=/var/tmp/mtest-290531" "--port=34861"
diff --git a/sql/test/analytics/Tests/analytics16.stable.out 
b/sql/test/analytics/Tests/analytics16.stable.out
--- a/sql/test/analytics/Tests/analytics16.stable.out
+++ b/sql/test/analytics/Tests/analytics16.stable.out
@@ -553,6 +553,31 @@ stdout of test 'analytics16` in director
 [ NULL ]
 [ NULL ]
 #rollback;
+#CREATE TABLE t0(c0 DOUBLE);
+#INSERT INTO t0(c0) VALUES(1E200), (0);
+[ 2    ]
+#SELECT VAR_POP(c0) FROM t0 GROUP BY c0;
+% sys.%1 # table_name
+% %1 # name
+% double # type
+% 24 # length
+[ 0    ]
+[ 0    ]
+#SELECT STDDEV_POP(c0) FROM t0 GROUP BY c0;
+% sys.%1 # table_name
+% %1 # name
+% double # type
+% 24 # length
+[ 0    ]
+[ 0    ]
+#SELECT CORR(c0,c0) FROM t0 GROUP BY c0;
+% sys.%1 # table_name
+% %1 # name
+% double # type
+% 24 # length
+[ NULL ]
+[ NULL ]
+#DROP TABLE T0;
 
 # 18:06:22 >  
 # 18:06:22 >  "Done."
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to