Changeset: cd1b1714c5d0 for MonetDB URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=cd1b1714c5d0 Modified Files: gdk/gdk_aggr.c gdk/gdk_analytic_func.c sql/test/analytics/Tests/analytics16.sql sql/test/analytics/Tests/analytics16.stable.err sql/test/analytics/Tests/analytics16.stable.out Branch: Jun2020 Log Message:
Added more missing overflow checks on statistical analytic functions diffs (282 lines): diff --git a/gdk/gdk_aggr.c b/gdk/gdk_aggr.c --- a/gdk/gdk_aggr.c +++ b/gdk/gdk_aggr.c @@ -3255,6 +3255,8 @@ BATcalcvariance_sample(dbl *avgp, BAT *b delta2 = (dbl) y - mean2; \ mean2 += delta2 / n; \ m2 += delta1 * ((dbl) y - mean2); \ + if (isinf(m2)) \ + goto overflow; \ } \ } while (0) @@ -3295,6 +3297,9 @@ calccovariance(const void *v1, const voi if (n <= (BUN) issample) return dbl_nil; return m2 / (n - issample); + overflow: + GDKerror("22003!overflow in calculation.\n"); + return dbl_nil; } dbl @@ -3340,6 +3345,8 @@ BATcalccovariance_sample(BAT *b1, BAT *b up += delta1 * aux; \ down1 += delta1 * ((dbl) x - mean1); \ down2 += delta2 * aux; \ + if (isinf(up) || isinf(down1) || isinf(down2)) \ + goto overflow; \ } \ } while (0) @@ -3390,6 +3397,9 @@ BATcalccorrelation(BAT *b1, BAT *b2) TRC_DEBUG(ALGO, "b1=" ALGOBATFMT ",b2=" ALGOBATFMT " (" LLFMT " usec)\n", ALGOBATPAR(b1), ALGOBATPAR(b2), GDKusec() - t0); return aux; + overflow: + GDKerror("22003!overflow in calculation.\n"); + return dbl_nil; } #define AGGR_STDEV(TYPE) \ @@ -3423,6 +3433,8 @@ BATcalccorrelation(BAT *b1, BAT *b2) } else if (cnts[i] == 1) { \ dbls[i] = issample ? dbl_nil : 0; \ nils2++; \ + } else if (isinf(m2[i])) { \ + goto overflow; \ } else if (variance) { \ dbls[i] = m2[i] / (cnts[i] - issample); \ } else { \ @@ -3593,7 +3605,8 @@ dogroupstdev(BAT **avgb, BAT *b, BAT *g, ALGOOPTBATPAR(bn), ALGOOPTBATPAR(an), func, GDKusec() - t0); return bn; - + overflow: + GDKerror("22003!overflow in calculation.\n"); alloc_fail: if (an) BBPreclaim(an); @@ -3675,6 +3688,8 @@ BATgroupvariance_population(BAT *b, BAT } else if (cnts[i] == 1) { \ dbls[i] = issample ? dbl_nil : 0; \ nils2++; \ + } else if (isinf(m2[i])) { \ + goto overflow; \ } else { \ dbls[i] = m2[i] / (cnts[i] - issample); \ } \ @@ -3813,6 +3828,8 @@ dogroupcovariance(BAT *b1, BAT *b2, BAT ALGOOPTBATPAR(bn), func, GDKusec() - t0); return bn; + overflow: + GDKerror("22003!overflow in calculation.\n"); alloc_fail: BBPreclaim(bn); GDKfree(mean1); @@ -3873,6 +3890,8 @@ BATgroupcovariance_population(BAT *b1, B if (cnts[i] <= 1 || cnts[i] == BUN_NONE || down1[i] == 0 || down2[i] == 0) { \ dbls[i] = dbl_nil; \ nils++; \ + } else if (isinf(up[i]) || isinf(down1[i]) || isinf(down2[i])) { \ + goto overflow; \ } else { \ dbls[i] = (up[i] / cnts[i]) / (sqrt(down1[i] / cnts[i]) * sqrt(down2[i] / cnts[i])); \ assert(!is_dbl_nil(dbls[i])); \ @@ -4013,6 +4032,8 @@ BATgroupcorrelation(BAT *b1, BAT *b2, BA ALGOOPTBATPAR(bn), GDKusec() - t0); return bn; + overflow: + GDKerror("22003!overflow in calculation.\n"); alloc_fail: BBPreclaim(bn); GDKfree(mean1); diff --git a/gdk/gdk_analytic_func.c b/gdk/gdk_analytic_func.c --- a/gdk/gdk_analytic_func.c +++ b/gdk/gdk_analytic_func.c @@ -1715,7 +1715,9 @@ GDKanalyticalavg(BAT *r, BAT *b, BAT *s, mean += delta / n; \ m2 += delta * ((dbl) v - mean); \ } \ - if (n > SAMPLE) { \ + if (isinf(m2)) { \ + goto overflow; \ + } else if (n > SAMPLE) { \ *rb = OP; \ } else { \ *rb = dbl_nil; \ @@ -1776,6 +1778,9 @@ GDKanalytical_##NAME(BAT *r, BAT *b, BAT r->tnonil = nils == 0; \ r->tnil = nils > 0; \ return GDK_SUCCEED; \ + overflow: \ + GDKerror("22003!overflow in calculation.\n"); \ + return GDK_FAIL; \ } GDK_ANALYTICAL_STDEV_VARIANCE(stddev_samp, 1, sqrt(m2 / (n - 1)), "standard deviation") @@ -1802,7 +1807,9 @@ GDK_ANALYTICAL_STDEV_VARIANCE(variance_p mean2 += delta2 / n; \ m2 += delta1 * ((dbl) v2 - mean2); \ } \ - if (n > SAMPLE) { \ + if (isinf(m2)) { \ + goto overflow; \ + } else if (n > SAMPLE) { \ *rb = OP; \ } else { \ *rb = dbl_nil; \ @@ -1864,6 +1871,9 @@ GDKanalytical_##NAME(BAT *r, BAT *b1, BA r->tnonil = nils == 0; \ r->tnil = nils > 0; \ return GDK_SUCCEED; \ + overflow: \ + GDKerror("22003!overflow in calculation.\n"); \ + return GDK_FAIL; \ } GDK_ANALYTICAL_COVARIANCE(covariance_samp, 1, m2 / (n - 1)) @@ -1891,7 +1901,9 @@ GDK_ANALYTICAL_COVARIANCE(covariance_pop down1 += delta1 * ((dbl) v1 - mean1); \ down2 += delta2 * aux; \ } \ - if (n != 0 && down1 != 0 && down2 != 0) { \ + if (isinf(up) || isinf(down1) || isinf(down2)) { \ + goto overflow; \ + } else if (n != 0 && down1 != 0 && down2 != 0) { \ *rb = (up / n) / (sqrt(down1 / n) * sqrt(down2 / n)); \ assert(!is_dbl_nil(*rb)); \ } else { \ @@ -1950,4 +1962,7 @@ GDKanalytical_correlation(BAT *r, BAT *b r->tnonil = nils == 0; r->tnil = nils > 0; return GDK_SUCCEED; + overflow: + GDKerror("22003!overflow in calculation.\n"); + return GDK_FAIL; } diff --git a/sql/test/analytics/Tests/analytics16.sql b/sql/test/analytics/Tests/analytics16.sql --- a/sql/test/analytics/Tests/analytics16.sql +++ b/sql/test/analytics/Tests/analytics16.sql @@ -137,3 +137,26 @@ select corr(a1.aa, a1.bb) from analytics select corr(a1.aa, a1.bb) from analytics a1 group by bb having a1.bb > (select corr(MAX(a1.aa) + a2.aa, MIN(a1.aa) + a2.aa) from analytics a2); rollback; + +CREATE TABLE t0(c0 DOUBLE, c1 INT); +INSERT INTO t0(c0,c1) VALUES(1E200, 1), (0, 1); + +SELECT VAR_POP(c0) FROM t0; --error, overflow +SELECT STDDEV_POP(c0) FROM t0; --error, overflow +SELECT COVAR_POP(c0,c0) FROM t0; --error, overflow +SELECT CORR(c0,c0) FROM t0; --error, overflow + +SELECT VAR_POP(c0) FROM t0 GROUP BY c0; +SELECT STDDEV_POP(c0) FROM t0 GROUP BY c0; +SELECT CORR(c0,c0) FROM t0 GROUP BY c0; + +SELECT VAR_POP(c0) FROM t0 GROUP BY c1;--error, overflow +SELECT STDDEV_POP(c0) FROM t0 GROUP BY c1; --error, overflow +SELECT CORR(c0,c0) FROM t0 GROUP BY c1; --error, overflow + +SELECT VAR_SAMP(c0) OVER () FROM t0; --error, overflow +SELECT STDDEV_SAMP(c0) OVER () FROM t0; --error, overflow +SELECT COVAR_SAMP(c0,c0) OVER () FROM t0; --error, overflow +SELECT CORR(c0,c0) OVER () FROM t0; --error, overflow + +DROP TABLE T0; diff --git a/sql/test/analytics/Tests/analytics16.stable.err b/sql/test/analytics/Tests/analytics16.stable.err --- a/sql/test/analytics/Tests/analytics16.stable.err +++ b/sql/test/analytics/Tests/analytics16.stable.err @@ -5,20 +5,39 @@ stderr of test 'analytics16` in director # 18:06:21 > "mserver5" "--debug=10" "--set" "gdk_nr_threads=0" "--set" "mapi_open=true" "--set" "mapi_port=34861" "--set" "mapi_usock=/var/tmp/mtest-290531/.s.monetdb.34861" "--forcemito" "--dbpath=/home/ferreira/repositories/MonetDB-statistics-analytics/BUILD/var/MonetDB/mTests_sql_test_analytics" "--set" "embedded_c=true" # 18:06:21 > -# builtin opt gdk_dbpath = /home/ferreira/repositories/MonetDB-statistics-analytics/BUILD/var/monetdb5/dbfarm/demo -# builtin opt mapi_port = 50000 -# builtin opt mapi_open = false -# builtin opt mapi_ipv6 = false -# builtin opt mapi_autosense = false -# builtin opt sql_optimizer = default_pipe -# builtin opt sql_debug = 0 -# builtin opt raw_strings = false -# cmdline opt gdk_nr_threads = 0 -# cmdline opt mapi_open = true -# cmdline opt mapi_port = 34861 -# cmdline opt mapi_usock = /var/tmp/mtest-290531/.s.monetdb.34861 -# cmdline opt gdk_dbpath = /home/ferreira/repositories/MonetDB-statistics-analytics/BUILD/var/MonetDB/mTests_sql_test_analytics -# cmdline opt embedded_c = true +MAPI = (monetdb) /var/tmp/mtest-10191/.s.monetdb.31213 +QUERY = SELECT VAR_POP(c0) FROM t0; --error, overflow +ERROR = !GDK reported error: calcvariance: 22003!overflow in calculation. +MAPI = (monetdb) /var/tmp/mtest-10668/.s.monetdb.32784 +QUERY = SELECT STDDEV_POP(c0) FROM t0; --error, overflow +ERROR = !GDK reported error: calcvariance: 22003!overflow in calculation. +MAPI = (monetdb) /var/tmp/mtest-10668/.s.monetdb.32784 +QUERY = SELECT COVAR_POP(c0,c0) FROM t0; --error, overflow +ERROR = !GDK reported error: calccovariance: 22003!overflow in calculation. +MAPI = (monetdb) /var/tmp/mtest-10668/.s.monetdb.32784 +QUERY = SELECT CORR(c0,c0) FROM t0; --error, overflow +ERROR = !GDK reported error: BATcalccorrelation: 22003!overflow in calculation. +MAPI = (monetdb) /var/tmp/mtest-11411/.s.monetdb.37842 +QUERY = SELECT VAR_POP(c0) FROM t0 GROUP BY c1;--error, overflow +ERROR = !GDK reported error: dogroupstdev: 22003!overflow in calculation. +MAPI = (monetdb) /var/tmp/mtest-11411/.s.monetdb.37842 +QUERY = SELECT STDDEV_POP(c0) FROM t0 GROUP BY c1; --error, overflow +ERROR = !GDK reported error: dogroupstdev: 22003!overflow in calculation. +MAPI = (monetdb) /var/tmp/mtest-11411/.s.monetdb.37842 +QUERY = SELECT CORR(c0,c0) FROM t0 GROUP BY c1; --error, overflow +ERROR = !GDK reported error: BATgroupcorrelation: 22003!overflow in calculation. +MAPI = (monetdb) /var/tmp/mtest-11411/.s.monetdb.37842 +QUERY = SELECT VAR_SAMP(c0) OVER () FROM t0; --error, overflow +ERROR = !GDK reported error: GDKanalytical_variance_samp: 22003!overflow in calculation. +MAPI = (monetdb) /var/tmp/mtest-10668/.s.monetdb.32784 +QUERY = SELECT STDDEV_SAMP(c0) OVER () FROM t0; --error, overflow +ERROR = !GDK reported error: GDKanalytical_stddev_samp: 22003!overflow in calculation. +MAPI = (monetdb) /var/tmp/mtest-10668/.s.monetdb.32784 +QUERY = SELECT COVAR_SAMP(c0,c0) OVER () FROM t0; --error, overflow +ERROR = !GDK reported error: GDKanalytical_covariance_samp: 22003!overflow in calculation. +MAPI = (monetdb) /var/tmp/mtest-10434/.s.monetdb.38486 +QUERY = SELECT CORR(c0,c0) OVER () FROM t0; --error, overflow +ERROR = !GDK reported error: GDKanalytical_correlation: 22003!overflow in calculation. # 18:06:22 > # 18:06:22 > "mclient" "-lsql" "-ftest" "-tnone" "-Eutf-8" "-i" "-e" "--host=/var/tmp/mtest-290531" "--port=34861" diff --git a/sql/test/analytics/Tests/analytics16.stable.out b/sql/test/analytics/Tests/analytics16.stable.out --- a/sql/test/analytics/Tests/analytics16.stable.out +++ b/sql/test/analytics/Tests/analytics16.stable.out @@ -553,6 +553,31 @@ stdout of test 'analytics16` in director [ NULL ] [ NULL ] #rollback; +#CREATE TABLE t0(c0 DOUBLE); +#INSERT INTO t0(c0) VALUES(1E200), (0); +[ 2 ] +#SELECT VAR_POP(c0) FROM t0 GROUP BY c0; +% sys.%1 # table_name +% %1 # name +% double # type +% 24 # length +[ 0 ] +[ 0 ] +#SELECT STDDEV_POP(c0) FROM t0 GROUP BY c0; +% sys.%1 # table_name +% %1 # name +% double # type +% 24 # length +[ 0 ] +[ 0 ] +#SELECT CORR(c0,c0) FROM t0 GROUP BY c0; +% sys.%1 # table_name +% %1 # name +% double # type +% 24 # length +[ NULL ] +[ NULL ] +#DROP TABLE T0; # 18:06:22 > # 18:06:22 > "Done." _______________________________________________ checkin-list mailing list checkin-list@monetdb.org https://www.monetdb.org/mailman/listinfo/checkin-list