MonetDB: Oct2020-merged-Jun2020 - Stricter decimal parsing.

Aris Koning Tue, 17 Nov 2020 07:32:55 -0800

Changeset: eacb1f23e00e for MonetDB
URL: https://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=eacb1f23e00e
Modified Files:
        sql/backends/monet5/sql_round_impl.h
        sql/server/sql_decimal.c
        sql/server/sql_decimal.h
        sql/server/sql_parser.y
Branch: Oct2020-merged-Jun2020
Log Message:


Stricter decimal parsing.


diffs (truncated from 317 to 300 lines):

diff --git a/sql/backends/monet5/sql_round_impl.h 
b/sql/backends/monet5/sql_round_impl.h
--- a/sql/backends/monet5/sql_round_impl.h
+++ b/sql/backends/monet5/sql_round_impl.h
@@ -309,48 +309,42 @@ nil_2dec(TYPE *res, const void *val, con
 static inline str
 str_2dec_body(TYPE *res, const str val, const int d, const int sc)
 {
-       char *s = val;
-       char *dot, *end;
+       char *s;
        int digits;
        int scale;
        BIG value;
 
-       dot = strchr(s, '.');
-       if (dot != NULL) {
-               s = strip_extra_zeros(s);
-               digits = _strlen(s) - 1;
-               scale = _strlen(dot + 1);
-       } else {
-               digits = _strlen(s);
-               scale = 0;
-       }
-       end = NULL;
+       if (*d < 0 || *d >= (int) (sizeof(scales) / sizeof(scales[0])))
+               throw(SQL, STRING(TYPE), SQLSTATE(42000) "Decimal (%s) doesn't 
have format (%d.%d)", *val, *d, *sc);
+
+       s = *val;
+
+       int has_errors;
        value = 0;
 
-       if (digits < 0)
-               throw(SQL, STRING(TYPE), SQLSTATE(42000) "Decimal (%s) doesn't 
have format (%d.%d)", s, d, sc);
-       if (d < 0 || (size_t) d >= sizeof(scales) / sizeof(scales[0]))
-               throw(SQL, STRING(TYPE), SQLSTATE(42000) "Decimal (%s) doesn't 
have format (%d.%d)", s, d, sc);
+       // s = strip_extra_zeros(s);
+
+       value = decimal_from_str(s, &digits, &scale, &has_errors);
+       if (has_errors)
+               throw(SQL, STRING(TYPE), SQLSTATE(42000) "Decimal (%s) doesn't 
have format (%d.%d)", *val, *d, *sc);
 
-       value = decimal_from_str(s, &end);
-       if (*s == '+' || *s == '-')
-               digits--;
-       if (scale < sc) {
+       // handle situations where the de facto scale is different from the 
formal scale.
+       if (scale < *sc) {
                /* the current scale is too small, increase it by adding 0's */
-               int dff = sc - scale;   /* CANNOT be 0! */
+               int dff = *sc - scale;  /* CANNOT be 0! */
                if (dff >= MAX_SCALE)
-                       throw(SQL, STRING(TYPE), SQLSTATE(42000) "Rounding of 
decimal (%s) doesn't fit format (%d.%d)", s, d, sc);
+                       throw(SQL, STRING(TYPE), SQLSTATE(42000) "Rounding of 
decimal (%s) doesn't fit format (%d.%d)", *val, *d, *sc);
 
                value *= scales[dff];
                scale += dff;
                digits += dff;
-       } else if (scale > sc) {
+       } else if (scale > *sc) {
                /* the current scale is too big, decrease it by correctly 
rounding */
                /* we should round properly, and check for overflow (res >= 
10^digits+scale) */
-               int dff = scale - sc;   /* CANNOT be 0 */
+               int dff = scale - *sc;  /* CANNOT be 0 */
 
                if (dff >= MAX_SCALE)
-                       throw(SQL, STRING(TYPE), SQLSTATE(42000) "Rounding of 
decimal (%s) doesn't fit format (%d.%d)", s, d, sc);
+                       throw(SQL, STRING(TYPE), SQLSTATE(42000) "Rounding of 
decimal (%s) doesn't fit format (%d.%d)", *val, *d, *sc);
 
                BIG rnd = scales[dff] >> 1;
 
@@ -361,11 +355,13 @@ str_2dec_body(TYPE *res, const str val, 
                value /= scales[dff];
                scale -= dff;
                digits -= dff;
-               if (value >= scales[d] || value <= -scales[d])
-                       throw(SQL, STRING(TYPE), SQLSTATE(42000) "Rounding of 
decimal (%s) doesn't fit format (%d.%d)", s, d, sc);
+               if (value >= scales[*d] || value <= -scales[*d]) {
+                       throw(SQL, STRING(TYPE), SQLSTATE(42000) "Rounding of 
decimal (%s) doesn't fit format (%d.%d)", *val, *d, *sc);
+               }
        }
-       if (value <= -scales[d] || value >= scales[d]  || *end)
-               throw(SQL, STRING(TYPE), SQLSTATE(42000) "Decimal (%s) doesn't 
have format (%d.%d)", s, d, sc);
+       if (value <= -scales[*d] || value >= scales[*d]) {
+               throw(SQL, STRING(TYPE), SQLSTATE(42000) "Decimal (%s) doesn't 
have format (%d.%d)", *val, *d, *sc);
+       }
        *res = (TYPE) value;
        return MAL_SUCCEED;
 }
diff --git a/sql/server/sql_decimal.c b/sql/server/sql_decimal.c
--- a/sql/server/sql_decimal.c
+++ b/sql/server/sql_decimal.c
@@ -10,46 +10,97 @@
 
 #include "sql_decimal.h"
 
-#ifdef HAVE_HGE
-hge
-#else
-lng
-#endif
-decimal_from_str(char *dec, char **end)
+
+DEC_TPE
+decimal_from_str(char *dec, int* digits, int* scale, int* has_errors)
 {
 #ifdef HAVE_HGE
-       hge res = 0;
-       const hge max0 = GDK_hge_max / 10, max1 = GDK_hge_max % 10;
+    const hge max0 = GDK_hge_max / 10, max1 = GDK_hge_max % 10;
 #else
-       lng res = 0;
-       const lng max0 = GDK_lng_max / 10, max1 = GDK_lng_max % 10;
+    const lng max0 = GDK_lng_max / 10, max1 = GDK_lng_max % 10;
 #endif
-       int neg = 0, seen_dot = 0;
+
+       assert(digits);
+       assert(scale);
+       assert(has_errors);
 
+       DEC_TPE res = 0;
+       *has_errors = 0;
+
+       int _digits     = 0;
+       int _scale      = 0;
+
+// preceding whitespace:
+       int neg = 0;
        while(isspace((unsigned char) *dec))
                dec++;
+
+// optional sign:
        if (*dec == '-') {
                neg = 1;
                dec++;
        } else if (*dec == '+') {
                dec++;
        }
-       for (; *dec && (isdigit((unsigned char) *dec) || *dec == '.'); dec++) {
-               if (*dec != '.') {
-                       if (res > max0 || (res == max0 && *dec - '0' > max1))
-                               break;
-                       res *= 10;
-                       res += *dec - '0';
-               } else if (seen_dot) {
-                       break; /* dot cannot appear twice */
-               } else {
-                       seen_dot = 1;
-               }
+
+// optional fractional separator first opportunity
+       if (*dec == '.') {  // case: (+|-).456
+fractional_sep_first_opp:
+               dec++;
+               _digits++; // add one to digits for single implicit preceding 
0, e.g. (+|-)0.456
+               goto trailing_digits;
+       }
+
+// preceding_digits:
+       if (!isdigit((unsigned char) *dec)) {
+               *has_errors = 1;
+               goto end_state;
+       }
+       while (*dec == '0'){
+               // skip leading zeros in preceding digits.
+               dec++;
+               if (*dec == '.')
+                       goto fractional_sep_first_opp;
        }
+       for (; *dec && (isdigit((unsigned char) *dec)); dec++) {
+               if (res > max0 || (res == max0 && *dec - '0' > max1))
+                       break;
+               res *= 10;
+               res += *dec - '0';
+               _digits++;
+       }
+
+// optional fractional separator second opportunity
+       if (*dec == '.')        // case: (+|-)123.(456)
+               dec++;
+       else                                    // case:  (+|-)123
+               goto trailing_whitespace;
+
+trailing_digits:
+       if (!isdigit((unsigned char) *dec))
+               goto trailing_whitespace;
+       for (; *dec && (isdigit((unsigned char) *dec)); dec++) {
+               if (res > max0 || (res == max0 && *dec - '0' > max1))
+                       break;
+               res *= 10;
+               res += *dec - '0';
+               _scale++;
+       }
+       _digits += _scale;
+
+trailing_whitespace:
        while(isspace((unsigned char) *dec))
                dec++;
-       if (end)
-               *end = dec;
+
+end_state:
+       /* When the string cannot be parsed up to and including the null 
terminator,
+        * the string is an invalid decimal representation. */
+       if (*dec != 0)
+               *has_errors = 1;
+
+       *digits = _digits;
+       *scale = _scale;
+
        if (neg)
                return -res;
        else
diff --git a/sql/server/sql_decimal.h b/sql/server/sql_decimal.h
--- a/sql/server/sql_decimal.h
+++ b/sql/server/sql_decimal.h
@@ -14,19 +14,14 @@
 #include "gdk.h"
 
 #ifdef HAVE_HGE
-extern hge decimal_from_str(char *dec, char **end);
-extern char * decimal_to_str(sql_allocator *sa, hge v, sql_subtype *t);
+#define DEC_TPE hge
 #else
-extern lng decimal_from_str(char *dec, char **end);
-extern char * decimal_to_str(sql_allocator *sa, lng v, sql_subtype *t);
+#define DEC_TPE lng
 #endif
 
-#ifdef HAVE_HGE
-extern hge
-#else
-extern lng
-#endif
-scale2value(int scale);
+extern DEC_TPE decimal_from_str(char *dec, int* digits, int* scale, int* 
has_errors);
+extern char * decimal_to_str(sql_allocator *sa, DEC_TPE v, sql_subtype *t);
+DEC_TPE scale2value(int scale);
 
 #endif /* _SQL_DECIMAL_H */
 
diff --git a/sql/server/sql_parser.y b/sql/server/sql_parser.y
--- a/sql/server/sql_parser.y
+++ b/sql/server/sql_parser.y
@@ -4710,37 +4710,43 @@ literal:
                }
  |  INTNUM
                { char *s = sa_strdup(SA, $1);
-                 char *dot = strchr(s, '.');
-                 int digits = _strlen(s) - 1;
-                 int scale = digits - (int) (dot-s);
-                 sql_subtype t;
-
-                 if (digits <= 0)
-                       digits = 1;
-                 if (digits <= MAX_DEC_DIGITS) {
-#ifdef HAVE_HGE
-                       hge value = decimal_from_str(s, NULL);
-#else
-                       lng value = decimal_from_str(s, NULL);
-#endif
-
-                       if (*s == '+' || *s == '-')
-                               digits --;
-                       sql_find_subtype(&t, "decimal", digits, scale );
-                       $$ = _newAtomNode( atom_dec(SA, &t, value));
-                  } else {
-                       char *p = $1;
-                       double val;
-
-                       errno = 0;
-                       val = strtod($1,&p);
-                       if (p == $1 || is_dbl_nil(val) || (errno == ERANGE && 
(val < -1 || val > 1))) {
-                               sqlformaterror(m, SQLSTATE(22003) "Double value 
too large or not a number (%s)", $1);
+                       int digits;
+                       int scale;
+                       int has_errors;
+                       sql_subtype t;
+
+                       DEC_TPE value = decimal_from_str(s, &digits, &scale, 
&has_errors);
+
+                       if (has_errors) {
+                               char *msg = sql_message(SQLSTATE(22003) "Double 
value too large or not a number (%s)", $1);
+
+                               yyerror(m, msg);
+                               _DELETE(msg);
                                $$ = NULL;
                                YYABORT;
                        }
-                       sql_find_subtype(&t, "double", 51, 0 );
-                       $$ = _newAtomNode(atom_float(SA, &t, val));
+
+                       if (digits <= MAX_DEC_DIGITS) {
+                               double val = strtod($1,NULL);
+                               sql_find_subtype(&t, "decimal", digits, scale );
+                               $$ = _newAtomNode( atom_dec(SA, &t, value, 
val));
+                       }
+                       else {
+                               char *p = $1;
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

MonetDB: Oct2020-merged-Jun2020 - Stricter decimal parsing.

Reply via email to