Re: Non-decimal integer literals

Peter Eisentraut Sun, 31 Oct 2021 23:09:35 -0700

On 28.09.21 17:30, Peter Eisentraut wrote:

On 09.09.21 16:08, Vik Fearing wrote:
Even without that point, this patch *is* going to break valid queries,
because every one of those cases is a validnumber-followed-by-identifier
today,
Ah, true that.  So if this does go in, we may as well add the
underscores at the same time.
Yeah, looks like I'll need to look into the identifier lexing issuespreviously discussed. I'll attack that during the next commit fest.

Here is an updated patch for this. It's the previous patch polished abit more, and it contains changes so that numeric literals rejecttrailing identifier parts without whitespace in between, as discussed.Maybe I should split that into incremental patches, but for now I onlyhave the one. I don't have a patch for the underscores in numericliterals yet. It's in progress, but not ready.

From 6e081c44c04201ee9ded9dc6b689824ccabdfc28 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Sun, 31 Oct 2021 15:42:18 +0100
Subject: [PATCH v4] Non-decimal integer literals

Add support for hexadecimal, octal, and binary integer literals:

    0x42F
    0o273
    0b100101

per SQL:202x draft.

This adds support in the lexer as well as in the integer type input
functions.

Discussion: 
https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb...@enterprisedb.com
---
 doc/src/sgml/syntax.sgml                   |  26 ++++++
 src/backend/catalog/information_schema.sql |   6 +-
 src/backend/catalog/sql_features.txt       |   1 +
 src/backend/parser/scan.l                  | 103 ++++++++++++++++-----
 src/backend/utils/adt/int8.c               |  54 +++++++++++
 src/backend/utils/adt/numutils.c           |  97 +++++++++++++++++++
 src/fe_utils/psqlscan.l                    |  81 ++++++++++++----
 src/interfaces/ecpg/preproc/pgc.l          |  95 +++++++++++--------
 src/test/regress/expected/int2.out         |  19 ++++
 src/test/regress/expected/int4.out         |  75 +++++++++++++++
 src/test/regress/expected/int8.out         |  19 ++++
 src/test/regress/sql/int2.sql              |   7 ++
 src/test/regress/sql/int4.sql              |  26 ++++++
 src/test/regress/sql/int8.sql              |   7 ++
 14 files changed, 531 insertions(+), 85 deletions(-)

diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index d66560b587..a4f04199c6 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -694,6 +694,32 @@ <title>Numeric Constants</title>
 </literallayout>
     </para>
 
+    <para>
+     Additionally, non-decimal integer constants can be used in these forms:
+<synopsis>
+0x<replaceable>hexdigits</replaceable>
+0o<replaceable>octdigits</replaceable>
+0b<replaceable>bindigits</replaceable>
+</synopsis>
+     <replaceable>hexdigits</replaceable> is one or more hexadecimal digits
+     (0-9, A-F), <replaceable>octdigits</replaceable> is one or more octal
+     digits (0-7), <replaceable>bindigits</replaceable> is one or more binary
+     digits (0 or 1).  Hexadecimal digits and the radix prefixes can be in
+     upper or lower case.  Note that only integers can have non-decimal forms,
+     not numbers with fractional parts.
+    </para>
+
+    <para>
+     These are some examples of this:
+<literallayout>0b100101
+0B10011001
+0o273
+0O755
+0x42f
+0XFFFF
+</literallayout>
+    </para>
+
     <para>
      <indexterm><primary>integer</primary></indexterm>
      <indexterm><primary>bigint</primary></indexterm>
diff --git a/src/backend/catalog/information_schema.sql 
b/src/backend/catalog/information_schema.sql
index 11d9dd60c2..ce88c483a2 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -119,7 +119,7 @@ CREATE FUNCTION _pg_numeric_precision(typid oid, typmod 
int4) RETURNS integer
          WHEN 1700 /*numeric*/ THEN
               CASE WHEN $2 = -1
                    THEN null
-                   ELSE (($2 - 4) >> 16) & 65535
+                   ELSE (($2 - 4) >> 16) & 0xFFFF
                    END
          WHEN 700 /*float4*/ THEN 24 /*FLT_MANT_DIG*/
          WHEN 701 /*float8*/ THEN 53 /*DBL_MANT_DIG*/
@@ -147,7 +147,7 @@ CREATE FUNCTION _pg_numeric_scale(typid oid, typmod int4) 
RETURNS integer
        WHEN $1 IN (1700) THEN
             CASE WHEN $2 = -1
                  THEN null
-                 ELSE ($2 - 4) & 65535
+                 ELSE ($2 - 4) & 0xFFFF
                  END
        ELSE null
   END;
@@ -163,7 +163,7 @@ CREATE FUNCTION _pg_datetime_precision(typid oid, typmod 
int4) RETURNS integer
        WHEN $1 IN (1083, 1114, 1184, 1266) /* time, timestamp, same + tz */
            THEN CASE WHEN $2 < 0 THEN 6 ELSE $2 END
        WHEN $1 IN (1186) /* interval */
-           THEN CASE WHEN $2 < 0 OR $2 & 65535 = 65535 THEN 6 ELSE $2 & 65535 
END
+           THEN CASE WHEN $2 < 0 OR $2 & 0xFFFF = 0xFFFF THEN 6 ELSE $2 & 
0xFFFF END
        ELSE null
   END;
 
diff --git a/src/backend/catalog/sql_features.txt 
b/src/backend/catalog/sql_features.txt
index 9f424216e2..d6359503f3 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -526,6 +526,7 @@ T652        SQL-dynamic statements in SQL routines          
        NO
 T653   SQL-schema statements in external routines                      YES     
 T654   SQL-dynamic statements in external routines                     NO      
 T655   Cyclically dependent routines                   YES     
+T661   Non-decimal integer literals                    YES     SQL:202x draft
 T811   Basic SQL/JSON constructor functions                    NO      
 T812   SQL/JSON: JSON_OBJECTAGG                        NO      
 T813   SQL/JSON: JSON_ARRAYAGG with ORDER BY                   NO      
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 6e6824faeb..fe5ddbe2aa 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -124,7 +124,7 @@ static void addlit(char *ytext, int yleng, core_yyscan_t 
yyscanner);
 static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
 static char *litbufdup(core_yyscan_t yyscanner);
 static unsigned char unescape_single_char(unsigned char c, core_yyscan_t 
yyscanner);
-static int     process_integer_literal(const char *token, YYSTYPE *lval);
+static int     process_integer_literal(const char *token, YYSTYPE *lval, int 
base);
 static void addunicode(pg_wchar c, yyscan_t yyscanner);
 
 #define yyerror(msg)  scanner_yyerror(msg, yyscanner)
@@ -262,7 +262,7 @@ quotecontinuefail   {whitespace}*"-"?
 xbstart                        [bB]{quote}
 xbinside               [^']*
 
-/* Hexadecimal number */
+/* Hexadecimal byte string */
 xhstart                        [xX]{quote}
 xhinside               [^']*
 
@@ -341,7 +341,7 @@ xcstart                     \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               [^*/]+
 
-digit                  [0-9]
+
 ident_start            [A-Za-z\200-\377_]
 ident_cont             [A-Za-z\200-\377_0-9\$]
 
@@ -380,24 +380,44 @@ self                      [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars               [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator               {op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail0               0[xX]
+octfail0               0[oO]
+binfail0               0[bB]
 
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1              ({integer}|{decimal})[Ee]
-realfail2              ({integer}|{decimal})[Ee][-+]
+decfail         {decinteger}{ident_start}
+hexfail                        {hexinteger}{ident_start}
+octfail                        {octinteger}{ident_start}
+binfail                        {bininteger}{ident_start}
 
-param                  \${integer}
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
+
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1              ({decinteger}|{numeric})[Ee]
+realfail2              ({decinteger}|{numeric})[Ee][-+]
+
+param                  \${decinteger}
 
 other                  .
 
@@ -973,20 +993,53 @@ other                     .
                                        return PARAM;
                                }
 
-{integer}              {
+{decinteger}   {
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext, 
yylval, 10);
+                               }
+{hexinteger}   {
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext + 
2, yylval, 16);
+                               }
+{octinteger}   {
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext + 
2, yylval, 8);
+                               }
+{bininteger}   {
                                        SET_YYLLOC();
-                                       return process_integer_literal(yytext, 
yylval);
+                                       return process_integer_literal(yytext + 
2, yylval, 2);
+                               }
+{hexfail0}             {
+                                       yyerror("invalid hexadecimal integer");
+                               }
+{octfail0}             {
+                                       yyerror("invalid octal integer");
+                               }
+{binfail0}             {
+                                       yyerror("invalid binary integer");
+                               }
+{decfail}              {
+                                       yyerror("trailing junk after decimal 
integer");
+                               }
+{hexfail}              {
+                                       yyerror("trailing junk after 
hexadecimal integer");
+                               }
+{octfail}              {
+                                       yyerror("trailing junk after octal 
integer");
+                               }
+{binfail}              {
+                                       yyerror("trailing junk after binary 
integer");
                                }
-{decimal}              {
+{numeric}              {
                                        SET_YYLLOC();
                                        yylval->str = pstrdup(yytext);
                                        return FCONST;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
                                        SET_YYLLOC();
-                                       return process_integer_literal(yytext, 
yylval);
+                                       return process_integer_literal(yytext, 
yylval, 10);
                                }
 {real}                 {
                                        SET_YYLLOC();
@@ -996,17 +1049,17 @@ other                    .
 {realfail1}            {
                                        /*
                                         * throw back the [Ee], and figure out 
whether what
-                                        * remains is an {integer} or {decimal}.
+                                        * remains is a {decinteger} or 
{numeric}.
                                         */
                                        yyless(yyleng - 1);
                                        SET_YYLLOC();
-                                       return process_integer_literal(yytext, 
yylval);
+                                       return process_integer_literal(yytext, 
yylval, 10);
                                }
 {realfail2}            {
                                        /* throw back the [Ee][+-], and proceed 
as above */
                                        yyless(yyleng - 2);
                                        SET_YYLLOC();
-                                       return process_integer_literal(yytext, 
yylval);
+                                       return process_integer_literal(yytext, 
yylval, 10);
                                }
 
 
@@ -1296,17 +1349,17 @@ litbufdup(core_yyscan_t yyscanner)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
+ * Process {*integer}.  Note this will also do the right thing with {numeric},
  * ie digits and a decimal point.
  */
 static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
 {
        int                     val;
        char       *endptr;
 
        errno = 0;
-       val = strtoint(token, &endptr, 10);
+       val = strtoint(token, &endptr, base);
        if (*endptr != '\0' || errno == ERANGE)
        {
                /* integer too large (or contains decimal pt), treat it as a 
float */
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index 2168080dcc..c3ed944a6c 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -45,6 +45,17 @@ typedef struct
  * Formatting and conversion routines.
  *---------------------------------------------------------*/
 
+static const int8 hexlookup[128] = {
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
  * scanint8 --- try to parse a string into an int8.
  *
@@ -84,6 +95,48 @@ scanint8(const char *str, bool errorOK, int64 *result)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -92,6 +145,7 @@ scanint8(const char *str, bool errorOK, int64 *result)
                        unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index b93096f288..7c6520346e 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -173,6 +173,17 @@ pg_atoi(const char *s, int size, int c)
        return (int32) l;
 }
 
+static const int8 hexlookup[128] = {
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
  * Convert input string to a signed 16 bit integer.
  *
@@ -208,6 +219,48 @@ pg_strtoint16(const char *s)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -216,6 +269,7 @@ pg_strtoint16(const char *s)
                        unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
@@ -284,6 +338,48 @@ pg_strtoint32(const char *s)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -292,6 +388,7 @@ pg_strtoint32(const char *s)
                        unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 0fab48a382..4436509d88 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -200,7 +200,7 @@ quotecontinuefail   {whitespace}*"-"?
 xbstart                        [bB]{quote}
 xbinside               [^']*
 
-/* Hexadecimal number */
+/* Hexadecimal byte string */
 xhstart                        [xX]{quote}
 xhinside               [^']*
 
@@ -279,7 +279,6 @@ xcstart                     \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               [^*/]+
 
-digit                  [0-9]
 ident_start            [A-Za-z\200-\377_]
 ident_cont             [A-Za-z\200-\377_0-9\$]
 
@@ -318,24 +317,44 @@ self                      [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars               [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator               {op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail0               0[xX]
+octfail0               0[oO]
+binfail0               0[bB]
 
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1              ({integer}|{decimal})[Ee]
-realfail2              ({integer}|{decimal})[Ee][-+]
+decfail                        {decinteger}{ident_start}
+hexfail                        {hexinteger}{ident_start}
+octfail                        {octinteger}{ident_start}
+binfail                        {bininteger}{ident_start}
 
-param                  \${integer}
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
+
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1              ({decinteger}|{numeric})[Ee]
+realfail2              ({decinteger}|{numeric})[Ee][-+]
+
+param                  \${decinteger}
 
 /* psql-specific: characters allowed in variable names */
 variable_char  [A-Za-z\200-\377_0-9]
@@ -839,13 +858,43 @@ other                     .
                                        ECHO;
                                }
 
-{integer}              {
+{decinteger}   {
+                                       ECHO;
+                               }
+{hexinteger}   {
+                                       ECHO;
+                               }
+{octinteger}   {
+                                       ECHO;
+                               }
+{bininteger}   {
+                                       ECHO;
+                               }
+{hexfail0}             {
+                                       ECHO;
+                               }
+{octfail0}             {
+                                       ECHO;
+                               }
+{binfail0}             {
+                                       ECHO;
+                               }
+{decfail}              {
+                                       ECHO;
+                               }
+{hexfail}              {
+                                       ECHO;
+                               }
+{octfail}              {
+                                       ECHO;
+                               }
+{binfail}              {
                                        ECHO;
                                }
-{decimal}              {
+{numeric}              {
                                        ECHO;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
                                        ECHO;
diff --git a/src/interfaces/ecpg/preproc/pgc.l 
b/src/interfaces/ecpg/preproc/pgc.l
index 7a0356638d..8d6e1cd76a 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -57,7 +57,7 @@ static bool           include_next;
 #define startlit()     (literalbuf[0] = '\0', literallen = 0)
 static void addlit(char *ytext, int yleng);
 static void addlitchar(unsigned char);
-static int     process_integer_literal(const char *token, YYSTYPE *lval);
+static int     process_integer_literal(const char *token, YYSTYPE *lval, int 
base);
 static void parse_include(void);
 static bool ecpg_isspace(char ch);
 static bool isdefine(void);
@@ -305,7 +305,6 @@ xcstart                     \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               [^*/]+
 
-digit                  [0-9]
 ident_start            [A-Za-z\200-\377_]
 ident_cont             [A-Za-z\200-\377_0-9\$]
 
@@ -346,24 +345,44 @@ self                      [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars               [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator               {op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail0               0[xX]
+octfail0               0[oO]
+binfail0               0[bB]
+
+decfail                        {decinteger}{ident_start}
+hexfail                        {hexinteger}{ident_start}
+octfail                        {octinteger}{ident_start}
+binfail                        {bininteger}{ident_start}
 
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1              ({integer}|{decimal})[Ee]
-realfail2              ({integer}|{decimal})[Ee][-+]
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
 
-param                  \${integer}
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1              ({decinteger}|{numeric})[Ee]
+realfail2              ({decinteger}|{numeric})[Ee][-+]
+
+param                  \${decinteger}
 
 /* special characters for other dbms */
 /* we have to react differently in compat mode */
@@ -393,9 +412,6 @@ include_next        
[iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
 import                 [iI][mM][pP][oO][rR][tT]
 undef                  [uU][nN][dD][eE][fF]
 
-/* C version of hex number */
-xch                            0[xX][0-9A-Fa-f]*
-
 ccomment               "//".*\n
 
 if                             [iI][fF]
@@ -408,7 +424,7 @@ endif                       [eE][nN][dD][iI][fF]
 struct                 [sS][tT][rR][uU][cC][tT]
 
 exec_sql               {exec}{space}*{sql}{space}*
-ipdigit                        ({digit}|{digit}{digit}|{digit}{digit}{digit})
+ipdigit                        
({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
 ip                             {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
 
 /* we might want to parse all cpp include files */
@@ -923,17 +939,20 @@ cppline                   
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 }  /* <SQL> */
 
 <C,SQL>{
-{integer}              {
-                                       return process_integer_literal(yytext, 
&base_yylval);
+{decinteger}   {
+                                       return process_integer_literal(yytext, 
&base_yylval, 10);
                                }
-{decimal}              {
+{hexinteger}   {
+                                       return process_integer_literal(yytext + 
2, &base_yylval, 16);
+                               }
+{numeric}              {
                                        base_yylval.str = mm_strdup(yytext);
                                        return FCONST;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
-                                       return process_integer_literal(yytext, 
&base_yylval);
+                                       return process_integer_literal(yytext, 
&base_yylval, 10);
                                }
 {real}                 {
                                        base_yylval.str = mm_strdup(yytext);
@@ -942,18 +961,25 @@ cppline                   
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 {realfail1}            {
                                        /*
                                         * throw back the [Ee], and figure out 
whether what
-                                        * remains is an {integer} or {decimal}.
+                                        * remains is an {decinteger} or 
{numeric}.
                                         */
                                        yyless(yyleng - 1);
-                                       return process_integer_literal(yytext, 
&base_yylval);
+                                       return process_integer_literal(yytext, 
&base_yylval, 10);
                                }
 {realfail2}            {
                                        /* throw back the [Ee][+-], and proceed 
as above */
                                        yyless(yyleng - 2);
-                                       return process_integer_literal(yytext, 
&base_yylval);
+                                       return process_integer_literal(yytext, 
&base_yylval, 10);
                                }
 } /* <C,SQL> */
 
+<SQL>{octinteger}      {
+                                       return process_integer_literal(yytext + 
2, &base_yylval, 8);
+                               }
+<SQL>{bininteger}      {
+                                       return process_integer_literal(yytext + 
2, &base_yylval, 2);
+                               }
+
 <SQL>{
 :{identifier}((("->"|\.){identifier})|(\[{array}\]))*  {
                                        base_yylval.str = mm_strdup(yytext+1);
@@ -1009,19 +1035,6 @@ cppline                  
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                                                        return S_ANYTHING;
                                         }
 <C>{ccomment}          { ECHO; }
-<C>{xch}                       {
-                                               char* endptr;
-
-                                               errno = 0;
-                                               base_yylval.ival = 
strtoul((char *)yytext,&endptr,16);
-                                               if (*endptr != '\0' || errno == 
ERANGE)
-                                               {
-                                                       errno = 0;
-                                                       base_yylval.str = 
mm_strdup(yytext);
-                                                       return SCONST;
-                                               }
-                                               return ICONST;
-                                       }
 <C>{cppinclude}                {
                                                if (system_includes)
                                                {
@@ -1546,17 +1559,17 @@ addlitchar(unsigned char ychar)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
+ * Process {*integer}.  Note this will also do the right thing with {numeric},
  * ie digits and a decimal point.
  */
 static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
 {
        int                     val;
        char       *endptr;
 
        errno = 0;
-       val = strtoint(token, &endptr, 10);
+       val = strtoint(token, &endptr, base);
        if (*endptr != '\0' || errno == ERANGE)
        {
                /* integer too large (or contains decimal pt), treat it as a 
float */
diff --git a/src/test/regress/expected/int2.out 
b/src/test/regress/expected/int2.out
index 55ea7202cd..220e1493e8 100644
--- a/src/test/regress/expected/int2.out
+++ b/src/test/regress/expected/int2.out
@@ -306,3 +306,22 @@ FROM (VALUES (-2.5::numeric),
   2.5 |          3
 (7 rows)
 
+-- non-decimal literals
+SELECT int2 '0b100101';
+ int2 
+------
+   37
+(1 row)
+
+SELECT int2 '0o273';
+ int2 
+------
+  187
+(1 row)
+
+SELECT int2 '0x42F';
+ int2 
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/int4.out 
b/src/test/regress/expected/int4.out
index 9d20b3380f..060b599705 100644
--- a/src/test/regress/expected/int4.out
+++ b/src/test/regress/expected/int4.out
@@ -437,3 +437,78 @@ SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
 ERROR:  integer out of range
 SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
 ERROR:  integer out of range
+-- non-decimal literals
+SELECT int4 '0b100101';
+ int4 
+------
+   37
+(1 row)
+
+SELECT int4 '0o273';
+ int4 
+------
+  187
+(1 row)
+
+SELECT int4 '0x42F';
+ int4 
+------
+ 1071
+(1 row)
+
+-- lexer literals
+SELECT 0b100101;
+ ?column? 
+----------
+       37
+(1 row)
+
+SELECT 0o273;
+ ?column? 
+----------
+      187
+(1 row)
+
+SELECT 0x42F;
+ ?column? 
+----------
+     1071
+(1 row)
+
+-- error cases
+SELECT 0b;
+ERROR:  invalid binary integer at or near "SELECT 0b"
+LINE 1: SELECT 0b;
+        ^
+SELECT 1b;
+ERROR:  trailing junk after decimal integer at or near "SELECT 1b"
+LINE 1: SELECT 1b;
+        ^
+SELECT 0b0x;
+ERROR:  trailing junk after binary integer at or near "SELECT 0b0x"
+LINE 1: SELECT 0b0x;
+        ^
+SELECT 0o;
+ERROR:  invalid octal integer at or near "SELECT 0o"
+LINE 1: SELECT 0o;
+        ^
+SELECT 1o;
+ERROR:  trailing junk after decimal integer at or near "SELECT 1o"
+LINE 1: SELECT 1o;
+        ^
+SELECT 0o0x;
+ERROR:  trailing junk after octal integer at or near "SELECT 0o0x"
+LINE 1: SELECT 0o0x;
+        ^
+SELECT 0x;
+ERROR:  invalid hexadecimal integer at or near "SELECT 0x"
+LINE 1: SELECT 0x;
+        ^
+SELECT 1x;
+ERROR:  trailing junk after decimal integer at or near "SELECT 1x"
+LINE 1: SELECT 1x;
+        ^
+SELECT 0x0y;
+ERROR:  trailing junk after hexadecimal integer at or near "SELECT 0x0y"
+LINE 1: SELECT 0x0y;
+        ^
diff --git a/src/test/regress/expected/int8.out 
b/src/test/regress/expected/int8.out
index 36540ec456..edd15a4353 100644
--- a/src/test/regress/expected/int8.out
+++ b/src/test/regress/expected/int8.out
@@ -932,3 +932,22 @@ SELECT lcm((-9223372036854775808)::int8, 1::int8); -- 
overflow
 ERROR:  bigint out of range
 SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
 ERROR:  bigint out of range
+-- non-decimal literals
+SELECT int8 '0b100101';
+ int8 
+------
+   37
+(1 row)
+
+SELECT int8 '0o273';
+ int8 
+------
+  187
+(1 row)
+
+SELECT int8 '0x42F';
+ int8 
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/sql/int2.sql b/src/test/regress/sql/int2.sql
index 613b344704..0dee22fe6d 100644
--- a/src/test/regress/sql/int2.sql
+++ b/src/test/regress/sql/int2.sql
@@ -112,3 +112,10 @@ CREATE TABLE INT2_TBL(f1 int2);
              (0.5::numeric),
              (1.5::numeric),
              (2.5::numeric)) t(x);
+
+
+-- non-decimal literals
+
+SELECT int2 '0b100101';
+SELECT int2 '0o273';
+SELECT int2 '0x42F';
diff --git a/src/test/regress/sql/int4.sql b/src/test/regress/sql/int4.sql
index 55ec07a147..d97d017fca 100644
--- a/src/test/regress/sql/int4.sql
+++ b/src/test/regress/sql/int4.sql
@@ -176,3 +176,29 @@ CREATE TABLE INT4_TBL(f1 int4);
 
 SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
 SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int4 '0b100101';
+SELECT int4 '0o273';
+SELECT int4 '0x42F';
+
+-- lexer literals
+
+SELECT 0b100101;
+SELECT 0o273;
+SELECT 0x42F;
+
+-- error cases
+SELECT 0b;
+SELECT 1b;
+SELECT 0b0x;
+
+SELECT 0o;
+SELECT 1o;
+SELECT 0o0x;
+
+SELECT 0x;
+SELECT 1x;
+SELECT 0x0y;
diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql
index 32940b4daa..b7ad696dd8 100644
--- a/src/test/regress/sql/int8.sql
+++ b/src/test/regress/sql/int8.sql
@@ -250,3 +250,10 @@ CREATE TABLE INT8_TBL(q1 int8, q2 int8);
 
 SELECT lcm((-9223372036854775808)::int8, 1::int8); -- overflow
 SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int8 '0b100101';
+SELECT int8 '0o273';
+SELECT int8 '0x42F';
-- 
2.33.1

Re: Non-decimal integer literals

Reply via email to