Re: Non-decimal integer literals

Peter Eisentraut Tue, 28 Sep 2021 08:27:18 -0700

On 07.09.21 13:50, Zhihong Yu wrote:

    On 16.08.21 17:32, John Naylor wrote:
     > The one thing that jumped out at me on a cursory reading is
     > the {integer} rule, which seems to be used nowhere except to
     > call process_integer_literal, which must then inspect the token
    text to
     > figure out what type of integer it is. Maybe consider 4 separate
     > process_*_literal functions?


    Agreed, that can be done in a simpler way.  Here is an updated patch.

Hi,
Minor comment:

+SELECT int4 '0o112';

Maybe involve digits of up to 7 in the octal test case.


Good point, here is a lightly updated patch.

From 43957a1f48ed6f750f231ef8e3533d74d7ac4cc9 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Tue, 28 Sep 2021 17:14:44 +0200
Subject: [PATCH v3] Non-decimal integer literals

Add support for hexadecimal, octal, and binary integer literals:

    0x42F
    0o273
    0b100101

per SQL:202x draft.

This adds support in the lexer as well as in the integer type input
functions.

Discussion: 
https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb...@enterprisedb.com
---
 doc/src/sgml/syntax.sgml                   | 26 ++++++
 src/backend/catalog/information_schema.sql |  6 +-
 src/backend/catalog/sql_features.txt       |  1 +
 src/backend/parser/scan.l                  | 87 +++++++++++++------
 src/backend/utils/adt/int8.c               | 54 ++++++++++++
 src/backend/utils/adt/numutils.c           | 97 ++++++++++++++++++++++
 src/fe_utils/psqlscan.l                    | 55 ++++++++----
 src/interfaces/ecpg/preproc/pgc.l          | 64 +++++++++-----
 src/test/regress/expected/int2.out         | 19 +++++
 src/test/regress/expected/int4.out         | 37 +++++++++
 src/test/regress/expected/int8.out         | 19 +++++
 src/test/regress/sql/int2.sql              |  7 ++
 src/test/regress/sql/int4.sql              | 11 +++
 src/test/regress/sql/int8.sql              |  7 ++
 14 files changed, 425 insertions(+), 65 deletions(-)

diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index d66560b587..a4f04199c6 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -694,6 +694,32 @@ <title>Numeric Constants</title>
 </literallayout>
     </para>
 
+    <para>
+     Additionally, non-decimal integer constants can be used in these forms:
+<synopsis>
+0x<replaceable>hexdigits</replaceable>
+0o<replaceable>octdigits</replaceable>
+0b<replaceable>bindigits</replaceable>
+</synopsis>
+     <replaceable>hexdigits</replaceable> is one or more hexadecimal digits
+     (0-9, A-F), <replaceable>octdigits</replaceable> is one or more octal
+     digits (0-7), <replaceable>bindigits</replaceable> is one or more binary
+     digits (0 or 1).  Hexadecimal digits and the radix prefixes can be in
+     upper or lower case.  Note that only integers can have non-decimal forms,
+     not numbers with fractional parts.
+    </para>
+
+    <para>
+     These are some examples of this:
+<literallayout>0b100101
+0B10011001
+0o273
+0O755
+0x42f
+0XFFFF
+</literallayout>
+    </para>
+
     <para>
      <indexterm><primary>integer</primary></indexterm>
      <indexterm><primary>bigint</primary></indexterm>
diff --git a/src/backend/catalog/information_schema.sql 
b/src/backend/catalog/information_schema.sql
index 11d9dd60c2..ce88c483a2 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -119,7 +119,7 @@ CREATE FUNCTION _pg_numeric_precision(typid oid, typmod 
int4) RETURNS integer
          WHEN 1700 /*numeric*/ THEN
               CASE WHEN $2 = -1
                    THEN null
-                   ELSE (($2 - 4) >> 16) & 65535
+                   ELSE (($2 - 4) >> 16) & 0xFFFF
                    END
          WHEN 700 /*float4*/ THEN 24 /*FLT_MANT_DIG*/
          WHEN 701 /*float8*/ THEN 53 /*DBL_MANT_DIG*/
@@ -147,7 +147,7 @@ CREATE FUNCTION _pg_numeric_scale(typid oid, typmod int4) 
RETURNS integer
        WHEN $1 IN (1700) THEN
             CASE WHEN $2 = -1
                  THEN null
-                 ELSE ($2 - 4) & 65535
+                 ELSE ($2 - 4) & 0xFFFF
                  END
        ELSE null
   END;
@@ -163,7 +163,7 @@ CREATE FUNCTION _pg_datetime_precision(typid oid, typmod 
int4) RETURNS integer
        WHEN $1 IN (1083, 1114, 1184, 1266) /* time, timestamp, same + tz */
            THEN CASE WHEN $2 < 0 THEN 6 ELSE $2 END
        WHEN $1 IN (1186) /* interval */
-           THEN CASE WHEN $2 < 0 OR $2 & 65535 = 65535 THEN 6 ELSE $2 & 65535 
END
+           THEN CASE WHEN $2 < 0 OR $2 & 0xFFFF = 0xFFFF THEN 6 ELSE $2 & 
0xFFFF END
        ELSE null
   END;
 
diff --git a/src/backend/catalog/sql_features.txt 
b/src/backend/catalog/sql_features.txt
index 9f424216e2..d6359503f3 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -526,6 +526,7 @@ T652        SQL-dynamic statements in SQL routines          
        NO
 T653   SQL-schema statements in external routines                      YES     
 T654   SQL-dynamic statements in external routines                     NO      
 T655   Cyclically dependent routines                   YES     
+T661   Non-decimal integer literals                    YES     SQL:202x draft
 T811   Basic SQL/JSON constructor functions                    NO      
 T812   SQL/JSON: JSON_OBJECTAGG                        NO      
 T813   SQL/JSON: JSON_ARRAYAGG with ORDER BY                   NO      
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 6e6824faeb..a78fe7a2ed 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -124,7 +124,7 @@ static void addlit(char *ytext, int yleng, core_yyscan_t 
yyscanner);
 static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
 static char *litbufdup(core_yyscan_t yyscanner);
 static unsigned char unescape_single_char(unsigned char c, core_yyscan_t 
yyscanner);
-static int     process_integer_literal(const char *token, YYSTYPE *lval);
+static int     process_integer_literal(const char *token, YYSTYPE *lval, int 
base);
 static void addunicode(pg_wchar c, yyscan_t yyscanner);
 
 #define yyerror(msg)  scanner_yyerror(msg, yyscanner)
@@ -262,7 +262,7 @@ quotecontinuefail   {whitespace}*"-"?
 xbstart                        [bB]{quote}
 xbinside               [^']*
 
-/* Hexadecimal number */
+/* Hexadecimal byte string */
 xhstart                        [xX]{quote}
 xhinside               [^']*
 
@@ -341,7 +341,7 @@ xcstart                     \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               [^*/]+
 
-digit                  [0-9]
+
 ident_start            [A-Za-z\200-\377_]
 ident_cont             [A-Za-z\200-\377_0-9\$]
 
@@ -380,24 +380,39 @@ self                      [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars               [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator               {op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
+ *
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
 
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1              ({integer}|{decimal})[Ee]
-realfail2              ({integer}|{decimal})[Ee][-+]
+hexfail                        0[xX]
+octfail                        0[oO]
+binfail                        0[bB]
 
-param                  \${integer}
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
+
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1              ({decinteger}|{numeric})[Ee]
+realfail2              ({decinteger}|{numeric})[Ee][-+]
+
+param                  \${decinteger}
 
 other                  .
 
@@ -973,20 +988,42 @@ other                     .
                                        return PARAM;
                                }
 
-{integer}              {
+{decinteger}   {
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext, 
yylval, 10);
+                               }
+{hexinteger}   {
                                        SET_YYLLOC();
-                                       return process_integer_literal(yytext, 
yylval);
+                                       return process_integer_literal(yytext + 
2, yylval, 16);
                                }
-{decimal}              {
+{octinteger}   {
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext + 
2, yylval, 8);
+                               }
+{bininteger}   {
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext + 
2, yylval, 2);
+                               }
+{hexfail}              {
+                                       yyerror("invalid hexadecimal integer");
+                               }
+{octfail}              {
+                                       yyerror("invalid octal integer");
+                               }
+{binfail}              {
+                                       yyerror("invalid binary integer");
+                               }
+
+{numeric}              {
                                        SET_YYLLOC();
                                        yylval->str = pstrdup(yytext);
                                        return FCONST;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
                                        SET_YYLLOC();
-                                       return process_integer_literal(yytext, 
yylval);
+                                       return process_integer_literal(yytext, 
yylval, 10);
                                }
 {real}                 {
                                        SET_YYLLOC();
@@ -996,17 +1033,17 @@ other                    .
 {realfail1}            {
                                        /*
                                         * throw back the [Ee], and figure out 
whether what
-                                        * remains is an {integer} or {decimal}.
+                                        * remains is a {decinteger} or 
{numeric}.
                                         */
                                        yyless(yyleng - 1);
                                        SET_YYLLOC();
-                                       return process_integer_literal(yytext, 
yylval);
+                                       return process_integer_literal(yytext, 
yylval, 10);
                                }
 {realfail2}            {
                                        /* throw back the [Ee][+-], and proceed 
as above */
                                        yyless(yyleng - 2);
                                        SET_YYLLOC();
-                                       return process_integer_literal(yytext, 
yylval);
+                                       return process_integer_literal(yytext, 
yylval, 10);
                                }
 
 
@@ -1296,17 +1333,17 @@ litbufdup(core_yyscan_t yyscanner)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
+ * Process {*integer}.  Note this will also do the right thing with {numeric},
  * ie digits and a decimal point.
  */
 static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
 {
        int                     val;
        char       *endptr;
 
        errno = 0;
-       val = strtoint(token, &endptr, 10);
+       val = strtoint(token, &endptr, base);
        if (*endptr != '\0' || errno == ERANGE)
        {
                /* integer too large (or contains decimal pt), treat it as a 
float */
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index 2168080dcc..c3ed944a6c 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -45,6 +45,17 @@ typedef struct
  * Formatting and conversion routines.
  *---------------------------------------------------------*/
 
+static const int8 hexlookup[128] = {
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
  * scanint8 --- try to parse a string into an int8.
  *
@@ -84,6 +95,48 @@ scanint8(const char *str, bool errorOK, int64 *result)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -92,6 +145,7 @@ scanint8(const char *str, bool errorOK, int64 *result)
                        unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index b93096f288..7c6520346e 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -173,6 +173,17 @@ pg_atoi(const char *s, int size, int c)
        return (int32) l;
 }
 
+static const int8 hexlookup[128] = {
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
  * Convert input string to a signed 16 bit integer.
  *
@@ -208,6 +219,48 @@ pg_strtoint16(const char *s)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -216,6 +269,7 @@ pg_strtoint16(const char *s)
                        unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
@@ -284,6 +338,48 @@ pg_strtoint32(const char *s)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -292,6 +388,7 @@ pg_strtoint32(const char *s)
                        unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 0fab48a382..729aec562b 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -200,7 +200,7 @@ quotecontinuefail   {whitespace}*"-"?
 xbstart                        [bB]{quote}
 xbinside               [^']*
 
-/* Hexadecimal number */
+/* Hexadecimal byte string */
 xhstart                        [xX]{quote}
 xhinside               [^']*
 
@@ -279,7 +279,6 @@ xcstart                     \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               [^*/]+
 
-digit                  [0-9]
 ident_start            [A-Za-z\200-\377_]
 ident_cont             [A-Za-z\200-\377_0-9\$]
 
@@ -318,24 +317,41 @@ self                      [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars               [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator               {op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail                        0[xX]
+octfail                        0[oO]
+binfail                        0[bB]
+
+integer                        
({decinteger}|{hexinteger}|{octinteger}|{bininteger})
+
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
 
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1              ({integer}|{decimal})[Ee]
-realfail2              ({integer}|{decimal})[Ee][-+]
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1              ({decinteger}|{numeric})[Ee]
+realfail2              ({decinteger}|{numeric})[Ee][-+]
 
-param                  \${integer}
+param                  \${decinteger}
 
 /* psql-specific: characters allowed in variable names */
 variable_char  [A-Za-z\200-\377_0-9]
@@ -842,10 +858,19 @@ other                     .
 {integer}              {
                                        ECHO;
                                }
-{decimal}              {
+{hexfail}              {
+                                       ECHO;
+                               }
+{octfail}              {
+                                       ECHO;
+                               }
+{binfail}              {
+                                       ECHO;
+                               }
+{numeric}              {
                                        ECHO;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
                                        ECHO;
diff --git a/src/interfaces/ecpg/preproc/pgc.l 
b/src/interfaces/ecpg/preproc/pgc.l
index 7a0356638d..ebd1f3d7f4 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -305,7 +305,6 @@ xcstart                     \/\*{op_chars}*
 xcstop                 \*+\/
 xcinside               [^*/]+
 
-digit                  [0-9]
 ident_start            [A-Za-z\200-\377_]
 ident_cont             [A-Za-z\200-\377_0-9\$]
 
@@ -346,24 +345,41 @@ self                      [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars               [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator               {op_chars}+
 
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here.  Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail                        0[xX]
+octfail                        0[oO]
+binfail                        0[bB]
 
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1              ({integer}|{decimal})[Ee]
-realfail2              ({integer}|{decimal})[Ee][-+]
+integer                        
({decinteger}|{hexinteger}|{octinteger}|{bininteger})
 
-param                  \${integer}
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
+
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1              ({decinteger}|{numeric})[Ee]
+realfail2              ({decinteger}|{numeric})[Ee][-+]
+
+param                  \${decinteger}
 
 /* special characters for other dbms */
 /* we have to react differently in compat mode */
@@ -393,9 +409,6 @@ include_next        
[iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
 import                 [iI][mM][pP][oO][rR][tT]
 undef                  [uU][nN][dD][eE][fF]
 
-/* C version of hex number */
-xch                            0[xX][0-9A-Fa-f]*
-
 ccomment               "//".*\n
 
 if                             [iI][fF]
@@ -408,7 +421,7 @@ endif                       [eE][nN][dD][iI][fF]
 struct                 [sS][tT][rR][uU][cC][tT]
 
 exec_sql               {exec}{space}*{sql}{space}*
-ipdigit                        ({digit}|{digit}{digit}|{digit}{digit}{digit})
+ipdigit                        
({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
 ip                             {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
 
 /* we might want to parse all cpp include files */
@@ -926,11 +939,11 @@ cppline                   
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 {integer}              {
                                        return process_integer_literal(yytext, 
&base_yylval);
                                }
-{decimal}              {
+{numeric}              {
                                        base_yylval.str = mm_strdup(yytext);
                                        return FCONST;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
                                        return process_integer_literal(yytext, 
&base_yylval);
@@ -942,7 +955,7 @@ cppline                     
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 {realfail1}            {
                                        /*
                                         * throw back the [Ee], and figure out 
whether what
-                                        * remains is an {integer} or {decimal}.
+                                        * remains is an {integer} or {numeric}.
                                         */
                                        yyless(yyleng - 1);
                                        return process_integer_literal(yytext, 
&base_yylval);
@@ -1009,7 +1022,7 @@ cppline                   
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                                                        return S_ANYTHING;
                                         }
 <C>{ccomment}          { ECHO; }
-<C>{xch}                       {
+<C>{hexinteger}                {
                                                char* endptr;
 
                                                errno = 0;
@@ -1546,7 +1559,7 @@ addlitchar(unsigned char ychar)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
+ * Process {integer}.  Note this will also do the right thing with {numeric},
  * ie digits and a decimal point.
  */
 static int
@@ -1556,7 +1569,14 @@ process_integer_literal(const char *token, YYSTYPE *lval)
        char       *endptr;
 
        errno = 0;
-       val = strtoint(token, &endptr, 10);
+       if (token[0] == '0' && (token[1] == 'X' || token[1] == 'x'))
+               val = strtoint(token + 2, &endptr, 16);
+       else if (token[0] == '0' && (token[1] == 'O' || token[1] == 'o'))
+               val = strtoint(token + 2, &endptr, 8);
+       else if (token[0] == '0' && (token[1] == 'B' || token[1] == 'b'))
+               val = strtoint(token + 2, &endptr, 2);
+       else
+               val = strtoint(token, &endptr, 10);
        if (*endptr != '\0' || errno == ERANGE)
        {
                /* integer too large (or contains decimal pt), treat it as a 
float */
diff --git a/src/test/regress/expected/int2.out 
b/src/test/regress/expected/int2.out
index 55ea7202cd..220e1493e8 100644
--- a/src/test/regress/expected/int2.out
+++ b/src/test/regress/expected/int2.out
@@ -306,3 +306,22 @@ FROM (VALUES (-2.5::numeric),
   2.5 |          3
 (7 rows)
 
+-- non-decimal literals
+SELECT int2 '0b100101';
+ int2 
+------
+   37
+(1 row)
+
+SELECT int2 '0o273';
+ int2 
+------
+  187
+(1 row)
+
+SELECT int2 '0x42F';
+ int2 
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/int4.out 
b/src/test/regress/expected/int4.out
index 9d20b3380f..bb23331c3e 100644
--- a/src/test/regress/expected/int4.out
+++ b/src/test/regress/expected/int4.out
@@ -437,3 +437,40 @@ SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
 ERROR:  integer out of range
 SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
 ERROR:  integer out of range
+-- non-decimal literals
+SELECT 0b100101;
+ ?column? 
+----------
+       37
+(1 row)
+
+SELECT 0o273;
+ ?column? 
+----------
+      187
+(1 row)
+
+SELECT 0x42F;
+ ?column? 
+----------
+     1071
+(1 row)
+
+SELECT int4 '0b100101';
+ int4 
+------
+   37
+(1 row)
+
+SELECT int4 '0o273';
+ int4 
+------
+  187
+(1 row)
+
+SELECT int4 '0x42F';
+ int4 
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/int8.out 
b/src/test/regress/expected/int8.out
index 36540ec456..edd15a4353 100644
--- a/src/test/regress/expected/int8.out
+++ b/src/test/regress/expected/int8.out
@@ -932,3 +932,22 @@ SELECT lcm((-9223372036854775808)::int8, 1::int8); -- 
overflow
 ERROR:  bigint out of range
 SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
 ERROR:  bigint out of range
+-- non-decimal literals
+SELECT int8 '0b100101';
+ int8 
+------
+   37
+(1 row)
+
+SELECT int8 '0o273';
+ int8 
+------
+  187
+(1 row)
+
+SELECT int8 '0x42F';
+ int8 
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/sql/int2.sql b/src/test/regress/sql/int2.sql
index 613b344704..0dee22fe6d 100644
--- a/src/test/regress/sql/int2.sql
+++ b/src/test/regress/sql/int2.sql
@@ -112,3 +112,10 @@ CREATE TABLE INT2_TBL(f1 int2);
              (0.5::numeric),
              (1.5::numeric),
              (2.5::numeric)) t(x);
+
+
+-- non-decimal literals
+
+SELECT int2 '0b100101';
+SELECT int2 '0o273';
+SELECT int2 '0x42F';
diff --git a/src/test/regress/sql/int4.sql b/src/test/regress/sql/int4.sql
index 55ec07a147..3b214cdb65 100644
--- a/src/test/regress/sql/int4.sql
+++ b/src/test/regress/sql/int4.sql
@@ -176,3 +176,14 @@ CREATE TABLE INT4_TBL(f1 int4);
 
 SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
 SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
+
+
+-- non-decimal literals
+
+SELECT 0b100101;
+SELECT 0o273;
+SELECT 0x42F;
+
+SELECT int4 '0b100101';
+SELECT int4 '0o273';
+SELECT int4 '0x42F';
diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql
index 32940b4daa..b7ad696dd8 100644
--- a/src/test/regress/sql/int8.sql
+++ b/src/test/regress/sql/int8.sql
@@ -250,3 +250,10 @@ CREATE TABLE INT8_TBL(q1 int8, q2 int8);
 
 SELECT lcm((-9223372036854775808)::int8, 1::int8); -- overflow
 SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int8 '0b100101';
+SELECT int8 '0o273';
+SELECT int8 '0x42F';
-- 
2.33.0

Re: Non-decimal integer literals

Reply via email to