Re: Non-decimal integer literals

Peter Eisentraut Mon, 10 Oct 2022 07:17:55 -0700

On 16.02.22 11:11, Peter Eisentraut wrote:

The remaining patches are material for PG16 at this point, and I willset the commit fest item to returned with feedback in the meantime.


Time to continue with this.

Attached is a rebased and cleaned up patch for non-decimal integerliterals. (I don't include the underscores-in-numeric literals patch.I'm keeping that for later.)


Two open issues from my notes:

Technically, numeric_in() should be made aware of this, but that seemsrelatively complicated and maybe not necessary for the first iteration.

Taking another look around ecpg to see how this interacts with C-syntaxinteger literals. I'm not aware of any particular issues, but it'sunderstandably tricky.


Other than that, this seems pretty complete as a start.

From d0bc72fa4c339ba2ea0bb8d1e5a3923d76ee8105 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Mon, 10 Oct 2022 16:03:15 +0200
Subject: [PATCH v9] Non-decimal integer literals

Add support for hexadecimal, octal, and binary integer literals:

    0x42F
    0o273
    0b100101

per SQL:202x draft.

This adds support in the lexer as well as in the integer type input
functions.

Discussion: 
https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb...@enterprisedb.com
---
 doc/src/sgml/syntax.sgml                   |  26 ++++
 src/backend/catalog/information_schema.sql |   6 +-
 src/backend/catalog/sql_features.txt       |   1 +
 src/backend/parser/scan.l                  |  99 +++++++++++----
 src/backend/utils/adt/numutils.c           | 140 +++++++++++++++++++++
 src/fe_utils/psqlscan.l                    |  78 +++++++++---
 src/interfaces/ecpg/preproc/pgc.l          | 108 +++++++++-------
 src/test/regress/expected/int2.out         |  19 +++
 src/test/regress/expected/int4.out         |  19 +++
 src/test/regress/expected/int8.out         |  19 +++
 src/test/regress/expected/numerology.out   |  59 ++++++++-
 src/test/regress/sql/int2.sql              |   7 ++
 src/test/regress/sql/int4.sql              |   7 ++
 src/test/regress/sql/int8.sql              |   7 ++
 src/test/regress/sql/numerology.sql        |  21 +++-
 15 files changed, 523 insertions(+), 93 deletions(-)

diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index 93ad71737f51..bba78c22f1a9 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -694,6 +694,32 @@ <title>Numeric Constants</title>
 </literallayout>
     </para>
 
+    <para>
+     Additionally, non-decimal integer constants can be used in these forms:
+<synopsis>
+0x<replaceable>hexdigits</replaceable>
+0o<replaceable>octdigits</replaceable>
+0b<replaceable>bindigits</replaceable>
+</synopsis>
+     <replaceable>hexdigits</replaceable> is one or more hexadecimal digits
+     (0-9, A-F), <replaceable>octdigits</replaceable> is one or more octal
+     digits (0-7), <replaceable>bindigits</replaceable> is one or more binary
+     digits (0 or 1).  Hexadecimal digits and the radix prefixes can be in
+     upper or lower case.  Note that only integers can have non-decimal forms,
+     not numbers with fractional parts.
+    </para>
+
+    <para>
+     These are some examples of this:
+<literallayout>0b100101
+0B10011001
+0o273
+0O755
+0x42f
+0XFFFF
+</literallayout>
+    </para>
+
     <para>
      <indexterm><primary>integer</primary></indexterm>
      <indexterm><primary>bigint</primary></indexterm>
diff --git a/src/backend/catalog/information_schema.sql 
b/src/backend/catalog/information_schema.sql
index 18725a02d1fb..95c27a625e7e 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -119,7 +119,7 @@ CREATE FUNCTION _pg_numeric_precision(typid oid, typmod 
int4) RETURNS integer
          WHEN 1700 /*numeric*/ THEN
               CASE WHEN $2 = -1
                    THEN null
-                   ELSE (($2 - 4) >> 16) & 65535
+                   ELSE (($2 - 4) >> 16) & 0xFFFF
                    END
          WHEN 700 /*float4*/ THEN 24 /*FLT_MANT_DIG*/
          WHEN 701 /*float8*/ THEN 53 /*DBL_MANT_DIG*/
@@ -147,7 +147,7 @@ CREATE FUNCTION _pg_numeric_scale(typid oid, typmod int4) 
RETURNS integer
        WHEN $1 IN (1700) THEN
             CASE WHEN $2 = -1
                  THEN null
-                 ELSE ($2 - 4) & 65535
+                 ELSE ($2 - 4) & 0xFFFF
                  END
        ELSE null
   END;
@@ -163,7 +163,7 @@ CREATE FUNCTION _pg_datetime_precision(typid oid, typmod 
int4) RETURNS integer
        WHEN $1 IN (1083, 1114, 1184, 1266) /* time, timestamp, same + tz */
            THEN CASE WHEN $2 < 0 THEN 6 ELSE $2 END
        WHEN $1 IN (1186) /* interval */
-           THEN CASE WHEN $2 < 0 OR $2 & 65535 = 65535 THEN 6 ELSE $2 & 65535 
END
+           THEN CASE WHEN $2 < 0 OR $2 & 0xFFFF = 0xFFFF THEN 6 ELSE $2 & 
0xFFFF END
        ELSE null
   END;
 
diff --git a/src/backend/catalog/sql_features.txt 
b/src/backend/catalog/sql_features.txt
index da7c9c772e09..e897e28ed148 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -527,6 +527,7 @@ T652        SQL-dynamic statements in SQL routines          
        NO
 T653   SQL-schema statements in external routines                      YES     
 T654   SQL-dynamic statements in external routines                     NO      
 T655   Cyclically dependent routines                   YES     
+T661   Non-decimal integer literals                    YES     SQL:202x draft
 T811   Basic SQL/JSON constructor functions                    NO      
 T812   SQL/JSON: JSON_OBJECTAGG                        NO      
 T813   SQL/JSON: JSON_ARRAYAGG with ORDER BY                   NO      
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index db8b0fe8ebcc..8bb9d5fcc52d 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -124,7 +124,7 @@ static void addlit(char *ytext, int yleng, core_yyscan_t 
yyscanner);
 static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
 static char *litbufdup(core_yyscan_t yyscanner);
 static unsigned char unescape_single_char(unsigned char c, core_yyscan_t 
yyscanner);
-static int     process_integer_literal(const char *token, YYSTYPE *lval);
+static int     process_integer_literal(const char *token, YYSTYPE *lval, int 
base);
 static void addunicode(pg_wchar c, yyscan_t yyscanner);
 
 #define yyerror(msg)  scanner_yyerror(msg, yyscanner)
@@ -385,25 +385,40 @@ operator          {op_chars}+
  * Unary minus is not part of a number here.  Instead we pass it separately to
  * the parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail} is added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
-digit                  [0-9]
-
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail               ({integer}|{decimal})[Ee][-+]
-
-integer_junk   {integer}{ident_start}
-decimal_junk   {decimal}{ident_start}
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail                        0[xX]
+octfail                        0[oO]
+binfail                        0[bB]
+
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
+
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail               ({decinteger}|{numeric})[Ee][-+]
+
+decinteger_junk        {decinteger}{ident_start}
+hexinteger_junk        {hexinteger}{ident_start}
+octinteger_junk        {octinteger}{ident_start}
+bininteger_junk        {bininteger}{ident_start}
+numeric_junk   {numeric}{ident_start}
 real_junk              {real}{ident_start}
 
-param                  \${integer}
-param_junk             \${integer}{ident_start}
+param                  \${decinteger}
+param_junk             \${decinteger}{ident_start}
 
 other                  .
 
@@ -983,20 +998,44 @@ other                     .
                                        yyerror("trailing junk after 
parameter");
                                }
 
-{integer}              {
+{decinteger}   {
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext, 
yylval, 10);
+                               }
+{hexinteger}   {
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext + 
2, yylval, 16);
+                               }
+{octinteger}   {
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext + 
2, yylval, 8);
+                               }
+{bininteger}   {
+                                       SET_YYLLOC();
+                                       return process_integer_literal(yytext + 
2, yylval, 2);
+                               }
+{hexfail}              {
+                                       SET_YYLLOC();
+                                       yyerror("invalid hexadecimal integer");
+                               }
+{octfail}              {
                                        SET_YYLLOC();
-                                       return process_integer_literal(yytext, 
yylval);
+                                       yyerror("invalid octal integer");
                                }
-{decimal}              {
+{binfail}              {
+                                       SET_YYLLOC();
+                                       yyerror("invalid binary integer");
+                               }
+{numeric}              {
                                        SET_YYLLOC();
                                        yylval->str = pstrdup(yytext);
                                        return FCONST;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
                                        SET_YYLLOC();
-                                       return process_integer_literal(yytext, 
yylval);
+                                       return process_integer_literal(yytext, 
yylval, 10);
                                }
 {real}                 {
                                        SET_YYLLOC();
@@ -1007,11 +1046,23 @@ other                   .
                                        SET_YYLLOC();
                                        yyerror("trailing junk after numeric 
literal");
                                }
-{integer_junk} {
+{decinteger_junk}      {
+                                       SET_YYLLOC();
+                                       yyerror("trailing junk after numeric 
literal");
+                               }
+{hexinteger_junk}      {
+                                       SET_YYLLOC();
+                                       yyerror("trailing junk after numeric 
literal");
+                               }
+{octinteger_junk}      {
+                                       SET_YYLLOC();
+                                       yyerror("trailing junk after numeric 
literal");
+                               }
+{bininteger_junk}      {
                                        SET_YYLLOC();
                                        yyerror("trailing junk after numeric 
literal");
                                }
-{decimal_junk} {
+{numeric_junk} {
                                        SET_YYLLOC();
                                        yyerror("trailing junk after numeric 
literal");
                                }
@@ -1307,17 +1358,17 @@ litbufdup(core_yyscan_t yyscanner)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
+ * Process {*integer}.  Note this will also do the right thing with {numeric},
  * ie digits and a decimal point.
  */
 static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
 {
        int                     val;
        char       *endptr;
 
        errno = 0;
-       val = strtoint(token, &endptr, 10);
+       val = strtoint(token, &endptr, base);
        if (*endptr != '\0' || errno == ERANGE)
        {
                /* integer too large (or contains decimal pt), treat it as a 
float */
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 834ec0b5882c..55f0a20839db 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -85,6 +85,17 @@ decimalLength64(const uint64 v)
        return t + (v >= PowersOfTen[t]);
 }
 
+static const int8 hexlookup[128] = {
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
 /*
  * Convert input string to a signed 16 bit integer.
  *
@@ -120,6 +131,48 @@ pg_strtoint16(const char *s)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s16_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -128,6 +181,7 @@ pg_strtoint16(const char *s)
                        unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
@@ -196,6 +250,48 @@ pg_strtoint32(const char *s)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s32_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -204,6 +300,7 @@ pg_strtoint32(const char *s)
                        unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
@@ -280,6 +377,48 @@ pg_strtoint64(const char *s)
                goto invalid_syntax;
 
        /* process digits */
+       if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+       {
+               ptr += 2;
+               while (*ptr && isxdigit((unsigned char) *ptr))
+               {
+                       int8            digit = hexlookup[(unsigned char) *ptr];
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 16, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+
+                       ptr++;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 8, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+       {
+               ptr += 2;
+
+               while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+               {
+                       int8            digit = (*ptr++ - '0');
+
+                       if (unlikely(pg_mul_s64_overflow(tmp, 2, &tmp)) ||
+                               unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+                               goto out_of_range;
+               }
+       }
+       else
+       {
        while (*ptr && isdigit((unsigned char) *ptr))
        {
                int8            digit = (*ptr++ - '0');
@@ -288,6 +427,7 @@ pg_strtoint64(const char *s)
                        unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
                        goto out_of_range;
        }
+       }
 
        /* allow trailing whitespace, but not other trailing chars */
        while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index ae531ec24077..cb1fc5213844 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -323,25 +323,40 @@ operator          {op_chars}+
  * Unary minus is not part of a number here.  Instead we pass it separately to
  * the parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail} is added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
-digit                  [0-9]
-
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail               ({integer}|{decimal})[Ee][-+]
-
-integer_junk   {integer}{ident_start}
-decimal_junk   {decimal}{ident_start}
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail                        0[xX]
+octfail                        0[oO]
+binfail                        0[bB]
+
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
+
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail               ({decinteger}|{numeric})[Ee][-+]
+
+decinteger_junk        {decinteger}{ident_start}
+hexinteger_junk        {hexinteger}{ident_start}
+octinteger_junk        {octinteger}{ident_start}
+bininteger_junk        {bininteger}{ident_start}
+numeric_junk   {numeric}{ident_start}
 real_junk              {real}{ident_start}
 
-param                  \${integer}
-param_junk             \${integer}{ident_start}
+param                  \${decinteger}
+param_junk             \${decinteger}{ident_start}
 
 /* psql-specific: characters allowed in variable names */
 variable_char  [A-Za-z\200-\377_0-9]
@@ -847,13 +862,31 @@ other                     .
                                        ECHO;
                                }
 
-{integer}              {
+{decinteger}   {
+                                       ECHO;
+                               }
+{hexinteger}   {
+                                       ECHO;
+                               }
+{octinteger}   {
+                                       ECHO;
+                               }
+{bininteger}   {
+                                       ECHO;
+                               }
+{hexfail}              {
                                        ECHO;
                                }
-{decimal}              {
+{octfail}              {
                                        ECHO;
                                }
-{decimalfail}  {
+{binfail}              {
+                                       ECHO;
+                               }
+{numeric}              {
+                                       ECHO;
+                               }
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
                                        ECHO;
@@ -864,10 +897,19 @@ other                     .
 {realfail}             {
                                        ECHO;
                                }
-{integer_junk} {
+{decinteger_junk}      {
+                                       ECHO;
+                               }
+{hexinteger_junk}      {
+                                       ECHO;
+                               }
+{octinteger_junk}      {
+                                       ECHO;
+                               }
+{bininteger_junk}      {
                                        ECHO;
                                }
-{decimal_junk} {
+{numeric_junk} {
                                        ECHO;
                                }
 {real_junk}            {
diff --git a/src/interfaces/ecpg/preproc/pgc.l 
b/src/interfaces/ecpg/preproc/pgc.l
index c145c9698f1a..b6ada6ef1f5e 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -57,7 +57,7 @@ static bool           include_next;
 #define startlit()     (literalbuf[0] = '\0', literallen = 0)
 static void addlit(char *ytext, int yleng);
 static void addlitchar(unsigned char ychar);
-static int     process_integer_literal(const char *token, YYSTYPE *lval);
+static int     process_integer_literal(const char *token, YYSTYPE *lval, int 
base);
 static void parse_include(void);
 static bool ecpg_isspace(char ch);
 static bool isdefine(void);
@@ -351,25 +351,40 @@ operator          {op_chars}+
  * Unary minus is not part of a number here.  Instead we pass it separately to
  * the parser, and there it gets coerced via doNegate().
  *
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot, 
10.
  *
  * {realfail} is added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */
-digit                  [0-9]
-
-integer                        {digit}+
-decimal                        (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail            {digit}+\.\.
-real                   ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail               ({integer}|{decimal})[Ee][-+]
-
-integer_junk   {integer}{ident_start}
-decimal_junk   {decimal}{ident_start}
+decdigit               [0-9]
+hexdigit               [0-9A-Fa-f]
+octdigit               [0-7]
+bindigit               [0-1]
+
+decinteger             {decdigit}+
+hexinteger             0[xX]{hexdigit}+
+octinteger             0[oO]{octdigit}+
+bininteger             0[bB]{bindigit}+
+
+hexfail                        0[xX]
+octfail                        0[oO]
+binfail                        0[bB]
+
+numeric                        (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail            {decdigit}+\.\.
+
+real                   ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail               ({decinteger}|{numeric})[Ee][-+]
+
+decinteger_junk        {decinteger}{ident_start}
+hexinteger_junk        {hexinteger}{ident_start}
+octinteger_junk        {octinteger}{ident_start}
+bininteger_junk        {bininteger}{ident_start}
+numeric_junk   {numeric}{ident_start}
 real_junk              {real}{ident_start}
 
-param                  \${integer}
-param_junk             \${integer}{ident_start}
+param                  \${decinteger}
+param_junk             \${decinteger}{ident_start}
 
 /* special characters for other dbms */
 /* we have to react differently in compat mode */
@@ -399,9 +414,6 @@ include_next        
[iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
 import                 [iI][mM][pP][oO][rR][tT]
 undef                  [uU][nN][dD][eE][fF]
 
-/* C version of hex number */
-xch                            0[xX][0-9A-Fa-f]*
-
 ccomment               "//".*\n
 
 if                             [iI][fF]
@@ -414,7 +426,7 @@ endif                       [eE][nN][dD][iI][fF]
 struct                 [sS][tT][rR][uU][cC][tT]
 
 exec_sql               {exec}{space}*{sql}{space}*
-ipdigit                        ({digit}|{digit}{digit}|{digit}{digit}{digit})
+ipdigit                        
({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
 ip                             {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
 
 /* we might want to parse all cpp include files */
@@ -932,17 +944,20 @@ cppline                   
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 }  /* <SQL> */
 
 <C,SQL>{
-{integer}              {
-                                       return process_integer_literal(yytext, 
&base_yylval);
+{decinteger}   {
+                                       return process_integer_literal(yytext, 
&base_yylval, 10);
                                }
-{decimal}              {
+{hexinteger}   {
+                                       return process_integer_literal(yytext + 
2, &base_yylval, 16);
+                               }
+{numeric}              {
                                        base_yylval.str = mm_strdup(yytext);
                                        return FCONST;
                                }
-{decimalfail}  {
+{numericfail}  {
                                        /* throw back the .., and treat as 
integer */
                                        yyless(yyleng - 2);
-                                       return process_integer_literal(yytext, 
&base_yylval);
+                                       return process_integer_literal(yytext, 
&base_yylval, 10);
                                }
 {real}                 {
                                        base_yylval.str = mm_strdup(yytext);
@@ -951,22 +966,38 @@ cppline                   
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 {realfail}             {
                                        /*
                                         * throw back the [Ee][+-], and figure 
out whether what
-                                        * remains is an {integer} or {decimal}.
+                                        * remains is an {decinteger} or 
{numeric}.
                                         */
                                        yyless(yyleng - 2);
-                                       return process_integer_literal(yytext, 
&base_yylval);
+                                       return process_integer_literal(yytext, 
&base_yylval, 10);
                                }
 } /* <C,SQL> */
 
 <SQL>{
+{octinteger}   {
+                                       return process_integer_literal(yytext + 
2, &base_yylval, 8);
+                               }
+{bininteger}   {
+                                       return process_integer_literal(yytext + 
2, &base_yylval, 2);
+                               }
+
        /*
-        * Note that some trailing junk is valid in C (such as 100LL), so we
-        * contain this to SQL mode.
+        * Note that some trailing junk is valid in C (such as 100LL), so we 
contain
+        * this to SQL mode.
         */
-{integer_junk} {
+{decinteger_junk}      {
                                        mmfatal(PARSE_ERROR, "trailing junk 
after numeric literal");
                                }
-{decimal_junk} {
+{hexinteger_junk}      {
+                                       mmfatal(PARSE_ERROR, "trailing junk 
after numeric literal");
+                               }
+{octinteger_junk}      {
+                                       mmfatal(PARSE_ERROR, "trailing junk 
after numeric literal");
+                               }
+{bininteger_junk}      {
+                                       mmfatal(PARSE_ERROR, "trailing junk 
after numeric literal");
+                               }
+{numeric_junk} {
                                        mmfatal(PARSE_ERROR, "trailing junk 
after numeric literal");
                                }
 {real_junk}            {
@@ -1036,19 +1067,6 @@ cppline                  
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
                                                        return S_ANYTHING;
                                         }
 <C>{ccomment}          { ECHO; }
-<C>{xch}                       {
-                                               char* endptr;
-
-                                               errno = 0;
-                                               base_yylval.ival = 
strtoul((char *) yytext, &endptr, 16);
-                                               if (*endptr != '\0' || errno == 
ERANGE)
-                                               {
-                                                       errno = 0;
-                                                       base_yylval.str = 
mm_strdup(yytext);
-                                                       return SCONST;
-                                               }
-                                               return ICONST;
-                                       }
 <C>{cppinclude}                {
                                                if (system_includes)
                                                {
@@ -1573,17 +1591,17 @@ addlitchar(unsigned char ychar)
 }
 
 /*
- * Process {integer}.  Note this will also do the right thing with {decimal},
+ * Process {*integer}.  Note this will also do the right thing with {numeric},
  * ie digits and a decimal point.
  */
 static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
 {
        int                     val;
        char       *endptr;
 
        errno = 0;
-       val = strtoint(token, &endptr, 10);
+       val = strtoint(token, &endptr, base);
        if (*endptr != '\0' || errno == ERANGE)
        {
                /* integer too large (or contains decimal pt), treat it as a 
float */
diff --git a/src/test/regress/expected/int2.out 
b/src/test/regress/expected/int2.out
index 109cf9baaaca..39bd2684ad3d 100644
--- a/src/test/regress/expected/int2.out
+++ b/src/test/regress/expected/int2.out
@@ -304,3 +304,22 @@ FROM (VALUES (-2.5::numeric),
   2.5 |          3
 (7 rows)
 
+-- non-decimal literals
+SELECT int2 '0b100101';
+ int2 
+------
+   37
+(1 row)
+
+SELECT int2 '0o273';
+ int2 
+------
+  187
+(1 row)
+
+SELECT int2 '0x42F';
+ int2 
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/int4.out 
b/src/test/regress/expected/int4.out
index fbcc0e8d9e68..6ec54835b4aa 100644
--- a/src/test/regress/expected/int4.out
+++ b/src/test/regress/expected/int4.out
@@ -431,3 +431,22 @@ SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
 ERROR:  integer out of range
 SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
 ERROR:  integer out of range
+-- non-decimal literals
+SELECT int4 '0b100101';
+ int4 
+------
+   37
+(1 row)
+
+SELECT int4 '0o273';
+ int4 
+------
+  187
+(1 row)
+
+SELECT int4 '0x42F';
+ int4 
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/int8.out 
b/src/test/regress/expected/int8.out
index 1ae23cf3f94f..765df1de52ae 100644
--- a/src/test/regress/expected/int8.out
+++ b/src/test/regress/expected/int8.out
@@ -927,3 +927,22 @@ SELECT lcm((-9223372036854775808)::int8, 1::int8); -- 
overflow
 ERROR:  bigint out of range
 SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
 ERROR:  bigint out of range
+-- non-decimal literals
+SELECT int8 '0b100101';
+ int8 
+------
+   37
+(1 row)
+
+SELECT int8 '0o273';
+ int8 
+------
+  187
+(1 row)
+
+SELECT int8 '0x42F';
+ int8 
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/numerology.out 
b/src/test/regress/expected/numerology.out
index 77d48434173b..d95b24c7b329 100644
--- a/src/test/regress/expected/numerology.out
+++ b/src/test/regress/expected/numerology.out
@@ -3,14 +3,33 @@
 -- Test various combinations of numeric types and functions.
 --
 --
--- Trailing junk in numeric literals
+-- numeric literals
 --
+SELECT 0b100101;
+ ?column? 
+----------
+       37
+(1 row)
+
+SELECT 0o273;
+ ?column? 
+----------
+      187
+(1 row)
+
+SELECT 0x42F;
+ ?column? 
+----------
+     1071
+(1 row)
+
+-- error cases
 SELECT 123abc;
 ERROR:  trailing junk after numeric literal at or near "123a"
 LINE 1: SELECT 123abc;
                ^
 SELECT 0x0o;
-ERROR:  trailing junk after numeric literal at or near "0x"
+ERROR:  trailing junk after numeric literal at or near "0x0o"
 LINE 1: SELECT 0x0o;
                ^
 SELECT 1_2_3;
@@ -45,6 +64,42 @@ PREPARE p1 AS SELECT $1a;
 ERROR:  trailing junk after parameter at or near "$1a"
 LINE 1: PREPARE p1 AS SELECT $1a;
                              ^
+SELECT 0b;
+ERROR:  invalid binary integer at or near "0b"
+LINE 1: SELECT 0b;
+               ^
+SELECT 1b;
+ERROR:  trailing junk after numeric literal at or near "1b"
+LINE 1: SELECT 1b;
+               ^
+SELECT 0b0x;
+ERROR:  trailing junk after numeric literal at or near "0b0x"
+LINE 1: SELECT 0b0x;
+               ^
+SELECT 0o;
+ERROR:  invalid octal integer at or near "0o"
+LINE 1: SELECT 0o;
+               ^
+SELECT 1o;
+ERROR:  trailing junk after numeric literal at or near "1o"
+LINE 1: SELECT 1o;
+               ^
+SELECT 0o0x;
+ERROR:  trailing junk after numeric literal at or near "0o0x"
+LINE 1: SELECT 0o0x;
+               ^
+SELECT 0x;
+ERROR:  invalid hexadecimal integer at or near "0x"
+LINE 1: SELECT 0x;
+               ^
+SELECT 1x;
+ERROR:  trailing junk after numeric literal at or near "1x"
+LINE 1: SELECT 1x;
+               ^
+SELECT 0x0y;
+ERROR:  trailing junk after numeric literal at or near "0x0y"
+LINE 1: SELECT 0x0y;
+               ^
 --
 -- Test implicit type conversions
 -- This fails for Postgres v6.1 (and earlier?)
diff --git a/src/test/regress/sql/int2.sql b/src/test/regress/sql/int2.sql
index ea29066b78ee..405617fa7829 100644
--- a/src/test/regress/sql/int2.sql
+++ b/src/test/regress/sql/int2.sql
@@ -104,3 +104,10 @@
              (0.5::numeric),
              (1.5::numeric),
              (2.5::numeric)) t(x);
+
+
+-- non-decimal literals
+
+SELECT int2 '0b100101';
+SELECT int2 '0o273';
+SELECT int2 '0x42F';
diff --git a/src/test/regress/sql/int4.sql b/src/test/regress/sql/int4.sql
index f19077f3da21..1843b718a705 100644
--- a/src/test/regress/sql/int4.sql
+++ b/src/test/regress/sql/int4.sql
@@ -164,3 +164,10 @@
 
 SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
 SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int4 '0b100101';
+SELECT int4 '0o273';
+SELECT int4 '0x42F';
diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql
index 38b771964d79..dd82040b0b37 100644
--- a/src/test/regress/sql/int8.sql
+++ b/src/test/regress/sql/int8.sql
@@ -245,3 +245,10 @@
 
 SELECT lcm((-9223372036854775808)::int8, 1::int8); -- overflow
 SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int8 '0b100101';
+SELECT int8 '0o273';
+SELECT int8 '0x42F';
diff --git a/src/test/regress/sql/numerology.sql 
b/src/test/regress/sql/numerology.sql
index be7d6dfe0c26..0e12bcc7b709 100644
--- a/src/test/regress/sql/numerology.sql
+++ b/src/test/regress/sql/numerology.sql
@@ -3,10 +3,16 @@
 -- Test various combinations of numeric types and functions.
 --
 
+
 --
--- Trailing junk in numeric literals
+-- numeric literals
 --
 
+SELECT 0b100101;
+SELECT 0o273;
+SELECT 0x42F;
+
+-- error cases
 SELECT 123abc;
 SELECT 0x0o;
 SELECT 1_2_3;
@@ -18,6 +24,19 @@
 SELECT 0.0e+a;
 PREPARE p1 AS SELECT $1a;
 
+SELECT 0b;
+SELECT 1b;
+SELECT 0b0x;
+
+SELECT 0o;
+SELECT 1o;
+SELECT 0o0x;
+
+SELECT 0x;
+SELECT 1x;
+SELECT 0x0y;
+
+
 --
 -- Test implicit type conversions
 -- This fails for Postgres v6.1 (and earlier?)

base-commit: 06dbd619bfbfe03fefa7223838690d4012f874ad
-- 
2.37.3

Re: Non-decimal integer literals

Reply via email to