On 07.09.21 13:50, Zhihong Yu wrote:
On 16.08.21 17:32, John Naylor wrote:
> The one thing that jumped out at me on a cursory reading is
> the {integer} rule, which seems to be used nowhere except to
> call process_integer_literal, which must then inspect the token
text to
> figure out what type of integer it is. Maybe consider 4 separate
> process_*_literal functions?
Agreed, that can be done in a simpler way. Here is an updated patch.
Hi,
Minor comment:
+SELECT int4 '0o112';
Maybe involve digits of up to 7 in the octal test case.
Good point, here is a lightly updated patch.
From 43957a1f48ed6f750f231ef8e3533d74d7ac4cc9 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <pe...@eisentraut.org>
Date: Tue, 28 Sep 2021 17:14:44 +0200
Subject: [PATCH v3] Non-decimal integer literals
Add support for hexadecimal, octal, and binary integer literals:
0x42F
0o273
0b100101
per SQL:202x draft.
This adds support in the lexer as well as in the integer type input
functions.
Discussion:
https://www.postgresql.org/message-id/flat/b239564c-cad0-b23e-c57e-166d883cb...@enterprisedb.com
---
doc/src/sgml/syntax.sgml | 26 ++++++
src/backend/catalog/information_schema.sql | 6 +-
src/backend/catalog/sql_features.txt | 1 +
src/backend/parser/scan.l | 87 +++++++++++++------
src/backend/utils/adt/int8.c | 54 ++++++++++++
src/backend/utils/adt/numutils.c | 97 ++++++++++++++++++++++
src/fe_utils/psqlscan.l | 55 ++++++++----
src/interfaces/ecpg/preproc/pgc.l | 64 +++++++++-----
src/test/regress/expected/int2.out | 19 +++++
src/test/regress/expected/int4.out | 37 +++++++++
src/test/regress/expected/int8.out | 19 +++++
src/test/regress/sql/int2.sql | 7 ++
src/test/regress/sql/int4.sql | 11 +++
src/test/regress/sql/int8.sql | 7 ++
14 files changed, 425 insertions(+), 65 deletions(-)
diff --git a/doc/src/sgml/syntax.sgml b/doc/src/sgml/syntax.sgml
index d66560b587..a4f04199c6 100644
--- a/doc/src/sgml/syntax.sgml
+++ b/doc/src/sgml/syntax.sgml
@@ -694,6 +694,32 @@ <title>Numeric Constants</title>
</literallayout>
</para>
+ <para>
+ Additionally, non-decimal integer constants can be used in these forms:
+<synopsis>
+0x<replaceable>hexdigits</replaceable>
+0o<replaceable>octdigits</replaceable>
+0b<replaceable>bindigits</replaceable>
+</synopsis>
+ <replaceable>hexdigits</replaceable> is one or more hexadecimal digits
+ (0-9, A-F), <replaceable>octdigits</replaceable> is one or more octal
+ digits (0-7), <replaceable>bindigits</replaceable> is one or more binary
+ digits (0 or 1). Hexadecimal digits and the radix prefixes can be in
+ upper or lower case. Note that only integers can have non-decimal forms,
+ not numbers with fractional parts.
+ </para>
+
+ <para>
+ These are some examples of this:
+<literallayout>0b100101
+0B10011001
+0o273
+0O755
+0x42f
+0XFFFF
+</literallayout>
+ </para>
+
<para>
<indexterm><primary>integer</primary></indexterm>
<indexterm><primary>bigint</primary></indexterm>
diff --git a/src/backend/catalog/information_schema.sql
b/src/backend/catalog/information_schema.sql
index 11d9dd60c2..ce88c483a2 100644
--- a/src/backend/catalog/information_schema.sql
+++ b/src/backend/catalog/information_schema.sql
@@ -119,7 +119,7 @@ CREATE FUNCTION _pg_numeric_precision(typid oid, typmod
int4) RETURNS integer
WHEN 1700 /*numeric*/ THEN
CASE WHEN $2 = -1
THEN null
- ELSE (($2 - 4) >> 16) & 65535
+ ELSE (($2 - 4) >> 16) & 0xFFFF
END
WHEN 700 /*float4*/ THEN 24 /*FLT_MANT_DIG*/
WHEN 701 /*float8*/ THEN 53 /*DBL_MANT_DIG*/
@@ -147,7 +147,7 @@ CREATE FUNCTION _pg_numeric_scale(typid oid, typmod int4)
RETURNS integer
WHEN $1 IN (1700) THEN
CASE WHEN $2 = -1
THEN null
- ELSE ($2 - 4) & 65535
+ ELSE ($2 - 4) & 0xFFFF
END
ELSE null
END;
@@ -163,7 +163,7 @@ CREATE FUNCTION _pg_datetime_precision(typid oid, typmod
int4) RETURNS integer
WHEN $1 IN (1083, 1114, 1184, 1266) /* time, timestamp, same + tz */
THEN CASE WHEN $2 < 0 THEN 6 ELSE $2 END
WHEN $1 IN (1186) /* interval */
- THEN CASE WHEN $2 < 0 OR $2 & 65535 = 65535 THEN 6 ELSE $2 & 65535
END
+ THEN CASE WHEN $2 < 0 OR $2 & 0xFFFF = 0xFFFF THEN 6 ELSE $2 &
0xFFFF END
ELSE null
END;
diff --git a/src/backend/catalog/sql_features.txt
b/src/backend/catalog/sql_features.txt
index 9f424216e2..d6359503f3 100644
--- a/src/backend/catalog/sql_features.txt
+++ b/src/backend/catalog/sql_features.txt
@@ -526,6 +526,7 @@ T652 SQL-dynamic statements in SQL routines
NO
T653 SQL-schema statements in external routines YES
T654 SQL-dynamic statements in external routines NO
T655 Cyclically dependent routines YES
+T661 Non-decimal integer literals YES SQL:202x draft
T811 Basic SQL/JSON constructor functions NO
T812 SQL/JSON: JSON_OBJECTAGG NO
T813 SQL/JSON: JSON_ARRAYAGG with ORDER BY NO
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 6e6824faeb..a78fe7a2ed 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -124,7 +124,7 @@ static void addlit(char *ytext, int yleng, core_yyscan_t
yyscanner);
static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
static char *litbufdup(core_yyscan_t yyscanner);
static unsigned char unescape_single_char(unsigned char c, core_yyscan_t
yyscanner);
-static int process_integer_literal(const char *token, YYSTYPE *lval);
+static int process_integer_literal(const char *token, YYSTYPE *lval, int
base);
static void addunicode(pg_wchar c, yyscan_t yyscanner);
#define yyerror(msg) scanner_yyerror(msg, yyscanner)
@@ -262,7 +262,7 @@ quotecontinuefail {whitespace}*"-"?
xbstart [bB]{quote}
xbinside [^']*
-/* Hexadecimal number */
+/* Hexadecimal byte string */
xhstart [xX]{quote}
xhinside [^']*
@@ -341,7 +341,7 @@ xcstart \/\*{op_chars}*
xcstop \*+\/
xcinside [^*/]+
-digit [0-9]
+
ident_start [A-Za-z\200-\377_]
ident_cont [A-Za-z\200-\377_0-9\$]
@@ -380,24 +380,39 @@ self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
operator {op_chars}+
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
*
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
+ * Unary minus is not part of a number here. Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
+ *
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
*
* {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
+decdigit [0-9]
+hexdigit [0-9A-Fa-f]
+octdigit [0-7]
+bindigit [0-1]
+
+decinteger {decdigit}+
+hexinteger 0[xX]{hexdigit}+
+octinteger 0[oO]{octdigit}+
+bininteger 0[bB]{bindigit}+
-integer {digit}+
-decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail {digit}+\.\.
-real ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1 ({integer}|{decimal})[Ee]
-realfail2 ({integer}|{decimal})[Ee][-+]
+hexfail 0[xX]
+octfail 0[oO]
+binfail 0[bB]
-param \${integer}
+numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail {decdigit}+\.\.
+
+real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1 ({decinteger}|{numeric})[Ee]
+realfail2 ({decinteger}|{numeric})[Ee][-+]
+
+param \${decinteger}
other .
@@ -973,20 +988,42 @@ other .
return PARAM;
}
-{integer} {
+{decinteger} {
+ SET_YYLLOC();
+ return process_integer_literal(yytext,
yylval, 10);
+ }
+{hexinteger} {
SET_YYLLOC();
- return process_integer_literal(yytext,
yylval);
+ return process_integer_literal(yytext +
2, yylval, 16);
}
-{decimal} {
+{octinteger} {
+ SET_YYLLOC();
+ return process_integer_literal(yytext +
2, yylval, 8);
+ }
+{bininteger} {
+ SET_YYLLOC();
+ return process_integer_literal(yytext +
2, yylval, 2);
+ }
+{hexfail} {
+ yyerror("invalid hexadecimal integer");
+ }
+{octfail} {
+ yyerror("invalid octal integer");
+ }
+{binfail} {
+ yyerror("invalid binary integer");
+ }
+
+{numeric} {
SET_YYLLOC();
yylval->str = pstrdup(yytext);
return FCONST;
}
-{decimalfail} {
+{numericfail} {
/* throw back the .., and treat as
integer */
yyless(yyleng - 2);
SET_YYLLOC();
- return process_integer_literal(yytext,
yylval);
+ return process_integer_literal(yytext,
yylval, 10);
}
{real} {
SET_YYLLOC();
@@ -996,17 +1033,17 @@ other .
{realfail1} {
/*
* throw back the [Ee], and figure out
whether what
- * remains is an {integer} or {decimal}.
+ * remains is a {decinteger} or
{numeric}.
*/
yyless(yyleng - 1);
SET_YYLLOC();
- return process_integer_literal(yytext,
yylval);
+ return process_integer_literal(yytext,
yylval, 10);
}
{realfail2} {
/* throw back the [Ee][+-], and proceed
as above */
yyless(yyleng - 2);
SET_YYLLOC();
- return process_integer_literal(yytext,
yylval);
+ return process_integer_literal(yytext,
yylval, 10);
}
@@ -1296,17 +1333,17 @@ litbufdup(core_yyscan_t yyscanner)
}
/*
- * Process {integer}. Note this will also do the right thing with {decimal},
+ * Process {*integer}. Note this will also do the right thing with {numeric},
* ie digits and a decimal point.
*/
static int
-process_integer_literal(const char *token, YYSTYPE *lval)
+process_integer_literal(const char *token, YYSTYPE *lval, int base)
{
int val;
char *endptr;
errno = 0;
- val = strtoint(token, &endptr, 10);
+ val = strtoint(token, &endptr, base);
if (*endptr != '\0' || errno == ERANGE)
{
/* integer too large (or contains decimal pt), treat it as a
float */
diff --git a/src/backend/utils/adt/int8.c b/src/backend/utils/adt/int8.c
index 2168080dcc..c3ed944a6c 100644
--- a/src/backend/utils/adt/int8.c
+++ b/src/backend/utils/adt/int8.c
@@ -45,6 +45,17 @@ typedef struct
* Formatting and conversion routines.
*---------------------------------------------------------*/
+static const int8 hexlookup[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
/*
* scanint8 --- try to parse a string into an int8.
*
@@ -84,6 +95,48 @@ scanint8(const char *str, bool errorOK, int64 *result)
goto invalid_syntax;
/* process digits */
+ if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+ {
+ ptr += 2;
+ while (*ptr && isxdigit((unsigned char) *ptr))
+ {
+ int8 digit = hexlookup[(unsigned char) *ptr];
+
+ if (unlikely(pg_mul_s64_overflow(tmp, 16, &tmp)) ||
+ unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+
+ ptr++;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s64_overflow(tmp, 8, &tmp)) ||
+ unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s64_overflow(tmp, 2, &tmp)) ||
+ unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else
+ {
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
@@ -92,6 +145,7 @@ scanint8(const char *str, bool errorOK, int64 *result)
unlikely(pg_sub_s64_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
+ }
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index b93096f288..7c6520346e 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -173,6 +173,17 @@ pg_atoi(const char *s, int size, int c)
return (int32) l;
}
+static const int8 hexlookup[128] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+};
+
/*
* Convert input string to a signed 16 bit integer.
*
@@ -208,6 +219,48 @@ pg_strtoint16(const char *s)
goto invalid_syntax;
/* process digits */
+ if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+ {
+ ptr += 2;
+ while (*ptr && isxdigit((unsigned char) *ptr))
+ {
+ int8 digit = hexlookup[(unsigned char) *ptr];
+
+ if (unlikely(pg_mul_s16_overflow(tmp, 16, &tmp)) ||
+ unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+
+ ptr++;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s16_overflow(tmp, 8, &tmp)) ||
+ unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s16_overflow(tmp, 2, &tmp)) ||
+ unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else
+ {
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
@@ -216,6 +269,7 @@ pg_strtoint16(const char *s)
unlikely(pg_sub_s16_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
+ }
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
@@ -284,6 +338,48 @@ pg_strtoint32(const char *s)
goto invalid_syntax;
/* process digits */
+ if (ptr[0] == '0' && (ptr[1] == 'x' || ptr[1] == 'X'))
+ {
+ ptr += 2;
+ while (*ptr && isxdigit((unsigned char) *ptr))
+ {
+ int8 digit = hexlookup[(unsigned char) *ptr];
+
+ if (unlikely(pg_mul_s32_overflow(tmp, 16, &tmp)) ||
+ unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+
+ ptr++;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'o' || ptr[1] == 'O'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '7'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s32_overflow(tmp, 8, &tmp)) ||
+ unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else if (ptr[0] == '0' && (ptr[1] == 'b' || ptr[1] == 'B'))
+ {
+ ptr += 2;
+
+ while (*ptr && (*ptr >= '0' && *ptr <= '1'))
+ {
+ int8 digit = (*ptr++ - '0');
+
+ if (unlikely(pg_mul_s32_overflow(tmp, 2, &tmp)) ||
+ unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
+ goto out_of_range;
+ }
+ }
+ else
+ {
while (*ptr && isdigit((unsigned char) *ptr))
{
int8 digit = (*ptr++ - '0');
@@ -292,6 +388,7 @@ pg_strtoint32(const char *s)
unlikely(pg_sub_s32_overflow(tmp, digit, &tmp)))
goto out_of_range;
}
+ }
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 0fab48a382..729aec562b 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -200,7 +200,7 @@ quotecontinuefail {whitespace}*"-"?
xbstart [bB]{quote}
xbinside [^']*
-/* Hexadecimal number */
+/* Hexadecimal byte string */
xhstart [xX]{quote}
xhinside [^']*
@@ -279,7 +279,6 @@ xcstart \/\*{op_chars}*
xcstop \*+\/
xcinside [^*/]+
-digit [0-9]
ident_start [A-Za-z\200-\377_]
ident_cont [A-Za-z\200-\377_0-9\$]
@@ -318,24 +317,41 @@ self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
operator {op_chars}+
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here. Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
*
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
*
* {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
+decdigit [0-9]
+hexdigit [0-9A-Fa-f]
+octdigit [0-7]
+bindigit [0-1]
+
+decinteger {decdigit}+
+hexinteger 0[xX]{hexdigit}+
+octinteger 0[oO]{octdigit}+
+bininteger 0[bB]{bindigit}+
+
+hexfail 0[xX]
+octfail 0[oO]
+binfail 0[bB]
+
+integer
({decinteger}|{hexinteger}|{octinteger}|{bininteger})
+
+numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail {decdigit}+\.\.
-integer {digit}+
-decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail {digit}+\.\.
-real ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1 ({integer}|{decimal})[Ee]
-realfail2 ({integer}|{decimal})[Ee][-+]
+real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1 ({decinteger}|{numeric})[Ee]
+realfail2 ({decinteger}|{numeric})[Ee][-+]
-param \${integer}
+param \${decinteger}
/* psql-specific: characters allowed in variable names */
variable_char [A-Za-z\200-\377_0-9]
@@ -842,10 +858,19 @@ other .
{integer} {
ECHO;
}
-{decimal} {
+{hexfail} {
+ ECHO;
+ }
+{octfail} {
+ ECHO;
+ }
+{binfail} {
+ ECHO;
+ }
+{numeric} {
ECHO;
}
-{decimalfail} {
+{numericfail} {
/* throw back the .., and treat as
integer */
yyless(yyleng - 2);
ECHO;
diff --git a/src/interfaces/ecpg/preproc/pgc.l
b/src/interfaces/ecpg/preproc/pgc.l
index 7a0356638d..ebd1f3d7f4 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -305,7 +305,6 @@ xcstart \/\*{op_chars}*
xcstop \*+\/
xcinside [^*/]+
-digit [0-9]
ident_start [A-Za-z\200-\377_]
ident_cont [A-Za-z\200-\377_0-9\$]
@@ -346,24 +345,41 @@ self [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
op_chars [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
operator {op_chars}+
-/* we no longer allow unary minus in numbers.
- * instead we pass it separately to parser. there it gets
- * coerced via doNegate() -- Leon aug 20 1999
+/*
+ * Numbers
+ *
+ * Unary minus is not part of a number here. Instead we pass it separately to
+ * parser, and there it gets coerced via doNegate().
*
- * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
+ * {numericfail} is used because we would like "1..10" to lex as 1, dot_dot,
10.
*
* {realfail1} and {realfail2} are added to prevent the need for scanner
* backup when the {real} rule fails to match completely.
*/
+decdigit [0-9]
+hexdigit [0-9A-Fa-f]
+octdigit [0-7]
+bindigit [0-1]
+
+decinteger {decdigit}+
+hexinteger 0[xX]{hexdigit}+
+octinteger 0[oO]{octdigit}+
+bininteger 0[bB]{bindigit}+
+
+hexfail 0[xX]
+octfail 0[oO]
+binfail 0[bB]
-integer {digit}+
-decimal (({digit}*\.{digit}+)|({digit}+\.{digit}*))
-decimalfail {digit}+\.\.
-real ({integer}|{decimal})[Ee][-+]?{digit}+
-realfail1 ({integer}|{decimal})[Ee]
-realfail2 ({integer}|{decimal})[Ee][-+]
+integer
({decinteger}|{hexinteger}|{octinteger}|{bininteger})
-param \${integer}
+numeric (({decinteger}\.{decinteger}?)|(\.{decinteger}))
+numericfail {decdigit}+\.\.
+
+real ({decinteger}|{numeric})[Ee][-+]?{decdigit}+
+realfail1 ({decinteger}|{numeric})[Ee]
+realfail2 ({decinteger}|{numeric})[Ee][-+]
+
+param \${decinteger}
/* special characters for other dbms */
/* we have to react differently in compat mode */
@@ -393,9 +409,6 @@ include_next
[iI][nN][cC][lL][uU][dD][eE]_[nN][eE][xX][tT]
import [iI][mM][pP][oO][rR][tT]
undef [uU][nN][dD][eE][fF]
-/* C version of hex number */
-xch 0[xX][0-9A-Fa-f]*
-
ccomment "//".*\n
if [iI][fF]
@@ -408,7 +421,7 @@ endif [eE][nN][dD][iI][fF]
struct [sS][tT][rR][uU][cC][tT]
exec_sql {exec}{space}*{sql}{space}*
-ipdigit ({digit}|{digit}{digit}|{digit}{digit}{digit})
+ipdigit
({decdigit}|{decdigit}{decdigit}|{decdigit}{decdigit}{decdigit})
ip {ipdigit}\.{ipdigit}\.{ipdigit}\.{ipdigit}
/* we might want to parse all cpp include files */
@@ -926,11 +939,11 @@ cppline
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
{integer} {
return process_integer_literal(yytext,
&base_yylval);
}
-{decimal} {
+{numeric} {
base_yylval.str = mm_strdup(yytext);
return FCONST;
}
-{decimalfail} {
+{numericfail} {
/* throw back the .., and treat as
integer */
yyless(yyleng - 2);
return process_integer_literal(yytext,
&base_yylval);
@@ -942,7 +955,7 @@ cppline
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
{realfail1} {
/*
* throw back the [Ee], and figure out
whether what
- * remains is an {integer} or {decimal}.
+ * remains is an {integer} or {numeric}.
*/
yyless(yyleng - 1);
return process_integer_literal(yytext,
&base_yylval);
@@ -1009,7 +1022,7 @@ cppline
{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
return S_ANYTHING;
}
<C>{ccomment} { ECHO; }
-<C>{xch} {
+<C>{hexinteger} {
char* endptr;
errno = 0;
@@ -1546,7 +1559,7 @@ addlitchar(unsigned char ychar)
}
/*
- * Process {integer}. Note this will also do the right thing with {decimal},
+ * Process {integer}. Note this will also do the right thing with {numeric},
* ie digits and a decimal point.
*/
static int
@@ -1556,7 +1569,14 @@ process_integer_literal(const char *token, YYSTYPE *lval)
char *endptr;
errno = 0;
- val = strtoint(token, &endptr, 10);
+ if (token[0] == '0' && (token[1] == 'X' || token[1] == 'x'))
+ val = strtoint(token + 2, &endptr, 16);
+ else if (token[0] == '0' && (token[1] == 'O' || token[1] == 'o'))
+ val = strtoint(token + 2, &endptr, 8);
+ else if (token[0] == '0' && (token[1] == 'B' || token[1] == 'b'))
+ val = strtoint(token + 2, &endptr, 2);
+ else
+ val = strtoint(token, &endptr, 10);
if (*endptr != '\0' || errno == ERANGE)
{
/* integer too large (or contains decimal pt), treat it as a
float */
diff --git a/src/test/regress/expected/int2.out
b/src/test/regress/expected/int2.out
index 55ea7202cd..220e1493e8 100644
--- a/src/test/regress/expected/int2.out
+++ b/src/test/regress/expected/int2.out
@@ -306,3 +306,22 @@ FROM (VALUES (-2.5::numeric),
2.5 | 3
(7 rows)
+-- non-decimal literals
+SELECT int2 '0b100101';
+ int2
+------
+ 37
+(1 row)
+
+SELECT int2 '0o273';
+ int2
+------
+ 187
+(1 row)
+
+SELECT int2 '0x42F';
+ int2
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/int4.out
b/src/test/regress/expected/int4.out
index 9d20b3380f..bb23331c3e 100644
--- a/src/test/regress/expected/int4.out
+++ b/src/test/regress/expected/int4.out
@@ -437,3 +437,40 @@ SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
ERROR: integer out of range
SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
ERROR: integer out of range
+-- non-decimal literals
+SELECT 0b100101;
+ ?column?
+----------
+ 37
+(1 row)
+
+SELECT 0o273;
+ ?column?
+----------
+ 187
+(1 row)
+
+SELECT 0x42F;
+ ?column?
+----------
+ 1071
+(1 row)
+
+SELECT int4 '0b100101';
+ int4
+------
+ 37
+(1 row)
+
+SELECT int4 '0o273';
+ int4
+------
+ 187
+(1 row)
+
+SELECT int4 '0x42F';
+ int4
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/expected/int8.out
b/src/test/regress/expected/int8.out
index 36540ec456..edd15a4353 100644
--- a/src/test/regress/expected/int8.out
+++ b/src/test/regress/expected/int8.out
@@ -932,3 +932,22 @@ SELECT lcm((-9223372036854775808)::int8, 1::int8); --
overflow
ERROR: bigint out of range
SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
ERROR: bigint out of range
+-- non-decimal literals
+SELECT int8 '0b100101';
+ int8
+------
+ 37
+(1 row)
+
+SELECT int8 '0o273';
+ int8
+------
+ 187
+(1 row)
+
+SELECT int8 '0x42F';
+ int8
+------
+ 1071
+(1 row)
+
diff --git a/src/test/regress/sql/int2.sql b/src/test/regress/sql/int2.sql
index 613b344704..0dee22fe6d 100644
--- a/src/test/regress/sql/int2.sql
+++ b/src/test/regress/sql/int2.sql
@@ -112,3 +112,10 @@ CREATE TABLE INT2_TBL(f1 int2);
(0.5::numeric),
(1.5::numeric),
(2.5::numeric)) t(x);
+
+
+-- non-decimal literals
+
+SELECT int2 '0b100101';
+SELECT int2 '0o273';
+SELECT int2 '0x42F';
diff --git a/src/test/regress/sql/int4.sql b/src/test/regress/sql/int4.sql
index 55ec07a147..3b214cdb65 100644
--- a/src/test/regress/sql/int4.sql
+++ b/src/test/regress/sql/int4.sql
@@ -176,3 +176,14 @@ CREATE TABLE INT4_TBL(f1 int4);
SELECT lcm((-2147483648)::int4, 1::int4); -- overflow
SELECT lcm(2147483647::int4, 2147483646::int4); -- overflow
+
+
+-- non-decimal literals
+
+SELECT 0b100101;
+SELECT 0o273;
+SELECT 0x42F;
+
+SELECT int4 '0b100101';
+SELECT int4 '0o273';
+SELECT int4 '0x42F';
diff --git a/src/test/regress/sql/int8.sql b/src/test/regress/sql/int8.sql
index 32940b4daa..b7ad696dd8 100644
--- a/src/test/regress/sql/int8.sql
+++ b/src/test/regress/sql/int8.sql
@@ -250,3 +250,10 @@ CREATE TABLE INT8_TBL(q1 int8, q2 int8);
SELECT lcm((-9223372036854775808)::int8, 1::int8); -- overflow
SELECT lcm(9223372036854775807::int8, 9223372036854775806::int8); -- overflow
+
+
+-- non-decimal literals
+
+SELECT int8 '0b100101';
+SELECT int8 '0o273';
+SELECT int8 '0x42F';
--
2.33.0