Hi Dean, > IMO big-endian is the most convenient byte-ordering to use here, > because then the string representation of the bytea is consistent with > the hex representation of the integer. It's also consistent with the > integer-to-bit casts, which output the most significant bits first, > starting with the sign bit. > > As far as the docs go, it's important to document precisely what > format is used, but I don't think it needs to explain why that choice > was made. It should also mention the size of the result and that it's > the two's complement representation, since there are other possible > representations of integers. So I think it would be sufficient for the > initial paragraph to say something like "Casting an integer to a bytea > produces 2, 4, or 8 bytes, depending on the width of the integer type. > The result is the two's complement representation of the integer, with > the most significant byte first.", and then list the examples to > demonstrate that.
Thank you. Here is the corrected patch. -- Best regards, Aleksander Alekseev
From 731dadc5f87d4e62d4f064c7d0ce91bdbafd1888 Mon Sep 17 00:00:00 2001 From: Aleksander Alekseev <aleksander@timescale.com> Date: Mon, 26 Aug 2024 12:09:59 +0300 Subject: [PATCH v9] Allow casting between bytea and integer types. For instance: SELECT '\x12345678'::bytea::integer; SELECT 0x12345678::bytea; This works with int2's, int4's and int8's. Author: Aleksander Alekseev Reviewed-by: Peter Eisentraut, Michael Paquier, Dean Rasheed Discussion: https://postgr.es/m/CAJ7c6TPtOp6%2BkFX5QX3fH1SVr7v65uHr-7yEJ%3DGMGQi5uhGtcA%40mail.gmail.com BUMP CATVERSION --- doc/src/sgml/func.sgml | 18 ++++ src/backend/utils/adt/varlena.c | 84 ++++++++++++++++ src/include/catalog/pg_cast.dat | 14 +++ src/include/catalog/pg_proc.dat | 18 ++++ src/test/regress/expected/opr_sanity.out | 3 + src/test/regress/expected/strings.out | 120 +++++++++++++++++++++++ src/test/regress/sql/strings.sql | 34 +++++++ 7 files changed, 291 insertions(+) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 5678e7621a5..0c1790a5e46 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -4997,6 +4997,24 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); </variablelist> </para> + <para> + In addition, it is possible to cast integral values to and from type + <type>bytea</type>. Casting an integer to a <type>bytea</type> produces + 2, 4, or 8 bytes, depending on the width of the integer type. The result + is the two's complement representation of the integer, with the most + significant byte first. Examples: +<programlisting> +''::bytea::int4 <lineannotation>0</lineannotation> +0x1234::int2::bytea <lineannotation>\x1234</lineannotation> +'\x12'::bytea::int4 <lineannotation>0x12</lineannotation> +'\x8000'::bytea::int2 <lineannotation>-32768</lineannotation> +'\x123456'::bytea::int2 <lineannotation>ERROR</lineannotation> +</programlisting> + Note that casting <type>bytea</type> to an integer type that is too small + to represent given value produces an error. Casting integer types to + <type>bytea</type> never fails. + </para> + <para> See also the aggregate function <function>string_agg</function> in <xref linkend="functions-aggregate"/> and the large object functions diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 34796f2e27c..eceecb25124 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -3985,6 +3985,90 @@ bytea_sortsupport(PG_FUNCTION_ARGS) PG_RETURN_VOID(); } +/* Common code for bytea_int2, bytea_int4 and bytea_int8 */ +static int64 +bytea_integer(const bytea *v, int len) +{ + int offset = 0; + int64 result = 0; + + while (len--) + { + result = result << 8; + result |= ((unsigned char *) VARDATA_ANY(v))[offset]; + offset++; + } + + return result; +} + +/* Cast bytea -> int2 */ +Datum +bytea_int2(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + + if (len > sizeof(int16)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("smallint out of range"))); + + PG_RETURN_INT16((int16) bytea_integer(v, len)); +} + +/* Cast bytea -> int4 */ +Datum +bytea_int4(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + + if (len > sizeof(int32)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("integer out of range"))); + + PG_RETURN_INT32((int32) bytea_integer(v, len)); +} + +/* Cast bytea -> int8 */ +Datum +bytea_int8(PG_FUNCTION_ARGS) +{ + bytea *v = PG_GETARG_BYTEA_PP(0); + int len = VARSIZE_ANY_EXHDR(v); + + if (len > sizeof(int64)) + ereport(ERROR, + (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), + errmsg("bigint out of range"))); + + + PG_RETURN_INT64(bytea_integer(v, len)); +} + +/* Cast int2 -> bytea; currently just a wrapper for int2send() */ +Datum +int2_bytea(PG_FUNCTION_ARGS) +{ + return int2send(fcinfo); +} + +/* Cast int4 -> bytea; currently just a wrapper for int4send() */ +Datum +int4_bytea(PG_FUNCTION_ARGS) +{ + return int4send(fcinfo); +} + +/* Cast int8 -> bytea; currently just a wrapper for int8send() */ +Datum +int8_bytea(PG_FUNCTION_ARGS) +{ + return int8send(fcinfo); +} + /* * appendStringInfoText * diff --git a/src/include/catalog/pg_cast.dat b/src/include/catalog/pg_cast.dat index a26ba34e869..ab46be606f0 100644 --- a/src/include/catalog/pg_cast.dat +++ b/src/include/catalog/pg_cast.dat @@ -320,6 +320,20 @@ { castsource => 'varchar', casttarget => 'name', castfunc => 'name(varchar)', castcontext => 'i', castmethod => 'f' }, +# Allow explicit coercions between bytea and integer types +{ castsource => 'int2', casttarget => 'bytea', castfunc => 'bytea(int2)', + castcontext => 'e', castmethod => 'f' }, +{ castsource => 'int4', casttarget => 'bytea', castfunc => 'bytea(int4)', + castcontext => 'e', castmethod => 'f' }, +{ castsource => 'int8', casttarget => 'bytea', castfunc => 'bytea(int8)', + castcontext => 'e', castmethod => 'f' }, +{ castsource => 'bytea', casttarget => 'int2', castfunc => 'int2(bytea)', + castcontext => 'e', castmethod => 'f' }, +{ castsource => 'bytea', casttarget => 'int4', castfunc => 'int4(bytea)', + castcontext => 'e', castmethod => 'f' }, +{ castsource => 'bytea', casttarget => 'int8', castfunc => 'int8(bytea)', + castcontext => 'e', castmethod => 'f' }, + # Allow explicit coercions between int4 and "char" { castsource => 'char', casttarget => 'int4', castfunc => 'int4(char)', castcontext => 'e', castmethod => 'f' }, diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 18560755d26..ce3b979d676 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -1164,6 +1164,24 @@ { oid => '409', descr => 'convert char(n) to name', proname => 'name', proleakproof => 't', prorettype => 'name', proargtypes => 'bpchar', prosrc => 'bpchar_name' }, +{ oid => '8577', descr => 'convert int2 to bytea', + proname => 'bytea', proleakproof => 't', prorettype => 'bytea', + proargtypes => 'int2', prosrc => 'int2_bytea' }, +{ oid => '8578', descr => 'convert int4 to bytea', + proname => 'bytea', proleakproof => 't', prorettype => 'bytea', + proargtypes => 'int4', prosrc => 'int4_bytea' }, +{ oid => '8579', descr => 'convert int8 to bytea', + proname => 'bytea', proleakproof => 't', prorettype => 'bytea', + proargtypes => 'int8', prosrc => 'int8_bytea' }, +{ oid => '8580', descr => 'convert bytea to int2', + proname => 'int2', proleakproof => 'f', prorettype => 'int2', + proargtypes => 'bytea', prosrc => 'bytea_int2' }, +{ oid => '8581', descr => 'convert bytea to int4', + proname => 'int4', proleakproof => 'f', prorettype => 'int4', + proargtypes => 'bytea', prosrc => 'bytea_int4' }, +{ oid => '8582', descr => 'convert bytea to int8', + proname => 'int8', proleakproof => 'f', prorettype => 'int8', + proargtypes => 'bytea', prosrc => 'bytea_int8' }, { oid => '449', descr => 'hash', proname => 'hashint2', prorettype => 'int4', proargtypes => 'int2', diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index b673642ad1d..20bf9ea9cdf 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -875,6 +875,9 @@ uuid_extract_timestamp(uuid) uuid_extract_version(uuid) crc32(bytea) crc32c(bytea) +bytea(smallint) +bytea(integer) +bytea(bigint) bytea_larger(bytea,bytea) bytea_smaller(bytea,bytea) -- Check that functions without argument are not marked as leakproof. diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index b65bb2d5368..69f1e66aa98 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -2690,3 +2690,123 @@ ERROR: invalid Unicode code point: 2FFFFF SELECT unistr('wrong: \xyz'); ERROR: invalid Unicode escape HINT: Unicode escapes must be \XXXX, \+XXXXXX, \uXXXX, or \UXXXXXXXX. +-- +-- Test coercions between bytea and integer types +-- +SET bytea_output TO hex; +SELECT 0x1234::int2::bytea; + bytea +-------- + \x1234 +(1 row) + +SELECT 0x12345678::int4::bytea; + bytea +------------ + \x12345678 +(1 row) + +SELECT 0x1122334455667788::int8::bytea; + bytea +-------------------- + \x1122334455667788 +(1 row) + +SELECT ''::bytea::int2 = 0; + ?column? +---------- + t +(1 row) + +SELECT '\x12'::bytea::int2 = 0x12; + ?column? +---------- + t +(1 row) + +SELECT '\x1234'::bytea::int2 = 0x1234; + ?column? +---------- + t +(1 row) + +SELECT '\x123456'::bytea::int2; -- error +ERROR: smallint out of range +SELECT ''::bytea::int4 = 0; + ?column? +---------- + t +(1 row) + +SELECT '\x12'::bytea::int4 = 0x12; + ?column? +---------- + t +(1 row) + +SELECT '\x12345678'::bytea::int4 = 0x12345678; + ?column? +---------- + t +(1 row) + +SELECT '\x123456789A'::bytea::int4; -- error +ERROR: integer out of range +SELECT ''::bytea::int8 = 0; + ?column? +---------- + t +(1 row) + +SELECT '\x12'::bytea::int8 = 0x12; + ?column? +---------- + t +(1 row) + +SELECT '\x1122334455667788'::bytea::int8 = 0x1122334455667788; + ?column? +---------- + t +(1 row) + +SELECT '\x112233445566778899'::bytea::int8; -- error +ERROR: bigint out of range +-- max integer values +SELECT '\x7FFF'::bytea::int2 = 0x7FFF; + ?column? +---------- + t +(1 row) + +SELECT '\x7FFFFFFF'::bytea::int4 = 0x7FFFFFFF; + ?column? +---------- + t +(1 row) + +SELECT '\x7FFFFFFFFFFFFFFF'::bytea::int8 = 0x7FFFFFFFFFFFFFFF; + ?column? +---------- + t +(1 row) + +-- min integer values +SELECT '\x8000'::bytea::int2 = -0x8000; + ?column? +---------- + t +(1 row) + +SELECT '\x80000000'::bytea::int4 = -0x80000000; + ?column? +---------- + t +(1 row) + +SELECT '\x8000000000000000'::bytea::int8 = -0x8000000000000000; + ?column? +---------- + t +(1 row) + diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 8e0f3a0e75f..16cceb5c746 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -848,3 +848,37 @@ SELECT unistr('wrong: \udb99\u0061'); SELECT unistr('wrong: \U0000db99\U00000061'); SELECT unistr('wrong: \U002FFFFF'); SELECT unistr('wrong: \xyz'); + +-- +-- Test coercions between bytea and integer types +-- +SET bytea_output TO hex; + +SELECT 0x1234::int2::bytea; +SELECT 0x12345678::int4::bytea; +SELECT 0x1122334455667788::int8::bytea; + +SELECT ''::bytea::int2 = 0; +SELECT '\x12'::bytea::int2 = 0x12; +SELECT '\x1234'::bytea::int2 = 0x1234; +SELECT '\x123456'::bytea::int2; -- error + +SELECT ''::bytea::int4 = 0; +SELECT '\x12'::bytea::int4 = 0x12; +SELECT '\x12345678'::bytea::int4 = 0x12345678; +SELECT '\x123456789A'::bytea::int4; -- error + +SELECT ''::bytea::int8 = 0; +SELECT '\x12'::bytea::int8 = 0x12; +SELECT '\x1122334455667788'::bytea::int8 = 0x1122334455667788; +SELECT '\x112233445566778899'::bytea::int8; -- error + +-- max integer values +SELECT '\x7FFF'::bytea::int2 = 0x7FFF; +SELECT '\x7FFFFFFF'::bytea::int4 = 0x7FFFFFFF; +SELECT '\x7FFFFFFFFFFFFFFF'::bytea::int8 = 0x7FFFFFFFFFFFFFFF; + +-- min integer values +SELECT '\x8000'::bytea::int2 = -0x8000; +SELECT '\x80000000'::bytea::int4 = -0x80000000; +SELECT '\x8000000000000000'::bytea::int8 = -0x8000000000000000; -- 2.48.1