There was a complaint recently about the documentation using the widely frowned-upon md5() function in an unrelated context as an example hash function. This is quite common in many examples, such as hashing row values to compare them, or hashing datums if they don't fit into an index. But there is nothing we can easily replace md5 with, without going to things like pgcrypto.
I also noticed while working on some SSL code that we have perfectly good SHA-2 functionality in the server already, but it has no test coverage outside the SCRAM tests. So I suggest these patches that expose the new functions sha224(), sha256(), sha384(), sha512(). That allows us to make the SSL and SCRAM tests more robust, and it will allow them to be used in general purpose contexts over md5(). -- Peter Eisentraut http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
From 52edfab8f1175c69ba791139fde26feb9279364a Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <pete...@gmx.net> Date: Tue, 6 Feb 2018 21:46:46 -0500 Subject: [PATCH 1/2] Add user-callable SHA-2 functions Add the user-callable functions sha224, sha256, sha384, sha512. We already had these in the C code to support SCRAM, but there was no test coverage outside of the SCRAM tests. Adding these as user-callable functions allows writing some tests. Also, we have a user-callable md5 function but no more modern alternative, which led to wide use of md5 as a general-purpose hash function, which leads to occasional complaints about using md5. Also mark the existing md5 functions as leak-proof. --- doc/src/sgml/func.sgml | 70 ++++++++++++++++++++- src/backend/utils/adt/varlena.c | 101 +++++++++++++++++++++++++++++++ src/include/catalog/pg_proc.h | 12 +++- src/test/regress/expected/opr_sanity.out | 6 ++ src/test/regress/expected/strings.out | 53 ++++++++++++++++ src/test/regress/sql/strings.sql | 18 ++++++ 6 files changed, 257 insertions(+), 3 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 1e535cf215..bab828d507 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -3640,7 +3640,7 @@ <title>Other Binary String Functions</title> returning the result in hexadecimal </entry> <entry><literal>md5(E'Th\\000omas'::bytea)</literal></entry> - <entry><literal>8ab2d3c9689aaf18 b4958c334c82d8b1</literal></entry> + <entry><literal>8ab2d3c9689aaf18​b4958c334c82d8b1</literal></entry> </row> <row> @@ -3674,6 +3674,66 @@ <title>Other Binary String Functions</title> <entry><literal>set_byte(E'Th\\000omas'::bytea, 4, 64)</literal></entry> <entry><literal>Th\000o@as</literal></entry> </row> + + <row> + <entry> + <indexterm> + <primary>sha224</primary> + </indexterm> + <literal><function>sha224(<type>bytea</type>)</function></literal> + </entry> + <entry><type>bytea</type></entry> + <entry> + SHA-224 hash + </entry> + <entry><literal>sha224('abc')</literal></entry> + <entry><literal>\x23097d223405d8228642a477bda2​55b32aadbce4bda0b3f7e36c9da7</literal></entry> + </row> + + <row> + <entry> + <indexterm> + <primary>sha256</primary> + </indexterm> + <literal><function>sha256(<type>bytea</type>)</function></literal> + </entry> + <entry><type>bytea</type></entry> + <entry> + SHA-256 hash + </entry> + <entry><literal>sha256('abc')</literal></entry> + <entry><literal>\xba7816bf8f01cfea414140de5dae2223​b00361a396177a9cb410ff61f20015ad</literal></entry> + </row> + + <row> + <entry> + <indexterm> + <primary>sha384</primary> + </indexterm> + <literal><function>sha384(<type>bytea</type>)</function></literal> + </entry> + <entry><type>bytea</type></entry> + <entry> + SHA-384 hash + </entry> + <entry><literal>sha384('abc')</literal></entry> + <entry><literal>\xcb00753f45a35e8bb5a03d699ac65007​272c32ab0eded1631a8b605a43ff5bed​8086072ba1e7cc2358baeca134c825a7</literal></entry> + </row> + + <row> + <entry> + <indexterm> + <primary>sha512</primary> + </indexterm> + <literal><function>sha512(<type>bytea</type>)</function></literal> + </entry> + <entry><type>bytea</type></entry> + <entry> + SHA-512 hash + </entry> + <entry><literal>sha512('abc')</literal></entry> + <entry><literal>\xddaf35a193617abacc417349ae204131​12e6fa4e89a97ea20a9eeee64b55d39a​2192992a274fc1a836ba3c23a3feebbd​454d4423643ce80e2a9ac94fa54ca49f</literal></entry> + </row> </tbody> </tgroup> </table> @@ -3686,6 +3746,14 @@ <title>Other Binary String Functions</title> the first byte, and bit 15 is the most significant bit of the second byte. </para> + <para> + Note that for historic reasons, the function <function>md5</function> + returns a hex-encoded value of type <type>text</type> whereas the SHA-2 + functions return type <type>bytea</type>. Use the functions + <function>encode</function> and <function>decode</function> to convert + between the two. + </para> + <para> See also the aggregate function <function>string_agg</function> in <xref linkend="functions-aggregate"/> and the large object functions diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 304cb26691..d1266f26f5 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -23,6 +23,7 @@ #include "catalog/pg_type.h" #include "common/int.h" #include "common/md5.h" +#include "common/sha2.h" #include "lib/hyperloglog.h" #include "libpq/pqformat.h" #include "miscadmin.h" @@ -4611,6 +4612,106 @@ md5_bytea(PG_FUNCTION_ARGS) PG_RETURN_TEXT_P(cstring_to_text(hexsum)); } +/* + * SHA-2 variants + */ + +Datum +sha224_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + const uint8 *data; + size_t len; + pg_sha224_ctx ctx; + unsigned char buf[PG_SHA224_DIGEST_LENGTH]; + bytea *result; + + len = VARSIZE_ANY_EXHDR(in); + data = (unsigned char *) VARDATA_ANY(in); + + pg_sha224_init(&ctx); + pg_sha224_update(&ctx, data, len); + pg_sha224_final(&ctx, buf); + + result = palloc(sizeof(buf) + VARHDRSZ); + SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); + memcpy(VARDATA(result), buf, sizeof(buf)); + + PG_RETURN_BYTEA_P(result); +} + +Datum +sha256_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + const uint8 *data; + size_t len; + pg_sha256_ctx ctx; + unsigned char buf[PG_SHA256_DIGEST_LENGTH]; + bytea *result; + + len = VARSIZE_ANY_EXHDR(in); + data = (unsigned char *) VARDATA_ANY(in); + + pg_sha256_init(&ctx); + pg_sha256_update(&ctx, data, len); + pg_sha256_final(&ctx, buf); + + result = palloc(sizeof(buf) + VARHDRSZ); + SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); + memcpy(VARDATA(result), buf, sizeof(buf)); + + PG_RETURN_BYTEA_P(result); +} + +Datum +sha384_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + const uint8 *data; + size_t len; + pg_sha384_ctx ctx; + unsigned char buf[PG_SHA384_DIGEST_LENGTH]; + bytea *result; + + len = VARSIZE_ANY_EXHDR(in); + data = (unsigned char *) VARDATA_ANY(in); + + pg_sha384_init(&ctx); + pg_sha384_update(&ctx, data, len); + pg_sha384_final(&ctx, buf); + + result = palloc(sizeof(buf) + VARHDRSZ); + SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); + memcpy(VARDATA(result), buf, sizeof(buf)); + + PG_RETURN_BYTEA_P(result); +} + +Datum +sha512_bytea(PG_FUNCTION_ARGS) +{ + bytea *in = PG_GETARG_BYTEA_PP(0); + const uint8 *data; + size_t len; + pg_sha512_ctx ctx; + unsigned char buf[PG_SHA512_DIGEST_LENGTH]; + bytea *result; + + len = VARSIZE_ANY_EXHDR(in); + data = (unsigned char *) VARDATA_ANY(in); + + pg_sha512_init(&ctx); + pg_sha512_update(&ctx, data, len); + pg_sha512_final(&ctx, buf); + + result = palloc(sizeof(buf) + VARHDRSZ); + SET_VARSIZE(result, sizeof(buf) + VARHDRSZ); + memcpy(VARDATA(result), buf, sizeof(buf)); + + PG_RETURN_BYTEA_P(result); +} + /* * Return the size of a datum, possibly compressed * diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 2a5321315a..62e16514cc 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -3917,10 +3917,18 @@ DATA(insert OID = 3314 ( system PGNSP PGUID 12 1 0 0 0 f f f f t f v s 1 0 33 DESCR("SYSTEM tablesample method handler"); /* cryptographic */ -DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ )); +DATA(insert OID = 2311 ( md5 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 25 "25" _null_ _null_ _null_ _null_ _null_ md5_text _null_ _null_ _null_ )); DESCR("MD5 hash"); -DATA(insert OID = 2321 ( md5 PGNSP PGUID 12 1 0 0 0 f f f f t f i s 1 0 25 "17" _null_ _null_ _null_ _null_ _null_ md5_bytea _null_ _null_ _null_ )); +DATA(insert OID = 2321 ( md5 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 25 "17" _null_ _null_ _null_ _null_ _null_ md5_bytea _null_ _null_ _null_ )); DESCR("MD5 hash"); +DATA(insert OID = 3419 ( sha224 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 17 "17" _null_ _null_ _null_ _null_ _null_ sha224_bytea _null_ _null_ _null_ )); +DESCR("SHA-224 hash"); +DATA(insert OID = 3420 ( sha256 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 17 "17" _null_ _null_ _null_ _null_ _null_ sha256_bytea _null_ _null_ _null_ )); +DESCR("SHA-256 hash"); +DATA(insert OID = 3421 ( sha384 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 17 "17" _null_ _null_ _null_ _null_ _null_ sha384_bytea _null_ _null_ _null_ )); +DESCR("SHA-384 hash"); +DATA(insert OID = 3422 ( sha512 PGNSP PGUID 12 1 0 0 0 f f f t t f i s 1 0 17 "17" _null_ _null_ _null_ _null_ _null_ sha512_bytea _null_ _null_ _null_ )); +DESCR("SHA-512 hash"); /* crosstype operations for date vs. timestamp and timestamptz */ DATA(insert OID = 2338 ( date_lt_timestamp PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 16 "1082 1114" _null_ _null_ _null_ _null_ _null_ date_lt_timestamp _null_ _null_ _null_ )); diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 684f7f20a8..6616cc1bf0 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -699,6 +699,8 @@ timestamp_lt(timestamp without time zone,timestamp without time zone) timestamp_le(timestamp without time zone,timestamp without time zone) timestamp_ge(timestamp without time zone,timestamp without time zone) timestamp_gt(timestamp without time zone,timestamp without time zone) +md5(text) +md5(bytea) tidgt(tid,tid) tidlt(tid,tid) tidge(tid,tid) @@ -711,6 +713,10 @@ uuid_gt(uuid,uuid) uuid_ne(uuid,uuid) xidneq(xid,xid) xidneqint4(xid,integer) +sha224(bytea) +sha256(bytea) +sha384(bytea) +sha512(bytea) macaddr8_eq(macaddr8,macaddr8) macaddr8_lt(macaddr8,macaddr8) macaddr8_le(macaddr8,macaddr8) diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out index 8073eb4fad..cbe66c375c 100644 --- a/src/test/regress/expected/strings.out +++ b/src/test/regress/expected/strings.out @@ -1439,6 +1439,58 @@ select md5('12345678901234567890123456789012345678901234567890123456789012345678 t (1 row) +-- +-- SHA-2 +-- +SET bytea_output TO hex; +SELECT sha224(''); + sha224 +------------------------------------------------------------ + \xd14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f +(1 row) + +SELECT sha224('The quick brown fox jumps over the lazy dog.'); + sha224 +------------------------------------------------------------ + \x619cba8e8e05826e9b8c519c0a5c68f4fb653e8a3d8aa04bb2c8cd4c +(1 row) + +SELECT sha256(''); + sha256 +-------------------------------------------------------------------- + \xe3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 +(1 row) + +SELECT sha256('The quick brown fox jumps over the lazy dog.'); + sha256 +-------------------------------------------------------------------- + \xef537f25c895bfa782526529a9b63d97aa631564d5d789c2b765448c8635fb6c +(1 row) + +SELECT sha384(''); + sha384 +---------------------------------------------------------------------------------------------------- + \x38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b +(1 row) + +SELECT sha384('The quick brown fox jumps over the lazy dog.'); + sha384 +---------------------------------------------------------------------------------------------------- + \xed892481d8272ca6df370bf706e4d7bc1b5739fa2177aae6c50e946678718fc67a7af2819a021c2fc34e91bdb63409d7 +(1 row) + +SELECT sha512(''); + sha512 +------------------------------------------------------------------------------------------------------------------------------------ + \xcf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e +(1 row) + +SELECT sha512('The quick brown fox jumps over the lazy dog.'); + sha512 +------------------------------------------------------------------------------------------------------------------------------------ + \x91ea1245f20d46ae9a037a989f54f1f790f0a47607eeb8a14d12890cea77a1bbc6c7ed9cf205e67b7f2b8fd4c7dfd3a7a8617e45f3c463d481c7e586c39ac1ed +(1 row) + -- -- test behavior of escape_string_warning and standard_conforming_strings options -- @@ -1525,6 +1577,7 @@ select 'a\\bcd' as f1, 'a\\b\'cd' as f2, 'a\\b\'''cd' as f3, 'abcd\\' as f4, ' -- -- Additional string functions -- +SET bytea_output TO escape; SELECT initcap('hi THOMAS'); initcap ----------- diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql index 9ed242208f..5a82237870 100644 --- a/src/test/regress/sql/strings.sql +++ b/src/test/regress/sql/strings.sql @@ -506,6 +506,23 @@ CREATE TABLE toasttest (c char(4096)); select md5('12345678901234567890123456789012345678901234567890123456789012345678901234567890'::bytea) = '57edf4a22be3c955ac49da2e2107b67a' AS "TRUE"; +-- +-- SHA-2 +-- +SET bytea_output TO hex; + +SELECT sha224(''); +SELECT sha224('The quick brown fox jumps over the lazy dog.'); + +SELECT sha256(''); +SELECT sha256('The quick brown fox jumps over the lazy dog.'); + +SELECT sha384(''); +SELECT sha384('The quick brown fox jumps over the lazy dog.'); + +SELECT sha512(''); +SELECT sha512('The quick brown fox jumps over the lazy dog.'); + -- -- test behavior of escape_string_warning and standard_conforming_strings options -- @@ -540,6 +557,7 @@ CREATE TABLE toasttest (c char(4096)); -- -- Additional string functions -- +SET bytea_output TO escape; SELECT initcap('hi THOMAS'); -- 2.16.2
From a3dd53816093a4aea47c999280813c3e319771d0 Mon Sep 17 00:00:00 2001 From: Peter Eisentraut <pete...@gmx.net> Date: Tue, 6 Feb 2018 21:59:40 -0500 Subject: [PATCH 2/2] Update gratuitous use of MD5 in documentation --- doc/src/sgml/citext.sgml | 10 +++++----- doc/src/sgml/sepgsql.sgml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/citext.sgml b/doc/src/sgml/citext.sgml index 82251de852..b1fe7101b2 100644 --- a/doc/src/sgml/citext.sgml +++ b/doc/src/sgml/citext.sgml @@ -80,11 +80,11 @@ <title>How to Use It</title> pass TEXT NOT NULL ); -INSERT INTO users VALUES ( 'larry', md5(random()::text) ); -INSERT INTO users VALUES ( 'Tom', md5(random()::text) ); -INSERT INTO users VALUES ( 'Damian', md5(random()::text) ); -INSERT INTO users VALUES ( 'NEAL', md5(random()::text) ); -INSERT INTO users VALUES ( 'Bjørn', md5(random()::text) ); +INSERT INTO users VALUES ( 'larry', sha256(random()::text::bytea) ); +INSERT INTO users VALUES ( 'Tom', sha256(random()::text::bytea) ); +INSERT INTO users VALUES ( 'Damian', sha256(random()::text::bytea) ); +INSERT INTO users VALUES ( 'NEAL', sha256(random()::text::bytea) ); +INSERT INTO users VALUES ( 'Bjørn', sha256(random()::text::bytea) ); SELECT * FROM users WHERE nick = 'Larry'; </programlisting> diff --git a/doc/src/sgml/sepgsql.sgml b/doc/src/sgml/sepgsql.sgml index 273efc6e27..f8c99e1b00 100644 --- a/doc/src/sgml/sepgsql.sgml +++ b/doc/src/sgml/sepgsql.sgml @@ -370,7 +370,7 @@ <title>DML Permissions</title> <para> For example, consider: <synopsis> -UPDATE t1 SET x = 2, y = md5sum(y) WHERE z = 100; +UPDATE t1 SET x = 2, y = func1(y) WHERE z = 100; </synopsis> Here, <literal>db_column:update</literal> will be checked for -- 2.16.2