On Wed, Dec 30, 2020 at 05:27:04PM +0100, Daniel Verite wrote: > David Fetter wrote: > > +Datum > +byteapopcount(PG_FUNCTION_ARGS) > +{ > + bytea *t1 = PG_GETARG_BYTEA_PP(0); > + int len, result; > + > + len = VARSIZE_ANY_EXHDR(t1); > + result = pg_popcount(VARDATA_ANY(t1), len); > + > + PG_RETURN_INT32(result); > +} > > The input may have more than 2 billion bits set to 1. The biggest possible > result should be 8 billion for bytea (1 GB with all bits set to 1). > So shouldn't this function return an int8?
It does now, and thanks for looking at this. > There is a pre-existing function in core: bit_length(bytea) returning int4, > but it errors out for large values (in fact it's a SQL function that returns > octet_length($1)*8). > > For the varbit case, it doesn't seem like it can hold so many bits, although > I don't see the limit documented in > https://www.postgresql.org/docs/current/datatype-bit.html Out of an abundance of caution, I changed that one, too. I'd noticed earlier that ceil() doesn't need to be quite as wasteful as the way I had it earlier, and fixed that with help from Andrew Gierth. Best, David. -- David Fetter <david(at)fetter(dot)org> http://fetter.org/ Phone: +1 415 235 3778 Remember to vote! Consider donating to Postgres: http://www.postgresql.org/about/donate
>From 6d35100b3bc16c1c7d3bf08c62589f3e039cee76 Mon Sep 17 00:00:00 2001 From: David Fetter <da...@fetter.org> Date: Wed, 30 Dec 2020 02:51:46 -0800 Subject: [PATCH v2] popcount To: hackers MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------2.29.2" This is a multi-part message in MIME format. --------------2.29.2 Content-Type: text/plain; charset=UTF-8; format=fixed Content-Transfer-Encoding: 8bit Now it's accessible to SQL for the BIT VARYING and BYTEA types. diff --git doc/src/sgml/func.sgml doc/src/sgml/func.sgml index 5021ac1ca9..439a3a4a06 100644 --- doc/src/sgml/func.sgml +++ doc/src/sgml/func.sgml @@ -4069,6 +4069,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); </para></entry> </row> + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>popcount</primary> + </indexterm> + <function>popcount</function> ( <parameter>bytes</parameter> <type>bytea</type> ) + <returnvalue>integer</returnvalue> + </para> + <para> + Counts the number of bits set in a binary string. + </para> + <para> + <literal>popcount('\xdeadbeef'::bytea)</literal> + <returnvalue>24</returnvalue> + </para></entry> + </row> + <row> <entry role="func_table_entry"><para role="func_signature"> <indexterm> @@ -4830,6 +4847,24 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three'); <returnvalue>101010001010101010</returnvalue> </para></entry> </row> + + <row> + <entry role="func_table_entry"><para role="func_signature"> + <indexterm> + <primary>popcount</primary> + </indexterm> + <function>popcount</function> ( <parameter>bits</parameter> <type>bit</type> ) + <returnvalue>int4</returnvalue> + </para> + <para> + Counts the bits set in a bit string. + </para> + <para> + <literal>popcount(B'101010101010101010')</literal> + <returnvalue>9</returnvalue> + </para></entry> + </row> + </tbody> </tgroup> </table> diff --git src/include/catalog/pg_proc.dat src/include/catalog/pg_proc.dat index 139f4a08bd..82e3d50114 100644 --- src/include/catalog/pg_proc.dat +++ src/include/catalog/pg_proc.dat @@ -1446,6 +1446,9 @@ { oid => '752', descr => 'substitute portion of string', proname => 'overlay', prorettype => 'bytea', proargtypes => 'bytea bytea int4', prosrc => 'byteaoverlay_no_len' }, +{ oid => '8436', descr => 'count set bits', + proname => 'popcount', prorettype => 'int8', proargtypes => 'bytea', + prosrc => 'byteapopcount'}, { oid => '725', proname => 'dist_pl', prorettype => 'float8', proargtypes => 'point line', @@ -3865,6 +3868,9 @@ { oid => '3033', descr => 'set bit', proname => 'set_bit', prorettype => 'bit', proargtypes => 'bit int4 int4', prosrc => 'bitsetbit' }, +{ oid => '8435', descr => 'count set bits', + proname => 'popcount', prorettype => 'int8', proargtypes => 'bit', + prosrc => 'bitpopcount'}, # for macaddr type support { oid => '436', descr => 'I/O', diff --git src/backend/utils/adt/varbit.c src/backend/utils/adt/varbit.c index 3c03459f51..a71c2b6bf4 100644 --- src/backend/utils/adt/varbit.c +++ src/backend/utils/adt/varbit.c @@ -36,6 +36,7 @@ #include "libpq/pqformat.h" #include "nodes/nodeFuncs.h" #include "nodes/supportnodes.h" +#include "port/pg_bitutils.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/varbit.h" @@ -1872,3 +1873,29 @@ bitgetbit(PG_FUNCTION_ARGS) else PG_RETURN_INT32(0); } + +/* + * bitpopcount + * + * Returns the number of bits set in a bit array. + * + */ +Datum +bitpopcount(PG_FUNCTION_ARGS) +{ + VarBit *arg1 = PG_GETARG_VARBIT_P(0); + bits8 *p; + int len; + int8 popcount; + + /* + * ceil(VARBITLEN(ARG1)/BITS_PER_BYTE) + * done to minimize branches and instructions. + */ + len = (VARBITLEN(arg1) + BITS_PER_BYTE + 1) / BITS_PER_BYTE; + p = VARBITS(arg1); + + popcount = pg_popcount((const char *)p, len); + + PG_RETURN_INT64(popcount); +} diff --git src/backend/utils/adt/varlena.c src/backend/utils/adt/varlena.c index 9300d19e0c..9c5e5b5592 100644 --- src/backend/utils/adt/varlena.c +++ src/backend/utils/adt/varlena.c @@ -3415,6 +3415,22 @@ bytea_overlay(bytea *t1, bytea *t2, int sp, int sl) return result; } +/* + * popcount + */ +Datum +byteapopcount(PG_FUNCTION_ARGS) +{ + bytea *t1 = PG_GETARG_BYTEA_PP(0); + int len; + int8 result; + + len = VARSIZE_ANY_EXHDR(t1); + result = pg_popcount(VARDATA_ANY(t1), len); + + PG_RETURN_INT64(result); +} + /* * byteapos - * Return the position of the specified substring. diff --git src/test/regress/expected/bit.out src/test/regress/expected/bit.out index a1fab7ebcb..c91ad3d0d5 100644 --- src/test/regress/expected/bit.out +++ src/test/regress/expected/bit.out @@ -681,6 +681,19 @@ SELECT overlay(B'0101011100' placing '001' from 20); 0101011100001 (1 row) +-- Popcount +SELECT popcount(B'0101011100'::bit(10)); + popcount +---------- + 5 +(1 row) + +SELECT popcount(B'1111111111'::bit(10)); + popcount +---------- + 10 +(1 row) + -- This table is intentionally left around to exercise pg_dump/pg_upgrade CREATE TABLE bit_defaults( b1 bit(4) DEFAULT '1001', diff --git src/test/regress/expected/strings.out src/test/regress/expected/strings.out index 298b6c48c2..9780639038 100644 --- src/test/regress/expected/strings.out +++ src/test/regress/expected/strings.out @@ -2126,3 +2126,10 @@ SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 Th\000o\x02\x03 (1 row) +SET bytea_output TO hex; +SELECT popcount(E'\\xdeadbeef'::bytea); + popcount +---------- + 24 +(1 row) + diff --git src/test/regress/sql/bit.sql src/test/regress/sql/bit.sql index 7681d4ab4d..3cd9fb65d3 100644 --- src/test/regress/sql/bit.sql +++ src/test/regress/sql/bit.sql @@ -207,6 +207,10 @@ SELECT overlay(B'0101011100' placing '101' from 6); SELECT overlay(B'0101011100' placing '001' from 11); SELECT overlay(B'0101011100' placing '001' from 20); +-- Popcount +SELECT popcount(B'0101011100'::bit(10)); +SELECT popcount(B'1111111111'::bit(10)); + -- This table is intentionally left around to exercise pg_dump/pg_upgrade CREATE TABLE bit_defaults( b1 bit(4) DEFAULT '1001', diff --git src/test/regress/sql/strings.sql src/test/regress/sql/strings.sql index ad5221ab6b..a35e993ab0 100644 --- src/test/regress/sql/strings.sql +++ src/test/regress/sql/strings.sql @@ -717,3 +717,6 @@ SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea); SELECT encode(overlay(E'Th\\000omas'::bytea placing E'Th\\001omas'::bytea from 2),'escape'); SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 8),'escape'); SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 for 3),'escape'); + +SET bytea_output TO hex; +SELECT popcount(E'\\xdeadbeef'::bytea); --------------2.29.2--