On Wed, Dec 30, 2020 at 05:27:04PM +0100, Daniel Verite wrote:
> David Fetter wrote:
>
> +Datum
> +byteapopcount(PG_FUNCTION_ARGS)
> +{
> + bytea *t1 = PG_GETARG_BYTEA_PP(0);
> + int len, result;
> +
> + len = VARSIZE_ANY_EXHDR(t1);
> + result = pg_popcount(VARDATA_ANY(t1), len);
> +
> + PG_RETURN_INT32(result);
> +}
>
> The input may have more than 2 billion bits set to 1. The biggest possible
> result should be 8 billion for bytea (1 GB with all bits set to 1).
> So shouldn't this function return an int8?
It does now, and thanks for looking at this.
> There is a pre-existing function in core: bit_length(bytea) returning int4,
> but it errors out for large values (in fact it's a SQL function that returns
> octet_length($1)*8).
>
> For the varbit case, it doesn't seem like it can hold so many bits, although
> I don't see the limit documented in
> https://www.postgresql.org/docs/current/datatype-bit.html
Out of an abundance of caution, I changed that one, too.
I'd noticed earlier that ceil() doesn't need to be quite as wasteful
as the way I had it earlier, and fixed that with help from Andrew
Gierth.
Best,
David.
--
David Fetter <david(at)fetter(dot)org> http://fetter.org/
Phone: +1 415 235 3778
Remember to vote!
Consider donating to Postgres: http://www.postgresql.org/about/donate
>From 6d35100b3bc16c1c7d3bf08c62589f3e039cee76 Mon Sep 17 00:00:00 2001
From: David Fetter <[email protected]>
Date: Wed, 30 Dec 2020 02:51:46 -0800
Subject: [PATCH v2] popcount
To: hackers
MIME-Version: 1.0
Content-Type: multipart/mixed; boundary="------------2.29.2"
This is a multi-part message in MIME format.
--------------2.29.2
Content-Type: text/plain; charset=UTF-8; format=fixed
Content-Transfer-Encoding: 8bit
Now it's accessible to SQL for the BIT VARYING and BYTEA types.
diff --git doc/src/sgml/func.sgml doc/src/sgml/func.sgml
index 5021ac1ca9..439a3a4a06 100644
--- doc/src/sgml/func.sgml
+++ doc/src/sgml/func.sgml
@@ -4069,6 +4069,23 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>popcount</primary>
+ </indexterm>
+ <function>popcount</function> ( <parameter>bytes</parameter> <type>bytea</type> )
+ <returnvalue>integer</returnvalue>
+ </para>
+ <para>
+ Counts the number of bits set in a binary string.
+ </para>
+ <para>
+ <literal>popcount('\xdeadbeef'::bytea)</literal>
+ <returnvalue>24</returnvalue>
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
@@ -4830,6 +4847,24 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
<returnvalue>101010001010101010</returnvalue>
</para></entry>
</row>
+
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <indexterm>
+ <primary>popcount</primary>
+ </indexterm>
+ <function>popcount</function> ( <parameter>bits</parameter> <type>bit</type> )
+ <returnvalue>int4</returnvalue>
+ </para>
+ <para>
+ Counts the bits set in a bit string.
+ </para>
+ <para>
+ <literal>popcount(B'101010101010101010')</literal>
+ <returnvalue>9</returnvalue>
+ </para></entry>
+ </row>
+
</tbody>
</tgroup>
</table>
diff --git src/include/catalog/pg_proc.dat src/include/catalog/pg_proc.dat
index 139f4a08bd..82e3d50114 100644
--- src/include/catalog/pg_proc.dat
+++ src/include/catalog/pg_proc.dat
@@ -1446,6 +1446,9 @@
{ oid => '752', descr => 'substitute portion of string',
proname => 'overlay', prorettype => 'bytea',
proargtypes => 'bytea bytea int4', prosrc => 'byteaoverlay_no_len' },
+{ oid => '8436', descr => 'count set bits',
+ proname => 'popcount', prorettype => 'int8', proargtypes => 'bytea',
+ prosrc => 'byteapopcount'},
{ oid => '725',
proname => 'dist_pl', prorettype => 'float8', proargtypes => 'point line',
@@ -3865,6 +3868,9 @@
{ oid => '3033', descr => 'set bit',
proname => 'set_bit', prorettype => 'bit', proargtypes => 'bit int4 int4',
prosrc => 'bitsetbit' },
+{ oid => '8435', descr => 'count set bits',
+ proname => 'popcount', prorettype => 'int8', proargtypes => 'bit',
+ prosrc => 'bitpopcount'},
# for macaddr type support
{ oid => '436', descr => 'I/O',
diff --git src/backend/utils/adt/varbit.c src/backend/utils/adt/varbit.c
index 3c03459f51..a71c2b6bf4 100644
--- src/backend/utils/adt/varbit.c
+++ src/backend/utils/adt/varbit.c
@@ -36,6 +36,7 @@
#include "libpq/pqformat.h"
#include "nodes/nodeFuncs.h"
#include "nodes/supportnodes.h"
+#include "port/pg_bitutils.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/varbit.h"
@@ -1872,3 +1873,29 @@ bitgetbit(PG_FUNCTION_ARGS)
else
PG_RETURN_INT32(0);
}
+
+/*
+ * bitpopcount
+ *
+ * Returns the number of bits set in a bit array.
+ *
+ */
+Datum
+bitpopcount(PG_FUNCTION_ARGS)
+{
+ VarBit *arg1 = PG_GETARG_VARBIT_P(0);
+ bits8 *p;
+ int len;
+ int8 popcount;
+
+ /*
+ * ceil(VARBITLEN(ARG1)/BITS_PER_BYTE)
+ * done to minimize branches and instructions.
+ */
+ len = (VARBITLEN(arg1) + BITS_PER_BYTE + 1) / BITS_PER_BYTE;
+ p = VARBITS(arg1);
+
+ popcount = pg_popcount((const char *)p, len);
+
+ PG_RETURN_INT64(popcount);
+}
diff --git src/backend/utils/adt/varlena.c src/backend/utils/adt/varlena.c
index 9300d19e0c..9c5e5b5592 100644
--- src/backend/utils/adt/varlena.c
+++ src/backend/utils/adt/varlena.c
@@ -3415,6 +3415,22 @@ bytea_overlay(bytea *t1, bytea *t2, int sp, int sl)
return result;
}
+/*
+ * popcount
+ */
+Datum
+byteapopcount(PG_FUNCTION_ARGS)
+{
+ bytea *t1 = PG_GETARG_BYTEA_PP(0);
+ int len;
+ int8 result;
+
+ len = VARSIZE_ANY_EXHDR(t1);
+ result = pg_popcount(VARDATA_ANY(t1), len);
+
+ PG_RETURN_INT64(result);
+}
+
/*
* byteapos -
* Return the position of the specified substring.
diff --git src/test/regress/expected/bit.out src/test/regress/expected/bit.out
index a1fab7ebcb..c91ad3d0d5 100644
--- src/test/regress/expected/bit.out
+++ src/test/regress/expected/bit.out
@@ -681,6 +681,19 @@ SELECT overlay(B'0101011100' placing '001' from 20);
0101011100001
(1 row)
+-- Popcount
+SELECT popcount(B'0101011100'::bit(10));
+ popcount
+----------
+ 5
+(1 row)
+
+SELECT popcount(B'1111111111'::bit(10));
+ popcount
+----------
+ 10
+(1 row)
+
-- This table is intentionally left around to exercise pg_dump/pg_upgrade
CREATE TABLE bit_defaults(
b1 bit(4) DEFAULT '1001',
diff --git src/test/regress/expected/strings.out src/test/regress/expected/strings.out
index 298b6c48c2..9780639038 100644
--- src/test/regress/expected/strings.out
+++ src/test/regress/expected/strings.out
@@ -2126,3 +2126,10 @@ SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5
Th\000o\x02\x03
(1 row)
+SET bytea_output TO hex;
+SELECT popcount(E'\\xdeadbeef'::bytea);
+ popcount
+----------
+ 24
+(1 row)
+
diff --git src/test/regress/sql/bit.sql src/test/regress/sql/bit.sql
index 7681d4ab4d..3cd9fb65d3 100644
--- src/test/regress/sql/bit.sql
+++ src/test/regress/sql/bit.sql
@@ -207,6 +207,10 @@ SELECT overlay(B'0101011100' placing '101' from 6);
SELECT overlay(B'0101011100' placing '001' from 11);
SELECT overlay(B'0101011100' placing '001' from 20);
+-- Popcount
+SELECT popcount(B'0101011100'::bit(10));
+SELECT popcount(B'1111111111'::bit(10));
+
-- This table is intentionally left around to exercise pg_dump/pg_upgrade
CREATE TABLE bit_defaults(
b1 bit(4) DEFAULT '1001',
diff --git src/test/regress/sql/strings.sql src/test/regress/sql/strings.sql
index ad5221ab6b..a35e993ab0 100644
--- src/test/regress/sql/strings.sql
+++ src/test/regress/sql/strings.sql
@@ -717,3 +717,6 @@ SELECT btrim(E'\\000trim\\000'::bytea, ''::bytea);
SELECT encode(overlay(E'Th\\000omas'::bytea placing E'Th\\001omas'::bytea from 2),'escape');
SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 8),'escape');
SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 for 3),'escape');
+
+SET bytea_output TO hex;
+SELECT popcount(E'\\xdeadbeef'::bytea);
--------------2.29.2--