On Tue, Aug 09, 2022 at 01:00:37PM -0700, Nathan Bossart wrote: > Your adjustments in 0002 seem reasonable to me. I think it makes sense to > ensure there is test coverage for pg_lfind32(), but I don't know if that > syscache code is the right choice. For non-USE_SSE2 builds, it might make > these lookups more expensive. I'll look around to see if there are any > other suitable candidates.
One option might be to create a small test module for pg_lfind32(). Here is an example. -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From 0b15a0569318cc846980b7771ff809ef5d2d505c Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nathandboss...@gmail.com> Date: Wed, 3 Aug 2022 09:49:04 -0700 Subject: [PATCH v12 1/2] Introduce optimized routine for linear searches through an array of integers. If SSE2 is available, this function uses it to speed up the search. Otherwise, it uses a simple 'for' loop. This is a prerequisite for a follow-up commit that will use this function to optimize [sub]xip lookups in XidInMVCCSnapshot(), but it can be used anywhere that might benefit from such an optimization. It might be worthwhile to add an ARM-specific code path to this function in the future. Author: Nathan Bossart Reviewed by: Andres Freund, John Naylor, Bharath Rupireddy, Masahiko Sawada Discussion: https://postgr.es/m/20220713170950.GA3116318%40nathanxps13 --- src/include/port/pg_lfind.h | 103 ++++++++++++++++++ src/test/modules/Makefile | 1 + src/test/modules/test_lfind/.gitignore | 4 + src/test/modules/test_lfind/Makefile | 23 ++++ .../test_lfind/expected/test_lfind.out | 12 ++ .../modules/test_lfind/sql/test_lfind.sql | 8 ++ .../modules/test_lfind/test_lfind--1.0.sql | 8 ++ src/test/modules/test_lfind/test_lfind.c | 52 +++++++++ .../modules/test_lfind/test_lfind.control | 4 + 9 files changed, 215 insertions(+) create mode 100644 src/include/port/pg_lfind.h create mode 100644 src/test/modules/test_lfind/.gitignore create mode 100644 src/test/modules/test_lfind/Makefile create mode 100644 src/test/modules/test_lfind/expected/test_lfind.out create mode 100644 src/test/modules/test_lfind/sql/test_lfind.sql create mode 100644 src/test/modules/test_lfind/test_lfind--1.0.sql create mode 100644 src/test/modules/test_lfind/test_lfind.c create mode 100644 src/test/modules/test_lfind/test_lfind.control diff --git a/src/include/port/pg_lfind.h b/src/include/port/pg_lfind.h new file mode 100644 index 0000000000..fb125977b2 --- /dev/null +++ b/src/include/port/pg_lfind.h @@ -0,0 +1,103 @@ +/*------------------------------------------------------------------------- + * + * pg_lfind.h + * Optimized linear search routines. + * + * Copyright (c) 2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/include/port/pg_lfind.h + * + *------------------------------------------------------------------------- + */ +#ifndef PG_LFIND_H +#define PG_LFIND_H + +#include "port/simd.h" + +/* + * pg_lfind32 + * + * Return true if there is an element in 'base' that equals 'key', otherwise + * return false. + */ +static inline bool +pg_lfind32(uint32 key, uint32 *base, uint32 nelem) +{ + uint32 i = 0; + + /* Use SIMD intrinsics where available. */ +#ifdef USE_SSE2 + + /* + * A 16-byte register only has four 4-byte lanes. For better + * instruction-level parallelism, each loop iteration operates on a block + * of four registers. Testing has showed this is ~40% faster than using a + * block of two registers. + */ + const __m128i keys = _mm_set1_epi32(key); /* load 4 copies of key */ + uint32 iterations = nelem & ~0xF; /* round down to multiple of 16 */ + +#if defined(USE_ASSERT_CHECKING) + bool assert_result = false; + + /* pre-compute the result for assert checking */ + for (i = 0; i < nelem; i++) + { + if (key == base[i]) + { + assert_result = true; + break; + } + } +#endif + + for (i = 0; i < iterations; i += 16) + { + /* load the next block into 4 registers holding 4 values each */ + const __m128i vals1 = _mm_loadu_si128((__m128i *) & base[i]); + const __m128i vals2 = _mm_loadu_si128((__m128i *) & base[i + 4]); + const __m128i vals3 = _mm_loadu_si128((__m128i *) & base[i + 8]); + const __m128i vals4 = _mm_loadu_si128((__m128i *) & base[i + 12]); + + /* compare each value to the key */ + const __m128i result1 = _mm_cmpeq_epi32(keys, vals1); + const __m128i result2 = _mm_cmpeq_epi32(keys, vals2); + const __m128i result3 = _mm_cmpeq_epi32(keys, vals3); + const __m128i result4 = _mm_cmpeq_epi32(keys, vals4); + + /* combine the results into a single variable */ + const __m128i tmp1 = _mm_or_si128(result1, result2); + const __m128i tmp2 = _mm_or_si128(result3, result4); + const __m128i result = _mm_or_si128(tmp1, tmp2); + + /* see if there was a match */ + if (_mm_movemask_epi8(result) != 0) + { +#if defined(USE_ASSERT_CHECKING) + Assert(assert_result == true); +#endif + return true; + } + } +#endif /* USE_SSE2 */ + + /* Process the remaining elements one at a time. */ + for (; i < nelem; i++) + { + if (key == base[i]) + { +#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING) + Assert(assert_result == true); +#endif + return true; + } + } + +#if defined(USE_SSE2) && defined(USE_ASSERT_CHECKING) + Assert(assert_result == false); +#endif + return false; +} + +#endif /* PG_LFIND_H */ diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index 9090226daa..6c31c8707c 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -19,6 +19,7 @@ SUBDIRS = \ test_extensions \ test_ginpostinglist \ test_integerset \ + test_lfind \ test_misc \ test_oat_hooks \ test_parser \ diff --git a/src/test/modules/test_lfind/.gitignore b/src/test/modules/test_lfind/.gitignore new file mode 100644 index 0000000000..5dcb3ff972 --- /dev/null +++ b/src/test/modules/test_lfind/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/modules/test_lfind/Makefile b/src/test/modules/test_lfind/Makefile new file mode 100644 index 0000000000..00ba56ff74 --- /dev/null +++ b/src/test/modules/test_lfind/Makefile @@ -0,0 +1,23 @@ +# src/test/modules/test_lfind/Makefile + +MODULE_big = test_lfind +OBJS = \ + $(WIN32RES) \ + test_lfind.o +PGFILEDESC = "test_lfind - test code for optimized linear search functions" + +EXTENSION = test_lfind +DATA = test_lfind--1.0.sql + +REGRESS = test_lfind + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_lfind +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_lfind/expected/test_lfind.out b/src/test/modules/test_lfind/expected/test_lfind.out new file mode 100644 index 0000000000..222c8fd7ff --- /dev/null +++ b/src/test/modules/test_lfind/expected/test_lfind.out @@ -0,0 +1,12 @@ +CREATE EXTENSION test_lfind; +-- +-- These tests don't produce any interesting output. We're checking that +-- the operations complete without crashing or hanging and that none of their +-- internal sanity tests fail. +-- +SELECT test_lfind(); + test_lfind +------------ + +(1 row) + diff --git a/src/test/modules/test_lfind/sql/test_lfind.sql b/src/test/modules/test_lfind/sql/test_lfind.sql new file mode 100644 index 0000000000..899f1dd49b --- /dev/null +++ b/src/test/modules/test_lfind/sql/test_lfind.sql @@ -0,0 +1,8 @@ +CREATE EXTENSION test_lfind; + +-- +-- These tests don't produce any interesting output. We're checking that +-- the operations complete without crashing or hanging and that none of their +-- internal sanity tests fail. +-- +SELECT test_lfind(); diff --git a/src/test/modules/test_lfind/test_lfind--1.0.sql b/src/test/modules/test_lfind/test_lfind--1.0.sql new file mode 100644 index 0000000000..d82ab0567e --- /dev/null +++ b/src/test/modules/test_lfind/test_lfind--1.0.sql @@ -0,0 +1,8 @@ +/* src/test/modules/test_lfind/test_lfind--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_lfind" to load this file. \quit + +CREATE FUNCTION test_lfind() + RETURNS pg_catalog.void + AS 'MODULE_PATHNAME' LANGUAGE C; diff --git a/src/test/modules/test_lfind/test_lfind.c b/src/test/modules/test_lfind/test_lfind.c new file mode 100644 index 0000000000..a000746fb8 --- /dev/null +++ b/src/test/modules/test_lfind/test_lfind.c @@ -0,0 +1,52 @@ +/*-------------------------------------------------------------------------- + * + * test_lfind.c + * Test correctness of optimized linear search functions. + * + * Copyright (c) 2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/test/modules/test_lfind/test_lfind.c + * + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "fmgr.h" +#include "port/pg_lfind.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(test_lfind); + +Datum +test_lfind(PG_FUNCTION_ARGS) +{ +#define TEST_ARRAY_SIZE 135 + uint32 test_array[TEST_ARRAY_SIZE] = {0}; + + test_array[8] = 1; + test_array[64] = 2; + test_array[TEST_ARRAY_SIZE - 1] = 3; + + if (pg_lfind32(1, test_array, 4)) + elog(ERROR, "pg_lfind32() found nonexistent element"); + if (!pg_lfind32(1, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind32() did not find existing element"); + + if (pg_lfind32(2, test_array, 32)) + elog(ERROR, "pg_lfind32() found nonexistent element"); + if (!pg_lfind32(2, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind32() did not find existing element"); + + if (pg_lfind32(3, test_array, 96)) + elog(ERROR, "pg_lfind32() found nonexistent element"); + if (!pg_lfind32(3, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind32() did not find existing element"); + + if (pg_lfind32(4, test_array, TEST_ARRAY_SIZE)) + elog(ERROR, "pg_lfind32() found nonexistent element"); + + PG_RETURN_VOID(); +} diff --git a/src/test/modules/test_lfind/test_lfind.control b/src/test/modules/test_lfind/test_lfind.control new file mode 100644 index 0000000000..d8b57dfca2 --- /dev/null +++ b/src/test/modules/test_lfind/test_lfind.control @@ -0,0 +1,4 @@ +comment = 'Test code for optimized linear search functions' +default_version = '1.0' +module_pathname = '$libdir/test_lfind' +relocatable = true -- 2.25.1
>From 89b0ec186f7013aa6ba66c1733130d715161d8f0 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nathandboss...@gmail.com> Date: Wed, 3 Aug 2022 09:59:28 -0700 Subject: [PATCH v12 2/2] Optimize linear searches in XidInMVCCSnapshot(). This change makes use of the recently-introduced optimized linear search routine to speed up searches through the [sub]xip arrays when possible, which should improve performance significantly when the arrays are large. Author: Nathan Bossart Reviewed by: Andres Freund, John Naylor, Bharath Rupireddy, Masahiko Sawada Discussion: https://postgr.es/m/20220713170950.GA3116318%40nathanxps13 --- src/backend/utils/time/snapmgr.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 5bc2a15160..9b504c9745 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -56,6 +56,7 @@ #include "datatype/timestamp.h" #include "lib/pairingheap.h" #include "miscadmin.h" +#include "port/pg_lfind.h" #include "storage/predicate.h" #include "storage/proc.h" #include "storage/procarray.h" @@ -2284,8 +2285,6 @@ RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc) bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) { - uint32 i; - /* * Make a quick range check to eliminate most XIDs without looking at the * xip arrays. Note that this is OK even if we convert a subxact XID to @@ -2317,13 +2316,8 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) if (!snapshot->suboverflowed) { /* we have full data, so search subxip */ - int32 j; - - for (j = 0; j < snapshot->subxcnt; j++) - { - if (TransactionIdEquals(xid, snapshot->subxip[j])) - return true; - } + if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) + return true; /* not there, fall through to search xip[] */ } @@ -2344,16 +2338,11 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) return false; } - for (i = 0; i < snapshot->xcnt; i++) - { - if (TransactionIdEquals(xid, snapshot->xip[i])) - return true; - } + if (pg_lfind32(xid, snapshot->xip, snapshot->xcnt)) + return true; } else { - int32 j; - /* * In recovery we store all xids in the subxact array because it is by * far the bigger array, and we mostly don't know which xids are @@ -2383,11 +2372,8 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) * indeterminate xid. We don't know whether it's top level or subxact * but it doesn't matter. If it's present, the xid is visible. */ - for (j = 0; j < snapshot->subxcnt; j++) - { - if (TransactionIdEquals(xid, snapshot->subxip[j])) - return true; - } + if (pg_lfind32(xid, snapshot->subxip, snapshot->subxcnt)) + return true; } return false; -- 2.25.1