On Fri, Jul 29, 2022 at 10:38:11PM -0700, Nathan Bossart wrote: > On Sat, Jul 30, 2022 at 12:02:02PM +0700, John Naylor wrote: >> I'll be the first to say it's not committable and needs some thought. Since >> there are several recently proposed patches that take advantage of SSE2, it >> seems time for me to open a new thread and get that prerequisite settled. >> I'll do that next week. > > Awesome. I will help test and review.
While this prerequisite is worked out [0], here is a new patch set. I've added an 0002 in which I've made use of the proposed SSE2 linear search function in several other areas. I haven't done any additional performance analysis, and it's likely I'm missing some eligible code locations, but at the very least, this demonstrates the reusability of the new function. [0] https://postgr.es/m/CAFBsxsE2G_H_5Wbw%2BNOPm70-BK4xxKf86-mRzY%3DL2sLoQqM%2B-Q%40mail.gmail.com -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From a2b2ab3777f689775c2e731822aefe2ab500e8ee Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nathandboss...@gmail.com> Date: Thu, 28 Jul 2022 12:15:47 -0700 Subject: [PATCH v4 1/2] Use SSE2 intrinsics for XidInMVCCSnapshot(). This optimizes the linear searches through the [sub]xip arrays when possible, which should improve performance significantly when the arrays are large. --- src/backend/utils/time/snapmgr.c | 28 +++--------- src/include/c.h | 8 ++++ src/include/utils/linearsearch.h | 78 ++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 21 deletions(-) create mode 100644 src/include/utils/linearsearch.h diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 5bc2a15160..834c8867d4 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -63,6 +63,7 @@ #include "storage/sinvaladt.h" #include "storage/spin.h" #include "utils/builtins.h" +#include "utils/linearsearch.h" #include "utils/memutils.h" #include "utils/old_snapshot.h" #include "utils/rel.h" @@ -2284,8 +2285,6 @@ RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc) bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) { - uint32 i; - /* * Make a quick range check to eliminate most XIDs without looking at the * xip arrays. Note that this is OK even if we convert a subxact XID to @@ -2317,13 +2316,8 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) if (!snapshot->suboverflowed) { /* we have full data, so search subxip */ - int32 j; - - for (j = 0; j < snapshot->subxcnt; j++) - { - if (TransactionIdEquals(xid, snapshot->subxip[j])) - return true; - } + if (pg_linearsearch_uint32(xid, snapshot->subxip, snapshot->subxcnt)) + return true; /* not there, fall through to search xip[] */ } @@ -2344,16 +2338,11 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) return false; } - for (i = 0; i < snapshot->xcnt; i++) - { - if (TransactionIdEquals(xid, snapshot->xip[i])) - return true; - } + if (pg_linearsearch_uint32(xid, snapshot->xip, snapshot->xcnt)) + return true; } else { - int32 j; - /* * In recovery we store all xids in the subxact array because it is by * far the bigger array, and we mostly don't know which xids are @@ -2383,11 +2372,8 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) * indeterminate xid. We don't know whether it's top level or subxact * but it doesn't matter. If it's present, the xid is visible. */ - for (j = 0; j < snapshot->subxcnt; j++) - { - if (TransactionIdEquals(xid, snapshot->subxip[j])) - return true; - } + if (pg_linearsearch_uint32(xid, snapshot->subxip, snapshot->subxcnt)) + return true; } return false; diff --git a/src/include/c.h b/src/include/c.h index d35405f191..2c1a47bc28 100644 --- a/src/include/c.h +++ b/src/include/c.h @@ -371,6 +371,14 @@ typedef void (*pg_funcptr_t) (void); #endif #endif +/* + * Are SSE2 intrinsics available? + */ +#if (defined(__x86_64__) || defined(_M_AMD64)) +#include <emmintrin.h> +#define USE_SSE2 +#endif + /* ---------------------------------------------------------------- * Section 2: bool, true, false diff --git a/src/include/utils/linearsearch.h b/src/include/utils/linearsearch.h new file mode 100644 index 0000000000..65b0092a65 --- /dev/null +++ b/src/include/utils/linearsearch.h @@ -0,0 +1,78 @@ +/*------------------------------------------------------------------------- + * + * linearsearch.h + * Optimized linear search routines. + * + * Copyright (c) 2022, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/include/utils/linearsearch.h + * + *------------------------------------------------------------------------- + */ +#ifndef LINEARSEARCH_H +#define LINEARSEARCH_H + +#include "c.h" + +#ifdef USE_SSE2 +#include "port/pg_bitutils.h" +#endif + +/* + * pg_linearsearch_uint32 + * + * Returns the address of the first element in 'base' that equals 'key', or + * NULL if no match is found. + */ +#ifdef USE_SSE2 +pg_attribute_no_sanitize_alignment() +#endif +static inline uint32 * +pg_linearsearch_uint32(uint32 key, uint32 *base, uint32 nelem) +{ + uint32 i = 0; + + /* If possible, use SSE2 intrinsics to speed up the search. */ +#ifdef USE_SSE2 + __m128i keys = _mm_set1_epi32(key); /* load 4 copies of key */ + uint32 its = nelem & ~0xF; /* round down to multiple of 16 */ + + for (; i < its; i += 16) + { + /* load the next 16 values into __m128i variables */ + __m128i vals1 = _mm_loadu_si128((__m128i *) &base[i]); + __m128i vals2 = _mm_loadu_si128((__m128i *) &base[i + 4]); + __m128i vals3 = _mm_loadu_si128((__m128i *) &base[i + 8]); + __m128i vals4 = _mm_loadu_si128((__m128i *) &base[i + 12]); + + /* perform the comparisons */ + __m128i result1 = _mm_cmpeq_epi32(keys, vals1); + __m128i result2 = _mm_cmpeq_epi32(keys, vals2); + __m128i result3 = _mm_cmpeq_epi32(keys, vals3); + __m128i result4 = _mm_cmpeq_epi32(keys, vals4); + + /* shrink the results into an integer */ + __m128i tmp1 = _mm_packs_epi32(result1, result2); + __m128i tmp2 = _mm_packs_epi32(result3, result4); + __m128i tmp3 = _mm_packs_epi16(tmp1, tmp2); + uint32 result = _mm_movemask_epi8(tmp3); + + /* if found, return a pointer to the matching element */ + if (result != 0) + return &base[i + pg_rightmost_one_pos32(result)]; + } +#endif + + /* Process the remaining elements the slow way. */ + for (; i < nelem; i++) + { + if (key == base[i]) + return &base[i]; + } + + return NULL; +} + +#endif /* LINEARSEARCH_H */ -- 2.25.1
>From 571f831fc1d55660afb1531c17572847ec57b446 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nathandboss...@gmail.com> Date: Tue, 2 Aug 2022 14:56:53 -0700 Subject: [PATCH v4 2/2] SIMD-ify various linear searches through arrays of integers. --- src/backend/access/transam/slru.c | 15 ++++++--------- src/backend/commands/extension.c | 25 +++++++------------------ src/backend/commands/tsearchcmds.c | 13 +++++-------- src/backend/executor/execParallel.c | 23 +++++++++++++++-------- src/backend/parser/parse_coerce.c | 11 ++++++----- src/backend/storage/ipc/procarray.c | 8 +++----- src/backend/storage/lmgr/predicate.c | 9 +++------ src/backend/utils/adt/lockfuncs.c | 27 ++++++++++++--------------- 8 files changed, 57 insertions(+), 74 deletions(-) diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index b65cb49d7f..84b0487f4d 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -59,6 +59,7 @@ #include "pgstat.h" #include "storage/fd.h" #include "storage/shmem.h" +#include "utils/linearsearch.h" #define SlruFileName(ctl, path, seg) \ snprintf(path, MAXPGPATH, "%s/%04X", (ctl)->Dir, seg) @@ -812,16 +813,12 @@ SlruPhysicalWritePage(SlruCtl ctl, int pageno, int slotno, SlruWriteAll fdata) */ if (fdata) { - int i; + int *match; - for (i = 0; i < fdata->num_files; i++) - { - if (fdata->segno[i] == segno) - { - fd = fdata->fd[i]; - break; - } - } + if ((match = (int *) pg_linearsearch_uint32(segno, + (uint32 *) fdata->segno, + fdata->num_files))) + fd = fdata->fd[match - fdata->segno]; } if (fd < 0) diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index 6b6720c690..06e045c0e0 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -60,6 +60,7 @@ #include "utils/acl.h" #include "utils/builtins.h" #include "utils/fmgroids.h" +#include "utils/linearsearch.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" @@ -2446,7 +2447,7 @@ pg_extension_config_dump(PG_FUNCTION_ARGS) { /* Modify or extend existing extconfig array */ Oid *arrayData; - int i; + Oid *match; a = DatumGetArrayTypeP(arrayDatum); @@ -2461,14 +2462,8 @@ pg_extension_config_dump(PG_FUNCTION_ARGS) arrayIndex = arrayLength + 1; /* set up to add after end */ - for (i = 0; i < arrayLength; i++) - { - if (arrayData[i] == tableoid) - { - arrayIndex = i + 1; /* replace this element instead */ - break; - } - } + if ((match = pg_linearsearch_uint32(tableoid, arrayData, arrayLength))) + arrayIndex = (match - arrayData) + 1; /* replace this element instead */ a = array_set(a, 1, &arrayIndex, elementDatum, @@ -2582,7 +2577,7 @@ extension_config_remove(Oid extensionoid, Oid tableoid) else { Oid *arrayData; - int i; + Oid *match; a = DatumGetArrayTypeP(arrayDatum); @@ -2597,14 +2592,8 @@ extension_config_remove(Oid extensionoid, Oid tableoid) arrayIndex = -1; /* flag for no deletion needed */ - for (i = 0; i < arrayLength; i++) - { - if (arrayData[i] == tableoid) - { - arrayIndex = i; /* index to remove */ - break; - } - } + if ((match = pg_linearsearch_uint32(tableoid, arrayData, arrayLength))) + arrayIndex = match - arrayData; /* index to remove */ } /* If tableoid is not in extconfig, nothing to do */ diff --git a/src/backend/commands/tsearchcmds.c b/src/backend/commands/tsearchcmds.c index 4cc4e3c00f..6ae41bdd56 100644 --- a/src/backend/commands/tsearchcmds.c +++ b/src/backend/commands/tsearchcmds.c @@ -44,6 +44,7 @@ #include "tsearch/ts_utils.h" #include "utils/builtins.h" #include "utils/fmgroids.h" +#include "utils/linearsearch.h" #include "utils/lsyscache.h" #include "utils/rel.h" #include "utils/syscache.h" @@ -1302,14 +1303,10 @@ MakeConfigurationMapping(AlterTSConfigurationStmt *stmt, { bool tokmatch = false; - for (j = 0; j < ntoken; j++) - { - if (cfgmap->maptokentype == tokens[j]) - { - tokmatch = true; - break; - } - } + if (pg_linearsearch_uint32(cfgmap->maptokentype, + (uint32 *) tokens, ntoken)) + tokmatch = true; + if (!tokmatch) continue; } diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c index f1fd7f7e8b..2c9cbe6d52 100644 --- a/src/backend/executor/execParallel.c +++ b/src/backend/executor/execParallel.c @@ -47,6 +47,7 @@ #include "tcop/tcopprot.h" #include "utils/datum.h" #include "utils/dsa.h" +#include "utils/linearsearch.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/snapmgr.h" @@ -1022,12 +1023,15 @@ ExecParallelRetrieveInstrumentation(PlanState *planstate, int ibytes; int plan_node_id = planstate->plan->plan_node_id; MemoryContext oldcontext; + int *match; /* Find the instrumentation for this node. */ - for (i = 0; i < instrumentation->num_plan_nodes; ++i) - if (instrumentation->plan_node_id[i] == plan_node_id) - break; - if (i >= instrumentation->num_plan_nodes) + match = (int *) pg_linearsearch_uint32(plan_node_id, + (uint32 *) instrumentation->plan_node_id, + instrumentation->num_plan_nodes); + if (match) + i = match - instrumentation->plan_node_id; + else elog(ERROR, "plan node %d not found", plan_node_id); /* Accumulate the statistics from all workers. */ @@ -1265,6 +1269,7 @@ ExecParallelReportInstrumentation(PlanState *planstate, int i; int plan_node_id = planstate->plan->plan_node_id; Instrumentation *instrument; + int *match; InstrEndLoop(planstate->instrument); @@ -1274,10 +1279,12 @@ ExecParallelReportInstrumentation(PlanState *planstate, * we're pushing down sufficiently large plan trees. For now, do it the * slow, dumb way. */ - for (i = 0; i < instrumentation->num_plan_nodes; ++i) - if (instrumentation->plan_node_id[i] == plan_node_id) - break; - if (i >= instrumentation->num_plan_nodes) + match = (int *) pg_linearsearch_uint32(plan_node_id, + (uint32 *) instrumentation->plan_node_id, + instrumentation->num_plan_nodes); + if (match) + i = match - instrumentation->plan_node_id; + else elog(ERROR, "plan node %d not found", plan_node_id); /* diff --git a/src/backend/parser/parse_coerce.c b/src/backend/parser/parse_coerce.c index c4e958e4aa..fd9291a4a4 100644 --- a/src/backend/parser/parse_coerce.c +++ b/src/backend/parser/parse_coerce.c @@ -27,6 +27,7 @@ #include "utils/builtins.h" #include "utils/datum.h" /* needed for datumIsEqual() */ #include "utils/fmgroids.h" +#include "utils/linearsearch.h" #include "utils/lsyscache.h" #include "utils/syscache.h" #include "utils/typcache.h" @@ -2920,11 +2921,11 @@ check_valid_internal_signature(Oid ret_type, { if (ret_type == INTERNALOID) { - for (int i = 0; i < nargs; i++) - { - if (declared_arg_types[i] == ret_type) - return NULL; /* OK */ - } + if (pg_linearsearch_uint32(ret_type, + unconstify(Oid *, declared_arg_types), + nargs)) + return NULL; /* OK */ + return pstrdup(_("A result of type internal requires at least one input of type internal.")); } else diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c index 0555b02a8d..157dab61d1 100644 --- a/src/backend/storage/ipc/procarray.c +++ b/src/backend/storage/ipc/procarray.c @@ -63,6 +63,7 @@ #include "storage/spin.h" #include "utils/acl.h" #include "utils/builtins.h" +#include "utils/linearsearch.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -1588,11 +1589,8 @@ TransactionIdIsInProgress(TransactionId xid) Assert(TransactionIdIsValid(topxid)); if (!TransactionIdEquals(topxid, xid)) { - for (int i = 0; i < nxids; i++) - { - if (TransactionIdEquals(xids[i], topxid)) - return true; - } + if (pg_linearsearch_uint32(topxid, xids, nxids)) + return true; } cachedXidIsNotInProgress = xid; diff --git a/src/backend/storage/lmgr/predicate.c b/src/backend/storage/lmgr/predicate.c index 5136da6ea3..11d3412650 100644 --- a/src/backend/storage/lmgr/predicate.c +++ b/src/backend/storage/lmgr/predicate.c @@ -207,6 +207,7 @@ #include "storage/predicate_internals.h" #include "storage/proc.h" #include "storage/procarray.h" +#include "utils/linearsearch.h" #include "utils/rel.h" #include "utils/snapmgr.h" @@ -4065,7 +4066,6 @@ static bool XidIsConcurrent(TransactionId xid) { Snapshot snap; - uint32 i; Assert(TransactionIdIsValid(xid)); Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny())); @@ -4078,11 +4078,8 @@ XidIsConcurrent(TransactionId xid) if (TransactionIdFollowsOrEquals(xid, snap->xmax)) return true; - for (i = 0; i < snap->xcnt; i++) - { - if (xid == snap->xip[i]) - return true; - } + if (pg_linearsearch_uint32(xid, snap->xip, snap->xcnt)) + return true; return false; } diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index f9b324efec..48cb0c87bb 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -20,6 +20,7 @@ #include "storage/predicate_internals.h" #include "utils/array.h" #include "utils/builtins.h" +#include "utils/linearsearch.h" /* @@ -506,17 +507,15 @@ pg_blocking_pids(PG_FUNCTION_ARGS) { /* conflict in lock requests; who's in front in wait queue? */ bool ahead = false; - int k; - for (k = 0; k < bproc->num_waiters; k++) + if (pg_linearsearch_uint32(instance->pid, + (uint32 *) preceding_waiters, + bproc->num_waiters)) { - if (preceding_waiters[k] == instance->pid) - { - /* soft block: this entry is ahead of blocked proc */ - ahead = true; - break; - } + /* soft block: this entry is ahead of blocked proc */ + ahead = true; } + if (!ahead) continue; /* not blocked by this entry */ } @@ -598,8 +597,7 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) int num_interesting_pids; int num_blocking_pids; int dummy; - int i, - j; + int i; /* Validate the passed-in array */ Assert(ARR_ELEMTYPE(interesting_pids_a) == INT4OID); @@ -632,11 +630,10 @@ pg_isolation_test_session_is_blocked(PG_FUNCTION_ARGS) * for a match. */ for (i = 0; i < num_blocking_pids; i++) - for (j = 0; j < num_interesting_pids; j++) - { - if (blocking_pids[i] == interesting_pids[j]) - PG_RETURN_BOOL(true); - } + if (pg_linearsearch_uint32(blocking_pids[i], + (uint32 *) interesting_pids, + num_interesting_pids)) + PG_RETURN_BOOL(true); /* * Check if blocked_pid is waiting for a safe snapshot. We could in -- 2.25.1