On Wed, Aug 03, 2022 at 11:06:58AM -0700, Andres Freund wrote: > On 2022-08-02 16:43:57 -0700, Nathan Bossart wrote: >> >> +#ifdef USE_SSE2 >> >> +pg_attribute_no_sanitize_alignment() >> >> +#endif >> > >> > What's the deal with this annotation? Needs a comment. >> >> Will do. c.h suggests that this should only be used for x86-specific code. > > What I'm asking is why the annotation is needed at all?
Upon further inspection, I don't think this is needed. I originally borrowed it from the SSE version of the CRC code, but while it is trivial to produce alignment failures with the CRC code, I haven't been able to generate any with my patches. Looking at the code, I'm not sure why I was worried about this in the first place. Please pardon the brain fade. Here is a new patch set without the annotation. -- Nathan Bossart Amazon Web Services: https://aws.amazon.com
>From bd523948876f801b7f1b909f399b2cc41acf06cf Mon Sep 17 00:00:00 2001 From: John Naylor <john.nay...@postgresql.org> Date: Wed, 3 Aug 2022 11:07:40 +0700 Subject: [PATCH v6 1/3] Support SSE2 intrinsics where available SSE2 vector instructions are part of the spec for the 64-bit x86 architecture. Until now we have relied on the compiler to autovectorize in some limited situations, but some useful coding idioms can only be expressed explicitly via compiler intrinsics. To this end, add a header that defines USE_SSE2 when available. While x86-only for now, we can add other architectures in the future. This will also be the intended place for low-level hepler functions that use vector operations. Reviewed by Nathan Bossart Discussion: https://www.postgresql.org/message-id/CAFBsxsE2G_H_5Wbw%2BNOPm70-BK4xxKf86-mRzY%3DL2sLoQqM%2B-Q%40mail.gmail.com --- src/include/port/simd.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 src/include/port/simd.h diff --git a/src/include/port/simd.h b/src/include/port/simd.h new file mode 100644 index 0000000000..a571e79f57 --- /dev/null +++ b/src/include/port/simd.h @@ -0,0 +1,30 @@ +/*------------------------------------------------------------------------- + * + * simd.h + * Support for platform-specific vector operations. + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * src/include/port/simd.h + * + *------------------------------------------------------------------------- + */ +#ifndef SIMD_H +#define SIMD_H + +/* + * SSE2 instructions are part of the spec for the 64-bit x86 ISA. We assume + * that compilers targeting this architecture understand SSE2 intrinsics. + * + * We use emmintrin.h rather than the comprehensive header immintrin.h in + * order to exclude extensions beyond SSE2. This is because MSVC, at least, + * will allow the use of intrinsics that haven't been enabled at compile + * time. + */ +#if (defined(__x86_64__) || defined(_M_AMD64)) +#include <emmintrin.h> +#define USE_SSE2 +#endif + +#endif /* SIMD_H */ -- 2.25.1
>From 9b70c265fa7a254117436eed59c2d0effd07a00d Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nathandboss...@gmail.com> Date: Wed, 3 Aug 2022 09:49:04 -0700 Subject: [PATCH v6 2/3] Introduce optimized routine for linear searches through an array of integers. If SSE2 is available, this function uses it to speed up the search. Otherwise, it uses a simple 'for' loop. This is a prerequisite for a follow-up commit that will use this function to optimize [sub]xip lookups in XidInMVCCSnapshot(), but it can be used anywhere that might benefit from such an optimization. It might be worthwhile to add an ARM-specific code path to this function in the future. Author: Nathan Bossart Reviewed by: Andres Freund, John Naylor Discussion: https://postgr.es/m/20220713170950.GA3116318%40nathanxps13 --- src/include/utils/linearsearch.h | 70 ++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 src/include/utils/linearsearch.h diff --git a/src/include/utils/linearsearch.h b/src/include/utils/linearsearch.h new file mode 100644 index 0000000000..a23ad45d82 --- /dev/null +++ b/src/include/utils/linearsearch.h @@ -0,0 +1,70 @@ +/*------------------------------------------------------------------------- + * + * linearsearch.h + * Optimized linear search routines. + * + * Copyright (c) 2022, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/include/utils/linearsearch.h + * + *------------------------------------------------------------------------- + */ +#ifndef LINEARSEARCH_H +#define LINEARSEARCH_H + +#include "port/simd.h" + +/* + * pg_linearsearch_uint32 + * + * Returns true if there is an element in 'base' that equals 'key'. Otherwise, + * returns false. + */ +static inline bool +pg_linearsearch_uint32(uint32 key, uint32 *base, uint32 nelem) +{ + uint32 i = 0; + + /* If possible, use SSE2 intrinsics to speed up the search. */ +#ifdef USE_SSE2 + __m128i keys = _mm_set1_epi32(key); /* load 4 copies of key */ + uint32 its = nelem & ~0xF; /* round down to multiple of 16 */ + + for (; i < its; i += 16) + { + /* load the next 16 values into __m128i variables */ + __m128i vals1 = _mm_loadu_si128((__m128i *) &base[i]); + __m128i vals2 = _mm_loadu_si128((__m128i *) &base[i + 4]); + __m128i vals3 = _mm_loadu_si128((__m128i *) &base[i + 8]); + __m128i vals4 = _mm_loadu_si128((__m128i *) &base[i + 12]); + + /* perform the comparisons */ + __m128i result1 = _mm_cmpeq_epi32(keys, vals1); + __m128i result2 = _mm_cmpeq_epi32(keys, vals2); + __m128i result3 = _mm_cmpeq_epi32(keys, vals3); + __m128i result4 = _mm_cmpeq_epi32(keys, vals4); + + /* shrink the results into a single variable */ + __m128i tmp1 = _mm_packs_epi32(result1, result2); + __m128i tmp2 = _mm_packs_epi32(result3, result4); + __m128i result = _mm_packs_epi16(tmp1, tmp2); + + /* see if there was a match */ + if (_mm_movemask_epi8(result) != 0) + return true; + } +#endif + + /* Process the remaining elements the slow way. */ + for (; i < nelem; i++) + { + if (key == base[i]) + return true; + } + + return false; +} + +#endif /* LINEARSEARCH_H */ -- 2.25.1
>From 1929db646a2b424bedb7f36779a6ea9da5ae0b08 Mon Sep 17 00:00:00 2001 From: Nathan Bossart <nathandboss...@gmail.com> Date: Wed, 3 Aug 2022 09:59:28 -0700 Subject: [PATCH v6 3/3] Optimize linear searches in XidInMVCCSnapshot(). This change makes use of the recently-introduced optimized linear search routine to speed up searches through the [sub]xip arrays when possible, which should improve performance significantly when the arrays are large. Author: Nathan Bossart Reviewed by: Andres Freund, John Naylor Discussion: https://postgr.es/m/20220713170950.GA3116318%40nathanxps13 --- src/backend/utils/time/snapmgr.c | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/src/backend/utils/time/snapmgr.c b/src/backend/utils/time/snapmgr.c index 5bc2a15160..834c8867d4 100644 --- a/src/backend/utils/time/snapmgr.c +++ b/src/backend/utils/time/snapmgr.c @@ -63,6 +63,7 @@ #include "storage/sinvaladt.h" #include "storage/spin.h" #include "utils/builtins.h" +#include "utils/linearsearch.h" #include "utils/memutils.h" #include "utils/old_snapshot.h" #include "utils/rel.h" @@ -2284,8 +2285,6 @@ RestoreTransactionSnapshot(Snapshot snapshot, void *source_pgproc) bool XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) { - uint32 i; - /* * Make a quick range check to eliminate most XIDs without looking at the * xip arrays. Note that this is OK even if we convert a subxact XID to @@ -2317,13 +2316,8 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) if (!snapshot->suboverflowed) { /* we have full data, so search subxip */ - int32 j; - - for (j = 0; j < snapshot->subxcnt; j++) - { - if (TransactionIdEquals(xid, snapshot->subxip[j])) - return true; - } + if (pg_linearsearch_uint32(xid, snapshot->subxip, snapshot->subxcnt)) + return true; /* not there, fall through to search xip[] */ } @@ -2344,16 +2338,11 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) return false; } - for (i = 0; i < snapshot->xcnt; i++) - { - if (TransactionIdEquals(xid, snapshot->xip[i])) - return true; - } + if (pg_linearsearch_uint32(xid, snapshot->xip, snapshot->xcnt)) + return true; } else { - int32 j; - /* * In recovery we store all xids in the subxact array because it is by * far the bigger array, and we mostly don't know which xids are @@ -2383,11 +2372,8 @@ XidInMVCCSnapshot(TransactionId xid, Snapshot snapshot) * indeterminate xid. We don't know whether it's top level or subxact * but it doesn't matter. If it's present, the xid is visible. */ - for (j = 0; j < snapshot->subxcnt; j++) - { - if (TransactionIdEquals(xid, snapshot->subxip[j])) - return true; - } + if (pg_linearsearch_uint32(xid, snapshot->subxip, snapshot->subxcnt)) + return true; } return false; -- 2.25.1