Hi,

On 2022-10-31 14:37:39 -0400, Melanie Plageman wrote:
> and heapgetpage(). heapgettup() and heapgettup_pagemode() have a lot of
> duplicated code, confusingly nested if statements, and unnecessary local
> variables. While working on a feature for the AIO/DIO patchset, I
> noticed that it was difficult to add new code to heapgettup() and
> heapgettup_pagemode() because of how the functions are written.

Thanks for working on this - the current state is quite painful.


> From cde2d6720f4f5ab2531c22ad4a5f0d9e6ec1039d Mon Sep 17 00:00:00 2001
> From: Melanie Plageman <melanieplage...@gmail.com>
> Date: Wed, 26 Oct 2022 20:00:34 -0400
> Subject: [PATCH v1 1/3] Remove breaks in HeapTupleSatisfiesVisibility
>
> breaks in HeapTupleSatisfiesVisibility were superfluous
> ---
>  src/backend/access/heap/heapam_visibility.c | 7 -------
>  1 file changed, 7 deletions(-)
>
> diff --git a/src/backend/access/heap/heapam_visibility.c 
> b/src/backend/access/heap/heapam_visibility.c
> index 6e33d1c881..dd5d5da190 100644
> --- a/src/backend/access/heap/heapam_visibility.c
> +++ b/src/backend/access/heap/heapam_visibility.c
> @@ -1769,25 +1769,18 @@ HeapTupleSatisfiesVisibility(HeapTuple htup, Snapshot 
> snapshot, Buffer buffer)
>       {
>               case SNAPSHOT_MVCC:
>                       return HeapTupleSatisfiesMVCC(htup, snapshot, buffer);
> -                     break;
>               case SNAPSHOT_SELF:
>                       return HeapTupleSatisfiesSelf(htup, snapshot, buffer);
> -                     break;
>               case SNAPSHOT_ANY:
>                       return HeapTupleSatisfiesAny(htup, snapshot, buffer);
> -                     break;
>               case SNAPSHOT_TOAST:
>                       return HeapTupleSatisfiesToast(htup, snapshot, buffer);
> -                     break;
>               case SNAPSHOT_DIRTY:
>                       return HeapTupleSatisfiesDirty(htup, snapshot, buffer);
> -                     break;
>               case SNAPSHOT_HISTORIC_MVCC:
>                       return HeapTupleSatisfiesHistoricMVCC(htup, snapshot, 
> buffer);
> -                     break;
>               case SNAPSHOT_NON_VACUUMABLE:
>                       return HeapTupleSatisfiesNonVacuumable(htup, snapshot, 
> buffer);
> -                     break;
>       }

Not sure what the author of this code, a certain Mr Freund, was thinking when
he added those returns...


> From 9d8b01960463dc64ff5b111d523ff80fce3017af Mon Sep 17 00:00:00 2001
> From: Melanie Plageman <melanieplage...@gmail.com>
> Date: Mon, 31 Oct 2022 13:40:06 -0400
> Subject: [PATCH v1 2/3] Turn HeapKeyTest macro into function
>
> This should always be inlined appropriately now. It is easier to read as
> a function. Also, remove unused include in catcache.c.
> ---
>  src/backend/access/heap/heapam.c   | 10 ++--
>  src/backend/utils/cache/catcache.c |  1 -
>  src/include/access/valid.h         | 76 ++++++++++++------------------
>  3 files changed, 36 insertions(+), 51 deletions(-)
>
> diff --git a/src/backend/access/heap/heapam.c 
> b/src/backend/access/heap/heapam.c
> index 12be87efed..1c995faa12 100644
> --- a/src/backend/access/heap/heapam.c
> +++ b/src/backend/access/heap/heapam.c
> @@ -716,8 +716,10 @@ heapgettup(HeapScanDesc scan,
>                                                                               
>                         snapshot);
>
>                               if (valid && key != NULL)
> -                                     HeapKeyTest(tuple, 
> RelationGetDescr(scan->rs_base.rs_rd),
> -                                                             nkeys, key, 
> valid);
> +                             {
> +                                     valid = HeapKeyTest(tuple, 
> RelationGetDescr(scan->rs_base.rs_rd),
> +                                                             nkeys, key);
> +                             }
>
>                               if (valid)
>                               {

superfluous parens.



> --- a/src/include/access/valid.h
> +++ b/src/include/access/valid.h
> @@ -19,51 +19,35 @@
>   *
>   *           Test a heap tuple to see if it satisfies a scan key.
>   */
> -#define HeapKeyTest(tuple, \
> -                                     tupdesc, \
> -                                     nkeys, \
> -                                     keys, \
> -                                     result) \
> -do \
> -{ \
> -     /* Use underscores to protect the variables passed in as parameters */ \
> -     int                     __cur_nkeys = (nkeys); \
> -     ScanKey         __cur_keys = (keys); \
> - \
> -     (result) = true; /* may change */ \
> -     for (; __cur_nkeys--; __cur_keys++) \
> -     { \
> -             Datum   __atp; \
> -             bool    __isnull; \
> -             Datum   __test; \
> - \
> -             if (__cur_keys->sk_flags & SK_ISNULL) \
> -             { \
> -                     (result) = false; \
> -                     break; \
> -             } \
> - \
> -             __atp = heap_getattr((tuple), \
> -                                                      __cur_keys->sk_attno, \
> -                                                      (tupdesc), \
> -                                                      &__isnull); \
> - \
> -             if (__isnull) \
> -             { \
> -                     (result) = false; \
> -                     break; \
> -             } \
> - \
> -             __test = FunctionCall2Coll(&__cur_keys->sk_func, \
> -                                                                
> __cur_keys->sk_collation, \
> -                                                                __atp, 
> __cur_keys->sk_argument); \
> - \
> -             if (!DatumGetBool(__test)) \
> -             { \
> -                     (result) = false; \
> -                     break; \
> -             } \
> -     } \
> -} while (0)
> +static inline bool
> +HeapKeyTest(HeapTuple tuple, TupleDesc tupdesc, int nkeys, ScanKey keys)
> +{
> +     int cur_nkeys = nkeys;
> +     ScanKey cur_key = keys;
> +
> +     for (; cur_nkeys--; cur_key++)
> +     {
> +             Datum atp;
> +             bool isnull;
> +             Datum test;
> +
> +             if (cur_key->sk_flags & SK_ISNULL)
> +                     return false;
> +
> +             atp = heap_getattr(tuple, cur_key->sk_attno, tupdesc, &isnull);
> +
> +             if (isnull)
> +                     return false;
> +
> +             test = FunctionCall2Coll(&cur_key->sk_func,
> +                                                             
> cur_key->sk_collation,
> +                                                             atp, 
> cur_key->sk_argument);
> +
> +             if (!DatumGetBool(test))
> +                     return false;
> +     }
> +
> +     return true;
> +}

Seems like a simple and nice win in readability.

I recall looking at this in the past and thinking that there was some
additional subtlety here, but I can't see what that'd be.



> From a894ce38c488df6546392b9f3bd894b67edf951e Mon Sep 17 00:00:00 2001
> From: Melanie Plageman <melanieplage...@gmail.com>
> Date: Mon, 31 Oct 2022 13:40:29 -0400
> Subject: [PATCH v1 3/3] Refactor heapgettup* and heapgetpage
>
> Simplify heapgettup(), heapgettup_pagemode(), and heapgetpage(). All
> three contained several unnecessary local variables, duplicate code, and
> nested if statements. Streamlining these improves readability and
> extensibility.

It'd be nice to break this into slightly smaller chunks.


> +
> +static inline void heapgettup_no_movement(HeapScanDesc scan)
> +{

FWIW, for function definitions we keep the return type (and with that also the
the "static inline") on a separate line.


> +     ItemId          lpp;
> +     OffsetNumber lineoff;
> +     BlockNumber page;
> +     Page dp;
> +     HeapTuple       tuple = &(scan->rs_ctup);
> +     /*
> +     * ``no movement'' scan direction: refetch prior tuple
> +     */
> +
> +     /* Since the tuple was previously fetched, needn't lock page here */
> +     if (!scan->rs_inited)
> +     {
> +             Assert(!BufferIsValid(scan->rs_cbuf));
> +             tuple->t_data = NULL;
> +             return;

Is it possible to have a no-movement scan with an uninitialized scan? That
doesn't really seem to make sense. At least that's how I understand the
explanation for NoMovement nearby:
 * dir == NoMovementScanDirection means "re-fetch the tuple indicated
 * by scan->rs_ctup".

We can't have a rs_ctup without an already started scan, no?

Looks like this is pre-existing code that you just moved, but it still seems
wrong.


> +     }
> +     page = ItemPointerGetBlockNumber(&(tuple->t_self));
> +     if (page != scan->rs_cblock)
> +             heapgetpage((TableScanDesc) scan, page);


We have a
        BlockNumber page;
and
        Page            dp;
in this code which seems almost intentionally confusing. This again is a
pre-existing issue but perhaps we could clean it up first?



> +static inline Page heapgettup_continue_page(HeapScanDesc scan, BlockNumber 
> page, ScanDirection dir,
> +             int *linesleft, OffsetNumber *lineoff)
> +{
> +     HeapTuple       tuple = &(scan->rs_ctup);

Hm. Finding the next offset via rs_ctup doesn't seem quite right. For one,
it's not actually that cheap to extract the offset from an ItemPointer because
of the the way we pack it into ItemPointerData.


> +     Page dp = BufferGetPage(scan->rs_cbuf);
> +     TestForOldSnapshot(scan->rs_base.rs_snapshot, scan->rs_base.rs_rd, dp);

Newlines between definitions and code :)

Perhaps worth asserting that the scan is initialized and that rs_cbuf is valid?


> +     if (ScanDirectionIsForward(dir))
> +     {
> +             *lineoff = 
> OffsetNumberNext(ItemPointerGetOffsetNumber(&(tuple->t_self)));
> +             *linesleft = PageGetMaxOffsetNumber(dp) - (*lineoff) + 1;

We can't access PageGetMaxOffsetNumber etc without holding a lock on the
page. It's not immediately obvious that that is held in all paths.


> +static inline BlockNumber heapgettup_initial_page(HeapScanDesc scan, 
> ScanDirection dir)
> +{
> +     Assert(!ScanDirectionIsNoMovement(dir));
> +     Assert(!scan->rs_inited);

Is there a reason we couldn't set rs_inited in here, rather than reapeating
that in all callers?


ISTM that this function should deal with the
                        /*
                         * return null immediately if relation is empty
                         */

logic, I think you now are repeating that check on every call to heapgettup().


> @@ -511,182 +711,55 @@ heapgettup(HeapScanDesc scan,
>                  ScanKey key)
>  {
>       HeapTuple       tuple = &(scan->rs_ctup);
> -     Snapshot        snapshot = scan->rs_base.rs_snapshot;
> -     bool            backward = ScanDirectionIsBackward(dir);
>       BlockNumber page;
> -     bool            finished;
>       Page            dp;
> -     int                     lines;
>       OffsetNumber lineoff;
>       int                     linesleft;
> -     ItemId          lpp;
> +
> +     if (ScanDirectionIsNoMovement(dir))
> +             return heapgettup_no_movement(scan);

Maybe add an unlikely() - this path is barely ever used...


>       /*
> -      * calculate next starting lineoff, given scan direction
> +      * return null immediately if relation is empty
>        */
> -     if (ScanDirectionIsForward(dir))
> +     if (scan->rs_nblocks == 0 || scan->rs_numblocks == 0)
>       {

As mentioned above, I don't think we should repeat the nblocks check on every
call.


> +             page = scan->rs_cblock;
> +             LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
> +             dp = heapgettup_continue_page(scan, page, dir, &linesleft, 
> &lineoff);
> +             goto continue_page;
>       }
>
>       /*
>        * advance the scan until we find a qualifying tuple or run out of stuff
>        * to scan
>        */
> -     lpp = PageGetItemId(dp, lineoff);
> -     for (;;)
> +     while (page != InvalidBlockNumber)
>       {
> +             heapgetpage((TableScanDesc) scan, page);
> +             LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
> +             dp = heapgettup_start_page(scan, page, dir, &linesleft, 
> &lineoff);
> +     continue_page:


I don't like the goto continue_page at all.  Seems that the paths leading here
should call LockBuffer(), heapgettup_start_page() etc?  Possibly a do {} while
() loop could do the trick as well.



Greetings,

Andres Freund


Reply via email to