Re: Tid scan improvements

Andres Freund Tue, 16 Feb 2021 14:05:59 -0800

Hi,

On 2021-02-04 23:51:39 +1300, David Rowley wrote:


> I ended up adding just two new API functions to table AM.
> 
> void (*scan_set_tid_range) (TableScanDesc sscan,
>    ItemPointer mintid,
>    ItemPointer maxtid);
> 
> and
> bool (*scan_tid_range_nextslot) (TableScanDesc sscan,
> ScanDirection direction,
> TupleTableSlot *slot);
> 
> I added an additional function in tableam.h that does not have a
> corresponding API function:
> 
> static inline TableScanDesc
> table_tid_range_start(Relation rel, Snapshot snapshot,
>   ItemPointer mintid,
>   ItemPointer maxtid)
> 
> This just calls the standard scan_begin then calls scan_set_tid_range
> setting the specified mintid and maxtid.

Hm. But that means we can't rescan?


> I also added 2 new fields to TableScanDesc:
> 
> ItemPointerData rs_mintid;
> ItemPointerData rs_maxtid;
> 
> I didn't quite see a need to have a new start and end scan API function.

Yea. I guess they're not that large. Avoiding that was one of the two
reasons to have a separate scan state somewhere. The other that it
seemed like it'd possibly a bit cleaner API wise to deal with rescan.


> +bool
> +heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
> +                                               TupleTableSlot *slot)
> +{
> +     HeapScanDesc scan = (HeapScanDesc) sscan;
> +     ItemPointer mintid = &sscan->rs_mintid;
> +     ItemPointer maxtid = &sscan->rs_maxtid;
> +
> +     /* Note: no locking manipulations needed */
> +     for (;;)
> +     {
> +             if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
> +                     heapgettup_pagemode(scan, direction, sscan->rs_nkeys, 
> sscan->rs_key);
> +             else
> +                     heapgettup(scan, direction, sscan->rs_nkeys, 
> sscan->rs_key);
> +
> +             if (scan->rs_ctup.t_data == NULL)
> +             {
> +                     ExecClearTuple(slot);
> +                     return false;
> +             }
> +
> +             /*
> +              * heap_set_tidrange will have used heap_setscanlimits to limit 
> the
> +              * range of pages we scan to only ones that can contain the TID 
> range
> +              * we're scanning for.  Here we must filter out any tuples from 
> these
> +              * pages that are outwith that range.
> +              */
> +             if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
> +             {
> +                     ExecClearTuple(slot);
> +
> +                     /*
> +                      * When scanning backwards, the TIDs will be in 
> descending order.
> +                      * Future tuples in this direction will be lower still, 
> so we can
> +                      * just return false to indicate there will be no more 
> tuples.
> +                      */
> +                     if (ScanDirectionIsBackward(direction))
> +                             return false;
> +
> +                     continue;
> +             }
> +
> +             /*
> +              * Likewise for the final page, we must filter out TIDs greater 
> than
> +              * maxtid.
> +              */
> +             if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
> +             {
> +                     ExecClearTuple(slot);
> +
> +                     /*
> +                      * When scanning forward, the TIDs will be in ascending 
> order.
> +                      * Future tuples in this direction will be higher 
> still, so we can
> +                      * just return false to indicate there will be no more 
> tuples.
> +                      */
> +                     if (ScanDirectionIsForward(direction))
> +                             return false;
> +                     continue;
> +             }
> +
> +             break;
> +     }
> +
> +     /*
> +      * if we get here it means we have a new current scan tuple, so point to
> +      * the proper return buffer and return the tuple.
> +      */
> +     pgstat_count_heap_getnext(scan->rs_base.rs_rd);

I wonder if there's an argument for counting the misses above via
pgstat_count_heap_fetch()? Probably not, right?


> +#define IsCTIDVar(node)  \
> +     ((node) != NULL && \
> +      IsA((node), Var) && \
> +      ((Var *) (node))->varattno == SelfItemPointerAttributeNumber && \
> +      ((Var *) (node))->varlevelsup == 0)
> +
> +typedef enum
> +{
> +     TIDEXPR_UPPER_BOUND,
> +     TIDEXPR_LOWER_BOUND
> +} TidExprType;
> +
> +/* Upper or lower range bound for scan */
> +typedef struct TidOpExpr
> +{
> +     TidExprType exprtype;           /* type of op; lower or upper */
> +     ExprState  *exprstate;          /* ExprState for a TID-yielding subexpr 
> */
> +     bool            inclusive;              /* whether op is inclusive */
> +} TidOpExpr;
> +
> +/*
> + * For the given 'expr', build and return an appropriate TidOpExpr taking 
> into
> + * account the expr's operator and operand order.
> + */
> +static TidOpExpr *
> +MakeTidOpExpr(OpExpr *expr, TidRangeScanState *tidstate)
> +{
> +     Node       *arg1 = get_leftop((Expr *) expr);
> +     Node       *arg2 = get_rightop((Expr *) expr);
> +     ExprState  *exprstate = NULL;
> +     bool            invert = false;
> +     TidOpExpr  *tidopexpr;
> +
> +     if (IsCTIDVar(arg1))
> +             exprstate = ExecInitExpr((Expr *) arg2, &tidstate->ss.ps);
> +     else if (IsCTIDVar(arg2))
> +     {
> +             exprstate = ExecInitExpr((Expr *) arg1, &tidstate->ss.ps);
> +             invert = true;
> +     }
> +     else
> +             elog(ERROR, "could not identify CTID variable");
> +
> +     tidopexpr = (TidOpExpr *) palloc(sizeof(TidOpExpr));
> +     tidopexpr->inclusive = false;   /* for now */
> +
> +     switch (expr->opno)
> +     {
> +             case TIDLessEqOperator:
> +                     tidopexpr->inclusive = true;
> +                     /* fall through */
> +             case TIDLessOperator:
> +                     tidopexpr->exprtype = invert ? TIDEXPR_LOWER_BOUND : 
> TIDEXPR_UPPER_BOUND;
> +                     break;
> +             case TIDGreaterEqOperator:
> +                     tidopexpr->inclusive = true;
> +                     /* fall through */
> +             case TIDGreaterOperator:
> +                     tidopexpr->exprtype = invert ? TIDEXPR_UPPER_BOUND : 
> TIDEXPR_LOWER_BOUND;
> +                     break;
> +             default:
> +                     elog(ERROR, "could not identify CTID operator");
> +     }
> +
> +     tidopexpr->exprstate = exprstate;
> +
> +     return tidopexpr;
> +}
> +
> +/*
> + * Extract the qual subexpressions that yield TIDs to search for,
> + * and compile them into ExprStates if they're ordinary expressions.
> + */
> +static void
> +TidExprListCreate(TidRangeScanState *tidrangestate)
> +{
> +     TidRangeScan *node = (TidRangeScan *) tidrangestate->ss.ps.plan;
> +     List       *tidexprs = NIL;
> +     ListCell   *l;
> +
> +     foreach(l, node->tidrangequals)
> +     {
> +             OpExpr     *opexpr = lfirst(l);
> +             TidOpExpr  *tidopexpr;
> +
> +             if (!IsA(opexpr, OpExpr))
> +                     elog(ERROR, "could not identify CTID expression");
> +
> +             tidopexpr = MakeTidOpExpr(opexpr, tidrangestate);
> +             tidexprs = lappend(tidexprs, tidopexpr);
> +     }
> +
> +     tidrangestate->trss_tidexprs = tidexprs;
> +}

Architecturally it feels like this is something that really belongs more
into plan time?


> +/*
> + * table_set_tidrange resets the minimum and maximum TID range to scan for a
> + * TableScanDesc created by table_beginscan_tidrange.
> + */
> +static inline void
> +table_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
> +                                ItemPointer maxtid)
> +{
> +     /* Ensure table_beginscan_tidrange() was used. */
> +     Assert((sscan->rs_flags & SO_TYPE_TIDRANGESCAN) != 0);
> +
> +     sscan->rs_rd->rd_tableam->scan_set_tidrange(sscan, mintid, maxtid);
> +}

How does this interact with rescans?

Greetings,

Andres Freund

Re: Tid scan improvements

Reply via email to