Re: ALTER TABLE ADD COLUMN fast default

Andres Freund Wed, 28 Mar 2018 00:02:05 -0700

Hi,

On 2018-03-26 09:02:10 +1030, Andrew Dunstan wrote:
> Thanks for this, all looks good. Here is the consolidate patch
> rebased. If there are no further comments I propose to commit this in
> a few days time.


Here's a bit of post commit review:

@@ -1310,13 +1679,11 @@ slot_getsomeattrs(TupleTableSlot *slot, int attnum)
 
        /*
         * If tuple doesn't have all the atts indicated by tupleDesc, read the
-        * rest as null
+        * rest as NULLs or missing values
         */
-       for (; attno < attnum; attno++)
-       {
-               slot->tts_values[attno] = (Datum) 0;
-               slot->tts_isnull[attno] = true;
-       }
+       if (attno < attnum)
+               slot_getmissingattrs(slot, attno, attnum);
+
        slot->tts_nvalid = attnum;
 }

It's worthwhile to note that this'll re-process *all* missing values,
even if they've already been deformed. I.e. if
slot_getmissingattrs(.., first-missing)
slot_getmissingattrs(.., first-missing + 1)
slot_getmissingattrs(.., first-missing + 2)
is called, all three missing values will be copied every time. That's
because tts_nvalid isn't taken into account.  I wonder if slot_getmissingattrs
could take tts_nvalid into account?

I also wonder if this doesn't deserve an unlikely(), to avoid the cost
of spilling registers in the hot branch of not missing any values.


+       else
+       {
+               /* if there is a missing values array we must process them one 
by one */
+               for (missattnum = lastAttNum - 1;
+                        missattnum >= startAttNum;
+                        missattnum--)
+               {
+                       slot->tts_values[missattnum] = 
attrmiss[missattnum].ammissing;
+                       slot->tts_isnull[missattnum] =
+                               !attrmiss[missattnum].ammissingPresent;
+               }
+       }
+}

Why is this done backwards? It's noticeably slower to walk arrays
backwards on some CPU microarchitectures.



@@ -176,6 +179,23 @@ CreateTupleDescCopyConstr(TupleDesc tupdesc)
                        }
                }
 


@@ -469,6 +503,29 @@ equalTupleDescs(TupleDesc tupdesc1, TupleDesc tupdesc2)
                        if (strcmp(defval1->adbin, defval2->adbin) != 0)
                                return false;
                }
+               if (constr1->missing)
+               {
+                       if (!constr2->missing)
+                               return false;
+                       for (i = 0; i < tupdesc1->natts; i++)
+                       {
+                               AttrMissing *missval1 = constr1->missing + i;
+                               AttrMissing *missval2 = constr2->missing + i;

It's a bit odd to not use array indexing here?


@@ -625,7 +625,15 @@ ExecCopySlotMinimalTuple(TupleTableSlot *slot)
        if (slot->tts_mintuple)
                return heap_copy_minimal_tuple(slot->tts_mintuple);
        if (slot->tts_tuple)
-               return minimal_tuple_from_heap_tuple(slot->tts_tuple);
+       {
+               if (TTS_HAS_PHYSICAL_TUPLE(slot) &&
+                       HeapTupleHeaderGetNatts(slot->tts_tuple->t_data)
+                       < slot->tts_tupleDescriptor->natts)
+                       return minimal_expand_tuple(slot->tts_tuple,
+                                                                               
slot->tts_tupleDescriptor);
+               else
+                       return minimal_tuple_from_heap_tuple(slot->tts_tuple);
+       }
 

What's the TTS_HAS_PHYSICAL_TUPLE doing here? How can this be relevant
given the previous tts_mintuple check? Am I missing something?



@@ -563,10 +569,63 @@ RelationBuildTupleDesc(Relation relation)
                                        
MemoryContextAllocZero(CacheMemoryContext,
                                                                                
   relation->rd_rel->relnatts *
                                                                                
   sizeof(AttrDefault));
-                       attrdef[ndef].adnum = attp->attnum;
+                       attrdef[ndef].adnum = attnum;
                        attrdef[ndef].adbin = NULL;
+
                        ndef++;
                }
+
+               /* Likewise for a missing value */
+               if (attp->atthasmissing)
+               {
+                       Datum           missingval;
+                       bool            missingNull;
+
+                       /* Do we have a missing value? */
+                       missingval = heap_getattr(pg_attribute_tuple,
+                                                                         
Anum_pg_attribute_attmissingval,
+                                                                         
pg_attribute_desc->rd_att,
+                                                                         
&missingNull);
+                       if (!missingNull)
+                       {
+                               /* Yes, fetch from the array */
+                               MemoryContext oldcxt;
+                               bool            is_null;
+                               int                     one = 1;
+                               Datum           missval;
+
+                               if (attrmiss == NULL)
+                                       attrmiss = (AttrMissing *)
+                                               
MemoryContextAllocZero(CacheMemoryContext,
+                                                                               
           relation->rd_rel->relnatts *
+                                                                               
           sizeof(AttrMissing));
+
+                               missval = array_get_element(missingval,
+                                                                               
        1,
+                                                                               
        &one,
+                                                                               
        -1,
+                                                                               
        attp->attlen,
+                                                                               
        attp->attbyval,
+                                                                               
        attp->attalign,
+                                                                               
        &is_null);
+                               Assert(!is_null);
+                               if (attp->attbyval)
+                               {
+                                       /* for copy by val just copy the datum 
direct */
+                                       attrmiss[attnum - 1].ammissing = 
missval;
+                               }
+                               else
+                               {
+                                       /* otherwise copy in the correct 
context */
+                                       oldcxt = 
MemoryContextSwitchTo(CacheMemoryContext);
+                                       attrmiss[attnum - 1].ammissing = 
datumCopy(missval,
+                                                                               
                                           attp->attbyval,
+                                                                               
                                           attp->attlen);
+                                       MemoryContextSwitchTo(oldcxt);
+                               }
+                               attrmiss[attnum - 1].ammissingPresent = true;
+                       }
+               }
                need--;
                if (need == 0)
                        break;

I'm still strongly object to do doing this unconditionally for queries
that never need any of this.  We're can end up with a significant number
of large tuples in memory here, and then copy that through dozens of
tupledesc copies in queries.


@@ -167,6 +170,12 @@ CATALOG(pg_attribute,1249) BKI_BOOTSTRAP BKI_WITHOUT_OIDS 
BKI_ROWTYPE_OID(75) BK
 
        /* Column-level FDW options */
        text            attfdwoptions[1] BKI_DEFAULT(_null_);
+
+       /*
+        * Missing value for added columns. This is a one element array which 
lets
+        * us store a value of the attribute type here.
+        */
+       anyarray        attmissingval BKI_DEFAULT(_null_);
 #endif
 } FormData_pg_attribute;
 

Still think this is a bad location, and it'll reduce cache hit ratio for
catalog lookups.

Greetings,

Andres Freund

Re: ALTER TABLE ADD COLUMN fast default

Reply via email to