Pavel Stehule <pavel.steh...@gmail.com> writes:
> I am sending a patch that is years used in GoodData.

I'm quite unexcited about that.  I'd be the first to agree that the
ten-pages estimate is a hack, but it's not an improvement to ask users
to think of a better value ... especially not as a one-size-fits-
all-relations GUC setting.

I did have an idea that I think is better than my previous one:
rather than lying about the value of relpages, let's represent the
case where we don't know the tuple density by setting reltuples = -1
initially.  This leads to a patch that's a good bit more invasive than
the quick-hack solution, but I think it's a lot cleaner on the whole.

A possible objection is that this changes the FDW API slightly, as
GetForeignRelSize callbacks now need to deal with rel->tuples possibly
being -1.  We could avoid an API break if we made plancat.c clamp
that value to zero; but then FDWs still couldn't tell the difference
between the "empty" and "never analyzed" cases, and I think this is
just as much of an issue for them as for the core code.

I'll add this to the upcoming CF.

                        regards, tom lane

diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index fbcf7ca9c9..072a6dc1c1 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -996,7 +996,7 @@ estimate_size(PlannerInfo *root, RelOptInfo *baserel,
 	/*
 	 * Estimate the number of tuples in the file.
 	 */
-	if (baserel->pages > 0)
+	if (baserel->tuples >= 0 && baserel->pages > 0)
 	{
 		/*
 		 * We have # of pages and # of tuples from pg_class (that is, from a
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index 3a99333d44..23306e11a7 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -195,6 +195,9 @@ statapprox_heap(Relation rel, output_type *stat)
 	stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned,
 											   stat->tuple_count);
 
+	/* It's not clear if we could get -1 here, but be safe. */
+	stat->tuple_count = Max(stat->tuple_count, 0);
+
 	/*
 	 * Calculate percentages if the relation has one or more pages.
 	 */
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 9fc53cad68..a31abce7c9 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -692,15 +692,14 @@ postgresGetForeignRelSize(PlannerInfo *root,
 	else
 	{
 		/*
-		 * If the foreign table has never been ANALYZEd, it will have relpages
-		 * and reltuples equal to zero, which most likely has nothing to do
-		 * with reality.  We can't do a whole lot about that if we're not
+		 * If the foreign table has never been ANALYZEd, it will have
+		 * reltuples < 0, meaning "unknown".  We can't do much if we're not
 		 * allowed to consult the remote server, but we can use a hack similar
 		 * to plancat.c's treatment of empty relations: use a minimum size
 		 * estimate of 10 pages, and divide by the column-datatype-based width
 		 * estimate to get the corresponding number of tuples.
 		 */
-		if (baserel->pages == 0 && baserel->tuples == 0)
+		if (baserel->tuples < 0)
 		{
 			baserel->pages = 10;
 			baserel->tuples =
diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 1232b24e74..7861379d97 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -1977,6 +1977,10 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
        the planner.  It is updated by <command>VACUUM</command>,
        <command>ANALYZE</command>, and a few DDL commands such as
        <command>CREATE INDEX</command>.
+       If the table has never yet been vacuumed or
+       analyzed, <structfield>reltuples</structfield>
+       contains <literal>-1</literal> indicating that the row count is
+       unknown.
       </para></entry>
      </row>
 
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index 74793035d7..72fa127212 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -130,7 +130,8 @@ GetForeignRelSize(PlannerInfo *root,
      (The initial value is
      from <structname>pg_class</structname>.<structfield>reltuples</structfield>
      which represents the total row count seen by the
-     last <command>ANALYZE</command>.)
+     last <command>ANALYZE</command>; it will be <literal>-1</literal> if
+     no <command>ANALYZE</command> has been done on this foreign table.)
     </para>
 
     <para>
diff --git a/src/backend/access/gin/ginvacuum.c b/src/backend/access/gin/ginvacuum.c
index 9cd6638df6..0935a6d9e5 100644
--- a/src/backend/access/gin/ginvacuum.c
+++ b/src/backend/access/gin/ginvacuum.c
@@ -727,7 +727,7 @@ ginvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
 	 * entries.  This is bogus if the index is partial, but it's real hard to
 	 * tell how many distinct heap entries are referenced by a GIN index.
 	 */
-	stats->num_index_tuples = info->num_heap_tuples;
+	stats->num_index_tuples = Max(info->num_heap_tuples, 0);
 	stats->estimated_count = info->estimated_count;
 
 	/*
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 44e2224dd5..52dd0c59f0 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -208,7 +208,8 @@ typedef struct LVShared
 	 * live tuples in the index vacuum case or the new live tuples in the
 	 * index cleanup case.
 	 *
-	 * estimated_count is true if reltuples is an estimated value.
+	 * estimated_count is true if reltuples is an estimated value.  (Note that
+	 * reltuples could be -1 in this case, indicating we have no idea.)
 	 */
 	double		reltuples;
 	bool		estimated_count;
@@ -561,31 +562,19 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 	/*
 	 * Update statistics in pg_class.
 	 *
-	 * A corner case here is that if we scanned no pages at all because every
-	 * page is all-visible, we should not update relpages/reltuples, because
-	 * we have no new information to contribute.  In particular this keeps us
-	 * from replacing relpages=reltuples=0 (which means "unknown tuple
-	 * density") with nonzero relpages and reltuples=0 (which means "zero
-	 * tuple density") unless there's some actual evidence for the latter.
+	 * In principle new_live_tuples could be -1 indicating that we (still)
+	 * don't know the tuple count.  In practice that probably can't happen,
+	 * since we'd surely have scanned some pages if the table is new and
+	 * nonempty.
 	 *
-	 * It's important that we use tupcount_pages and not scanned_pages for the
-	 * check described above; scanned_pages counts pages where we could not
-	 * get cleanup lock, and which were processed only for frozenxid purposes.
-	 *
-	 * We do update relallvisible even in the corner case, since if the table
-	 * is all-visible we'd definitely like to know that.  But clamp the value
-	 * to be not more than what we're setting relpages to.
+	 * For safety, clamp relallvisible to be not more than what we're setting
+	 * relpages to.
 	 *
 	 * Also, don't change relfrozenxid/relminmxid if we skipped any pages,
 	 * since then we don't know for certain that all tuples have a newer xmin.
 	 */
 	new_rel_pages = vacrelstats->rel_pages;
 	new_live_tuples = vacrelstats->new_live_tuples;
-	if (vacrelstats->tupcount_pages == 0 && new_rel_pages > 0)
-	{
-		new_rel_pages = vacrelstats->old_rel_pages;
-		new_live_tuples = vacrelstats->old_live_tuples;
-	}
 
 	visibilitymap_count(onerel, &new_rel_allvisible, NULL);
 	if (new_rel_allvisible > new_rel_pages)
@@ -606,7 +595,7 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 	/* report results to the stats collector, too */
 	pgstat_report_vacuum(RelationGetRelid(onerel),
 						 onerel->rd_rel->relisshared,
-						 new_live_tuples,
+						 Max(new_live_tuples, 0),
 						 vacrelstats->new_dead_tuples);
 	pgstat_progress_end_command();
 
@@ -1674,9 +1663,12 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 														  vacrelstats->tupcount_pages,
 														  live_tuples);
 
-	/* also compute total number of surviving heap entries */
+	/*
+	 * Also compute the total number of surviving heap entries.  In the
+	 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
+	 */
 	vacrelstats->new_rel_tuples =
-		vacrelstats->new_live_tuples + vacrelstats->new_dead_tuples;
+		Max(vacrelstats->new_live_tuples, 0) + vacrelstats->new_dead_tuples;
 
 	/*
 	 * Release any remaining pin on visibility map page.
@@ -2401,7 +2393,7 @@ lazy_cleanup_all_indexes(Relation *Irel, IndexBulkDeleteResult **stats,
  *		dead_tuples, and update running statistics.
  *
  *		reltuples is the number of heap tuples to be passed to the
- *		bulkdelete callback.
+ *		bulkdelete callback.  It's always assumed to be estimated.
  */
 static void
 lazy_vacuum_index(Relation indrel, IndexBulkDeleteResult **stats,
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 8fa6ac7296..c822b49a71 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -853,6 +853,7 @@ _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
 		prev_num_heap_tuples = metad->btm_last_cleanup_num_heap_tuples;
 
 		if (cleanup_scale_factor <= 0 ||
+			info->num_heap_tuples < 0 ||
 			prev_num_heap_tuples <= 0 ||
 			(info->num_heap_tuples - prev_num_heap_tuples) /
 			prev_num_heap_tuples >= cleanup_scale_factor)
diff --git a/src/backend/access/table/tableam.c b/src/backend/access/table/tableam.c
index c638319765..6438c45716 100644
--- a/src/backend/access/table/tableam.c
+++ b/src/backend/access/table/tableam.c
@@ -701,18 +701,14 @@ table_block_relation_estimate_size(Relation rel, int32 *attr_widths,
 	 * doesn't happen instantaneously, and it won't happen at all for cases
 	 * such as temporary tables.)
 	 *
-	 * We approximate "never vacuumed" by "has relpages = 0", which means this
-	 * will also fire on genuinely empty relations.  Not great, but
-	 * fortunately that's a seldom-seen case in the real world, and it
-	 * shouldn't degrade the quality of the plan too much anyway to err in
-	 * this direction.
+	 * We test "never vacuumed" by seeing whether reltuples < 0.
 	 *
 	 * If the table has inheritance children, we don't apply this heuristic.
 	 * Totally empty parent tables are quite common, so we should be willing
 	 * to believe that they are empty.
 	 */
 	if (curpages < 10 &&
-		relpages == 0 &&
+		reltuples < 0 &&
 		!rel->rd_rel->relhassubclass)
 		curpages = 10;
 
@@ -727,17 +723,17 @@ table_block_relation_estimate_size(Relation rel, int32 *attr_widths,
 	}
 
 	/* estimate number of tuples from previous tuple density */
-	if (relpages > 0)
+	if (reltuples >= 0 && relpages > 0)
 		density = reltuples / (double) relpages;
 	else
 	{
 		/*
-		 * When we have no data because the relation was truncated, estimate
-		 * tuple width from attribute datatypes.  We assume here that the
-		 * pages are completely full, which is OK for tables (since they've
-		 * presumably not been VACUUMed yet) but is probably an overestimate
-		 * for indexes.  Fortunately get_relation_info() can clamp the
-		 * overestimate to the parent table's size.
+		 * When we have no data because the relation was never yet vacuumed,
+		 * estimate tuple width from attribute datatypes.  We assume here that
+		 * the pages are completely full, which is OK for tables but is
+		 * probably an overestimate for indexes.  Fortunately
+		 * get_relation_info() can clamp the overestimate to the parent
+		 * table's size.
 		 *
 		 * Note: this code intentionally disregards alignment considerations,
 		 * because (a) that would be gilding the lily considering how crude
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index f2ca686397..abd5bdb866 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -1015,7 +1015,7 @@ AddNewRelationTuple(Relation pg_class_desc,
 		case RELKIND_TOASTVALUE:
 			/* The relation is real, but as yet empty */
 			new_rel_reltup->relpages = 0;
-			new_rel_reltup->reltuples = 0;
+			new_rel_reltup->reltuples = -1;
 			new_rel_reltup->relallvisible = 0;
 			break;
 		case RELKIND_SEQUENCE:
@@ -1027,7 +1027,7 @@ AddNewRelationTuple(Relation pg_class_desc,
 		default:
 			/* Views, etc, have no disk storage */
 			new_rel_reltup->relpages = 0;
-			new_rel_reltup->reltuples = 0;
+			new_rel_reltup->reltuples = -1;
 			new_rel_reltup->relallvisible = 0;
 			break;
 	}
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 1be27eec52..daeac6b5a5 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -2722,6 +2722,15 @@ index_update_stats(Relation rel,
 	/* Should this be a more comprehensive test? */
 	Assert(rd_rel->relkind != RELKIND_PARTITIONED_INDEX);
 
+	/*
+	 * As a special hack, if we are dealing with an empty table and the
+	 * existing reltuples is -1, we leave that alone.  This ensures that
+	 * creating an index as part of CREATE TABLE doesn't cause the table to
+	 * prematurely look like it's been vacuumed.
+	 */
+	if (reltuples == 0 && rd_rel->reltuples < 0)
+		reltuples = -1;
+
 	/* Apply required updates, if any, to copied tuple */
 
 	dirty = false;
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 23eb605d4c..308a51d95d 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1128,8 +1128,8 @@ vacuum_set_xid_limits(Relation rel,
  *		live tuples seen; but if we did not, we should not blindly extrapolate
  *		from that number, since VACUUM may have scanned a quite nonrandom
  *		subset of the table.  When we have only partial information, we take
- *		the old value of pg_class.reltuples as a measurement of the
- *		tuple density in the unscanned pages.
+ *		the old value of pg_class.reltuples/pg_class.relpages as a measurement
+ *		of the tuple density in the unscanned pages.
  *
  *		Note: scanned_tuples should count only *live* tuples, since
  *		pg_class.reltuples is defined that way.
@@ -1152,18 +1152,16 @@ vac_estimate_reltuples(Relation relation,
 
 	/*
 	 * If scanned_pages is zero but total_pages isn't, keep the existing value
-	 * of reltuples.  (Note: callers should avoid updating the pg_class
-	 * statistics in this situation, since no new information has been
-	 * provided.)
+	 * of reltuples.  (Note: we might be returning -1 in this case.)
 	 */
 	if (scanned_pages == 0)
 		return old_rel_tuples;
 
 	/*
-	 * If old value of relpages is zero, old density is indeterminate; we
-	 * can't do much except scale up scanned_tuples to match total_pages.
+	 * If old density is unknown, we can't do much except scale up
+	 * scanned_tuples to match total_pages.
 	 */
-	if (old_rel_pages == 0)
+	if (old_rel_tuples < 0 || old_rel_pages == 0)
 		return floor((scanned_tuples / scanned_pages) * total_pages + 0.5);
 
 	/*
diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 0eeff804bc..b399592ff8 100644
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -912,7 +912,11 @@ set_foreign_size(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
 	/* ... but do not let it set the rows estimate to zero */
 	rel->rows = clamp_row_est(rel->rows);
 
-	/* also, make sure rel->tuples is not insane relative to rel->rows */
+	/*
+	 * Also, make sure rel->tuples is not insane relative to rel->rows.
+	 * Notably, this ensures sanity if pg_class.reltuples contains -1 and the
+	 * FDW doesn't do anything to replace that.
+	 */
 	rel->tuples = Max(rel->tuples, rel->rows);
 }
 
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 25545029d7..f9d0d67aa7 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -974,11 +974,6 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
 			/* it has storage, ok to call the smgr */
 			curpages = RelationGetNumberOfBlocks(rel);
 
-			/* coerce values in pg_class to more desirable types */
-			relpages = (BlockNumber) rel->rd_rel->relpages;
-			reltuples = (double) rel->rd_rel->reltuples;
-			relallvisible = (BlockNumber) rel->rd_rel->relallvisible;
-
 			/* report estimated # pages */
 			*pages = curpages;
 			/* quick exit if rel is clearly empty */
@@ -988,6 +983,7 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
 				*allvisfrac = 0;
 				break;
 			}
+
 			/* coerce values in pg_class to more desirable types */
 			relpages = (BlockNumber) rel->rd_rel->relpages;
 			reltuples = (double) rel->rd_rel->reltuples;
@@ -1006,12 +1002,12 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
 			}
 
 			/* estimate number of tuples from previous tuple density */
-			if (relpages > 0)
+			if (reltuples >= 0 && relpages > 0)
 				density = reltuples / (double) relpages;
 			else
 			{
 				/*
-				 * When we have no data because the relation was truncated,
+				 * If we have no data because the relation was never vacuumed,
 				 * estimate tuple width from attribute datatypes.  We assume
 				 * here that the pages are completely full, which is OK for
 				 * tables (since they've presumably not been VACUUMed yet) but
@@ -1059,6 +1055,7 @@ estimate_rel_size(Relation rel, int32 *attr_widths,
 			break;
 		case RELKIND_FOREIGN_TABLE:
 			/* Just use whatever's in pg_class */
+			/* Note that FDW must cope if reltuples is -1! */
 			*pages = rel->rd_rel->relpages;
 			*tuples = rel->rd_rel->reltuples;
 			*allvisfrac = 0;
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index c6ec657a93..1b8cd7bacd 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -3080,6 +3080,10 @@ relation_needs_vacanalyze(Oid relid,
 		instuples = tabentry->inserts_since_vacuum;
 		anltuples = tabentry->changes_since_analyze;
 
+		/* If the table hasn't yet been vacuumed, take reltuples as zero */
+		if (reltuples < 0)
+			reltuples = 0;
+
 		vacthresh = (float4) vac_base_thresh + vac_scale_factor * reltuples;
 		vacinsthresh = (float4) vac_ins_base_thresh + vac_ins_scale_factor * reltuples;
 		anlthresh = (float4) anl_base_thresh + anl_scale_factor * reltuples;
diff --git a/src/backend/rewrite/rewriteDefine.c b/src/backend/rewrite/rewriteDefine.c
index 9989df1107..8ef0917021 100644
--- a/src/backend/rewrite/rewriteDefine.c
+++ b/src/backend/rewrite/rewriteDefine.c
@@ -621,7 +621,7 @@ DefineQueryRewrite(const char *rulename,
 		classForm->relam = InvalidOid;
 		classForm->reltablespace = InvalidOid;
 		classForm->relpages = 0;
-		classForm->reltuples = 0;
+		classForm->reltuples = -1;
 		classForm->relallvisible = 0;
 		classForm->reltoastrelid = InvalidOid;
 		classForm->relhasindex = false;
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index a2453cf1f4..96ecad02dd 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -1870,7 +1870,7 @@ formrdesc(const char *relationName, Oid relationReltype,
 
 	relation->rd_rel->relreplident = REPLICA_IDENTITY_NOTHING;
 	relation->rd_rel->relpages = 0;
-	relation->rd_rel->reltuples = 0;
+	relation->rd_rel->reltuples = -1;
 	relation->rd_rel->relallvisible = 0;
 	relation->rd_rel->relkind = RELKIND_RELATION;
 	relation->rd_rel->relnatts = (int16) natts;
@@ -3692,7 +3692,7 @@ RelationSetNewRelfilenode(Relation relation, char persistence)
 		if (relation->rd_rel->relkind != RELKIND_SEQUENCE)
 		{
 			classform->relpages = 0;	/* it's empty until further notice */
-			classform->reltuples = 0;
+			classform->reltuples = -1;
 			classform->relallvisible = 0;
 		}
 		classform->relfrozenxid = freezeXid;
diff --git a/src/include/access/genam.h b/src/include/access/genam.h
index 931257bd81..68d90f5141 100644
--- a/src/include/access/genam.h
+++ b/src/include/access/genam.h
@@ -38,8 +38,8 @@ typedef struct IndexBuildResult
  *
  * num_heap_tuples is accurate only when estimated_count is false;
  * otherwise it's just an estimate (currently, the estimate is the
- * prior value of the relation's pg_class.reltuples field).  It will
- * always just be an estimate during ambulkdelete.
+ * prior value of the relation's pg_class.reltuples field, so it could
+ * even be -1).  It will always just be an estimate during ambulkdelete.
  */
 typedef struct IndexVacuumInfo
 {
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index 78b33b2a7f..679eec3443 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -62,8 +62,8 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat
 	/* # of blocks (not always up-to-date) */
 	int32		relpages BKI_DEFAULT(0);
 
-	/* # of tuples (not always up-to-date) */
-	float4		reltuples BKI_DEFAULT(0);
+	/* # of tuples (not always up-to-date; -1 means "unknown") */
+	float4		reltuples BKI_DEFAULT(-1);
 
 	/* # of all-visible blocks (not always up-to-date) */
 	int32		relallvisible BKI_DEFAULT(0);

Reply via email to