On Mon, 5 Sep 2016 14:54:05 +0300 Grigory Smolkin <g.smol...@postgrespro.ru> wrote:
> Hello, hackers! > > We were testing how well some application works with PostgreSQL and > stumbled upon an autovacuum behavior which I fail to understand. > Application in question have a habit to heavily use temporary tables > in funny ways. > For example it creates A LOT of them. > Which is ok. > Funny part is that it never drops them. So when backend is finally > terminated, it tries to drop them and fails with error: > > FATAL: out of shared memory > HINT: You might need to increase max_locks_per_transaction > > If I understand that rigth, we are trying to drop all these temp > tables in one transaction and running out of locks to do so. > After that postgresql.log is flooded at the rate 1k/s with messages > like that: > > LOG: autovacuum: found orphan temp table "pg_temp_15"."tt38147" in > database "DB_TEST" > > It produces a noticeable load on the system and it`s getting worst > with every terminated backend or restart. > I did some RTFS and it appears that autovacuum has no intention of > cleaning that orphan tables unless > it`s wraparound time: > > src/backend/postmaster/autovacuum.c > /* We just ignore it if the owning backend is still > active */ 2037 if (backendID == MyBackendId || > BackendIdGetProc(backendID) == NULL) > 2038 { > 2039 /* > 2040 * We found an orphan temp table (which was > probably left > 2041 * behind by a crashed backend). If it's so > old as to need > 2042 * vacuum for wraparound, forcibly drop it. > Otherwise just > 2043 * log a complaint. > 2044 */ > 2045 if (wraparound) > 2046 { > 2047 ObjectAddress object; > 2048 > 2049 ereport(LOG, > 2050 (errmsg("autovacuum: dropping > orphan temp table \"%s\".\"%s\" in database \"%s\"", > 2051 get_namespace_name(classForm->relnamespace), > 2052 NameStr(classForm->relname), > 2053 get_database_name(MyDatabaseId)))); > 2054 object.classId = RelationRelationId; > 2055 object.objectId = relid; > 2056 object.objectSubId = 0; > 2057 performDeletion(&object, DROP_CASCADE, > PERFORM_DELETION_INTERNAL); > 2058 } > 2059 else > 2060 { > 2061 ereport(LOG, > 2062 (errmsg("autovacuum: found orphan > temp table \"%s\".\"%s\" in database \"%s\"", > 2063 get_namespace_name(classForm->relnamespace), > 2064 NameStr(classForm->relname), > 2065 get_database_name(MyDatabaseId)))); > 2066 } > 2067 } > 2068 } > > > What is more troubling is that pg_statistic is starting to bloat > badly. > > LOG: automatic vacuum of table "DB_TEST.pg_catalog.pg_statistic": > index scans: 0 > pages: 0 removed, 68225 remain, 0 skipped due to pins > tuples: 0 removed, 2458382 remain, 2408081 are dead but not > yet removable > buffer usage: 146450 hits, 31 misses, 0 dirtied > avg read rate: 0.010 MB/s, avg write rate: 0.000 MB/s > system usage: CPU 3.27s/6.92u sec elapsed 23.87 sec > > What is the purpose of keeping orphan tables around and not dropping > them on the spot? > > Hey Hackers, I tried to fix the problem with a new backend not being able to reuse a temporary namespace when it contains thousands of temporary tables. I disabled locking of objects during namespace clearing process. See the patch attached. Please tell me if there are any reasons why this is wrong. I also added a GUC variable and changed the condition in autovacuum to let it nuke orphan tables on its way. See another patch attached. Regards, Constantin Pan
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 5c8db97..d7707ac 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -5665,6 +5665,20 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv; </listitem> </varlistentry> + <varlistentry id="guc-autovacuum-wipe-orphan-temp-tables" xreflabel="autovacuum_wipe_orphan_temp_tables"> + <term><varname>autovacuum_wipe_orphan_temp_tables</varname> (<type>boolean</type>) + <indexterm> + <primary><varname>autovacuum_wipe_orphan_temp_tables</> configuration parameter</primary> + </indexterm> + </term> + <listitem> + <para> + Controls whether the server should drop orphan temporary tables during + autovacuum. This is on by default. + </para> + </listitem> + </varlistentry> + <varlistentry id="guc-log-autovacuum-min-duration" xreflabel="log_autovacuum_min_duration"> <term><varname>log_autovacuum_min_duration</varname> (<type>integer</type>) <indexterm> diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c index 3768f50..e5318f4 100644 --- a/src/backend/postmaster/autovacuum.c +++ b/src/backend/postmaster/autovacuum.c @@ -123,6 +123,8 @@ int autovacuum_vac_cost_limit; int Log_autovacuum_min_duration = -1; +bool autovacuum_wipe_orphan_temp_tables = true; + /* how long to keep pgstat data in the launcher, in milliseconds */ #define STATS_READ_DELAY 1000 @@ -2052,7 +2054,7 @@ do_autovacuum(void) * vacuum for wraparound, forcibly drop it. Otherwise just * log a complaint. */ - if (wraparound) + if (wraparound || autovacuum_wipe_orphan_temp_tables) { ObjectAddress object; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index c5178f7..d0695ba 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1227,6 +1227,15 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"autovacuum_wipe_orphan_temp_tables", PGC_SIGHUP, AUTOVACUUM, + gettext_noop("Forces autovacuum to wipe orphan temp tables on sight."), + NULL + }, + &autovacuum_wipe_orphan_temp_tables, + true, + NULL, NULL, NULL + }, { {"trace_notify", PGC_USERSET, DEVELOPER_OPTIONS, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 6d0666c..21b65b7 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -519,6 +519,7 @@ #autovacuum_vacuum_cost_limit = -1 # default vacuum cost limit for # autovacuum, -1 means use # vacuum_cost_limit +#autovacuum_wipe_orphan_temp_tables = on # Drop orphan temp tables during autovacuum? 'on' #------------------------------------------------------------------------------ diff --git a/src/include/postmaster/autovacuum.h b/src/include/postmaster/autovacuum.h index b5000b0..05b7a71 100644 --- a/src/include/postmaster/autovacuum.h +++ b/src/include/postmaster/autovacuum.h @@ -34,6 +34,8 @@ extern int AutovacuumLauncherPid; extern int Log_autovacuum_min_duration; +extern bool autovacuum_wipe_orphan_temp_tables; + /* Status inquiry functions */ extern bool AutoVacuumingActive(void); extern bool IsAutoVacuumLauncherProcess(void);
diff --git a/src/backend/catalog/dependency.c b/src/backend/catalog/dependency.c index 04d7840..7148c48 100644 --- a/src/backend/catalog/dependency.c +++ b/src/backend/catalog/dependency.c @@ -76,6 +76,7 @@ #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/syscache.h" +#include "miscadmin.h" #include "utils/tqual.h" @@ -172,7 +173,8 @@ static void findDependentObjects(const ObjectAddress *object, ObjectAddressStack *stack, ObjectAddresses *targetObjects, const ObjectAddresses *pendingObjects, - Relation *depRel); + Relation *depRel, + bool lockObjects); static void reportDependentObjects(const ObjectAddresses *targetObjects, DropBehavior behavior, int msglevel, @@ -182,6 +184,7 @@ static void deleteOneObject(const ObjectAddress *object, static void doDeletion(const ObjectAddress *object, int flags); static void AcquireDeletionLock(const ObjectAddress *object, int flags); static void ReleaseDeletionLock(const ObjectAddress *object); +static void ReleaseDeletionLockCompletely(const ObjectAddress *object); static bool find_expr_references_walker(Node *node, find_expr_references_context *context); static void eliminate_duplicate_dependencies(ObjectAddresses *addrs); @@ -296,7 +299,8 @@ performDeletion(const ObjectAddress *object, NULL, /* empty stack */ targetObjects, NULL, /* no pendingObjects */ - &depRel); + &depRel, + true /* lock objects during the search */); /* * Check if deletion is allowed, and report about cascaded deletes. @@ -367,7 +371,8 @@ performMultipleDeletions(const ObjectAddresses *objects, NULL, /* empty stack */ targetObjects, objects, - &depRel); + &depRel, + true /* lock objects during the search */); } /* @@ -434,7 +439,9 @@ deleteWhatDependsOn(const ObjectAddress *object, NULL, /* empty stack */ targetObjects, NULL, /* no pendingObjects */ - &depRel); + &depRel, + false /* without locking */); + /* * Check if deletion is allowed, and report about cascaded deletes. @@ -464,8 +471,9 @@ deleteWhatDependsOn(const ObjectAddress *object, * purposes, we might need to revisit this. */ deleteOneObject(thisobj, &depRel, PERFORM_DELETION_INTERNAL); + ReleaseDeletionLockCompletely(thisobj); } /* And clean up */ free_object_addresses(targetObjects); @@ -506,7 +514,8 @@ findDependentObjects(const ObjectAddress *object, ObjectAddressStack *stack, ObjectAddresses *targetObjects, const ObjectAddresses *pendingObjects, - Relation *depRel) + Relation *depRel, + bool lockObjects) { ScanKeyData key[3]; int nkeys; @@ -622,7 +631,8 @@ findDependentObjects(const ObjectAddress *object, { systable_endscan(scan); /* need to release caller's lock; see notes below */ - ReleaseDeletionLock(object); + if (lockObjects) + ReleaseDeletionLock(object); return; } @@ -671,8 +681,11 @@ findDependentObjects(const ObjectAddress *object, * caller's lock to avoid deadlock against a concurrent * deletion of the owning object.) */ - ReleaseDeletionLock(object); - AcquireDeletionLock(&otherObject, 0); + if (lockObjects) + { + ReleaseDeletionLock(object); + AcquireDeletionLock(&otherObject, 0); + } /* * The owning object might have been deleted while we waited @@ -683,7 +696,8 @@ findDependentObjects(const ObjectAddress *object, if (!systable_recheck_tuple(scan, tup)) { systable_endscan(scan); - ReleaseDeletionLock(&otherObject); + if (lockObjects) + ReleaseDeletionLock(&otherObject); return; } @@ -703,7 +717,8 @@ findDependentObjects(const ObjectAddress *object, stack, targetObjects, pendingObjects, - depRel); + depRel, + lockObjects); /* And we're done here. */ systable_endscan(scan); return; @@ -764,10 +779,11 @@ findDependentObjects(const ObjectAddress *object, otherObject.objectId = foundDep->objid; otherObject.objectSubId = foundDep->objsubid; /* * Must lock the dependent object before recursing to it. */ - AcquireDeletionLock(&otherObject, 0); + if (lockObjects) + AcquireDeletionLock(&otherObject, 0); /* * The dependent object might have been deleted while we waited to @@ -779,7 +795,8 @@ findDependentObjects(const ObjectAddress *object, if (!systable_recheck_tuple(scan, tup)) { /* release the now-useless lock */ - ReleaseDeletionLock(&otherObject); + if (lockObjects) + ReleaseDeletionLock(&otherObject); /* and continue scanning for dependencies */ continue; } @@ -824,7 +841,8 @@ findDependentObjects(const ObjectAddress *object, &mystack, targetObjects, pendingObjects, - depRel); + depRel, + lockObjects); } systable_endscan(scan); @@ -1335,6 +1353,20 @@ ReleaseDeletionLock(const ObjectAddress *object) } /* + * ReleaseDeletionLock - release an object deletion lock + */ +static void +ReleaseDeletionLockCompletely(const ObjectAddress *object) +{ + LOCKTAG tag; + if (object->classId == RelationRelationId) + SET_LOCKTAG_RELATION(tag, MyDatabaseId, object->objectId); + else + SET_LOCKTAG_OBJECT(tag, MyDatabaseId, object->classId, object->objectId, 0); + LockReleaseCompletely(&tag, AccessExclusiveLock, false); +} + +/* * recordDependencyOnExpr - find expression dependencies * * This is used to find the dependencies of rules, constraint expressions, diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -47,6 +47,7 @@ #include "storage/standby.h" #include "utils/memutils.h" #include "utils/ps_status.h" +#include "utils/rel.h" #include "utils/resowner_private.h" @@ -1806,6 +1807,48 @@ RemoveFromWaitQueue(PGPROC *proc, uint32 hashcode) true); } +void +LockReleaseCompletely(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock) +{ + LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid; + LockMethod lockMethodTable; + LOCALLOCKTAG localtag; + LOCALLOCK *locallock; + int num; + + if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) + elog(ERROR, "unrecognized lock method: %d", lockmethodid); + lockMethodTable = LockMethods[lockmethodid]; + if (lockmode <= 0 || lockmode > lockMethodTable->numLockModes) + elog(ERROR, "unrecognized lock mode: %d", lockmode); + +#ifdef LOCK_DEBUG + if (LOCK_DEBUG_ENABLED(locktag)) + elog(LOG, "LockReleaseCompletely: lock [%u,%u] %s", + locktag->locktag_field1, locktag->locktag_field2, + lockMethodTable->lockModeNames[lockmode]); +#endif + + /* + * Find the LOCALLOCK entry for this lock and lockmode + */ + MemSet(&localtag, 0, sizeof(localtag)); /* must clear padding */ + localtag.lock = *locktag; + localtag.mode = lockmode; + + locallock = (LOCALLOCK *) hash_search(LockMethodLocalHash, + (void *) &localtag, + HASH_FIND, NULL); + + num = 0; + if (!locallock) return; + num = locallock->nLocks; + if (num <= 0) return; + + while (num--) + LockRelease(locktag, lockmode, sessionLock); +} + /* * LockRelease -- look up 'locktag' and release one 'lockmode' lock on it. * Release a session lock if 'sessionLock' is true, else release a diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index efa75ec..0091d59 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -533,6 +533,8 @@ extern LockAcquireResult LockAcquireExtended(const LOCKTAG *locktag, extern void AbortStrongLockAcquire(void); extern bool LockRelease(const LOCKTAG *locktag, LOCKMODE lockmode, bool sessionLock); +extern void LockReleaseCompletely(const LOCKTAG *locktag, + LOCKMODE lockmode, bool sessionLock); extern void LockReleaseAll(LOCKMETHODID lockmethodid, bool allLocks); extern void LockReleaseSession(LOCKMETHODID lockmethodid); extern void LockReleaseCurrentOwner(LOCALLOCK **locallocks, int nlocks);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers