Well the problem with spurious waits for deadlocks turned out to be quite
mundane. The deadlocks were not so spurious after all... there were real
deadlocks which were being hidden by a plpgsql function that caught the
exception and didn't report the error.

I think this is still a minor problem though. There's an asymmetry here in
that log_lock_waits meant the transactions that *didn't* detect a deadlock
reported log messages which couldn't be intercepted but the transactions which
*did* detect a deadlock reported nothing, even though they had waited on a
lock for just as long.

I think we should go ahead and print a LOG message in the case where a
deadlock is detected. It looks redundant since we'll also throw an error but
in fact it does two things differently. a) it prints the time spent waiting
before the deadlock was detected and b) it will print something even if the
exception is caught ensuring that all lock waits longer than deadlock_timeout
are logged similarly.

I also removed the switch statement in part because I'm not certain it's
actually safe to test a volatile variable in a switch expression and also
because I found it confusing and unnecessary given the changes that have
happened to the surrounding code since. Also, I added the process id and tried
to normalize the messages to all fit the same pattern more or less.

Index: src/backend/storage/lmgr/proc.c
===================================================================
RCS file: /home/stark/src/REPOSITORY/pgsql/src/backend/storage/lmgr/proc.c,v
retrieving revision 1.191
diff -c -r1.191 proc.c
*** src/backend/storage/lmgr/proc.c	16 Jul 2007 21:09:50 -0000	1.191
--- src/backend/storage/lmgr/proc.c	27 Aug 2007 12:45:05 -0000
***************
*** 882,936 ****
  		 * If awoken after the deadlock check interrupt has run, and
  		 * log_lock_waits is on, then report about the wait.
  		 */
! 		if (log_lock_waits)
  		{
! 			switch (deadlock_state)
! 			{
! 				case DS_NOT_YET_CHECKED:
! 					/* Lock granted, or spurious wakeup as described above */
! 					break; 
! 				case DS_NO_DEADLOCK:
! 				case DS_SOFT_DEADLOCK:
! 				{
! 					StringInfoData buf;
! 					const char *modename;
! 					long		secs;
! 					int			usecs;
! 					long		msecs;
! 
! 					initStringInfo(&buf);
! 					DescribeLockTag(&buf, &locallock->tag.lock);
! 					modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid,
! 											   lockmode);
! 					TimestampDifference(timeout_start_time,
! 										GetCurrentTimestamp(),
! 										&secs, &usecs);
! 					msecs = secs * 1000 + usecs / 1000;
! 					usecs = usecs % 1000;
! 
! 					if (deadlock_state == DS_SOFT_DEADLOCK)
! 						ereport(LOG,
! 								(errmsg("deadlock for %s on %s avoided by rearranging queue order after %ld.%03d ms",
! 										modename, buf.data,
! 										msecs, usecs)));
! 					else if (MyProc->waitStatus == STATUS_WAITING)
! 						ereport(LOG,
! 								(errmsg("process %d still waiting for %s on %s after %ld.%03d ms",
! 										MyProcPid, modename, buf.data,
! 										msecs, usecs)));
! 					else if (MyProc->waitStatus == STATUS_OK)
! 						ereport(LOG,
! 								(errmsg("process %d acquired %s on %s after %ld.%03d ms",
! 										MyProcPid, modename, buf.data,
! 										msecs, usecs)));
! 					/* ERROR will be reported later, so no message here */
! 					pfree(buf.data);
! 					break;
! 				}
! 				case DS_HARD_DEADLOCK:
! 					/* ERROR will be reported later, so no message here */
! 					break; 
! 			}
  		}
  	} while (MyProc->waitStatus == STATUS_WAITING);
  
--- 882,930 ----
  		 * If awoken after the deadlock check interrupt has run, and
  		 * log_lock_waits is on, then report about the wait.
  		 */
! 		if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED)
  		{
! 			StringInfoData buf;
! 			const char *modename;
! 			long		secs;
! 			int			usecs;
! 			long		msecs;
! 			
! 			initStringInfo(&buf);
! 			DescribeLockTag(&buf, &locallock->tag.lock);
! 			modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid,
! 									   lockmode);
! 			TimestampDifference(timeout_start_time,
! 								GetCurrentTimestamp(),
! 								&secs, &usecs);
! 			msecs = secs * 1000 + usecs / 1000;
! 			usecs = usecs % 1000;
! 
! 			if (deadlock_state == DS_SOFT_DEADLOCK)
! 				ereport(LOG,
! 						(errmsg("process %d avoided deadlock waiting for %s on %s by rearranging queue order after %ld.%03d ms",
! 								MyProcPid, modename, buf.data,
! 								msecs, usecs)));
! 			else if (deadlock_state == DS_HARD_DEADLOCK)
! 				/* this is a bit redundant with the error which will be
! 				 * reported subsequently however it adds the time elapsed and
! 				 * it will print even if the error is caught by an exception
! 				 * handler. */
! 				ereport(LOG,
! 						(errmsg("process %d detected deadlock waiting for %s on %s after %ld.%03d ms",
! 								MyProcPid, modename, buf.data,
! 								msecs, usecs)));
! 			else if (MyProc->waitStatus == STATUS_WAITING)
! 				ereport(LOG,
! 						(errmsg("process %d still waiting for %s on %s after %ld.%03d ms",
! 								MyProcPid, modename, buf.data,
! 								msecs, usecs)));
! 			else if (MyProc->waitStatus == STATUS_OK)
! 				ereport(LOG,
! 						(errmsg("process %d acquired %s on %s after %ld.%03d ms",
! 								MyProcPid, modename, buf.data,
! 								msecs, usecs)));
! 			pfree(buf.data);
  		}
  	} while (MyProc->waitStatus == STATUS_WAITING);
  

-- 
  Gregory Stark
  EnterpriseDB          http://www.enterprisedb.com
---------------------------(end of broadcast)---------------------------
TIP 7: You can help support the PostgreSQL project by donating at

                http://www.postgresql.org/about/donate

Reply via email to