I've completed my test for changes to the Windows implementation of pgkill. I ran two clients constantly doing a pair of notifications, with one client subscribing to those events. I ran the test for 90 hours. I found 760 cases where the CallNamedPipe function initially failed, but succeeded after recalling. In all but one of those cases, only a single retry was needed; the other case required two retries. As Tom has pointed out, there is no principled way to determine the correct number of times to retry. Therefore, I have set the maximum number of retries to 2, which is the worst case found in a fairly long stress test. This strikes me as a reasonable balance between failing quickly in truly erroneous cases, while still avoiding transient pseudo-failures.

A patch with this change is attached. It should be applied to the 8.3.5 version of src/port/kill.c

Steve


*** kill.c      2009-02-03 14:28:21.753474644 -0500
--- kill.c.mod  2009-02-03 14:28:04.465829331 -0500
***************
*** 25,30 ****
--- 25,33 ----
        BYTE            sigData = sig;
        BYTE            sigRet = 0;
        DWORD           bytes;
+       DWORD           timeout = 1000;     /* in ms; forever = 
NMPWAIT_WAIT_FOREVER */
+       const int       max_pipe_tries = 3; 
+       int                     pipe_tries = 0;
  
        /* we allow signal 0 here, but it will be ignored in pg_queue_signal */
        if (sig >= PG_SIGNAL_COUNT || sig < 0)
***************
*** 39,45 ****
                return -1;
        }
        snprintf(pipename, sizeof(pipename), "\\\\.\\pipe\\pgsignal_%u", pid);
!       if (!CallNamedPipe(pipename, &sigData, 1, &sigRet, 1, &bytes, 1000))
        {
                if (GetLastError() == ERROR_FILE_NOT_FOUND)
                        errno = ESRCH;
--- 42,63 ----
                return -1;
        }
        snprintf(pipename, sizeof(pipename), "\\\\.\\pipe\\pgsignal_%u", pid);
! 
!       /*
!        *  Writing data to the named pipe can fail for transient reasons.
!        *  Therefore, it is useful to retry if it fails.  The maximum number 
of 
!        *  calls to make was empirically determined from a 90-hour 
notification 
!        *  stress test. 
!        */
!       while (pipe_tries < max_pipe_tries &&
!                       !CallNamedPipe(pipename, &sigData, 1, &sigRet, 1, 
&bytes, timeout))
!       {
!               pipe_tries++;
!               timeout = 250;
!               pg_usleep(10000);
!       }
! 
!       if (pipe_tries >= max_pipe_tries)
        {
                if (GetLastError() == ERROR_FILE_NOT_FOUND)
                        errno = ESRCH;
-- 
Sent via pgsql-bugs mailing list (pgsql-bugs@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-bugs

Reply via email to