On Thu, Nov 29, 2018 at 10:51:40PM -0800, Noah Misch wrote:
> On Thu, Nov 29, 2018 at 01:10:57PM +0100, Dmitry Dolgov wrote:
> > As a side note, with this patch recovery tests are failing now on 016_shm.pl
> > 
> > #   Failed test 'detected live backend via shared memory'
> > #   at t/016_shm.pl line 87.
> > #                   '2018-11-28 13:08:08.504 UTC [21924] LOG:
> > listening on Unix socket "/tmp/yV2oDNcG8e/gnat/.s.PGSQL.512"
> > # 2018-11-28 13:08:08.512 UTC [21925] LOG:  database system was
> > interrupted; last known up at 2018-11-28 13:08:08 UTC
> > # 2018-11-28 13:08:08.512 UTC [21925] LOG:  database system was not
> > properly shut down; automatic recovery in progress
> > # 2018-11-28 13:08:08.512 UTC [21925] LOG:  invalid record length at
> > 0/165FEF8: wanted 24, got 0
> > # 2018-11-28 13:08:08.512 UTC [21925] LOG:  redo is not required
> > # 2018-11-28 13:08:08.516 UTC [21924] LOG:  database system is ready
> > to accept connections
> > # '
> > #     doesn't match '(?^:pre-existing shared memory block)'
> 
> Thanks for the report.  Since commit cfdf4dc made pg_sleep() react to
> postmaster death, the test will need a different way to stall a backend.  This
> doesn't affect non-test code, and the test still passes against cfdf4dc^ and
> against REL_11_STABLE.  I've queued a task to update the test code, but review
> can proceed in parallel.

Here's a version fixing that test for post-cfdf4dc backends.
--- a/src/backend/utils/init/miscinit.c
+++ b/src/backend/utils/init/miscinit.c
@@ -1070,14 +1070,10 @@ CreateLockFile(const char *filename, bool amPostmaster,
                                if (PGSharedMemoryIsInUse(id1, id2))
                                        ereport(FATAL,
                                                        
(errcode(ERRCODE_LOCK_FILE_EXISTS),
-                                                        errmsg("pre-existing 
shared memory block "
-                                                                       "(key 
%lu, ID %lu) is still in use",
+                                                        errmsg("pre-existing 
shared memory block (key %lu, ID %lu) is still in use",
                                                                        id1, 
id2),
-                                                        errhint("If you're 
sure there are no old "
-                                                                        
"server processes still running, remove "
-                                                                        "the 
shared memory block "
-                                                                        "or 
just delete the file \"%s\".",
-                                                                        
filename)));
+                                                        errhint("Terminate any 
old server processes associated with data directory \"%s\".",
+                                                                        
refName)));
                        }
                }
 
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 956fd27..a67455b 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -421,13 +421,13 @@ ifeq ($(enable_tap_tests),yes)
 define prove_installcheck
 rm -rf '$(CURDIR)'/tmp_check
 $(MKDIR_P) '$(CURDIR)'/tmp_check
-cd $(srcdir) && TESTDIR='$(CURDIR)' PATH="$(bindir):$$PATH" 
PGPORT='6$(DEF_PGPORT)' top_builddir='$(CURDIR)/$(top_builddir)' 
PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' $(PROVE) 
$(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
+cd $(srcdir) && TESTDIR='$(CURDIR)' PATH="$(bindir):$$PATH" 
PGPORT='6$(DEF_PGPORT)' top_builddir='$(CURDIR)/$(top_builddir)' 
PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' 
REGRESS_SHLIB='$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)' 
$(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if 
$(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
 endef
 
 define prove_check
 rm -rf '$(CURDIR)'/tmp_check
 $(MKDIR_P) '$(CURDIR)'/tmp_check
-cd $(srcdir) && TESTDIR='$(CURDIR)' $(with_temp_install) 
PGPORT='6$(DEF_PGPORT)' 
PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' $(PROVE) 
$(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if $(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
+cd $(srcdir) && TESTDIR='$(CURDIR)' $(with_temp_install) 
PGPORT='6$(DEF_PGPORT)' 
PG_REGRESS='$(CURDIR)/$(top_builddir)/src/test/regress/pg_regress' 
REGRESS_SHLIB='$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)' 
$(PROVE) $(PG_PROVE_FLAGS) $(PROVE_FLAGS) $(if 
$(PROVE_TESTS),$(PROVE_TESTS),t/*.pl)
 endef
 
 else
diff --git a/src/backend/port/sysv_shmem.c b/src/backend/port/sysv_shmem.c
index 741c455..7a9a68e 100644
--- a/src/backend/port/sysv_shmem.c
+++ b/src/backend/port/sysv_shmem.c
@@ -71,6 +71,26 @@
 typedef key_t IpcMemoryKey;            /* shared memory key passed to 
shmget(2) */
 typedef int IpcMemoryId;               /* shared memory ID returned by 
shmget(2) */
 
+/*
+ * How does a given IpcMemoryId relate to this PostgreSQL process?
+ *
+ * One could recycle unattached segments of different data directories if we
+ * distinguished that case from other SHMSTATE_FOREIGN cases.  Doing so would
+ * cause us to visit less of the key space, making us less likely to detect a
+ * SHMSTATE_ATTACHED key.  It would also complicate the concurrency analysis,
+ * in that postmasters of different data directories could simultaneously
+ * attempt to recycle a given key.  We'll waste keys longer in some cases, but
+ * avoiding the problems of the alternative justifies that loss.
+ */
+enum IpcMemoryState
+{
+       SHMSTATE_ANALYSIS_FAILURE,      /* unexpected failure to analyze the ID 
*/
+       SHMSTATE_ATTACHED,                      /* pertinent to DataDir, has 
attached PIDs */
+       SHMSTATE_EEXISTS,                       /* no segment of that ID */
+       SHMSTATE_FOREIGN,                       /* exists, but not pertinent to 
DataDir */
+       SHMSTATE_UNATTACHED                     /* pertinent to DataDir, no 
attached PIDs */
+};
+
 
 unsigned long UsedShmemSegID = 0;
 void      *UsedShmemSegAddr = NULL;
@@ -83,6 +103,7 @@ static void *AnonymousShmem = NULL;
 static void *InternalIpcMemoryCreate(IpcMemoryKey memKey, Size size);
 static void IpcMemoryDetach(int status, Datum shmaddr);
 static void IpcMemoryDelete(int status, Datum shmId);
+static enum IpcMemoryState IpcMemoryAnalyze(IpcMemoryId shmId);
 static PGShmemHeader *PGSharedMemoryAttach(IpcMemoryKey key,
                                         IpcMemoryId *shmid);
 
@@ -288,7 +309,23 @@ IpcMemoryDelete(int status, Datum shmId)
 bool
 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
 {
-       IpcMemoryId shmId = (IpcMemoryId) id2;
+       switch (IpcMemoryAnalyze((IpcMemoryId) id2))
+       {
+               case SHMSTATE_EEXISTS:
+               case SHMSTATE_FOREIGN:
+               case SHMSTATE_UNATTACHED:
+                       return false;
+               case SHMSTATE_ANALYSIS_FAILURE:
+               case SHMSTATE_ATTACHED:
+                       return true;
+       }
+       return true;
+}
+
+/* See comment at enum IpcMemoryState. */
+static enum IpcMemoryState
+IpcMemoryAnalyze(IpcMemoryId shmId)
+{
        struct shmid_ds shmStat;
        struct stat statbuf;
        PGShmemHeader *hdr;
@@ -305,7 +342,7 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
                 * exists.
                 */
                if (errno == EINVAL)
-                       return false;
+                       return SHMSTATE_EEXISTS;
 
                /*
                 * EACCES implies that the segment belongs to some other 
userid, which
@@ -313,7 +350,7 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
                 * is relevant to our data directory).
                 */
                if (errno == EACCES)
-                       return false;
+                       return SHMSTATE_FOREIGN;
 
                /*
                 * Some Linux kernel versions (in fact, all of them as of July 
2007)
@@ -324,7 +361,7 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
                 */
 #ifdef HAVE_LINUX_EIDRM_BUG
                if (errno == EIDRM)
-                       return false;
+                       return SHMSTATE_EEXISTS;
 #endif
 
                /*
@@ -332,25 +369,26 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long 
id2)
                 * only likely case is EIDRM, which implies that the segment 
has been
                 * IPC_RMID'd but there are still processes attached to it.
                 */
-               return true;
+               return SHMSTATE_ANALYSIS_FAILURE;
        }
 
-       /* If it has no attached processes, it's not in use */
-       if (shmStat.shm_nattch == 0)
-               return false;
-
        /*
         * Try to attach to the segment and see if it matches our data 
directory.
         * This avoids shmid-conflict problems on machines that are running
         * several postmasters under the same userid.
         */
        if (stat(DataDir, &statbuf) < 0)
-               return true;                    /* if can't stat, be 
conservative */
+               return SHMSTATE_ANALYSIS_FAILURE;       /* can't stat; be 
conservative */
 
+       /*
+        * If we can't attach, be conservative.  This may fail if postmaster.pid
+        * furnished the shmId and another user created a world-readable segment
+        * of the same shmId.  It shouldn't fail if PGSharedMemoryAttach()
+        * furnished the shmId, because that function tests shmat().
+        */
        hdr = (PGShmemHeader *) shmat(shmId, NULL, PG_SHMAT_FLAGS);
-
        if (hdr == (PGShmemHeader *) -1)
-               return true;                    /* if can't attach, be 
conservative */
+               return SHMSTATE_ANALYSIS_FAILURE;
 
        if (hdr->magic != PGShmemMagic ||
                hdr->device != statbuf.st_dev ||
@@ -358,16 +396,15 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long 
id2)
        {
                /*
                 * It's either not a Postgres segment, or not one for my data
-                * directory.  In either case it poses no threat.
+                * directory.
                 */
                shmdt((void *) hdr);
-               return false;
+               return SHMSTATE_FOREIGN;
        }
 
-       /* Trouble --- looks a lot like there's still live backends */
        shmdt((void *) hdr);
 
-       return true;
+       return shmStat.shm_nattch == 0 ? SHMSTATE_UNATTACHED : 
SHMSTATE_ATTACHED;
 }
 
 #ifdef USE_ANONYMOUS_SHMEM
@@ -543,19 +580,16 @@ AnonymousShmemDetach(int status, Datum arg)
  * standard header.  Also, register an on_shmem_exit callback to release
  * the storage.
  *
- * Dead Postgres segments are recycled if found, but we do not fail upon
- * collision with non-Postgres shmem segments.  The idea here is to detect and
- * re-use keys that may have been assigned by a crashed postmaster or backend.
- *
- * makePrivate means to always create a new segment, rather than attach to
- * or recycle any existing segment.
+ * Dead Postgres segments pertinent to this DataDir are recycled if found, but
+ * we do not fail upon collision with foreign shmem segments.  The idea here
+ * is to detect and re-use keys that may have been assigned by a crashed
+ * postmaster or backend.
  *
  * The port number is passed for possible use as a key (for SysV, we use
- * it to generate the starting shmem key).  In a standalone backend,
- * zero will be passed.
+ * it to generate the starting shmem key).
  */
 PGShmemHeader *
-PGSharedMemoryCreate(Size size, bool makePrivate, int port,
+PGSharedMemoryCreate(Size size, int port,
                                         PGShmemHeader **shim)
 {
        IpcMemoryKey NextShmemSegID;
@@ -592,11 +626,18 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int 
port,
        /* Make sure PGSharedMemoryAttach doesn't fail without need */
        UsedShmemSegAddr = NULL;
 
-       /* Loop till we find a free IPC key */
-       NextShmemSegID = port * 1000;
+       /*
+        * Loop till we find a free IPC key.  Trust CreateDataDirLockFile() to
+        * ensure no more than one postmaster per data directory can enter this
+        * loop simultaneously.  (CreateDataDirLockFile() does not ensure that,
+        * but prefer fixing it over coping here.)
+        */
+       NextShmemSegID = 1 + port * 1000;
 
-       for (NextShmemSegID++;; NextShmemSegID++)
+       for (;;)
        {
+               dsm_handle      hdr_dsm;
+
                /* Try to create new segment */
                memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
                if (memAddress)
@@ -604,58 +645,62 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int 
port,
 
                /* Check shared memory and possibly remove and recreate */
 
-               if (makePrivate)                /* a standalone backend 
shouldn't do this */
-                       continue;
-
                if ((memAddress = PGSharedMemoryAttach(NextShmemSegID, &shmid)) 
== NULL)
-                       continue;                       /* can't attach, not 
one of mine */
-
-               /*
-                * If I am not the creator and it belongs to an extant process,
-                * continue.
-                */
-               hdr = (PGShmemHeader *) memAddress;
-               if (hdr->creatorPID != getpid())
                {
-                       if (kill(hdr->creatorPID, 0) == 0 || errno != ESRCH)
-                       {
-                               shmdt(memAddress);
-                               continue;               /* segment belongs to a 
live process */
-                       }
+                       NextShmemSegID++;
+                       continue;
                }
-
-               /*
-                * The segment appears to be from a dead Postgres process, or 
from a
-                * previous cycle of life in this same process.  Zap it, if 
possible,
-                * and any associated dynamic shared memory segments, as well. 
This
-                * probably shouldn't fail, but if it does, assume the segment 
belongs
-                * to someone else after all, and continue quietly.
-                */
-               if (hdr->dsm_control != 0)
-                       dsm_cleanup_using_control_segment(hdr->dsm_control);
+               hdr_dsm = ((PGShmemHeader *) memAddress)->dsm_control;
                shmdt(memAddress);
-               if (shmctl(shmid, IPC_RMID, NULL) < 0)
-                       continue;
+               memAddress = NULL;
 
-               /*
-                * Now try again to create the segment.
-                */
-               memAddress = InternalIpcMemoryCreate(NextShmemSegID, sysvsize);
-               if (memAddress)
-                       break;                          /* successful create 
and attach */
+               switch (IpcMemoryAnalyze(shmid))
+               {
+                       case SHMSTATE_ANALYSIS_FAILURE:
+                       case SHMSTATE_ATTACHED:
+                               ereport(FATAL,
+                                               
(errcode(ERRCODE_LOCK_FILE_EXISTS),
+                                                errmsg("pre-existing shared 
memory block (key %lu, ID %lu) is still in use",
+                                                               (unsigned long) 
NextShmemSegID,
+                                                               (unsigned long) 
shmid),
+                                                errhint("Terminate any old 
server processes associated with data directory \"%s\".",
+                                                                DataDir)));
+                       case SHMSTATE_EEXISTS:
 
-               /*
-                * Can only get here if some other process managed to create 
the same
-                * shmem key before we did.  Let him have that one, loop around 
to try
-                * next key.
-                */
+                               /*
+                                * To our surprise, some other process deleted 
the segment
+                                * between InternalIpcMemoryCreate() and 
IpcMemoryAnalyze().
+                                * Moments earlier, we would have seen 
SHMSTATE_FOREIGN.  Try
+                                * that same ID again.
+                                */
+                               elog(LOG,
+                                        "shared memory block (key %lu, ID %lu) 
deleted during startup",
+                                        (unsigned long) NextShmemSegID,
+                                        (unsigned long) shmid);
+                               break;
+                       case SHMSTATE_FOREIGN:
+                               NextShmemSegID++;
+                               break;
+                       case SHMSTATE_UNATTACHED:
+
+                               /*
+                                * The segment pertains to DataDir, and every 
process that had
+                                * used it has died or detached.  Zap it, if 
possible, and any
+                                * associated dynamic shared memory segments, 
as well.  This
+                                * shouldn't fail, but if it does, assume the 
segment belongs
+                                * to someone else after all, and try the next 
candidate.
+                                * Otherwise, try again to create the segment.  
That may fail
+                                * if some other process creates the same shmem 
key before we
+                                * do, in which case we'll try the next key.
+                                */
+                               if (hdr_dsm != 0)
+                                       
dsm_cleanup_using_control_segment(hdr_dsm);
+                               if (shmctl(shmid, IPC_RMID, NULL) < 0)
+                                       NextShmemSegID++;
+               }
        }
 
-       /*
-        * OK, we created a new segment.  Mark it as created by this process. 
The
-        * order of assignments here is critical so that another Postgres 
process
-        * can't see the header as valid but belonging to an invalid PID!
-        */
+       /* Initialize new segment. */
        hdr = (PGShmemHeader *) memAddress;
        hdr->creatorPID = getpid();
        hdr->magic = PGShmemMagic;
@@ -816,7 +861,10 @@ PGSharedMemoryDetach(void)
 /*
  * Attach to shared memory and make sure it has a Postgres header
  *
- * Returns attach address if OK, else NULL
+ * Returns attach address if OK, else NULL.  Treat a NULL return value like
+ * SHMSTATE_FOREIGN.  (EACCES is common.  ENOENT, a narrow possibility,
+ * implies SHMSTATE_EEXISTS, but one can safely treat SHMSTATE_EEXISTS like
+ * SHMSTATE_FOREIGN.)
  */
 static PGShmemHeader *
 PGSharedMemoryAttach(IpcMemoryKey key, IpcMemoryId *shmid)
diff --git a/src/backend/port/win32_shmem.c b/src/backend/port/win32_shmem.c
index f8ca52e..3888c26 100644
--- a/src/backend/port/win32_shmem.c
+++ b/src/backend/port/win32_shmem.c
@@ -170,14 +170,9 @@ EnableLockPagesPrivilege(int elevel)
  *
  * Create a shared memory segment of the given size and initialize its
  * standard header.
- *
- * makePrivate means to always create a new segment, rather than attach to
- * or recycle any existing segment. On win32, we always create a new segment,
- * since there is no need for recycling (segments go away automatically
- * when the last backend exits)
  */
 PGShmemHeader *
-PGSharedMemoryCreate(Size size, bool makePrivate, int port,
+PGSharedMemoryCreate(Size size, int port,
                                         PGShmemHeader **shim)
 {
        void       *memAddress;
diff --git a/src/backend/postmaster/postmaster.c 
b/src/backend/postmaster/postmaster.c
index a33a131..3db9f34 100644
--- a/src/backend/postmaster/postmaster.c
+++ b/src/backend/postmaster/postmaster.c
@@ -2565,7 +2565,7 @@ reset_shared(int port)
         * determine IPC keys.  This helps ensure that we will clean up dead IPC
         * objects if the postmaster crashes and is restarted.
         */
-       CreateSharedMemoryAndSemaphores(false, port);
+       CreateSharedMemoryAndSemaphores(port);
 }
 
 
@@ -4914,7 +4914,7 @@ SubPostmasterMain(int argc, char *argv[])
                InitProcess();
 
                /* Attach process to shared data structures */
-               CreateSharedMemoryAndSemaphores(false, 0);
+               CreateSharedMemoryAndSemaphores(0);
 
                /* And run the backend */
                BackendRun(&port);              /* does not return */
@@ -4928,7 +4928,7 @@ SubPostmasterMain(int argc, char *argv[])
                InitAuxiliaryProcess();
 
                /* Attach process to shared data structures */
-               CreateSharedMemoryAndSemaphores(false, 0);
+               CreateSharedMemoryAndSemaphores(0);
 
                AuxiliaryProcessMain(argc - 2, argv + 2);       /* does not 
return */
        }
@@ -4941,7 +4941,7 @@ SubPostmasterMain(int argc, char *argv[])
                InitProcess();
 
                /* Attach process to shared data structures */
-               CreateSharedMemoryAndSemaphores(false, 0);
+               CreateSharedMemoryAndSemaphores(0);
 
                AutoVacLauncherMain(argc - 2, argv + 2);        /* does not 
return */
        }
@@ -4954,7 +4954,7 @@ SubPostmasterMain(int argc, char *argv[])
                InitProcess();
 
                /* Attach process to shared data structures */
-               CreateSharedMemoryAndSemaphores(false, 0);
+               CreateSharedMemoryAndSemaphores(0);
 
                AutoVacWorkerMain(argc - 2, argv + 2);  /* does not return */
        }
@@ -4972,7 +4972,7 @@ SubPostmasterMain(int argc, char *argv[])
                InitProcess();
 
                /* Attach process to shared data structures */
-               CreateSharedMemoryAndSemaphores(false, 0);
+               CreateSharedMemoryAndSemaphores(0);
 
                /* Fetch MyBgworkerEntry from shared memory */
                shmem_slot = atoi(argv[1] + 15);
diff --git a/src/backend/storage/ipc/ipci.c b/src/backend/storage/ipc/ipci.c
index 0c86a58..091244d 100644
--- a/src/backend/storage/ipc/ipci.c
+++ b/src/backend/storage/ipc/ipci.c
@@ -88,12 +88,9 @@ RequestAddinShmemSpace(Size size)
  * through the same code as before.  (Note that the called routines mostly
  * check IsUnderPostmaster, rather than EXEC_BACKEND, to detect this case.
  * This is a bit code-wasteful and could be cleaned up.)
- *
- * If "makePrivate" is true then we only need private memory, not shared
- * memory.  This is true for a standalone backend, false for a postmaster.
  */
 void
-CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
+CreateSharedMemoryAndSemaphores(int port)
 {
        PGShmemHeader *shim = NULL;
 
@@ -166,7 +163,7 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
                /*
                 * Create the shmem segment
                 */
-               seghdr = PGSharedMemoryCreate(size, makePrivate, port, &shim);
+               seghdr = PGSharedMemoryCreate(size, port, &shim);
 
                InitShmemAccess(seghdr);
 
@@ -187,12 +184,9 @@ CreateSharedMemoryAndSemaphores(bool makePrivate, int port)
        {
                /*
                 * We are reattaching to an existing shared memory segment. This
-                * should only be reached in the EXEC_BACKEND case, and even 
then only
-                * with makePrivate == false.
+                * should only be reached in the EXEC_BACKEND case.
                 */
-#ifdef EXEC_BACKEND
-               Assert(!makePrivate);
-#else
+#ifndef EXEC_BACKEND
                elog(PANIC, "should be attached to shared memory already");
 #endif
        }
diff --git a/src/backend/utils/init/miscinit.c 
b/src/backend/utils/init/miscinit.c
index 3d10aa5..9b2ba8c 100644
diff --git a/src/backend/utils/init/postinit.c 
b/src/backend/utils/init/postinit.c
index b636b1e..ab31ee3e 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -444,9 +444,11 @@ InitCommunication(void)
        {
                /*
                 * We're running a postgres bootstrap process or a standalone 
backend.
-                * Create private "shmem" and semaphores.
+                * Though we won't listen on PostPortNumber, use it to select a 
shmem
+                * key.  This increases the chance of detecting a leftover live
+                * backend of this DataDir.
                 */
-               CreateSharedMemoryAndSemaphores(true, 0);
+               CreateSharedMemoryAndSemaphores(PostPortNumber);
        }
 }
 
diff --git a/src/include/storage/ipc.h b/src/include/storage/ipc.h
index 6a05a89..02f4b1b 100644
--- a/src/include/storage/ipc.h
+++ b/src/include/storage/ipc.h
@@ -76,6 +76,6 @@ extern void on_exit_reset(void);
 /* ipci.c */
 extern PGDLLIMPORT shmem_startup_hook_type shmem_startup_hook;
 
-extern void CreateSharedMemoryAndSemaphores(bool makePrivate, int port);
+extern void CreateSharedMemoryAndSemaphores(int port);
 
 #endif                                                 /* IPC_H */
diff --git a/src/include/storage/pg_shmem.h b/src/include/storage/pg_shmem.h
index 6b1e040..0a65196 100644
--- a/src/include/storage/pg_shmem.h
+++ b/src/include/storage/pg_shmem.h
@@ -30,7 +30,7 @@ typedef struct PGShmemHeader  /* standard header for all 
Postgres shmem */
 {
        int32           magic;                  /* magic # to identify Postgres 
segments */
 #define PGShmemMagic  679834894
-       pid_t           creatorPID;             /* PID of creating process */
+       pid_t           creatorPID;             /* PID of creating process (set 
but unread) */
        Size            totalsize;              /* total size of segment */
        Size            freeoffset;             /* offset to first free space */
        dsm_handle      dsm_control;    /* ID of dynamic shared memory control 
seg */
@@ -64,8 +64,8 @@ extern void PGSharedMemoryReAttach(void);
 extern void PGSharedMemoryNoReAttach(void);
 #endif
 
-extern PGShmemHeader *PGSharedMemoryCreate(Size size, bool makePrivate,
-                                        int port, PGShmemHeader **shim);
+extern PGShmemHeader *PGSharedMemoryCreate(Size size, int port,
+                                        PGShmemHeader **shim);
 extern bool PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2);
 extern void PGSharedMemoryDetach(void);
 
diff --git a/src/test/perl/PostgresNode.pm b/src/test/perl/PostgresNode.pm
index 8a2c6fc..5c5f90f 100644
--- a/src/test/perl/PostgresNode.pm
+++ b/src/test/perl/PostgresNode.pm
@@ -104,7 +104,8 @@ our @EXPORT = qw(
   get_new_node
 );
 
-our ($test_localhost, $test_pghost, $last_port_assigned, @all_nodes, $died);
+our ($use_tcp, $test_localhost, $test_pghost, $last_host_assigned,
+       $last_port_assigned, @all_nodes, $died);
 
 # Windows path to virtual file system root
 
@@ -118,13 +119,14 @@ if ($Config{osname} eq 'msys')
 INIT
 {
 
-       # PGHOST is set once and for all through a single series of tests when
-       # this module is loaded.
-       $test_localhost = "127.0.0.1";
-       $test_pghost =
-         $TestLib::windows_os ? $test_localhost : TestLib::tempdir_short;
-       $ENV{PGHOST}     = $test_pghost;
-       $ENV{PGDATABASE} = 'postgres';
+       # Set PGHOST for backward compatibility.  This doesn't work for own_host
+       # nodes, so prefer to not rely on this when writing new tests.
+       $use_tcp            = $TestLib::windows_os;
+       $test_localhost     = "127.0.0.1";
+       $last_host_assigned = 1;
+       $test_pghost        = $use_tcp ? $test_localhost : 
TestLib::tempdir_short;
+       $ENV{PGHOST}        = $test_pghost;
+       $ENV{PGDATABASE}    = 'postgres';
 
        # Tracking of last port value assigned to accelerate free port lookup.
        $last_port_assigned = int(rand() * 16384) + 49152;
@@ -155,7 +157,9 @@ sub new
                _host    => $pghost,
                _basedir => "$TestLib::tmp_check/t_${testname}_${name}_data",
                _name    => $name,
-               _logfile => "$TestLib::log_path/${testname}_${name}.log"
+               _logfile_generation => 0,
+               _logfile_base       => "$TestLib::log_path/${testname}_${name}",
+               _logfile            => 
"$TestLib::log_path/${testname}_${name}.log"
        };
 
        bless $self, $class;
@@ -473,8 +477,9 @@ sub init
                print $conf "max_wal_senders = 0\n";
        }
 
-       if ($TestLib::windows_os)
+       if ($use_tcp)
        {
+               print $conf "unix_socket_directories = ''\n";
                print $conf "listen_addresses = '$host'\n";
        }
        else
@@ -536,12 +541,11 @@ sub backup
 {
        my ($self, $backup_name) = @_;
        my $backup_path = $self->backup_dir . '/' . $backup_name;
-       my $port        = $self->port;
        my $name        = $self->name;
 
        print "# Taking pg_basebackup $backup_name from node \"$name\"\n";
-       TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-p', 
$port,
-               '--no-sync');
+       TestLib::system_or_bail('pg_basebackup', '-D', $backup_path, '-h',
+               $self->host, '-p', $self->port, '--no-sync');
        print "# Backup finished\n";
        return;
 }
@@ -651,6 +655,7 @@ sub init_from_backup
 {
        my ($self, $root_node, $backup_name, %params) = @_;
        my $backup_path = $root_node->backup_dir . '/' . $backup_name;
+       my $host        = $self->host;
        my $port        = $self->port;
        my $node_name   = $self->name;
        my $root_name   = $root_node->name;
@@ -677,6 +682,15 @@ sub init_from_backup
                qq(
 port = $port
 ));
+       if ($use_tcp)
+       {
+               $self->append_conf('postgresql.conf', "listen_addresses = 
'$host'");
+       }
+       else
+       {
+               $self->append_conf('postgresql.conf',
+                       "unix_socket_directories = '$host'");
+       }
        $self->enable_streaming($root_node) if $params{has_streaming};
        $self->enable_restoring($root_node) if $params{has_restoring};
        return;
@@ -684,17 +698,45 @@ port = $port
 
 =pod
 
-=item $node->start()
+=item $node->rotate_logfile()
+
+Switch to a new PostgreSQL log file.  This does not alter any running
+PostgreSQL process.  Subsequent method calls, including pg_ctl invocations,
+will use the new name.  Return the new name.
+
+=cut
+
+sub rotate_logfile
+{
+       my ($self) = @_;
+       $self->{_logfile} = sprintf('%s_%d.log',
+               $self->{_logfile_base},
+               ++$self->{_logfile_generation});
+       return $self->{_logfile};
+}
+
+=pod
+
+=item $node->start(%params) => success_or_failure
 
 Wrapper for pg_ctl start
 
 Start the node and wait until it is ready to accept connections.
 
+=over
+
+=item fail_ok => 1
+
+By default, failure terminates the entire F<prove> invocation.  If given,
+instead return a true or false value to indicate success or failure.
+
+=back
+
 =cut
 
 sub start
 {
-       my ($self) = @_;
+       my ($self, %params) = @_;
        my $port   = $self->port;
        my $pgdata = $self->data_dir;
        my $name   = $self->name;
@@ -707,10 +749,34 @@ sub start
        {
                print "# pg_ctl start failed; logfile:\n";
                print TestLib::slurp_file($self->logfile);
-               BAIL_OUT("pg_ctl start failed");
+               BAIL_OUT("pg_ctl start failed") unless $params{fail_ok};
+               return 0;
        }
 
        $self->_update_pid(1);
+       return 1;
+}
+
+=pod
+
+=item $node->kill9()
+
+Send SIGKILL (signal 9) to the postmaster.
+
+Note: if the node is already known stopped, this does nothing.
+However, if we think it's running and it's not, it's important for
+this to fail.  Otherwise, tests might fail to detect server crashes.
+
+=cut
+
+sub kill9
+{
+       my ($self) = @_;
+       my $name = $self->name;
+       return unless defined $self->{_pid};
+       print "### Killing node \"$name\" using signal 9\n";
+       kill(9, $self->{_pid}) or BAIL_OUT("kill(9, $self->{_pid}) failed");
+       $self->{_pid} = undef;
        return;
 }
 
@@ -943,7 +1009,7 @@ sub _update_pid
 
 =pod
 
-=item PostgresNode->get_new_node(node_name)
+=item PostgresNode->get_new_node(node_name, %params)
 
 Build a new object of class C<PostgresNode> (or of a subclass, if you have
 one), assigning a free port number.  Remembers the node, to prevent its port
@@ -952,6 +1018,22 @@ shut down when the test script exits.
 
 You should generally use this instead of C<PostgresNode::new(...)>.
 
+=over
+
+=item port => [1,65535]
+
+By default, this function assigns a port number to each node.  Specify this to
+force a particular port number.  The caller is responsible for evaluating
+potential conflicts and privilege requirements.
+
+=item own_host => 1
+
+By default, all nodes use the same PGHOST value.  If specified, generate a
+PGHOST specific to this node.  This allows multiple nodes to use the same
+port.
+
+=back
+
 For backwards compatibility, it is also exported as a standalone function,
 which can only create objects of class C<PostgresNode>.
 
@@ -960,10 +1042,11 @@ which can only create objects of class C<PostgresNode>.
 sub get_new_node
 {
        my $class = 'PostgresNode';
-       $class = shift if 1 < scalar @_;
-       my $name  = shift;
-       my $found = 0;
-       my $port  = $last_port_assigned;
+       $class = shift if scalar(@_) % 2 != 1;
+       my ($name, %params) = @_;
+       my $port_is_forced = defined $params{port};
+       my $found          = $port_is_forced;
+       my $port = $port_is_forced ? $params{port} : $last_port_assigned;
 
        while ($found == 0)
        {
@@ -980,13 +1063,15 @@ sub get_new_node
                        $found = 0 if ($node->port == $port);
                }
 
-               # Check to see if anything else is listening on this TCP port.
-               # This is *necessary* on Windows, and seems like a good idea
-               # on Unixen as well, even though we don't ask the postmaster
-               # to open a TCP port on Unix.
+               # Check to see if anything else is listening on this TCP port.  
Accept
+               # only ports available for all possible listen_addresses 
values, so
+               # the caller can harness this port for the widest range of 
purposes.
+               # This is *necessary* on Windows, and seems like a good idea on 
Unixen
+               # as well, even though we don't ask the postmaster to open a 
TCP port
+               # on Unix.
                if ($found == 1)
                {
-                       my $iaddr = inet_aton($test_localhost);
+                       my $iaddr = inet_aton('0.0.0.0');
                        my $paddr = sockaddr_in($port, $iaddr);
                        my $proto = getprotobyname("tcp");
 
@@ -1002,16 +1087,35 @@ sub get_new_node
                }
        }
 
-       print "# Found free port $port\n";
+       print "# Found port $port\n";
+
+       # Select a host.
+       my $host = $test_pghost;
+       if ($params{own_host})
+       {
+               if ($use_tcp)
+               {
+                       # This assumes $use_tcp platforms treat every address in
+                       # 127.0.0.1/24, not just 127.0.0.1, as a usable 
loopback.
+                       $last_host_assigned++;
+                       $last_host_assigned > 254 and BAIL_OUT("too many 
own_host nodes");
+                       $host = '127.0.0.' . $last_host_assigned;
+               }
+               else
+               {
+                       $host = "$test_pghost/$name"; # Assume $name =~ 
/^[-_a-zA-Z0-9]+$/
+                       mkdir $host;
+               }
+       }
 
        # Lock port number found by creating a new node
-       my $node = $class->new($name, $test_pghost, $port);
+       my $node = $class->new($name, $host, $port);
 
        # Add node to list of nodes
        push(@all_nodes, $node);
 
        # And update port for next time
-       $last_port_assigned = $port;
+       $port_is_forced or $last_port_assigned = $port;
 
        return $node;
 }
@@ -1402,9 +1506,8 @@ $stderr);
 
 =item $node->command_ok(...)
 
-Runs a shell command like TestLib::command_ok, but with PGPORT
-set so that the command will default to connecting to this
-PostgresNode.
+Runs a shell command like TestLib::command_ok, but with PGHOST and PGPORT set
+so that the command will default to connecting to this PostgresNode.
 
 =cut
 
@@ -1414,6 +1517,7 @@ sub command_ok
 
        my $self = shift;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        TestLib::command_ok(@_);
@@ -1424,7 +1528,7 @@ sub command_ok
 
 =item $node->command_fails(...)
 
-TestLib::command_fails with our PGPORT. See command_ok(...)
+TestLib::command_fails with our connection parameters. See command_ok(...)
 
 =cut
 
@@ -1434,6 +1538,7 @@ sub command_fails
 
        my $self = shift;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        TestLib::command_fails(@_);
@@ -1444,7 +1549,7 @@ sub command_fails
 
 =item $node->command_like(...)
 
-TestLib::command_like with our PGPORT. See command_ok(...)
+TestLib::command_like with our connection parameters. See command_ok(...)
 
 =cut
 
@@ -1454,6 +1559,7 @@ sub command_like
 
        my $self = shift;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        TestLib::command_like(@_);
@@ -1464,7 +1570,8 @@ sub command_like
 
 =item $node->command_checks_all(...)
 
-TestLib::command_checks_all with our PGPORT. See command_ok(...)
+TestLib::command_checks_all with our connection parameters. See
+command_ok(...)
 
 =cut
 
@@ -1474,6 +1581,7 @@ sub command_checks_all
 
        my $self = shift;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        TestLib::command_checks_all(@_);
@@ -1498,6 +1606,7 @@ sub issues_sql_like
 
        my ($self, $cmd, $expected_sql, $test_name) = @_;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        truncate $self->logfile, 0;
@@ -1512,8 +1621,8 @@ sub issues_sql_like
 
 =item $node->run_log(...)
 
-Runs a shell command like TestLib::run_log, but with PGPORT set so
-that the command will default to connecting to this PostgresNode.
+Runs a shell command like TestLib::run_log, but with connection parameters set
+so that the command will default to connecting to this PostgresNode.
 
 =cut
 
@@ -1521,6 +1630,7 @@ sub run_log
 {
        my $self = shift;
 
+       local $ENV{PGHOST} = $self->host;
        local $ENV{PGPORT} = $self->port;
 
        TestLib::run_log(@_);
diff --git a/src/test/recovery/t/016_shm.pl b/src/test/recovery/t/016_shm.pl
new file mode 100644
index 0000000..9021562
--- /dev/null
+++ b/src/test/recovery/t/016_shm.pl
@@ -0,0 +1,154 @@
+#
+# Tests of pg_shmem.h functions
+#
+use strict;
+use warnings;
+use IPC::Run 'run';
+use PostgresNode;
+use Test::More;
+use TestLib;
+
+plan tests => 6;
+
+my $tempdir = TestLib::tempdir;
+my $port;
+
+# When using Unix sockets, we can dictate the port number.  In the absence of
+# collisions from other shmget() activity, gnat starts with key 0x7d001
+# (512001), and flea starts with key 0x7d002 (512002).
+$port = 512 unless $PostgresNode::use_tcp;
+
+# Log "ipcs" diffs on a best-effort basis, swallowing any error.
+my $ipcs_before = "$tempdir/ipcs_before";
+eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; };
+
+sub log_ipcs
+{
+       eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] };
+       return;
+}
+
+# Node setup.
+sub init_start
+{
+       my $name = shift;
+       my $ret = PostgresNode->get_new_node($name, port => $port, own_host => 
1);
+       defined($port) or $port = $ret->port;    # same port for all nodes
+       $ret->init;
+       $ret->start;
+       log_ipcs();
+       return $ret;
+}
+my $gnat = init_start 'gnat';
+my $flea = init_start 'flea';
+
+# Upon postmaster death, postmaster children exit automatically.
+$gnat->kill9;
+log_ipcs();
+$flea->restart;       # flea ignores the shm key gnat abandoned.
+log_ipcs();
+poll_start($gnat);    # gnat recycles its former shm key.
+log_ipcs();
+
+# After clean shutdown, the nodes swap shm keys.
+$gnat->stop;
+$flea->restart;
+log_ipcs();
+$gnat->start;
+log_ipcs();
+
+# Scenarios involving no postmaster.pid, dead postmaster, and a live backend.
+# Use a regress.c function to emulate the responsiveness of a backend working
+# through a CPU-intensive task.
+$gnat->safe_psql('postgres', <<EOSQL);
+CREATE FUNCTION wait_pid(int)
+   RETURNS void
+   AS '$ENV{REGRESS_SHLIB}'
+   LANGUAGE C STRICT;
+EOSQL
+my $slow_query = 'SELECT wait_pid(pg_backend_pid())';
+my ($stdout, $stderr);
+my $slow_client = IPC::Run::start(
+       [
+               'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'),
+               '-c', $slow_query
+       ],
+       '<',
+       \undef,
+       '>',
+       \$stdout,
+       '2>',
+       \$stderr,
+       IPC::Run::timeout(900));    # five times the poll_query_until timeout
+ok( $gnat->poll_query_until(
+               'postgres',
+               "SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", 
'1'),
+       'slow query started');
+my $slow_pid = $gnat->safe_psql('postgres',
+       "SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'");
+$gnat->kill9;
+unlink($gnat->data_dir . '/postmaster.pid');
+$gnat->rotate_logfile;    # on Windows, can't open old log for writing
+log_ipcs();
+# Reject ordinary startup.
+ok(!$gnat->start(fail_ok => 1), 'live query blocks restart');
+like(
+       slurp_file($gnat->logfile),
+       qr/pre-existing shared memory block/,
+       'detected live backend via shared memory');
+# Reject single-user startup.
+my $single_stderr;
+ok( !run_log(
+               [ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ],
+               '<', \('SELECT 1 + 1'), '2>', \$single_stderr),
+       'live query blocks --single');
+print STDERR $single_stderr;
+like(
+       $single_stderr,
+       qr/pre-existing shared memory block/,
+       'single-user mode detected live backend via shared memory');
+log_ipcs();
+# Fail to reject startup if shm key N has become available and we crash while
+# using key N+1.  This is unwanted, but expected.  Windows is immune, because
+# its GetSharedMemName() use DataDir strings, not numeric keys.
+$flea->stop;    # release first key
+is( $gnat->start(fail_ok => 1),
+       $TestLib::windows_os ? 0 : 1,
+       'key turnover fools only sysv_shmem.c');
+$gnat->stop;     # release first key (no-op on $TestLib::windows_os)
+$flea->start;    # grab first key
+# cleanup
+TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid);
+$slow_client->finish;    # client has detected backend termination
+log_ipcs();
+poll_start($gnat);       # recycle second key
+
+$gnat->stop;
+$flea->stop;
+log_ipcs();
+
+
+# When postmaster children are slow to exit after postmaster death, we may
+# need retries to start a new postmaster.
+sub poll_start
+{
+       my ($node) = @_;
+
+       my $max_attempts = 180 * 10;
+       my $attempts     = 0;
+
+       while ($attempts < $max_attempts)
+       {
+               $node->start(fail_ok => 1) && return 1;
+
+               # Wait 0.1 second before retrying.
+               usleep(100_000);
+
+               $attempts++;
+       }
+
+       # No success within 180 seconds.  Try one last time without fail_ok, 
which
+       # will BAIL_OUT unless it succeeds.
+       $node->start && return 1;
+       return 0;
+}
diff --git a/src/tools/msvc/vcregress.pl b/src/tools/msvc/vcregress.pl
index 4018313..4a1e6e3 100644
--- a/src/tools/msvc/vcregress.pl
+++ b/src/tools/msvc/vcregress.pl
@@ -205,6 +205,7 @@ sub tap_check
        local %ENV = %ENV;
        $ENV{PERL5LIB}   = "$topdir/src/test/perl;$ENV{PERL5LIB}";
        $ENV{PG_REGRESS} = "$topdir/$Config/pg_regress/pg_regress";
+       $ENV{REGRESS_SHLIB} = "$topdir/src/test/regress/regress.dll";
 
        $ENV{TESTDIR} = "$dir";
 

Reply via email to