Alvaro Herrera wrote:
> Robert Haas wrote:
> > On Thu, Mar 29, 2018 at 5:51 PM, Andres Freund <and...@anarazel.de> wrote:
> > >> > 2018-03-06 13:20:24.391 GMT [14869] ERROR:  epoll_ctl() failed: Bad 
> > >> > file
> > >> > descriptor
> > >>
> > >> I can confirm this bug exists in single-user mode.
> > >
> > > I'm not sure we need to do anything about this, personally. This seems
> > > like a fairly rare thing to do in a mode that's definitely not intended
> > > to be general purpose.
> > 
> > Mmmph.  I don't really think it's possible to view a user-visible
> > EBADF as anything other than a coding error.
> 
> IMO the problem is not the user-visible EBADF -- the problem is that the
> user might be attempting to clean up from some previous mistake by
> removing a replication slot.  Using single-user mode might be a strange
> tool, but it's not completely unreasonable.  Is it really all that
> difficult to fix slot acquisition for it?

Here's a patch (against pg10) to fix this problem.

-- 
Álvaro Herrera                https://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 59eb9253797776f21267d35b608582b5fd3ff67c Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
Date: Tue, 8 May 2018 11:48:56 -0300
Subject: [PATCH 1/2] don't try cond variables if single user mode

---
 src/backend/replication/slot.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index a8a16f55e9..a43f402214 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -351,20 +351,28 @@ retry:
                if (s->in_use && strcmp(name, NameStr(s->data.name)) == 0)
                {
                        /*
-                        * This is the slot we want.  We don't know yet if it's 
active, so
-                        * get ready to sleep on it in case it is.  (We may end 
up not
-                        * sleeping, but we don't want to do this while holding 
the
-                        * spinlock.)
+                        * This is the slot we want; check if it's active under 
some other
+                        * process.  In single user mode, we don't need this 
check.
                         */
-                       ConditionVariablePrepareToSleep(&s->active_cv);
+                       if (IsUnderPostmaster)
+                       {
+                               /*
+                                * Get ready to sleep on it in case it is 
active.  (We may end
+                                * up not sleeping, but we don't want to do 
this while holding
+                                * the spinlock.)
+                                */
+                               ConditionVariablePrepareToSleep(&s->active_cv);
 
-                       SpinLockAcquire(&s->mutex);
+                               SpinLockAcquire(&s->mutex);
 
-                       active_pid = s->active_pid;
-                       if (active_pid == 0)
-                               active_pid = s->active_pid = MyProcPid;
+                               active_pid = s->active_pid;
+                               if (active_pid == 0)
+                                       active_pid = s->active_pid = MyProcPid;
 
-                       SpinLockRelease(&s->mutex);
+                               SpinLockRelease(&s->mutex);
+                       }
+                       else
+                               active_pid = MyProcPid;
                        slot = s;
 
                        break;
-- 
2.11.0

>From 008ec88995b2ae1d851033222bfbd8dfebd7a368 Mon Sep 17 00:00:00 2001
From: Alvaro Herrera <alvhe...@alvh.no-ip.org>
Date: Tue, 8 May 2018 11:49:08 -0300
Subject: [PATCH 2/2] simplify baroque coding

---
 src/backend/replication/slot.c | 34 +++++++++++-----------------------
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c
index a43f402214..664bf2e3ad 100644
--- a/src/backend/replication/slot.c
+++ b/src/backend/replication/slot.c
@@ -99,7 +99,6 @@ ReplicationSlot *MyReplicationSlot = NULL;
 int                    max_replication_slots = 0;      /* the maximum number 
of replication
                                                                                
 * slots */
 
-static void ReplicationSlotDropAcquired(void);
 static void ReplicationSlotDropPtr(ReplicationSlot *slot);
 
 /* internal persistency functions */
@@ -434,7 +433,8 @@ ReplicationSlotRelease(void)
                 * fail, all that may happen is an incomplete cleanup of the 
on-disk
                 * data.
                 */
-               ReplicationSlotDropAcquired();
+               ReplicationSlotDropPtr(MyReplicationSlot);
+               MyReplicationSlot = NULL;
        }
 
        /*
@@ -520,23 +520,10 @@ ReplicationSlotDrop(const char *name, bool nowait)
 
        ReplicationSlotAcquire(name, nowait);
 
-       ReplicationSlotDropAcquired();
-}
+       ReplicationSlotDropPtr(MyReplicationSlot);
 
-/*
- * Permanently drop the currently acquired replication slot.
- */
-static void
-ReplicationSlotDropAcquired(void)
-{
-       ReplicationSlot *slot = MyReplicationSlot;
-
-       Assert(MyReplicationSlot != NULL);
-
-       /* slot isn't acquired anymore */
+       /* no longer my slot */
        MyReplicationSlot = NULL;
-
-       ReplicationSlotDropPtr(slot);
 }
 
 /*
@@ -923,7 +910,7 @@ restart:
                if (s->data.database != dboid)
                        continue;
 
-               /* acquire slot, so ReplicationSlotDropAcquired can be reused  
*/
+               /* acquire slot */
                SpinLockAcquire(&s->mutex);
                /* can't change while ReplicationSlotControlLock is held */
                slotname = NameStr(s->data.name);
@@ -949,16 +936,17 @@ restart:
                                                        slotname, active_pid)));
 
                /*
-                * To avoid duplicating ReplicationSlotDropAcquired() and to 
avoid
-                * holding ReplicationSlotControlLock over filesystem 
operations,
-                * release ReplicationSlotControlLock and use
-                * ReplicationSlotDropAcquired.
+                * To avoid holding ReplicationSlotControlLock over filesystem
+                * operations, release ReplicationSlotControlLock while we drop
+                * the slot.
                 *
                 * As that means the set of slots could change, restart scan 
from the
                 * beginning each time we release the lock.
                 */
                LWLockRelease(ReplicationSlotControlLock);
-               ReplicationSlotDropAcquired();
+               ReplicationSlotDropPtr(MyReplicationSlot);
+               /* no longer my slot */
+               MyReplicationSlot = NULL;
                goto restart;
        }
        LWLockRelease(ReplicationSlotControlLock);
-- 
2.11.0

Reply via email to