The pty driver writes output directly to the 'other' pty's flip
buffers; ie., the pty's write buffer is the other pty's flip buffer.
Flushing the write buffer for a pty was a FIXME because flushing the
other pty's flip buffer allows for a potential deadlock:

CPU 0                                        CPU 1
flush_to_ldisc
  mutex_lock(buf->lock)  <-----------+
  receive_buf                        |
    .                                |  flush_to_ldisc
    .                             +--|--> mutex_lock(buf->lock)
    tty_driver_flush_buffer       |  |    receive_buf
      pty_flush_buffer            |  |      .
        tty_buffer_flush          |  |      .
          mutex_lock(buf->lock) --+  |      tty_buffer_flush_buffer
                                     |        pty_flush_buffer
                                     |          tty_buffer_flush
                                     +--------    mutex_lock(buf->lock)

To resolve the lock inversion deadlock:
- Add a flush_nested() method to the pty driver (which is invoked by
  the line discipline when flushing the driver write buffer in the
  receive_buf() handling).
- Add tty_buffer_flush_nested(), which relies on the caller holding
  its own buf->lock and a shared mutex to so the state of the 'other'
  CPU can be inferred where a deadlock might otherwise have occurred.

Use the port->buf_mutex (which is otherwise unused by the pty driver)
of the lowest-address tty to ensure only a single mutex is used,
regardless of which pty is performing the flush.

Annotate the mutex lock as a nested lock class for lockdep: note the
buf->lock claimed here is the 'other' pty's buf->lock.

Signed-off-by: Peter Hurley <[email protected]>
---
 drivers/tty/pty.c        | 32 ++++++++++++++++++++-
 drivers/tty/tty_buffer.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/tty_flip.h |  2 ++
 3 files changed, 108 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 25c9bc7..54a2d6a 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -227,7 +227,33 @@ static void pty_flush_buffer(struct tty_struct *tty)
 
        if (!to)
                return;
-       /* tty_buffer_flush(to); FIXME */
+
+       tty_buffer_flush(to);
+
+       if (to->packet) {
+               spin_lock_irqsave(&tty->ctrl_lock, flags);
+               tty->ctrl_status |= TIOCPKT_FLUSHWRITE;
+               wake_up_interruptible(&to->read_wait);
+               spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+       }
+}
+
+static void pty_flush_buffer_nested(struct tty_struct *tty)
+{
+       struct tty_struct *to = tty->link;
+       unsigned long flags;
+       struct mutex *mutex;
+
+       if (!to)
+               return;
+
+       /* Use a single mutex for the pty pair */
+       if (tty < to)
+               mutex = &tty->port->buf_mutex;
+       else
+               mutex = &to->port->buf_mutex;
+       tty_buffer_flush_nested(to, mutex);
+
        if (to->packet) {
                spin_lock_irqsave(&tty->ctrl_lock, flags);
                tty->ctrl_status |= TIOCPKT_FLUSHWRITE;
@@ -451,6 +477,7 @@ static const struct tty_operations master_pty_ops_bsd = {
        .write = pty_write,
        .write_room = pty_write_room,
        .flush_buffer = pty_flush_buffer,
+       .flush_nested = pty_flush_buffer_nested,
        .chars_in_buffer = pty_chars_in_buffer,
        .unthrottle = pty_unthrottle,
        .set_termios = pty_set_termios,
@@ -467,6 +494,7 @@ static const struct tty_operations slave_pty_ops_bsd = {
        .write = pty_write,
        .write_room = pty_write_room,
        .flush_buffer = pty_flush_buffer,
+       .flush_nested = pty_flush_buffer_nested,
        .chars_in_buffer = pty_chars_in_buffer,
        .unthrottle = pty_unthrottle,
        .set_termios = pty_set_termios,
@@ -626,6 +654,7 @@ static const struct tty_operations ptm_unix98_ops = {
        .write = pty_write,
        .write_room = pty_write_room,
        .flush_buffer = pty_flush_buffer,
+       .flush_nested = pty_flush_buffer_nested,
        .chars_in_buffer = pty_chars_in_buffer,
        .unthrottle = pty_unthrottle,
        .set_termios = pty_set_termios,
@@ -644,6 +673,7 @@ static const struct tty_operations pty_unix98_ops = {
        .write = pty_write,
        .write_room = pty_write_room,
        .flush_buffer = pty_flush_buffer,
+       .flush_nested = pty_flush_buffer_nested,
        .chars_in_buffer = pty_chars_in_buffer,
        .unthrottle = pty_unthrottle,
        .set_termios = pty_set_termios,
diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 2cea1ab..4d0d2ef 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -233,6 +233,81 @@ void tty_buffer_flush(struct tty_struct *tty)
 }
 
 /**
+ *     tty_buffer_flush_nested         -       flush full tty buffers
+ *     @tty: tty to flush
+ *     @mutex: lock to distinguish 1st from subsequent caller
+ *
+ *     Special-purpose buffer flush for ptys receiving data (in ldisc's
+ *     receive_buf() path). Resolves lock inversion deadlock when both
+ *     paired ptys concurrently attempt flushing the write buffer (the
+ *     other's input tty_buffers).
+ *
+ *     Locking: Caller already holds its buf->lock (@tty is the 'other' pty)
+ *
+ *     Possible lock scenarios:
+ *     1) Caller successfully claims @mutex and @tty->port->buf->lock;
+ *        no contention, flush buffer
+ *     2) Caller successfully claims @mutex but not @tty->port->buf->lock;
+ *        wait for @tty->port->buf->lock;
+ *        2a) @tty->port->buf->lock owner releases lock; caller claims lock
+ *            and completes
+ *        2b) while caller waits, @tty->port->buf->lock owner attempts buffer
+ *            flush of caller's pty; @tty->port->buf->lock owner executes
+ *            scenario 3 below, and releases lock; caller claims lock and
+ *            completes.
+ *     3) Caller cannot successfully claim @mutex
+ *        By inference, @tty->port->buf->lock owner is waiting for caller's
+ *        buf->lock (since caller must hold its own buf->lock to call this
+ *        this function); because the @tty->port->buf->lock owner is waiting,
+ *        its buffers can be flushed without danger of concurrent changes.
+ *
+ *     The buffer flush in scenario 3 cannot free the current head buffer and
+ *     cannot alter its read index: processing the head buffer is in-progress
+ *     by the @tty->port->buf->lock owner (waiting in scenario 2b).
+ *
+ *     The release order of @mutex first and @tty->port->buf->lock next is
+ *     essential; if the release order is reversed, the 1st caller creates a
+ *     race window which could allow another thread to claim the
+ *     @tty->port->buf->lock and yet observe the locked @mutex and erroneously
+ *     concluded the 1st caller is stuck waiting.
+ */
+
+void tty_buffer_flush_nested(struct tty_struct *tty, struct mutex *mutex)
+{
+       struct tty_port *port = tty->port;
+       struct tty_bufhead *buf = &port->buf;
+       struct tty_buffer *next;
+
+       if (mutex_trylock(mutex)) {
+               atomic_inc(&buf->priority);
+               mutex_lock_nested(&buf->lock, 1);
+               while ((next = buf->head->next) != NULL) {
+                       tty_buffer_free(port, buf->head);
+                       buf->head = next;
+               }
+               buf->head->read = buf->head->commit;
+               atomic_dec(&buf->priority);
+               mutex_unlock(mutex);
+               mutex_unlock(&buf->lock);
+       } else {
+               /* Because the buf->lock owner is in-progress processing the
+                * head buffer, free only the buffers after head. Also,
+                * the tail buffer may be in concurrent use by a parallel
+                * pty write, so don't free the last buffer; only truncate.
+                */
+               next = buf->head->next;
+               while (next != NULL) {
+                       struct tty_buffer *tmp = next->next;
+                       if (tmp != NULL)
+                               tty_buffer_free(port, next);
+                       else
+                               tmp->read = tmp->commit;
+                       next = tmp;
+               }
+       }
+}
+
+/**
  *     tty_buffer_request_room         -       grow tty buffer if needed
  *     @tty: tty structure
  *     @size: size desired
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index c28dd52..803e67d 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -38,4 +38,6 @@ static inline int tty_insert_flip_string(struct tty_port 
*port,
 extern void tty_buffer_lock_exclusive(struct tty_port *port);
 extern void tty_buffer_unlock_exclusive(struct tty_port *port);
 
+extern void tty_buffer_flush_nested(struct tty_struct *tty, struct mutex *);
+
 #endif /* _LINUX_TTY_FLIP_H */
-- 
1.8.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to