Relax the required locking to, instead of disabling irqs, disable tasklets.
Allow the caller to perform the locking using utility functions, which
allows the caller to combine a number of register accesses within the
same critical section. Implement this for capability reads and command
buffer submission.

Signed-off-by: Thomas Hellstrom <thellstrom at vmware.com>
Reviewed-by: Sinclair Yeh <syeh at vmware.com>
---
 drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c |   8 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c    |   9 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.h    | 130 ++++++++++++++++++++++++++++-----
 drivers/gpu/drm/vmwgfx/vmwgfx_fence.c  |   6 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c   |  40 +++++++---
 drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c  |  20 ++---
 drivers/gpu/drm/vmwgfx/vmwgfx_irq.c    |   2 +-
 7 files changed, 163 insertions(+), 52 deletions(-)

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
index 8a76821..b8d2436 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -290,18 +290,20 @@ void vmw_cmdbuf_header_free(struct vmw_cmdbuf_header 
*header)
  */
 static int vmw_cmdbuf_header_submit(struct vmw_cmdbuf_header *header)
 {
-       struct vmw_cmdbuf_man *man = header->man;
+       struct vmw_private *dev_priv = header->man->dev_priv;
        u32 val;

        if (sizeof(header->handle) > 4)
                val = (header->handle >> 32);
        else
                val = 0;
-       vmw_write(man->dev_priv, SVGA_REG_COMMAND_HIGH, val);
+       vmw_hw_lock(dev_priv, true);
+       __vmw_write(dev_priv, SVGA_REG_COMMAND_HIGH, val);

        val = (header->handle & 0xFFFFFFFFULL);
        val |= header->cb_context & SVGA_CB_CONTEXT_MASK;
-       vmw_write(man->dev_priv, SVGA_REG_COMMAND_LOW, val);
+       __vmw_write(dev_priv, SVGA_REG_COMMAND_LOW, val);
+       vmw_hw_unlock(dev_priv, true);

        return header->cb_header->status;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index a09cf85..2a5496a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -631,7 +631,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned 
long chipset)
        ttm_lock_init(&dev_priv->reservation_sem);
        spin_lock_init(&dev_priv->hw_lock);
        spin_lock_init(&dev_priv->waiter_lock);
-       spin_lock_init(&dev_priv->cap_lock);
        spin_lock_init(&dev_priv->svga_lock);

        for (i = vmw_res_context; i < vmw_res_max; ++i) {
@@ -854,10 +853,10 @@ static int vmw_driver_load(struct drm_device *dev, 
unsigned long chipset)
        }

        if (dev_priv->has_mob) {
-               spin_lock(&dev_priv->cap_lock);
-               vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX);
-               dev_priv->has_dx = !!vmw_read(dev_priv, SVGA_REG_DEV_CAP);
-               spin_unlock(&dev_priv->cap_lock);
+               vmw_hw_lock(dev_priv, true);
+               __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_DX);
+               dev_priv->has_dx = !!__vmw_read(dev_priv, SVGA_REG_DEV_CAP);
+               vmw_hw_unlock(dev_priv, true);
        }


diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h 
b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index a8ae9df..f7dcbcc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -385,7 +385,6 @@ struct vmw_private {
        bool has_gmr;
        bool has_mob;
        spinlock_t hw_lock;
-       spinlock_t cap_lock;
        bool has_dx;

        /*
@@ -546,34 +545,127 @@ static inline struct vmw_master *vmw_master(struct 
drm_master *master)
        return (struct vmw_master *) master->driver_priv;
 }

-/*
- * The locking here is fine-grained, so that it is performed once
- * for every read- and write operation. This is of course costly, but we
- * don't perform much register access in the timing critical paths anyway.
- * Instead we have the extra benefit of being sure that we don't forget
- * the hw lock around register accesses.
+/**
+ * vmw_hw_lock - Perform necessary locking for register access
+ *
+ * @dev_priv: Pointer to device private structure
+ * @lock_bh: Disable bottom halves. Always set to true unless irqs are 
disabled.
  */
-static inline void vmw_write(struct vmw_private *dev_priv,
-                            unsigned int offset, uint32_t value)
+static inline void vmw_hw_lock(struct vmw_private *dev_priv,
+                              bool lock_bh)
 {
-       unsigned long irq_flags;
+       if (lock_bh)
+               spin_lock_bh(&dev_priv->hw_lock);
+       else
+               spin_lock(&dev_priv->hw_lock);
+}

-       spin_lock_irqsave(&dev_priv->hw_lock, irq_flags);
+/**
+ * vmw_hw_unlock - Perform necessary unlocking after register access
+ *
+ * @dev_priv: Pointer to device private structure
+ * @unlock_bh: Disable bottom halves. Always set to true if it was set to tru
+ * in the corresponding lock.
+ */
+static inline void vmw_hw_unlock(struct vmw_private *dev_priv,
+                                bool unlock_bh)
+{
+       if (unlock_bh)
+               spin_unlock_bh(&dev_priv->hw_lock);
+       else
+               spin_unlock(&dev_priv->hw_lock);
+}
+
+/**
+ * __vmw_write - Write a value to a device register with external locking.
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * Note that the caller must ensure that when calling this function,
+ * @dev_priv::hw_lock must be held and that we're safe against racing
+ * against tasklets.
+ */
+static inline void __vmw_write(struct vmw_private *dev_priv,
+                              unsigned int offset, uint32_t value)
+{
+#ifdef CONFIG_LOCKDEP
+       lockdep_assert_held_once(&dev_priv->hw_lock.rlock);
+       WARN_ON_ONCE(!(in_softirq() || irqs_disabled()));
+#endif
        outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
        outl(value, dev_priv->io_start + VMWGFX_VALUE_PORT);
-       spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags);
 }

-static inline uint32_t vmw_read(struct vmw_private *dev_priv,
-                               unsigned int offset)
+/**
+ * __vmw_read - Read a value from a device register with external locking.
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * Note that the caller must ensure that when calling this function,
+ * @dev_priv::hw_lock is be held and that we're safe against racing
+ * against tasklets.
+ */
+static inline uint32_t __vmw_read(struct vmw_private *dev_priv,
+                                 unsigned int offset)
 {
-       unsigned long irq_flags;
        u32 val;

-       spin_lock_irqsave(&dev_priv->hw_lock, irq_flags);
+#ifdef CONFIG_LOCKDEP
+       lockdep_assert_held_once(&dev_priv->hw_lock.rlock);
+       WARN_ON_ONCE(!(in_softirq() || irqs_disabled()));
+#endif
        outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT);
        val = inl(dev_priv->io_start + VMWGFX_VALUE_PORT);
-       spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags);
+
+       return val;
+}
+
+/**
+ * vmw_write - Write a value to a device register
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * This function may not be called when irqs are disabled, since
+ * its call to vmw_hw_unlock will re-enable irqs.
+ */
+static inline void vmw_write(struct vmw_private *dev_priv,
+                            unsigned int offset, uint32_t value)
+{
+#ifdef CONFIG_LOCKDEP
+       WARN_ON_ONCE(irqs_disabled());
+#endif
+       vmw_hw_lock(dev_priv, true);
+       __vmw_write(dev_priv, offset, value);
+       vmw_hw_unlock(dev_priv, true);
+}
+
+/**
+ * vmw_write - Read a value from a device register
+ *
+ * @dev_priv: Pointer to the device private structure
+ * @offset: Device register offset
+ * @value: Value to write
+ *
+ * This function may not be called when irqs are disabled, since
+ * its call to vmw_hw_unlock will re-enable irqs.
+ */
+static inline uint32_t vmw_read(struct vmw_private *dev_priv,
+                               unsigned int offset)
+{
+       u32 val;
+
+#ifdef CONFIG_LOCKDEP
+       WARN_ON_ONCE(irqs_disabled());
+#endif
+       vmw_hw_lock(dev_priv, true);
+       val = __vmw_read(dev_priv, offset);
+       vmw_hw_unlock(dev_priv, true);

        return val;
 }
@@ -722,8 +814,8 @@ extern void vmw_fifo_commit(struct vmw_private *dev_priv, 
uint32_t bytes);
 extern void vmw_fifo_commit_flush(struct vmw_private *dev_priv, uint32_t 
bytes);
 extern int vmw_fifo_send_fence(struct vmw_private *dev_priv,
                               uint32_t *seqno);
-extern void vmw_fifo_ping_host_locked(struct vmw_private *, uint32_t reason);
-extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason);
+extern void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason,
+                              bool lock_bh);
 extern bool vmw_fifo_have_3d(struct vmw_private *dev_priv);
 extern bool vmw_fifo_have_pitchlock(struct vmw_private *dev_priv);
 extern int vmw_fifo_emit_dummy_query(struct vmw_private *dev_priv,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index f40c36e..ac66aad 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -149,7 +149,7 @@ static bool vmw_fence_enable_signaling(struct fence *f)
        if (seqno - fence->base.seqno < VMW_FENCE_WRAP)
                return false;

-       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, false);

        return true;
 }
@@ -183,7 +183,7 @@ static long vmw_fence_wait(struct fence *f, bool intr, 
signed long timeout)
        if (likely(vmw_fence_obj_signaled(fence)))
                return timeout;

-       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
        vmw_seqno_waiter_add(dev_priv);

        spin_lock_bh(f->lock);
@@ -525,7 +525,7 @@ void vmw_fence_obj_flush(struct vmw_fence_obj *fence)
 {
        struct vmw_private *dev_priv = fman_from_fence(fence)->dev_priv;

-       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
 }

 static void vmw_fence_destroy(struct vmw_fence_obj *fence)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
index a8baf5f..a70cc88 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
@@ -49,10 +49,10 @@ bool vmw_fifo_have_3d(struct vmw_private *dev_priv)
                if (!dev_priv->has_mob)
                        return false;

-               spin_lock(&dev_priv->cap_lock);
-               vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D);
-               result = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
-               spin_unlock(&dev_priv->cap_lock);
+               vmw_hw_lock(dev_priv, true);
+               __vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D);
+               result = __vmw_read(dev_priv, SVGA_REG_DEV_CAP);
+               vmw_hw_unlock(dev_priv, true);

                return (result != 0);
        }
@@ -163,14 +163,32 @@ int vmw_fifo_init(struct vmw_private *dev_priv, struct 
vmw_fifo_state *fifo)
        return 0;
 }

-void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason)
+/**
+ * vmw_fifo_ping_host - Notify the device that there are device commands
+ * to process
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @reason: A valid reason to ping the host. See device headers.
+ * @lock_bh: Always set to true unless irqs are disabled in which case
+ * it must be set to false.
+ *
+ * Aka the device "doorbell". The device has an optimization that allows
+ * avoiding calling the relatively expensive register write to
+ * SVGA_REG_SYNC if the SVGA_FIFO_BUSY mmio register is set to 1, meaning
+ * that the doorbell has already been called. The SVGA_FIFO_BUSY mmio register
+ * will be cleared by the device when a new write to SVGA_REG_SYNC is needed
+ * to notify the device of pending commands.
+ */
+void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason,
+                       bool lock_bh)
 {
        u32 *fifo_mem = dev_priv->mmio_virt;

-       preempt_disable();
-       if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0)
-               vmw_write(dev_priv, SVGA_REG_SYNC, reason);
-       preempt_enable();
+       if (cmpxchg(fifo_mem + SVGA_FIFO_BUSY, 0, 1) == 0) {
+               vmw_hw_lock(dev_priv, lock_bh);
+               __vmw_write(dev_priv, SVGA_REG_SYNC, reason);
+               vmw_hw_unlock(dev_priv, lock_bh);
+       }
 }

 void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state 
*fifo)
@@ -256,7 +274,7 @@ static int vmw_fifo_wait(struct vmw_private *dev_priv,
        if (likely(!vmw_fifo_is_full(dev_priv, bytes)))
                return 0;

-       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL);
+       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_FIFOFULL, true);
        if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK))
                return vmw_fifo_wait_noirq(dev_priv, bytes,
                                           interruptible, timeout);
@@ -490,7 +508,7 @@ static void vmw_local_fifo_commit(struct vmw_private 
*dev_priv, uint32_t bytes)
                vmw_mmio_write(0, fifo_mem + SVGA_FIFO_RESERVED);
        mb();
        up_write(&fifo_state->rwsem);
-       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
        mutex_unlock(&fifo_state->fifo_mutex);
 }

diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index b8c6a03..8ce0886 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -159,14 +159,14 @@ static int vmw_fill_compat_cap(struct vmw_private 
*dev_priv, void *bounce,
                (pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32);
        compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS;

-       spin_lock(&dev_priv->cap_lock);
+       vmw_hw_lock(dev_priv, true);
        for (i = 0; i < max_size; ++i) {
-               vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
+               __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
                compat_cap->pairs[i][0] = i;
                compat_cap->pairs[i][1] = vmw_mask_multisample
-                       (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP));
+                       (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP));
        }
-       spin_unlock(&dev_priv->cap_lock);
+       vmw_hw_unlock(dev_priv, true);

        return 0;
 }
@@ -216,13 +216,13 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void 
*data,
                if (num > SVGA3D_DEVCAP_MAX)
                        num = SVGA3D_DEVCAP_MAX;

-               spin_lock(&dev_priv->cap_lock);
+               vmw_hw_lock(dev_priv, true);
                for (i = 0; i < num; ++i) {
-                       vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
+                       __vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
                        *bounce32++ = vmw_mask_multisample
-                               (i, vmw_read(dev_priv, SVGA_REG_DEV_CAP));
+                               (i, __vmw_read(dev_priv, SVGA_REG_DEV_CAP));
                }
-               spin_unlock(&dev_priv->cap_lock);
+               vmw_hw_unlock(dev_priv, true);
        } else if (gb_objects) {
                ret = vmw_fill_compat_cap(dev_priv, bounce, size);
                if (unlikely(ret != 0))
@@ -420,7 +420,7 @@ unsigned int vmw_fops_poll(struct file *filp, struct 
poll_table_struct *wait)
        struct vmw_private *dev_priv =
                vmw_priv(file_priv->minor->dev);

-       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
        return drm_poll(filp, wait);
 }

@@ -443,6 +443,6 @@ ssize_t vmw_fops_read(struct file *filp, char __user 
*buffer,
        struct vmw_private *dev_priv =
                vmw_priv(file_priv->minor->dev);

-       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);
        return drm_read(filp, buffer, count, offset);
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c 
b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
index 12aaed7..af6970b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
@@ -246,7 +246,7 @@ int vmw_wait_seqno(struct vmw_private *dev_priv,
        if (likely(vmw_seqno_passed(dev_priv, seqno)))
                return 0;

-       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC);
+       vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC, true);

        if (!(fifo->capabilities & SVGA_FIFO_CAP_FENCE))
                return vmw_fallback_wait(dev_priv, lazy, true, seqno,
-- 
2.4.3

Reply via email to