On Wed, Sep 13, 2017 at 07:18:44PM +0100, Chris Wilson wrote:
> The goal here is to trim an excess posting read and keep the predicates
> tight (reusing the same predicate throughout for GT ack/handling).
> 
> add/remove: 0/0 grow/shrink: 2/1 up/down: 26/-30 (-4)
> function                                     old     new   delta
> gen8_gt_irq_handler                          282     301     +19
> cherryview_irq_handler                       450     457      +7
> gen8_irq_handler                            1653    1623     -30
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_irq.c | 54 
> +++++++++++++++++++++++------------------
>  1 file changed, 30 insertions(+), 24 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index 91a2c5dbf2da..e12321cb7403 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -1375,31 +1375,34 @@ static irqreturn_t gen8_gt_irq_ack(struct 
> drm_i915_private *dev_priv,
>  }
>  
>  static void gen8_gt_irq_handler(struct drm_i915_private *dev_priv,
> -                             u32 gt_iir[4])
> +                             u32 master_ctl, u32 gt_iir[4])
>  {
> -     if (gt_iir[0]) {
> +     if (master_ctl & (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ)) {
>               gen8_cs_irq_handler(dev_priv->engine[RCS],
>                                   gt_iir[0], GEN8_RCS_IRQ_SHIFT);
>               gen8_cs_irq_handler(dev_priv->engine[BCS],
>                                   gt_iir[0], GEN8_BCS_IRQ_SHIFT);
>       }
>  
> -     if (gt_iir[1]) {
> +     if (master_ctl & (GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ)) {
>               gen8_cs_irq_handler(dev_priv->engine[VCS],
>                                   gt_iir[1], GEN8_VCS1_IRQ_SHIFT);
>               gen8_cs_irq_handler(dev_priv->engine[VCS2],
>                                   gt_iir[1], GEN8_VCS2_IRQ_SHIFT);
>       }
>  
> -     if (gt_iir[3])
> +     if (master_ctl & GEN8_GT_VECS_IRQ) {
>               gen8_cs_irq_handler(dev_priv->engine[VECS],
>                                   gt_iir[3], GEN8_VECS_IRQ_SHIFT);
> +     }
>  
> -     if (gt_iir[2] & dev_priv->pm_rps_events)
> -             gen6_rps_irq_handler(dev_priv, gt_iir[2]);
> +     if (master_ctl & (GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)) {
> +             if (gt_iir[2] & dev_priv->pm_rps_events)
> +                     gen6_rps_irq_handler(dev_priv, gt_iir[2]);
>  
> -     if (gt_iir[2] & dev_priv->pm_guc_events)
> -             gen9_guc_irq_handler(dev_priv, gt_iir[2]);
> +             if (gt_iir[2] & dev_priv->pm_guc_events)
> +                     gen9_guc_irq_handler(dev_priv, gt_iir[2]);
> +     }
>  }
>  
>  static bool bxt_port_hotplug_long_detect(enum port port, u32 val)
> @@ -1984,7 +1987,7 @@ static irqreturn_t cherryview_irq_handler(int irq, void 
> *arg)
>               I915_WRITE(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
>               POSTING_READ(GEN8_MASTER_IRQ);
>  
> -             gen8_gt_irq_handler(dev_priv, gt_iir);
> +             gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
>  
>               if (hotplug_status)
>                       i9xx_hpd_irq_handler(dev_priv, hotplug_status);
> @@ -2518,36 +2521,39 @@ gen8_de_irq_handler(struct drm_i915_private 
> *dev_priv, u32 master_ctl)
>       return ret;
>  }
>  
> +#define GEN8_GT_IRQ_BITS (GEN8_GT_RCS_IRQ | GEN8_GT_BCS_IRQ | \
> +                       GEN8_GT_VCS1_IRQ | GEN8_GT_VCS2_IRQ | \
> +                       GEN8_GT_VECS_IRQ | GEN8_GT_PM_IRQ | GEN8_GT_GUC_IRQ)
> +
>  static irqreturn_t gen8_irq_handler(int irq, void *arg)
>  {
> -     struct drm_device *dev = arg;
> -     struct drm_i915_private *dev_priv = to_i915(dev);
> -     u32 master_ctl;
> -     u32 gt_iir[4] = {};
> -     irqreturn_t ret;
> +     struct drm_i915_private *dev_priv = arg;
> +     u32 master_ctl, gt_iir[4];
> +     irqreturn_t ret = IRQ_NONE;
>  
>       if (!intel_irqs_enabled(dev_priv))
>               return IRQ_NONE;
>  
> -     master_ctl = I915_READ_FW(GEN8_MASTER_IRQ);
> -     master_ctl &= ~GEN8_MASTER_IRQ_CONTROL;
> +     master_ctl = I915_READ_FW(GEN8_MASTER_IRQ) &  ~GEN8_MASTER_IRQ_CONTROL;
>       if (!master_ctl)
>               return IRQ_NONE;
>  
>       I915_WRITE_FW(GEN8_MASTER_IRQ, 0);
>  
> -     /* IRQs are synced during runtime_suspend, we don't require a wakeref */
> -     disable_rpm_wakeref_asserts(dev_priv);
> -
>       /* Find, clear, then process each source of interrupt */
> -     ret = gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
> -     gen8_gt_irq_handler(dev_priv, gt_iir);
> -     ret |= gen8_de_irq_handler(dev_priv, master_ctl);
> +     if (master_ctl & GEN8_GT_IRQ_BITS)
> +             ret |= gen8_gt_irq_ack(dev_priv, master_ctl, gt_iir);
> +
> +     if (master_ctl & ~GEN8_GT_IRQ_BITS) {
> +             disable_rpm_wakeref_asserts(dev_priv);

Hmm. Why is this needed for DE interrupts but not GT interrupts? Just
the _FW() vs. not in the codepaths? If I'm reading things right we still
have some non _FW() accesses in the RPS handler at least.

BDW+ doesn't suffer from the "hang when accessing the same cacheline from
multiple cpus" issue anymore?

> +             ret |= gen8_de_irq_handler(dev_priv, master_ctl);
> +             enable_rpm_wakeref_asserts(dev_priv);
> +     }

This thing reminds me that I'd still like to split the DE stuff into
ack/handle stuff as well.

>  
>       I915_WRITE_FW(GEN8_MASTER_IRQ, GEN8_MASTER_IRQ_CONTROL);
> -     POSTING_READ_FW(GEN8_MASTER_IRQ);
>  
> -     enable_rpm_wakeref_asserts(dev_priv);
> +     if (master_ctl & GEN8_GT_IRQ_BITS)
> +             gen8_gt_irq_handler(dev_priv, master_ctl, gt_iir);
>  
>       return ret;
>  }
> -- 
> 2.14.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to