On Mon, Mar 21, 2016 at 04:26:57PM -0300, Paulo Zanoni wrote:
> FBC and the frontbuffer tracking infrastructure were designed assuming
> that user space applications would follow a specific set of rules
> regarding frontbuffer management and mmapping. I recently discovered
> that current user space is not exactly following these rules: my
> investigation led me to the conclusion that the generic backend from
> SNA - used by SKL and the other new platforms without a specific
> backend - is not issuing sw_finish/dirty_fb IOCTLs when using the CPU
> and WC mmaps. I discovered this when running lightdm: I would type the
> password and nothing would appear on the screen unless I moved the
> mouse over the place where the letters were supposed to appear.
> 
> Since we can't detect whether the current DRM master is a well-behaved
> application or not, let's use the sledgehammer approach and assume
> that every user space client on every gen is bad by disabling FBC when
> the frontbuffer is CPU/WC mmapped.  This will allow us to enable FBC
> on SKL, moving its FBC-related power savings from "always 0%" to "> 0%
> in some cases".
> 
> There's also some additional code to disable the workaround for
> frontbuffers that ever received a sw_finish or dirty_fb IOCTL, since
> the current bad user space never issues those calls. This should help
> for some specific cases and our IGT tests, but won't be enough for a
> new xf86-video-intel using TearFree.
> 
> Notice that we may need an equivalent commit for PSR too. We also need
> to investigate whether PSR needs to be disabled on GTT mmaps: if
> that's the case, we'll have problems since we seem to always have GTT
> mmaps on our current user space, so we would end up keeping PSR
> disabled forever.
> 
> v2:
>   - Rename some things.
>   - Disable the WA on sw_finish/dirty_fb (Daniel).
>   - Fix NULL obj checking.
>   - Restric to Gen 9.
> 
> Cc: Rodrigo Vivi <rodrigo.v...@intel.com>
> Cc: Daniel Vetter <daniel.vet...@ffwll.ch>
> Cc: Chris Wilson <ch...@chris-wilson.co.uk>
> Signed-off-by: Paulo Zanoni <paulo.r.zan...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.h          |  9 +++++++++
>  drivers/gpu/drm/i915/i915_gem.c          | 19 +++++++++++-------
>  drivers/gpu/drm/i915/intel_display.c     |  1 +
>  drivers/gpu/drm/i915/intel_drv.h         |  3 +++
>  drivers/gpu/drm/i915/intel_fbc.c         | 33 
> ++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/intel_frontbuffer.c | 31 ++++++++++++++++++++++++++++++
>  6 files changed, 89 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index efca534..45e31d2 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -873,6 +873,13 @@ enum fb_op_origin {
>       ORIGIN_DIRTYFB,
>  };
>  
> +enum fb_mmap_wa_flags {
> +     FB_MMAP_WA_CPU =        1 << 0,
> +     FB_MMAP_WA_GTT =        1 << 1,
> +     FB_MMAP_WA_DISABLE =    1 << 2,
> +     FB_MMAP_WA_FLAG_COUNT = 3,
> +};
> +
>  struct intel_fbc {
>       /* This is always the inner lock when overlapping with struct_mutex and
>        * it's the outer lock when overlapping with stolen_lock. */
> @@ -910,6 +917,7 @@ struct intel_fbc {
>                       unsigned int stride;
>                       int fence_reg;
>                       unsigned int tiling_mode;
> +                     unsigned int mmap_wa_flags;
>               } fb;
>       } state_cache;
>  
> @@ -2143,6 +2151,7 @@ struct drm_i915_gem_object {
>       unsigned int cache_dirty:1;
>  
>       unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
> +     unsigned int fb_mmap_wa_flags:FB_MMAP_WA_FLAG_COUNT;
>  
>       unsigned int pin_display;
>  
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8588c83..d44f27e 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1692,6 +1692,8 @@ i915_gem_sw_finish_ioctl(struct drm_device *dev, void 
> *data,
>               goto unlock;
>       }
>  
> +     intel_fb_obj_mmap_wa(obj, FB_MMAP_WA_DISABLE);
> +
>       /* Pinned buffers may be scanout, so flush the cache */
>       if (obj->pin_display)
>               i915_gem_object_flush_cpu_write_domain(obj);
> @@ -1724,7 +1726,7 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
>                   struct drm_file *file)
>  {
>       struct drm_i915_gem_mmap *args = data;
> -     struct drm_gem_object *obj;
> +     struct drm_i915_gem_object *obj;
>       unsigned long addr;
>  
>       if (args->flags & ~(I915_MMAP_WC))
> @@ -1733,19 +1735,19 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void 
> *data,
>       if (args->flags & I915_MMAP_WC && !cpu_has_pat)
>               return -ENODEV;
>  
> -     obj = drm_gem_object_lookup(dev, file, args->handle);
> -     if (obj == NULL)
> +     obj = to_intel_bo(drm_gem_object_lookup(dev, file, args->handle));
> +     if (&obj->base == NULL)
>               return -ENOENT;
>  
>       /* prime objects have no backing filp to GEM mmap
>        * pages from.
>        */
> -     if (!obj->filp) {
> -             drm_gem_object_unreference_unlocked(obj);
> +     if (!obj->base.filp) {
> +             drm_gem_object_unreference_unlocked(&obj->base);
>               return -EINVAL;
>       }
>  
> -     addr = vm_mmap(obj->filp, 0, args->size,
> +     addr = vm_mmap(obj->base.filp, 0, args->size,
>                      PROT_READ | PROT_WRITE, MAP_SHARED,
>                      args->offset);
>       if (args->flags & I915_MMAP_WC) {
> @@ -1761,10 +1763,12 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void 
> *data,
>                       addr = -ENOMEM;
>               up_write(&mm->mmap_sem);
>       }
> -     drm_gem_object_unreference_unlocked(obj);
> +     drm_gem_object_unreference_unlocked(&obj->base);
>       if (IS_ERR((void *)addr))
>               return addr;
>  
> +     intel_fb_obj_mmap_wa(obj, FB_MMAP_WA_CPU);
> +
>       args->addr_ptr = (uint64_t) addr;
>  
>       return 0;
> @@ -2099,6 +2103,7 @@ i915_gem_mmap_gtt(struct drm_file *file,
>               goto out;
>  
>       *offset = drm_vma_node_offset_addr(&obj->base.vma_node);
> +     intel_fb_obj_mmap_wa(obj, FB_MMAP_WA_GTT);
>  
>  out:
>       drm_gem_object_unreference(&obj->base);
> diff --git a/drivers/gpu/drm/i915/intel_display.c 
> b/drivers/gpu/drm/i915/intel_display.c
> index 28ead66..6e7aa9e 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -14705,6 +14705,7 @@ static int intel_user_framebuffer_dirty(struct 
> drm_framebuffer *fb,
>       struct drm_i915_gem_object *obj = intel_fb->obj;
>  
>       mutex_lock(&dev->struct_mutex);
> +     intel_fb_obj_mmap_wa(obj, FB_MMAP_WA_DISABLE);
>       intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
>       mutex_unlock(&dev->struct_mutex);
>  
> diff --git a/drivers/gpu/drm/i915/intel_drv.h 
> b/drivers/gpu/drm/i915/intel_drv.h
> index ba45245..bbea7c4 100644
> --- a/drivers/gpu/drm/i915/intel_drv.h
> +++ b/drivers/gpu/drm/i915/intel_drv.h
> @@ -1082,6 +1082,7 @@ void intel_frontbuffer_flip_complete(struct drm_device 
> *dev,
>                                    unsigned frontbuffer_bits);
>  void intel_frontbuffer_flip(struct drm_device *dev,
>                           unsigned frontbuffer_bits);
> +void intel_fb_obj_mmap_wa(struct drm_i915_gem_object *obj, unsigned int 
> flags);
>  unsigned int intel_fb_align_height(struct drm_device *dev,
>                                  unsigned int height,
>                                  uint32_t pixel_format,
> @@ -1371,6 +1372,8 @@ void intel_fbc_invalidate(struct drm_i915_private 
> *dev_priv,
>                         enum fb_op_origin origin);
>  void intel_fbc_flush(struct drm_i915_private *dev_priv,
>                    unsigned int frontbuffer_bits, enum fb_op_origin origin);
> +void intel_fbc_mmap_wa(struct drm_i915_private *dev_priv,
> +                    unsigned int frontbuffer_bits, unsigned int flags);
>  void intel_fbc_cleanup_cfb(struct drm_i915_private *dev_priv);
>  
>  /* intel_hdmi.c */
> diff --git a/drivers/gpu/drm/i915/intel_fbc.c 
> b/drivers/gpu/drm/i915/intel_fbc.c
> index 7101880..718ac38 100644
> --- a/drivers/gpu/drm/i915/intel_fbc.c
> +++ b/drivers/gpu/drm/i915/intel_fbc.c
> @@ -745,6 +745,14 @@ static void intel_fbc_update_state_cache(struct 
> intel_crtc *crtc)
>       cache->fb.stride = fb->pitches[0];
>       cache->fb.fence_reg = obj->fence_reg;
>       cache->fb.tiling_mode = obj->tiling_mode;
> +     cache->fb.mmap_wa_flags = obj->fb_mmap_wa_flags;
> +}
> +
> +static bool need_mmap_disable_workaround(struct intel_fbc *fbc)
> +{
> +     unsigned int flags = fbc->state_cache.fb.mmap_wa_flags;
> +
> +     return (flags & FB_MMAP_WA_CPU) && !(flags & FB_MMAP_WA_DISABLE);
>  }
>  
>  static bool intel_fbc_can_activate(struct intel_crtc *crtc)
> @@ -816,6 +824,11 @@ static bool intel_fbc_can_activate(struct intel_crtc 
> *crtc)
>               return false;
>       }
>  
> +     if (need_mmap_disable_workaround(fbc)) {
> +             fbc->no_fbc_reason = "FB is CPU or WC mmapped";
> +             return false;
> +     }
> +
>       return true;
>  }
>  
> @@ -1008,6 +1021,26 @@ out:
>       mutex_unlock(&fbc->lock);
>  }
>  
> +void intel_fbc_mmap_wa(struct drm_i915_private *dev_priv,
> +                    unsigned int frontbuffer_bits, unsigned int flags)
> +{
> +     struct intel_fbc *fbc = &dev_priv->fbc;
> +
> +     if (!fbc_supported(dev_priv))
> +             return;
> +
> +     mutex_lock(&fbc->lock);
> +
> +     if (fbc->enabled &&
> +         (intel_fbc_get_frontbuffer_bit(fbc) & frontbuffer_bits)) {
> +             fbc->state_cache.fb.mmap_wa_flags = flags;
> +             if (need_mmap_disable_workaround(fbc))
> +                     intel_fbc_deactivate(dev_priv);
> +     }
> +
> +     mutex_unlock(&fbc->lock);
> +}
> +
>  /**
>   * intel_fbc_choose_crtc - select a CRTC to enable FBC on
>   * @dev_priv: i915 device instance
> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c 
> b/drivers/gpu/drm/i915/intel_frontbuffer.c
> index ac85357..8d9b327 100644
> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> @@ -241,3 +241,34 @@ void intel_frontbuffer_flip(struct drm_device *dev,
>  
>       intel_frontbuffer_flush(dev, frontbuffer_bits, ORIGIN_FLIP);
>  }
> +
> +/**
> + * intel_fb_obj_mmap_wa - notifies about objects being mmapped
> + * @obj: GEM object being mmapped
> + *
> + * This function gets called whenever an object gets mmapped. Not every user
> + * space application follows the protocol assumed by the frontbuffer tracking
> + * subsystem when it was created, so this mmap notify callback can be used to
> + * completely disable frontbuffer features such as FBC and PSR. Even if at 
> some
> + * point we fix ever user space application, there's still the possibility 
> that
> + * the user may have a new Kernel with the old user space.
> + *
> + * Also notice that there's no munmap API because user space calls munmap()
> + * directly. Even if we had, it probably wouldn't help since munmap() calls 
> are
> + * not common.
> + */
> +void intel_fb_obj_mmap_wa(struct drm_i915_gem_object *obj, unsigned int 
> flags)
> +{
> +     struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> +
> +     if (!IS_GEN9(dev_priv))

Please only IS_SKYLAKE, I don't want to extend this to either bxt or kbl,
and both are still under prelim_hw_support and not yet shipping, i.e. we
can break them ;-)

Wrt the implementation: I think it would be great to get PSR on board to
avoid duplicating the logic. One option that might would be to create an
invlidate on skl with ORIGIN_MMAP_WA, and as long as that's in effect
(until the first sw_finish or dirty_fb) the frontbuffer tracking code
itself would filter all flushes. fbc could then use ORIGIN_MMAP_WA
invalidate to permanently disable (well, not re-enable after flip) until
it sees a flush. And PSR would Just Work (I hope).

Cheers, Daniel

> +             return;
> +
> +     obj->fb_mmap_wa_flags |= flags;
> +
> +     if (!obj->frontbuffer_bits)
> +             return;
> +
> +     intel_fbc_mmap_wa(dev_priv, obj->frontbuffer_bits,
> +                       obj->fb_mmap_wa_flags);
> +}


> -- 
> 2.7.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to