On Wed, Nov 11, 2015 at 04:06:13PM +0530, ankitprasad.r.sha...@intel.com wrote:
> From: Chris Wilson <ch...@chris-wilson.co.uk>
> 
> Ville reminded us that stolen memory is not preserved across
> hibernation, and a result of this was that context objects now being
> allocated from stolen were being corrupted on S4 and promptly hanging
> the GPU on resume.

BTW we had a bug that says that the firmware can make a mess of
stolen even during S3 when rabidstart is enabled :(

https://bugs.freedesktop.org/show_bug.cgi?id=91295

> 
> We want to utilise stolen for as much as possible (nothing else will use
> that wasted memory otherwise), so we need a strategy for handling
> general objects allocated from stolen and hibernation. A simple solution
> is to do a CPU copy through the GTT of the stolen object into a fresh
> shmemfs backing store and thenceforth treat it as a normal objects. This
> can be refined in future to either use a GPU copy to avoid the slow
> uncached reads (though it's hibernation!) and recreate stolen objects
> upon resume/first-use. For now, a simple approach should suffice for
> testing the object migration.
> 
> v2:
> Swap PTE for pinned bindings over to the shmemfs. This adds a
> complicated dance, but is required as many stolen objects are likely to
> be pinned for use by the hardware. Swapping the PTEs should not result
> in externally visible behaviour, as each PTE update should be atomic and
> the two pages identical. (danvet)
> 
> safe-by-default, or the principle of least surprise. We need a new flag
> to mark objects that we can wilfully discard and recreate across
> hibernation. (danvet)
> 
> Just use the global_list rather than invent a new stolen_list. This is
> the slowpath hibernate and so adding a new list and the associated
> complexity isn't worth it.
> 
> v3: Rebased on drm-intel-nightly (Ankit)
> 
> v4: Use insert_page to map stolen memory backed pages for migration to
> shmem (Chris)
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sha...@intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c         |  17 ++-
>  drivers/gpu/drm/i915/i915_drv.h         |   7 +
>  drivers/gpu/drm/i915/i915_gem.c         | 235 
> ++++++++++++++++++++++++++++++--
>  drivers/gpu/drm/i915/intel_display.c    |   3 +
>  drivers/gpu/drm/i915/intel_fbdev.c      |   6 +
>  drivers/gpu/drm/i915/intel_pm.c         |   2 +
>  drivers/gpu/drm/i915/intel_ringbuffer.c |   6 +
>  7 files changed, 264 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 9f55209..2bb9e9e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -1036,6 +1036,21 @@ static int i915_pm_suspend(struct device *dev)
>       return i915_drm_suspend(drm_dev);
>  }
>  
> +static int i915_pm_freeze(struct device *dev)
> +{
> +     int ret;
> +
> +     ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev)));
> +     if (ret)
> +             return ret;
> +
> +     ret = i915_pm_suspend(dev);
> +     if (ret)
> +             return ret;
> +
> +     return 0;
> +}
> +
>  static int i915_pm_suspend_late(struct device *dev)
>  {
>       struct drm_device *drm_dev = dev_to_i915(dev)->dev;
> @@ -1700,7 +1715,7 @@ static const struct dev_pm_ops i915_pm_ops = {
>        * @restore, @restore_early : called after rebooting and restoring the
>        *                            hibernation image [PMSG_RESTORE]
>        */
> -     .freeze = i915_pm_suspend,
> +     .freeze = i915_pm_freeze,
>       .freeze_late = i915_pm_suspend_late,
>       .thaw_early = i915_pm_resume_early,
>       .thaw = i915_pm_resume,
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 2c75c32..a4e1bf7 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2079,6 +2079,12 @@ struct drm_i915_gem_object {
>        * Advice: are the backing pages purgeable?
>        */
>       unsigned int madv:2;
> +     /**
> +      * Whereas madv is for userspace, there are certain situations
> +      * where we want I915_MADV_DONTNEED behaviour on internal objects
> +      * without conflating the userspace setting.
> +      */
> +     unsigned int internal_volatile:1;
>  
>       /**
>        * Current tiling mode for the object.
> @@ -3006,6 +3012,7 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, 
> int slice);
>  void i915_gem_init_swizzling(struct drm_device *dev);
>  void i915_gem_cleanup_ringbuffer(struct drm_device *dev);
>  int __must_check i915_gpu_idle(struct drm_device *dev);
> +int __must_check i915_gem_freeze(struct drm_device *dev);
>  int __must_check i915_gem_suspend(struct drm_device *dev);
>  void __i915_add_request(struct drm_i915_gem_request *req,
>                       struct drm_i915_gem_object *batch_obj,
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e0b9502..dbe1173 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -4506,12 +4506,27 @@ static const struct drm_i915_gem_object_ops 
> i915_gem_object_ops = {
>       .put_pages = i915_gem_object_put_pages_gtt,
>  };
>  
> +static struct address_space *
> +i915_gem_set_inode_gfp(struct drm_device *dev, struct file *file)
> +{
> +     struct address_space *mapping = file_inode(file)->i_mapping;
> +     gfp_t mask;
> +
> +     mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
> +     if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
> +             /* 965gm cannot relocate objects above 4GiB. */
> +             mask &= ~__GFP_HIGHMEM;
> +             mask |= __GFP_DMA32;
> +     }
> +     mapping_set_gfp_mask(mapping, mask);
> +
> +     return mapping;
> +}
> +
>  struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
>                                                 size_t size)
>  {
>       struct drm_i915_gem_object *obj;
> -     struct address_space *mapping;
> -     gfp_t mask;
>       int ret = 0;
>  
>       obj = i915_gem_object_alloc(dev);
> @@ -4523,15 +4538,7 @@ struct drm_i915_gem_object 
> *i915_gem_alloc_object(struct drm_device *dev,
>               return ERR_PTR(ret);
>       }
>  
> -     mask = GFP_HIGHUSER | __GFP_RECLAIMABLE;
> -     if (IS_CRESTLINE(dev) || IS_BROADWATER(dev)) {
> -             /* 965gm cannot relocate objects above 4GiB. */
> -             mask &= ~__GFP_HIGHMEM;
> -             mask |= __GFP_DMA32;
> -     }
> -
> -     mapping = file_inode(obj->base.filp)->i_mapping;
> -     mapping_set_gfp_mask(mapping, mask);
> +     i915_gem_set_inode_gfp(dev, obj->base.filp);
>  
>       i915_gem_object_init(obj, &i915_gem_object_ops);
>  
> @@ -4708,6 +4715,212 @@ i915_gem_stop_ringbuffers(struct drm_device *dev)
>               dev_priv->gt.stop_ring(ring);
>  }
>  
> +static int
> +i915_gem_object_migrate_stolen_to_shmemfs(struct drm_i915_gem_object *obj)
> +{
> +     struct drm_i915_private *i915 = to_i915(obj->base.dev);
> +     struct i915_vma *vma, *vn;
> +     struct drm_mm_node node;
> +     struct file *file;
> +     struct address_space *mapping;
> +     struct sg_table *stolen_pages, *shmemfs_pages;
> +     int ret, i;
> +
> +     if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> +             return -EINVAL;
> +
> +     ret = i915_gem_object_set_to_gtt_domain(obj, false);
> +     if (ret)
> +             return ret;
> +
> +     file = shmem_file_setup("drm mm object", obj->base.size, VM_NORESERVE);
> +     if (IS_ERR(file))
> +             return PTR_ERR(file);
> +     mapping = i915_gem_set_inode_gfp(obj->base.dev, file);
> +
> +     list_for_each_entry_safe(vma, vn, &obj->vma_list, vma_link)
> +             if (i915_vma_unbind(vma))
> +                     continue;
> +
> +     if (obj->madv != I915_MADV_WILLNEED && list_empty(&obj->vma_list)) {
> +             /* Discard the stolen reservation, and replace with
> +              * an unpopulated shmemfs object.
> +              */
> +             obj->madv = __I915_MADV_PURGED;
> +             goto swap_pages;
> +     }
> +
> +     /* stolen objects are already pinned to prevent shrinkage */
> +     memset(&node, 0, sizeof(node));
> +     ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm,
> +                                               &node,
> +                                               4096, 0, I915_CACHE_NONE,
> +                                               0, i915->gtt.mappable_end,
> +                                               DRM_MM_SEARCH_DEFAULT,
> +                                               DRM_MM_CREATE_DEFAULT);
> +     if (ret)
> +             return ret;
> +
> +     for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +             struct page *page;
> +             void *__iomem src;
> +             void *dst;
> +
> +             wmb();
> +             i915->gtt.base.insert_page(&i915->gtt.base,
> +                                        i915_gem_object_get_dma_address(obj, 
> i),
> +                                        node.start,
> +                                        I915_CACHE_NONE,
> +                                        0);
> +             wmb();
> +
> +             page = shmem_read_mapping_page(mapping, i);
> +             if (IS_ERR(page)) {
> +                     ret = PTR_ERR(page);
> +                     goto err_node;
> +             }
> +
> +             src = io_mapping_map_atomic_wc(i915->gtt.mappable, node.start + 
> PAGE_SIZE * i);
> +             dst = kmap_atomic(page);
> +             memcpy_fromio(dst, src, PAGE_SIZE);
> +             kunmap_atomic(dst);
> +             io_mapping_unmap_atomic(src);
> +
> +             page_cache_release(page);
> +     }
> +
> +     wmb();
> +     i915->gtt.base.clear_range(&i915->gtt.base,
> +                                node.start, node.size,
> +                                true);
> +     drm_mm_remove_node(&node);
> +
> +swap_pages:
> +     stolen_pages = obj->pages;
> +     obj->pages = NULL;
> +
> +     obj->base.filp = file;
> +     obj->base.read_domains = I915_GEM_DOMAIN_CPU;
> +     obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> +
> +     /* Recreate any pinned binding with pointers to the new storage */
> +     if (!list_empty(&obj->vma_list)) {
> +             ret = i915_gem_object_get_pages_gtt(obj);
> +             if (ret) {
> +                     obj->pages = stolen_pages;
> +                     goto err_file;
> +             }
> +
> +             ret = i915_gem_gtt_prepare_object(obj);
> +             if (ret) {
> +                     i915_gem_object_put_pages_gtt(obj);
> +                     obj->pages = stolen_pages;
> +                     goto err_file;
> +             }
> +
> +             ret = i915_gem_object_set_to_gtt_domain(obj, true);
> +             if (ret) {
> +                     i915_gem_gtt_finish_object(obj);
> +                     i915_gem_object_put_pages_gtt(obj);
> +                     obj->pages = stolen_pages;
> +                     goto err_file;
> +             }
> +
> +             obj->get_page.sg = obj->pages->sgl;
> +             obj->get_page.last = 0;
> +
> +             list_for_each_entry(vma, &obj->vma_list, vma_link) {
> +                     if (!drm_mm_node_allocated(&vma->node))
> +                             continue;
> +
> +                     WARN_ON(i915_vma_bind(vma,
> +                                           obj->cache_level,
> +                                           PIN_UPDATE));
> +             }
> +     } else
> +             list_del(&obj->global_list);
> +
> +     /* drop the stolen pin and backing */
> +     shmemfs_pages = obj->pages;
> +     obj->pages = stolen_pages;
> +
> +     i915_gem_object_unpin_pages(obj);
> +     obj->ops->put_pages(obj);
> +     if (obj->ops->release)
> +             obj->ops->release(obj);
> +
> +     obj->ops = &i915_gem_object_ops;
> +     obj->pages = shmemfs_pages;
> +
> +     return 0;
> +
> +err_node:
> +     wmb();
> +     i915->gtt.base.clear_range(&i915->gtt.base,
> +                                node.start, node.size,
> +                                true);
> +     drm_mm_remove_node(&node);
> +err_file:
> +     fput(file);
> +     obj->base.filp = NULL;
> +     return ret;
> +}
> +
> +int
> +i915_gem_freeze(struct drm_device *dev)
> +{
> +     /* Called before i915_gem_suspend() when hibernating */
> +     struct drm_i915_private *i915 = to_i915(dev);
> +     struct drm_i915_gem_object *obj, *tmp;
> +     struct list_head *phase[] = {
> +             &i915->mm.unbound_list, &i915->mm.bound_list, NULL
> +     }, **p;
> +
> +     /* Across hibernation, the stolen area is not preserved.
> +      * Anything inside stolen must copied back to normal
> +      * memory if we wish to preserve it.
> +      */
> +     for (p = phase; *p; p++) {
> +             struct list_head migrate;
> +             int ret;
> +
> +             INIT_LIST_HEAD(&migrate);
> +             list_for_each_entry_safe(obj, tmp, *p, global_list) {
> +                     if (obj->stolen == NULL)
> +                             continue;
> +
> +                     if (obj->internal_volatile)
> +                             continue;
> +
> +                     /* In the general case, this object may only be alive
> +                      * due to an active reference, and that may disappear
> +                      * when we unbind any of the objects (and so wait upon
> +                      * the GPU and retire requests). To prevent one of the
> +                      * objects from disappearing beneath us, we need to
> +                      * take a reference to each as we build the migration
> +                      * list.
> +                      *
> +                      * This is similar to the strategy required whilst
> +                      * shrinking or evicting objects (for the same reason).
> +                      */
> +                     drm_gem_object_reference(&obj->base);
> +                     list_move(&obj->global_list, &migrate);
> +             }
> +
> +             ret = 0;
> +             list_for_each_entry_safe(obj, tmp, &migrate, global_list) {
> +                     if (ret == 0)
> +                             ret = 
> i915_gem_object_migrate_stolen_to_shmemfs(obj);
> +                     drm_gem_object_unreference(&obj->base);
> +             }
> +             list_splice(&migrate, *p);
> +             if (ret)
> +                     return ret;
> +     }
> +
> +     return 0;
> +}
> +
>  int
>  i915_gem_suspend(struct drm_device *dev)
>  {
> diff --git a/drivers/gpu/drm/i915/intel_display.c 
> b/drivers/gpu/drm/i915/intel_display.c
> index f281e0b..0803922 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -2549,6 +2549,9 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
>       if (IS_ERR(obj))
>               return false;
>  
> +     /* Not to be preserved across hibernation */
> +     obj->internal_volatile = true;
> +
>       obj->tiling_mode = plane_config->tiling;
>       if (obj->tiling_mode == I915_TILING_X)
>               obj->stride = fb->pitches[0];
> diff --git a/drivers/gpu/drm/i915/intel_fbdev.c 
> b/drivers/gpu/drm/i915/intel_fbdev.c
> index f43681e..1d89253 100644
> --- a/drivers/gpu/drm/i915/intel_fbdev.c
> +++ b/drivers/gpu/drm/i915/intel_fbdev.c
> @@ -154,6 +154,12 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
>               goto out;
>       }
>  
> +     /* Discard the contents of the BIOS fb across hibernation.
> +      * We really want to completely throwaway the earlier fbdev
> +      * and reconfigure it anyway.
> +      */
> +     obj->internal_volatile = true;
> +
>       fb = __intel_framebuffer_create(dev, &mode_cmd, obj);
>       if (IS_ERR(fb)) {
>               ret = PTR_ERR(fb);
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 03ad276..6ddc20a 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -5181,6 +5181,8 @@ static void valleyview_setup_pctx(struct drm_device 
> *dev)
>       I915_WRITE(VLV_PCBR, pctx_paddr);
>  
>  out:
> +     /* The power context need not be preserved across hibernation */
> +     pctx->internal_volatile = true;
>       DRM_DEBUG_DRIVER("PCBR: 0x%08x\n", I915_READ(VLV_PCBR));
>       dev_priv->vlv_pctx = pctx;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
> b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 5eabaf6..370d96a 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -2090,6 +2090,12 @@ static int intel_alloc_ringbuffer_obj(struct 
> drm_device *dev,
>       if (IS_ERR(obj))
>               return PTR_ERR(obj);
>  
> +     /* Ringbuffer objects are by definition volatile - only the commands
> +      * between HEAD and TAIL need to be preserved and whilst there are
> +      * any commands there, the ringbuffer is pinned by activity.
> +      */
> +     obj->internal_volatile = true;
> +
>       /* mark ring buffers as read-only from GPU side by default */
>       obj->gt_ro = 1;
>  
> -- 
> 1.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Ville Syrjälä
Intel OTC
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to