On Thu, Jul 10, 2014 at 09:53:43PM +0100, Chris Wilson wrote:
> Currently objects for which the hardware needs a contiguous physical
> address are allocated a shadow backing storage to satisfy the contraint.
> This shadow buffer is not wired into the normal obj->pages and so the
> physical object is incoherent with accesses via the GPU, GTT and CPU. By
> setting up the appropriate scatter-gather table, we can allow userspace
> to access the physical object via either a GTT mmaping of or by rendering
> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
> storage coherent with the contiguous shadow is not yet possible.
> Fortuituously, CPU mmaps of objects requiring physical addresses are not
> expected to be coherent anyway.
> 
> This allows the physical constraint of the GEM object to be transparent
> to userspace and allow it to efficiently render into or update them via
> the GTT and GPU.
> 
> v2: Fix leak of pci handle spotted by Ville
> v3: Remove the now duplicate call to detach_phys_object during free.
> v4: Wait for rendering before pwrite. As this patch makes it possible to
> render into the phys object, we should make it correct as well!
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> Cc: Ville Syrjälä <ville.syrj...@linux.intel.com>
> Reviewed-by: Ville Syrjälä <ville.syrj...@linux.intel.com>

Ok, I guess you can make a case that this fixes a bug, but then the commit
message should talk about that instead of only mentioning phys object
coherency (which is kinda just a side-effect of the rework to track phys
objects in the sg tables like everything else).

Plus mentioning that kms_cursors_crc provokes this if people bother to
plug in a vga connector or something.

With that I'll pull it into -fixes with a cc: stable without requiring
testcase for the coherency part.
-Daniel


> ---
>  drivers/gpu/drm/i915/i915_dma.c |   3 +
>  drivers/gpu/drm/i915/i915_drv.h |   6 +-
>  drivers/gpu/drm/i915/i915_gem.c | 207 
> +++++++++++++++++++++++++++-------------
>  include/uapi/drm/i915_drm.h     |   1 +
>  4 files changed, 150 insertions(+), 67 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 5ae608121f03..1b9798503b77 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1025,6 +1025,9 @@ static int i915_getparam(struct drm_device *dev, void 
> *data,
>       case I915_PARAM_CMD_PARSER_VERSION:
>               value = i915_cmd_parser_get_version();
>               break;
> +     case I915_PARAM_HAS_COHERENT_PHYS_GTT:
> +             value = 1;
> +             break;
>       default:
>               DRM_DEBUG("Unknown parameter %d\n", param->param);
>               return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 211db0653831..f81d787d6b47 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1787,10 +1787,10 @@ struct drm_i915_gem_object {
>       unsigned long user_pin_count;
>       struct drm_file *pin_filp;
>  
> -     /** for phy allocated objects */
> -     drm_dma_handle_t *phys_handle;
> -
>       union {
> +             /** for phy allocated objects */
> +             drm_dma_handle_t *phys_handle;
> +
>               struct i915_gem_userptr {
>                       uintptr_t ptr;
>                       unsigned read_only :1;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 83ccbabdcacf..d083cf5bdbbd 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -209,40 +209,137 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, 
> void *data,
>       return 0;
>  }
>  
> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
> +static int
> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
>  {
> -     drm_dma_handle_t *phys = obj->phys_handle;
> +     struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> +     char *vaddr = obj->phys_handle->vaddr;
> +     struct sg_table *st;
> +     struct scatterlist *sg;
> +     int i;
>  
> -     if (!phys)
> -             return;
> +     if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> +             return -EINVAL;
> +
> +     for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> +             struct page *page;
> +             char *src;
> +
> +             page = shmem_read_mapping_page(mapping, i);
> +             if (IS_ERR(page))
> +                     return PTR_ERR(page);
> +
> +             src = kmap_atomic(page);
> +             memcpy(vaddr, src, PAGE_SIZE);
> +             drm_clflush_virt_range(vaddr, PAGE_SIZE);
> +             kunmap_atomic(src);
> +
> +             page_cache_release(page);
> +             vaddr += PAGE_SIZE;
> +     }
> +
> +     i915_gem_chipset_flush(obj->base.dev);
> +
> +     st = kmalloc(sizeof(*st), GFP_KERNEL);
> +     if (st == NULL)
> +             return -ENOMEM;
> +
> +     if (sg_alloc_table(st, 1, GFP_KERNEL)) {
> +             kfree(st);
> +             return -ENOMEM;
> +     }
> +
> +     sg = st->sgl;
> +     sg->offset = 0;
> +     sg->length = obj->base.size;
>  
> -     if (obj->madv == I915_MADV_WILLNEED) {
> +     sg_dma_address(sg) = obj->phys_handle->busaddr;
> +     sg_dma_len(sg) = obj->base.size;
> +
> +     obj->pages = st;
> +     obj->has_dma_mapping = true;
> +     return 0;
> +}
> +
> +static void
> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
> +{
> +     int ret;
> +
> +     BUG_ON(obj->madv == __I915_MADV_PURGED);
> +
> +     ret = i915_gem_object_set_to_cpu_domain(obj, true);
> +     if (ret) {
> +             /* In the event of a disaster, abandon all caches and
> +              * hope for the best.
> +              */
> +             WARN_ON(ret != -EIO);
> +             obj->base.read_domains = obj->base.write_domain = 
> I915_GEM_DOMAIN_CPU;
> +     }
> +
> +     if (obj->madv == I915_MADV_DONTNEED)
> +             obj->dirty = 0;
> +
> +     if (obj->dirty) {
>               struct address_space *mapping = 
> file_inode(obj->base.filp)->i_mapping;
> -             char *vaddr = phys->vaddr;
> +             char *vaddr = obj->phys_handle->vaddr;
>               int i;
>  
>               for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -                     struct page *page = shmem_read_mapping_page(mapping, i);
> -                     if (!IS_ERR(page)) {
> -                             char *dst = kmap_atomic(page);
> -                             memcpy(dst, vaddr, PAGE_SIZE);
> -                             drm_clflush_virt_range(dst, PAGE_SIZE);
> -                             kunmap_atomic(dst);
> -
> -                             set_page_dirty(page);
> +                     struct page *page;
> +                     char *dst;
> +
> +                     page = shmem_read_mapping_page(mapping, i);
> +                     if (IS_ERR(page))
> +                             continue;
> +
> +                     dst = kmap_atomic(page);
> +                     drm_clflush_virt_range(vaddr, PAGE_SIZE);
> +                     memcpy(dst, vaddr, PAGE_SIZE);
> +                     kunmap_atomic(dst);
> +
> +                     set_page_dirty(page);
> +                     if (obj->madv == I915_MADV_WILLNEED)
>                               mark_page_accessed(page);
> -                             page_cache_release(page);
> -                     }
> +                     page_cache_release(page);
>                       vaddr += PAGE_SIZE;
>               }
> -             i915_gem_chipset_flush(obj->base.dev);
> +             obj->dirty = 0;
>       }
>  
> -#ifdef CONFIG_X86
> -     set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> -     drm_pci_free(obj->base.dev, phys);
> -     obj->phys_handle = NULL;
> +     sg_free_table(obj->pages);
> +     kfree(obj->pages);
> +
> +     obj->has_dma_mapping = false;
> +}
> +
> +static void
> +i915_gem_object_release_phys(struct drm_i915_gem_object *obj)
> +{
> +     drm_pci_free(obj->base.dev, obj->phys_handle);
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> +     .get_pages = i915_gem_object_get_pages_phys,
> +     .put_pages = i915_gem_object_put_pages_phys,
> +     .release = i915_gem_object_release_phys,
> +};
> +
> +static int
> +drop_pages(struct drm_i915_gem_object *obj)
> +{
> +     struct i915_vma *vma, *next;
> +     int ret;
> +
> +     drm_gem_object_reference(&obj->base);
> +     list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
> +             if (i915_vma_unbind(vma))
> +                     break;
> +
> +     ret = i915_gem_object_put_pages(obj);
> +     drm_gem_object_unreference(&obj->base);
> +
> +     return ret;
>  }
>  
>  int
> @@ -250,9 +347,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object 
> *obj,
>                           int align)
>  {
>       drm_dma_handle_t *phys;
> -     struct address_space *mapping;
> -     char *vaddr;
> -     int i;
> +     int ret;
>  
>       if (obj->phys_handle) {
>               if ((unsigned long)obj->phys_handle->vaddr & (align -1))
> @@ -267,41 +362,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object 
> *obj,
>       if (obj->base.filp == NULL)
>               return -EINVAL;
>  
> +     ret = drop_pages(obj);
> +     if (ret)
> +             return ret;
> +
>       /* create a new object */
>       phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
>       if (!phys)
>               return -ENOMEM;
>  
> -     vaddr = phys->vaddr;
> -#ifdef CONFIG_X86
> -     set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
> -#endif
> -     mapping = file_inode(obj->base.filp)->i_mapping;
> -     for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> -             struct page *page;
> -             char *src;
> -
> -             page = shmem_read_mapping_page(mapping, i);
> -             if (IS_ERR(page)) {
> -#ifdef CONFIG_X86
> -                     set_memory_wb((unsigned long)phys->vaddr, phys->size / 
> PAGE_SIZE);
> -#endif
> -                     drm_pci_free(obj->base.dev, phys);
> -                     return PTR_ERR(page);
> -             }
> -
> -             src = kmap_atomic(page);
> -             memcpy(vaddr, src, PAGE_SIZE);
> -             kunmap_atomic(src);
> -
> -             mark_page_accessed(page);
> -             page_cache_release(page);
> -
> -             vaddr += PAGE_SIZE;
> -     }
> -
>       obj->phys_handle = phys;
> -     return 0;
> +     obj->ops = &i915_gem_phys_ops;
> +
> +     return i915_gem_object_get_pages(obj);
>  }
>  
>  static int
> @@ -312,6 +385,14 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>       struct drm_device *dev = obj->base.dev;
>       void *vaddr = obj->phys_handle->vaddr + args->offset;
>       char __user *user_data = to_user_ptr(args->data_ptr);
> +     int ret;
> +
> +     /* We manually control the domain here and pretend that it
> +      * remains coherent i.e. in the GTT domain, like shmem_pwrite.
> +      */
> +     ret = i915_gem_object_wait_rendering(obj, false);
> +     if (ret)
> +             return ret;
>  
>       if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) {
>               unsigned long unwritten;
> @@ -327,6 +408,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
>                       return -EFAULT;
>       }
>  
> +     drm_clflush_virt_range(vaddr, args->size);
>       i915_gem_chipset_flush(dev);
>       return 0;
>  }
> @@ -1047,11 +1129,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void 
> *data,
>        * pread/pwrite currently are reading and writing from the CPU
>        * perspective, requiring manual detiling by the client.
>        */
> -     if (obj->phys_handle) {
> -             ret = i915_gem_phys_pwrite(obj, args, file);
> -             goto out;
> -     }
> -
>       if (obj->tiling_mode == I915_TILING_NONE &&
>           obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
>           cpu_write_needs_clflush(obj)) {
> @@ -1061,8 +1138,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void 
> *data,
>                * textures). Fallback to the shmem path in that case. */
>       }
>  
> -     if (ret == -EFAULT || ret == -ENOSPC)
> -             ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +     if (ret == -EFAULT || ret == -ENOSPC) {
> +             if (obj->phys_handle)
> +                     ret = i915_gem_phys_pwrite(obj, args, file);
> +             else
> +                     ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> +     }
>  
>  out:
>       drm_gem_object_unreference(&obj->base);
> @@ -3499,7 +3580,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
>        * Stolen memory is always coherent with the GPU as it is explicitly
>        * marked as wc by the system, or the system is cache-coherent.
>        */
> -     if (obj->stolen)
> +     if (obj->stolen || obj->phys_handle)
>               return false;
>  
>       /* If the GPU is snooping the contents of the CPU cache,
> @@ -4442,8 +4523,6 @@ void i915_gem_free_object(struct drm_gem_object 
> *gem_obj)
>               }
>       }
>  
> -     i915_gem_object_detach_phys(obj);
> -
>       /* Stolen objects don't hold a ref, but do hold pin count. Fix that up
>        * before progressing. */
>       if (obj->stolen)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index ff57f07c3249..c6b229fe4a11 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait {
>  #define I915_PARAM_HAS_EXEC_HANDLE_LUT   26
>  #define I915_PARAM_HAS_WT                     27
>  #define I915_PARAM_CMD_PARSER_VERSION         28
> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>  
>  typedef struct drm_i915_getparam {
>       int param;
> -- 
> 2.0.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to