On Sat, Aug 11, 2012 at 03:41:21PM +0100, Chris Wilson wrote:
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>

What about putting kmap/unmap abstractions into obj->ops (like the dma_buf
interface already has)? Since the pwrite/pread code is already rather
branch heave I hope we don't see the overhead of the indirect call even
in microbenchmarks (haven't checked). And this way we would also neatly
wrap up dma_bufs for pwrite (if anyone ever really wants that ...).

The kmap(_atomic) for stolen mem backed objects would boil down to doing
the pointer arithmetic, kunmap would be just a noop.

Cheers, Daniel
> ---
>  drivers/gpu/drm/i915/i915_gem.c |  169 
> +++++++++++++++++++++++++--------------
>  1 file changed, 111 insertions(+), 58 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 552f95b..a2fb2aa 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -664,19 +664,17 @@ out:
>   * needs_clflush_before is set and flushes out any written cachelines after
>   * writing if needs_clflush is set. */
>  static int
> -shmem_pwrite_fast(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pwrite_fast(char *vaddr, int shmem_page_offset, int page_length,
>                 char __user *user_data,
>                 bool page_do_bit17_swizzling,
>                 bool needs_clflush_before,
>                 bool needs_clflush_after)
>  {
> -     char *vaddr;
>       int ret;
>  
>       if (unlikely(page_do_bit17_swizzling))
>               return -EINVAL;
>  
> -     vaddr = kmap_atomic(page);
>       if (needs_clflush_before)
>               drm_clflush_virt_range(vaddr + shmem_page_offset,
>                                      page_length);
> @@ -686,7 +684,6 @@ shmem_pwrite_fast(struct page *page, int 
> shmem_page_offset, int page_length,
>       if (needs_clflush_after)
>               drm_clflush_virt_range(vaddr + shmem_page_offset,
>                                      page_length);
> -     kunmap_atomic(vaddr);
>  
>       return ret ? -EFAULT : 0;
>  }
> @@ -694,16 +691,14 @@ shmem_pwrite_fast(struct page *page, int 
> shmem_page_offset, int page_length,
>  /* Only difference to the fast-path function is that this can handle bit17
>   * and uses non-atomic copy and kmap functions. */
>  static int
> -shmem_pwrite_slow(struct page *page, int shmem_page_offset, int page_length,
> +shmem_pwrite_slow(char *vaddr, int shmem_page_offset, int page_length,
>                 char __user *user_data,
>                 bool page_do_bit17_swizzling,
>                 bool needs_clflush_before,
>                 bool needs_clflush_after)
>  {
> -     char *vaddr;
>       int ret;
>  
> -     vaddr = kmap(page);
>       if (unlikely(needs_clflush_before || page_do_bit17_swizzling))
>               shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
>                                            page_length,
> @@ -720,7 +715,6 @@ shmem_pwrite_slow(struct page *page, int 
> shmem_page_offset, int page_length,
>               shmem_clflush_swizzled_range(vaddr + shmem_page_offset,
>                                            page_length,
>                                            page_do_bit17_swizzling);
> -     kunmap(page);
>  
>       return ret ? -EFAULT : 0;
>  }
> @@ -731,6 +725,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>                     struct drm_i915_gem_pwrite *args,
>                     struct drm_file *file)
>  {
> +     struct drm_i915_private *dev_priv = dev->dev_private;
>       ssize_t remain;
>       loff_t offset;
>       char __user *user_data;
> @@ -770,74 +765,132 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
>       if (ret)
>               return ret;
>  
> -     i915_gem_object_pin_pages(obj);
> -
>       offset = args->offset;
>       obj->dirty = 1;
>  
> -     for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> -             struct page *page;
> -             int partial_cacheline_write;
> +     if (obj->stolen) {
> +             while (remain > 0) {
> +                     char *vaddr;
> +                     int partial_cacheline_write;
>  
> -             if (i < offset >> PAGE_SHIFT)
> -                     continue;
> +                     /* Operation in this page
> +                      *
> +                      * shmem_page_offset = offset within page in shmem file
> +                      * page_length = bytes to copy for this page
> +                      */
> +                     shmem_page_offset = offset_in_page(offset);
>  
> -             if (remain <= 0)
> -                     break;
> +                     page_length = remain;
> +                     if ((shmem_page_offset + page_length) > PAGE_SIZE)
> +                             page_length = PAGE_SIZE - shmem_page_offset;
>  
> -             /* Operation in this page
> -              *
> -              * shmem_page_offset = offset within page in shmem file
> -              * page_length = bytes to copy for this page
> -              */
> -             shmem_page_offset = offset_in_page(offset);
> +                     /* If we don't overwrite a cacheline completely we need 
> to be
> +                      * careful to have up-to-date data by first clflushing. 
> Don't
> +                      * overcomplicate things and flush the entire patch. */
> +                     partial_cacheline_write = needs_clflush_before &&
> +                             ((shmem_page_offset | page_length)
> +                              & (boot_cpu_data.x86_clflush_size - 1));
>  
> -             page_length = remain;
> -             if ((shmem_page_offset + page_length) > PAGE_SIZE)
> -                     page_length = PAGE_SIZE - shmem_page_offset;
> +                     vaddr = (char *)(dev_priv->mm.stolen_base + 
> obj->stolen->start + offset);
> +                     page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> +                             ((uintptr_t)vaddr & (1 << 17)) != 0;
>  
> -             /* If we don't overwrite a cacheline completely we need to be
> -              * careful to have up-to-date data by first clflushing. Don't
> -              * overcomplicate things and flush the entire patch. */
> -             partial_cacheline_write = needs_clflush_before &&
> -                     ((shmem_page_offset | page_length)
> -                             & (boot_cpu_data.x86_clflush_size - 1));
> +                     ret = shmem_pwrite_fast(vaddr, shmem_page_offset, 
> page_length,
> +                                             user_data, 
> page_do_bit17_swizzling,
> +                                             partial_cacheline_write,
> +                                             needs_clflush_after);
>  
> -             page = sg_page(sg);
> -             page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> -                     (page_to_phys(page) & (1 << 17)) != 0;
> +                     if (ret == 0)
> +                             goto next_stolen;
>  
> -             ret = shmem_pwrite_fast(page, shmem_page_offset, page_length,
> -                                     user_data, page_do_bit17_swizzling,
> -                                     partial_cacheline_write,
> -                                     needs_clflush_after);
> -             if (ret == 0)
> -                     goto next_page;
> +                     hit_slowpath = 1;
> +                     mutex_unlock(&dev->struct_mutex);
>  
> -             hit_slowpath = 1;
> -             mutex_unlock(&dev->struct_mutex);
> -             ret = shmem_pwrite_slow(page, shmem_page_offset, page_length,
> -                                     user_data, page_do_bit17_swizzling,
> -                                     partial_cacheline_write,
> -                                     needs_clflush_after);
> +                     ret = shmem_pwrite_slow(vaddr, shmem_page_offset, 
> page_length,
> +                                             user_data, 
> page_do_bit17_swizzling,
> +                                             partial_cacheline_write,
> +                                             needs_clflush_after);
>  
> -             mutex_lock(&dev->struct_mutex);
> +                     mutex_lock(&dev->struct_mutex);
> +                     if (ret)
> +                             goto out;
>  
> -next_page:
> -             set_page_dirty(page);
> -             mark_page_accessed(page);
> +next_stolen:
> +                     remain -= page_length;
> +                     user_data += page_length;
> +                     offset += page_length;
> +             }
> +     } else {
> +             i915_gem_object_pin_pages(obj);
>  
> -             if (ret)
> -                     goto out;
> +             for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> +                     struct page *page;
> +                     char *vaddr;
> +                     int partial_cacheline_write;
>  
> -             remain -= page_length;
> -             user_data += page_length;
> -             offset += page_length;
> +                     if (i < offset >> PAGE_SHIFT)
> +                             continue;
> +
> +                     if (remain <= 0)
> +                             break;
> +
> +                     /* Operation in this page
> +                      *
> +                      * shmem_page_offset = offset within page in shmem file
> +                      * page_length = bytes to copy for this page
> +                      */
> +                     shmem_page_offset = offset_in_page(offset);
> +
> +                     page_length = remain;
> +                     if ((shmem_page_offset + page_length) > PAGE_SIZE)
> +                             page_length = PAGE_SIZE - shmem_page_offset;
> +
> +                     /* If we don't overwrite a cacheline completely we need 
> to be
> +                      * careful to have up-to-date data by first clflushing. 
> Don't
> +                      * overcomplicate things and flush the entire patch. */
> +                     partial_cacheline_write = needs_clflush_before &&
> +                             ((shmem_page_offset | page_length)
> +                              & (boot_cpu_data.x86_clflush_size - 1));
> +
> +                     page = sg_page(sg);
> +                     page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> +                             (page_to_phys(page) & (1 << 17)) != 0;
> +
> +                     vaddr = kmap_atomic(page);
> +                     ret = shmem_pwrite_fast(vaddr, shmem_page_offset, 
> page_length,
> +                                             user_data, 
> page_do_bit17_swizzling,
> +                                             partial_cacheline_write,
> +                                             needs_clflush_after);
> +
> +                     kunmap_atomic(vaddr);
> +
> +                     if (ret == 0)
> +                             goto next_page;
> +
> +                     hit_slowpath = 1;
> +                     mutex_unlock(&dev->struct_mutex);
> +
> +                     vaddr = kmap(page);
> +                     ret = shmem_pwrite_slow(vaddr, shmem_page_offset, 
> page_length,
> +                                             user_data, 
> page_do_bit17_swizzling,
> +                                             partial_cacheline_write,
> +                                             needs_clflush_after);
> +                     kunmap(page);
> +
> +                     mutex_lock(&dev->struct_mutex);
> +                     if (ret)
> +                             goto out_unpin;
> +
> +next_page:
> +                     remain -= page_length;
> +                     user_data += page_length;
> +                     offset += page_length;
> +             }
> +out_unpin:
> +             i915_gem_object_unpin_pages(obj);
>       }
>  
>  out:
> -     i915_gem_object_unpin_pages(obj);
> -
>       if (hit_slowpath) {
>               /* Fixup: Kill any reinstated backing storage pages */
>               if (obj->madv == __I915_MADV_PURGED)
> -- 
> 1.7.10.4
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: dan...@ffwll.ch
Mobile: +41 (0)79 365 57 48
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to