On Sat, Mar 24, 2012 at 12:20:24AM +0000, Chris Wilson wrote:
> Originally the code tried to allocate a large enough array to perform
> the copy using vmalloc, performance wasn't great and throughput was
> improved by processing each individual relocation entry separately.
> This too is not as efficient as one would desire. A compromise is then
> to allocate a single page (since that is relatively cheap) and process
> the relocations in page size batches.
> 
> x11perf -copywinwin10:        n450/pnv        i3-330m         i5-2520m (cpu)
>                Before:          249000         785000          1280000 (80%)
>                 After:          264000         896000          1280000 (65%)
> 
> Signed-off-by: Chris Wilson <ch...@chris-wilson.co.uk>
> ---
>  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   92 
> +++++++++++++++++++++++-----
>  1 files changed, 77 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 2d5d41b..1da04db 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -407,28 +407,90 @@ i915_gem_execbuffer_relocate_object(struct 
> drm_i915_gem_object *obj,
>  {
>       struct drm_i915_gem_relocation_entry __user *user_relocs;
>       struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
> -     int i, ret;
> +     int i, ret = 0;
> +
> +     if (entry->relocation_count == 0)
> +             return 0;
> +
> +#define N_RELOC_PER_PAGE \
> +     (PAGE_SIZE / sizeof(struct drm_i915_gem_relocation_entry))
>  
>       user_relocs = (void __user *)(uintptr_t)entry->relocs_ptr;
> -     for (i = 0; i < entry->relocation_count; i++) {
> -             struct drm_i915_gem_relocation_entry reloc;
>  
> -             if (__copy_from_user_inatomic(&reloc,
> -                                           user_relocs+i,
> -                                           sizeof(reloc)))
> -                     return -EFAULT;
> +     if (entry->relocation_count < N_RELOC_PER_PAGE / 2) {
> +fallback:

We don't we go slightly more over board with optimiziting this?
drm_i915_gem_relocation_entry is on 32 bytes, so we can fit a few onto the
stack. Hence what about this before the loop?

drm_i915_gem_relocation_entry stack_arr[512/sizeof(reloc_entry)];

if (relocation_count > N_RELOC_PER_PAGE) {
        tmp_stor = __get_free_page;
        if (!tmp_stor)
                goto fallback;
        tmp_stor_size = N_RELOC_PER_PAGE;
} else {
fallback:
        tmp_stor = stackk_arr;
        tmp_stor_size = 512/sizeof(reloc_entry);
}

Then we also wouldn't have the not-so-beautiful duplication in code. Maybe
the added complexity of the page_alloc path isn't even worth it anymore.
Can I bother you to play around with this?

Cheers, Daniel

> +             for (i = 0; i < entry->relocation_count; i++) {
> +                     struct drm_i915_gem_relocation_entry reloc;
> +                     u64 offset;
>  
> -             ret = i915_gem_execbuffer_relocate_entry(obj, eb, &reloc);
> -             if (ret)
> -                     return ret;
> +                     if (__copy_from_user_inatomic(&reloc,
> +                                                   user_relocs+i,
> +                                                   sizeof(reloc)))
> +                             return -EFAULT;
>  
> -             if (__copy_to_user_inatomic(&user_relocs[i].presumed_offset,
> -                                         &reloc.presumed_offset,
> -                                         sizeof(reloc.presumed_offset)))
> -                     return -EFAULT;
> +                     offset = reloc.presumed_offset;
> +
> +                     ret = i915_gem_execbuffer_relocate_entry(obj, eb, 
> &reloc);
> +                     if (ret)
> +                             return ret;
> +
> +                     if (reloc.presumed_offset != offset &&
> +                         
> __copy_to_user_inatomic(&user_relocs[i].presumed_offset,
> +                                                 &reloc.presumed_offset,
> +                                                 
> sizeof(reloc.presumed_offset)))
> +                             return -EFAULT;
> +             }
> +     } else {
> +             unsigned long page;
> +             int remain;
> +
> +             page = __get_free_page(GFP_TEMPORARY);
> +             if (unlikely(page == 0))
> +                     goto fallback;
> +
> +             i = 0;
> +             remain = entry->relocation_count;
> +             do {
> +                     struct drm_i915_gem_relocation_entry *r =
> +                             (struct drm_i915_gem_relocation_entry *)page;
> +                     int count = remain;
> +                     if (count > N_RELOC_PER_PAGE)
> +                             count = N_RELOC_PER_PAGE;
> +                     remain -= count;
> +
> +                     if (__copy_from_user_inatomic(r, user_relocs+i,
> +                                                   count*sizeof(r[0]))) {
> +                             ret = -EFAULT;
> +                             goto err;
> +                     }
> +
> +                     do {
> +                             u64 offset = r->presumed_offset;
> +
> +                             ret = i915_gem_execbuffer_relocate_entry(obj, 
> eb, r);
> +                             if (ret)
> +                                     goto err;
> +
> +                             if (r->presumed_offset != offset &&
> +                                 
> __copy_to_user_inatomic(&user_relocs[i].presumed_offset,
> +                                                         &r->presumed_offset,
> +                                                         
> sizeof(r->presumed_offset))) {
> +                                     ret = -EFAULT;
> +                                     goto err;
> +                             }
> +
> +                             i++;
> +                             r++;
> +                     } while (--count);
> +             } while (remain);
> +
> +             ret = 0;
> +err:
> +             free_page(page);
>       }
>  
> -     return 0;
> +     return ret;
> +#undef N_RELOC_PER_PAGE
>  }
>  
>  static int
> -- 
> 1.7.9.1
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Mail: dan...@ffwll.ch
Mobile: +41 (0)79 365 57 48
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to