i915: support transparent-huge-pages through shmemfs

Matthew Auld Tue, 04 Apr 2017 15:12:20 -0700

Signed-off-by: Matthew Auld <matthew.a...@intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h |   3 +
 drivers/gpu/drm/i915/i915_gem.c | 187 +++++++++++++++++++++++++++++++++-------
 drivers/gpu/drm/i915/i915_vma.c |   8 ++
 3 files changed, 166 insertions(+), 32 deletions(-)


diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 838ce22a0a40..07dd4d24b93e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2672,6 +2672,9 @@ static inline struct scatterlist *__sg_next(struct 
scatterlist *sg)
  * @__pp:      page pointer (output)
  * @__iter:    'struct sgt_iter' (iterator state, internal)
  * @__sgt:     sg_table to iterate over (input)
+ *
+ * Be warned, if we using huge-pages @_pp could be a part of a compound page,
+ * so care must be taken. Too thorny?
  */
 #define for_each_sgt_page(__pp, __iter, __sgt)                         \
        for ((__iter) = __sgt_iter((__sgt)->sgl, false);                \
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5362f4d18689..1dde01676d37 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -171,7 +171,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object 
*obj)
        struct sg_table *st;
        struct scatterlist *sg;
        char *vaddr;
-       int i;
+       int i, j;
 
        if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
                return ERR_PTR(-EINVAL);
@@ -187,7 +187,7 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object 
*obj)
                return ERR_PTR(-ENOMEM);
 
        vaddr = phys->vaddr;
-       for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+       for (i = 0; i < obj->base.size / PAGE_SIZE; ) {
                struct page *page;
                char *src;
 
@@ -197,13 +197,15 @@ i915_gem_object_get_pages_phys(struct drm_i915_gem_object 
*obj)
                        goto err_phys;
                }
 
-               src = kmap_atomic(page);
-               memcpy(vaddr, src, PAGE_SIZE);
-               drm_clflush_virt_range(vaddr, PAGE_SIZE);
-               kunmap_atomic(src);
+               for (j = 0; j < hpage_nr_pages(page); ++j, ++i) {
+                       src = kmap_atomic(page + j);
+                       memcpy(vaddr, src, PAGE_SIZE);
+                       drm_clflush_virt_range(vaddr, PAGE_SIZE);
+                       kunmap_atomic(src);
+                       vaddr += PAGE_SIZE;
+               }
 
                put_page(page);
-               vaddr += PAGE_SIZE;
        }
 
        i915_gem_chipset_flush(to_i915(obj->base.dev));
@@ -263,9 +265,9 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object 
*obj,
        if (obj->mm.dirty) {
                struct address_space *mapping = obj->base.filp->f_mapping;
                char *vaddr = obj->phys_handle->vaddr;
-               int i;
+               int i, j;
 
-               for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
+               for (i = 0; i < obj->base.size / PAGE_SIZE; ) {
                        struct page *page;
                        char *dst;
 
@@ -273,16 +275,18 @@ i915_gem_object_put_pages_phys(struct drm_i915_gem_object 
*obj,
                        if (IS_ERR(page))
                                continue;
 
-                       dst = kmap_atomic(page);
-                       drm_clflush_virt_range(vaddr, PAGE_SIZE);
-                       memcpy(dst, vaddr, PAGE_SIZE);
-                       kunmap_atomic(dst);
+                       for (j = 0; j < hpage_nr_pages(page); ++j, ++i) {
+                               dst = kmap_atomic(page + j);
+                               drm_clflush_virt_range(vaddr, PAGE_SIZE);
+                               memcpy(dst, vaddr, PAGE_SIZE);
+                               kunmap_atomic(dst);
+                               vaddr += PAGE_SIZE;
+                       }
 
                        set_page_dirty(page);
                        if (obj->mm.madv == I915_MADV_WILLNEED)
                                mark_page_accessed(page);
                        put_page(page);
-                       vaddr += PAGE_SIZE;
                }
                obj->mm.dirty = false;
        }
@@ -2179,6 +2183,8 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object 
*obj,
                i915_gem_object_save_bit_17_swizzle(obj, pages);
 
        for_each_sgt_page(page, sgt_iter, pages) {
+               if (PageTail(page))
+                       continue;
                if (obj->mm.dirty)
                        set_page_dirty(page);
 
@@ -2272,6 +2278,15 @@ static bool i915_sg_trim(struct sg_table *orig_st)
        return true;
 }
 
+static inline unsigned int i915_shmem_page_size(struct page *page)
+{
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+       return PageTransHuge(page) ? HPAGE_PMD_SIZE : PAGE_SIZE;
+#else
+       return PAGE_SIZE;
+#endif
+}
+
 static struct sg_table *
 i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 {
@@ -2287,6 +2302,14 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object 
*obj)
        unsigned int max_segment;
        int ret;
        gfp_t gfp;
+       const unsigned int gtt_page_sizes[] = {
+               I915_GTT_PAGE_SIZE_1G,
+               I915_GTT_PAGE_SIZE_2M,
+               I915_GTT_PAGE_SIZE_64K,
+               I915_GTT_PAGE_SIZE_4K,
+       };
+       unsigned int page_size;
+       int j;
 
        /* Assert that the object is not currently in any GPU domain. As it
         * wasn't in the GTT, there shouldn't be any way it could have been in
@@ -2299,6 +2322,25 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object 
*obj)
        if (!max_segment)
                max_segment = rounddown(UINT_MAX, PAGE_SIZE);
 
+       /* max_segment is the maximum number of continuous PAGE_SIZE pages we
+        * can have in the bounce buffer, assuming swiotlb. So optimistically
+        * select the largest supported gtt page size which can fit into the
+        * max_segment. Also take care to properly align the max_segment to
+        * said page size to avoid any huge pages spilling across sg entries.
+        */
+       for (j = 0; j < ARRAY_SIZE(gtt_page_sizes); ++j) {
+               unsigned int page_size = gtt_page_sizes[j];
+               unsigned int nr_pages = page_size >> PAGE_SHIFT;
+
+               if (SUPPORTS_PAGE_SIZE(dev_priv, page_size) &&
+                   page_size <= obj->page_size &&
+                   nr_pages <= max_segment) {
+                       max_segment = rounddown(max_segment, nr_pages);
+                       obj->gtt_page_size = page_size;
+                       break;
+               }
+       }
+
        st = kmalloc(sizeof(*st), GFP_KERNEL);
        if (st == NULL)
                return ERR_PTR(-ENOMEM);
@@ -2309,6 +2351,9 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object 
*obj)
                return ERR_PTR(-ENOMEM);
        }
 
+       GEM_BUG_ON(!SUPPORTS_PAGE_SIZE(dev_priv, obj->gtt_page_size));
+       GEM_BUG_ON(!IS_ALIGNED(max_segment << PAGE_SHIFT, obj->gtt_page_size));
+
        /* Get the list of pages out of our struct file.  They'll be pinned
         * at this point until we release them.
         *
@@ -2319,7 +2364,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object 
*obj)
        gfp |= __GFP_NORETRY | __GFP_NOWARN;
        sg = st->sgl;
        st->nents = 0;
-       for (i = 0; i < page_count; i++) {
+       for (i = 0; i < page_count; i += hpage_nr_pages(page)) {
                page = shmem_read_mapping_page_gfp(mapping, i, gfp);
                if (unlikely(IS_ERR(page))) {
                        i915_gem_shrink(dev_priv,
@@ -2349,17 +2394,36 @@ i915_gem_object_get_pages_gtt(struct 
drm_i915_gem_object *obj)
                                goto err_sg;
                        }
                }
+
+               /* If we don't enough huge pages in the pool, fall back to the
+                * minimum page size. We can still allocate huge-pages but now
+                * obj->page_size and obj->gtt_page_size will reflect the
+                * minimum page size in the mapping.
+                */
+               page_size = i915_shmem_page_size(page);
+               if (page_size < obj->page_size) {
+                       obj->page_size = PAGE_SIZE;
+                       obj->gtt_page_size = I915_GTT_PAGE_SIZE;
+               }
+
+               /* TODO: if we don't use huge-pages or the object is small
+                * we can probably do something clever with continious pages
+                * here, if we have enough of them and they fit nicely into a
+                * gtt page size and max_segment. Imagine a 64K object, and we
+                * get 16 continuous 4K pages, we could get away with a single
+                * 64K pte.
+                */
                if (!i ||
                    sg->length >= max_segment ||
                    page_to_pfn(page) != last_pfn + 1) {
                        if (i)
                                sg = sg_next(sg);
                        st->nents++;
-                       sg_set_page(sg, page, PAGE_SIZE, 0);
+                       sg_set_page(sg, page, page_size, 0);
                } else {
-                       sg->length += PAGE_SIZE;
+                       sg->length += page_size;
                }
-               last_pfn = page_to_pfn(page);
+               last_pfn = page_to_pfn(page) + hpage_nr_pages(page) - 1;
 
                /* Check that the i965g/gm workaround works. */
                WARN_ON((gfp & __GFP_DMA32) && (last_pfn >= 0x00100000UL));
@@ -2372,25 +2436,43 @@ i915_gem_object_get_pages_gtt(struct 
drm_i915_gem_object *obj)
 
        ret = i915_gem_gtt_prepare_pages(obj, st);
        if (ret) {
-               /* DMA remapping failed? One possible cause is that
-                * it could not reserve enough large entries, asking
-                * for PAGE_SIZE chunks instead may be helpful.
-                */
-               if (max_segment > PAGE_SIZE) {
-                       for_each_sgt_page(page, sgt_iter, st)
-                               put_page(page);
-                       sg_free_table(st);
-
-                       max_segment = PAGE_SIZE;
-                       goto rebuild_st;
-               } else {
+               if (max_segment == PAGE_SIZE) {
                        dev_warn(&dev_priv->drm.pdev->dev,
                                 "Failed to DMA remap %lu pages\n",
                                 page_count);
                        goto err_pages;
                }
+
+               for_each_sgt_page(page, sgt_iter, st) {
+                       if (!PageTail(page))
+                               put_page(page);
+               }
+               sg_free_table(st);
+
+               /* DMA remapping failed? One possible cause is that
+                * it could not reserve enough large entries, trying
+                * smaller page size chunks instead may be helpful.
+                *
+                * We really don't know what the max_segment should be,
+                * just go with the simple premise that the next
+                * smallest segment will be at least half the size of
+                * the previous.
+                */
+               for (; j < ARRAY_SIZE(gtt_page_sizes); ++j) {
+                       unsigned int page_size = gtt_page_sizes[j];
+
+                       if (SUPPORTS_PAGE_SIZE(dev_priv, page_size) &&
+                           page_size < max_segment) {
+                               obj->gtt_page_size = max_segment = page_size;
+                               break;
+                       }
+               }
+
+               goto rebuild_st;
        }
 
+       GEM_BUG_ON(obj->gtt_page_size > obj->page_size);
+
        if (i915_gem_object_needs_bit17_swizzle(obj))
                i915_gem_object_do_bit_17_swizzle(obj, st);
 
@@ -2399,8 +2481,10 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object 
*obj)
 err_sg:
        sg_mark_end(sg);
 err_pages:
-       for_each_sgt_page(page, sgt_iter, st)
-               put_page(page);
+       for_each_sgt_page(page, sgt_iter, st) {
+               if (!PageTail(page))
+                       put_page(page);
+       }
        sg_free_table(st);
        kfree(st);
 
@@ -4192,10 +4276,36 @@ struct drm_i915_gem_object *
 i915_gem_object_create(struct drm_i915_private *dev_priv, u64 size)
 {
        struct drm_i915_gem_object *obj;
+       unsigned int page_size = PAGE_SIZE;
        struct address_space *mapping;
        gfp_t mask;
        int ret;
 
+       /* If configured *attempt* to use THP through shmemfs. HPAGE_PMD_SIZE
+        * will either be 2M or 1G depending on the default hugepage_sz. This
+        * is best effort and will of course depend on how many huge-pages we
+        * have available in the pool. We determine the gtt page size when we
+        * actually try pinning the backing storage, where gtt_page_size <=
+        * page_size.
+        *
+        * XXX Some musings:
+        *
+        * - We don't know if the object will be inserted into the ppgtt where
+        *   it will be most benificial to have huge-pages, or the ggtt where
+        *   the object will always be treated like a 4K object.
+        *
+        * - Similarly should we care if the gtt doesn't support pages sizes >
+        *   4K? If it does then great, if it doesn't then we do at least see
+        *   the benefit of reduced fragmentation, so it's not a complete
+        *   waste...thoughts?
+        */
+#ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE
+       if (has_transparent_hugepage() && size >= HPAGE_PMD_SIZE) {
+               page_size = HPAGE_PMD_SIZE;
+               size = round_up(size, page_size);
+       }
+#endif
+
        /* There is a prevalence of the assumption that we fit the object's
         * page count inside a 32bit _signed_ variable. Let's document this and
         * catch if we ever need to fix it. In the meantime, if you do spot
@@ -4227,6 +4337,19 @@ i915_gem_object_create(struct drm_i915_private 
*dev_priv, u64 size)
 
        i915_gem_object_init(obj, &i915_gem_object_ops);
 
+       /* In a few places we interact with shmemfs implicitly by writing
+        * through the page_cache prior to pinning the backing storage, this
+        * is for optimisation reasons and prevents shmemfs from needlessly
+        * clearing pages. So in order to control the use of huge-pages, from
+        * both the pinning of the backing store and any implicit interaction
+        * which may end up allocating pages we require more than the provided
+        * read_mapping or getpage interfaces provided by shmem. This should
+        * effectively default to huge-page allocations in shmem for this
+        * mapping.
+        */
+       SHMEM_I(mapping->host)->huge = page_size > PAGE_SIZE;
+       obj->page_size = page_size;
+
        obj->base.write_domain = I915_GEM_DOMAIN_CPU;
        obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index 4043145b4310..af295aa3b49c 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -469,6 +469,14 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 
alignment, u64 flags)
        if (ret)
                return ret;
 
+       /* We don't know the final gtt page size until *after* we pin the
+        * backing store, or that's at least the case for the shmem backend.
+        * Therefore re-adjust the alignment if needed. This is only relevant
+        * for huge-pages being inserted into the ppgtt.
+        */
+       if (!i915_is_ggtt(vma->vm) && alignment < obj->gtt_page_size)
+               alignment = obj->gtt_page_size;
+
        if (i915_vm_has_cache_coloring(vma->vm))
                color = obj->cache_level;
        else if (i915_vm_has_page_coloring(vma->vm))
-- 
2.9.3

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

[Intel-gfx] [PATCH 18/18] drm/i915: support transparent-huge-pages through shmemfs

Reply via email to