All of the page tracking structures contain a bitmap holding currently
used entries.
It's redundant, since we can just inspect the pointer.
The only actual place where we're taking advantage of bitmaps is during
ppgtt cleanup - since we're able to reduce the number of iterations.
Still, we can improve that in the following commits, and iterate over
the full range here.

Let's replace the bitmaps with a simple counter.

Cc: Arkadiusz Hiler <arkadiusz.hi...@intel.com>
Cc: Chris Wilson <ch...@chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
Cc: Michel Thierry <michel.thie...@intel.com>
Cc: Mika Kuoppala <mika.kuopp...@intel.com>
Signed-off-by: Michał Winiarski <michal.winiar...@intel.com>
---
 drivers/gpu/drm/i915/i915_gem_gtt.c | 138 +++++++++++++-----------------------
 drivers/gpu/drm/i915/i915_gem_gtt.h |  10 +--
 2 files changed, 54 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c 
b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a7d6f78..50d4861 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -426,38 +426,26 @@ static void cleanup_scratch_page(struct drm_i915_private 
*dev_priv,
 static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
 {
        struct i915_page_table *pt;
-       const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES;
        int ret = -ENOMEM;
 
        pt = kzalloc(sizeof(*pt), GFP_KERNEL);
        if (!pt)
                return ERR_PTR(-ENOMEM);
 
-       pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
-                               GFP_KERNEL);
-
-       if (!pt->used_ptes)
-               goto fail_bitmap;
-
        ret = setup_px(dev_priv, pt);
-       if (ret)
-               goto fail_page_m;
+       if (ret) {
+               kfree(pt);
+               return ERR_PTR(ret);
+       }
 
        return pt;
 
-fail_page_m:
-       kfree(pt->used_ptes);
-fail_bitmap:
-       kfree(pt);
-
-       return ERR_PTR(ret);
 }
 
 static void free_pt(struct drm_i915_private *dev_priv,
                    struct i915_page_table *pt)
 {
        cleanup_px(dev_priv, pt);
-       kfree(pt->used_ptes);
        kfree(pt);
 }
 
@@ -494,23 +482,13 @@ static struct i915_page_directory *alloc_pd(struct 
drm_i915_private *dev_priv)
        if (!pd)
                return ERR_PTR(-ENOMEM);
 
-       pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
-                               sizeof(*pd->used_pdes), GFP_KERNEL);
-       if (!pd->used_pdes)
-               goto fail_bitmap;
-
        ret = setup_px(dev_priv, pd);
-       if (ret)
-               goto fail_page_m;
+       if (ret) {
+               kfree(pd);
+               return ERR_PTR(ret);
+       }
 
        return pd;
-
-fail_page_m:
-       kfree(pd->used_pdes);
-fail_bitmap:
-       kfree(pd);
-
-       return ERR_PTR(ret);
 }
 
 static void free_pd(struct drm_i915_private *dev_priv,
@@ -518,7 +496,6 @@ static void free_pd(struct drm_i915_private *dev_priv,
 {
        if (px_page(pd)) {
                cleanup_px(dev_priv, pd);
-               kfree(pd->used_pdes);
                kfree(pd);
        }
 }
@@ -538,28 +515,16 @@ static int __pdp_init(struct drm_i915_private *dev_priv,
 {
        size_t pdpes = I915_PDPES_PER_PDP(dev_priv);
 
-       pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
-                                 sizeof(unsigned long),
-                                 GFP_KERNEL);
-       if (!pdp->used_pdpes)
-               return -ENOMEM;
-
        pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
                                      GFP_KERNEL);
-       if (!pdp->page_directory) {
-               kfree(pdp->used_pdpes);
-               /* the PDP might be the statically allocated top level. Keep it
-                * as clean as possible */
-               pdp->used_pdpes = NULL;
+       if (!pdp->page_directory)
                return -ENOMEM;
-       }
 
        return 0;
 }
 
 static void __pdp_fini(struct i915_page_directory_pointer *pdp)
 {
-       kfree(pdp->used_pdpes);
        kfree(pdp->page_directory);
        pdp->page_directory = NULL;
 }
@@ -733,9 +698,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space 
*vm,
 
        GEM_BUG_ON(pte_end > GEN8_PTES);
 
-       bitmap_clear(pt->used_ptes, pte, num_entries);
+       pt->num_ptes -= num_entries;
 
-       if (bitmap_empty(pt->used_ptes, GEN8_PTES))
+       if (pt->num_ptes == 0)
                return true;
 
        pt_vaddr = kmap_px(pt);
@@ -768,15 +733,16 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space 
*vm,
                        break;
 
                if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
-                       __clear_bit(pde, pd->used_pdes);
+                       pd->num_pdes--;
                        pde_vaddr = kmap_px(pd);
                        pde_vaddr[pde] = scratch_pde;
                        kunmap_px(ppgtt, pde_vaddr);
                        free_pt(vm->i915, pt);
+                       pd->page_table[pde] = NULL;
                }
        }
 
-       if (bitmap_empty(pd->used_pdes, I915_PDES))
+       if (pd->num_pdes == 0)
                return true;
 
        return false;
@@ -802,19 +768,20 @@ static bool gen8_ppgtt_clear_pdp(struct 
i915_address_space *vm,
                        break;
 
                if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
-                       __clear_bit(pdpe, pdp->used_pdpes);
+                       pdp->num_pdpes--;
                        if (USES_FULL_48BIT_PPGTT(dev_priv)) {
                                pdpe_vaddr = kmap_px(pdp);
                                pdpe_vaddr[pdpe] = scratch_pdpe;
                                kunmap_px(ppgtt, pdpe_vaddr);
                        }
                        free_pd(vm->i915, pd);
+                       pdp->page_directory[pdpe] = NULL;
                }
        }
 
        mark_tlbs_dirty(ppgtt);
 
-       if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
+       if (pdp->num_pdpes == 0)
                return true;
 
        return false;
@@ -843,11 +810,12 @@ static void gen8_ppgtt_clear_pml4(struct 
i915_address_space *vm,
                        break;
 
                if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
-                       __clear_bit(pml4e, pml4->used_pml4es);
+                       pml4->num_pml4es--;
                        pml4e_vaddr = kmap_px(pml4);
                        pml4e_vaddr[pml4e] = scratch_pml4e;
                        kunmap_px(ppgtt, pml4e_vaddr);
                        free_pdp(vm->i915, pdp);
+                       pml4->pdps[pml4e] = NULL;
                }
        }
 }
@@ -938,12 +906,11 @@ static void gen8_free_page_tables(struct drm_i915_private 
*dev_priv,
        if (!px_page(pd))
                return;
 
-       for_each_set_bit(i, pd->used_pdes, I915_PDES) {
-               if (WARN_ON(!pd->page_table[i]))
-                       continue;
-
-               free_pt(dev_priv, pd->page_table[i]);
-               pd->page_table[i] = NULL;
+       for (i = 0; i < I915_PDES; i++) {
+               if (pd->page_table[i]) {
+                       free_pt(dev_priv, pd->page_table[i]);
+                       pd->page_table[i] = NULL;
+               }
        }
 }
 
@@ -1040,12 +1007,11 @@ static void gen8_ppgtt_cleanup_3lvl(struct 
drm_i915_private *dev_priv,
 {
        int i;
 
-       for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) {
-               if (WARN_ON(!pdp->page_directory[i]))
-                       continue;
-
-               gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
-               free_pd(dev_priv, pdp->page_directory[i]);
+       for (i = 0; i < I915_PDPES_PER_PDP(dev_priv); i++) {
+               if (pdp->page_directory[i]) {
+                       gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
+                       pdp->page_directory[i] = NULL;
+               }
        }
 
        free_pdp(dev_priv, pdp);
@@ -1056,11 +1022,11 @@ static void gen8_ppgtt_cleanup_4lvl(struct 
drm_i915_private *dev_priv,
 {
        int i;
 
-       for_each_set_bit(i, pml4->used_pml4es, GEN8_PML4ES_PER_PML4) {
-               if (WARN_ON(!pml4->pdps[i]))
-                       continue;
-
-               gen8_ppgtt_cleanup_3lvl(dev_priv, pml4->pdps[i]);
+       for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
+               if (pml4->pdps[i]) {
+                       gen8_ppgtt_cleanup_3lvl(dev_priv, pml4->pdps[i]);
+                       pml4->pdps[i] = NULL;
+               }
        }
 
        cleanup_px(dev_priv, pml4);
@@ -1109,7 +1075,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct 
i915_address_space *vm,
        const uint64_t start_save = start;
 
        gen8_for_each_pde(pt, pd, start, length, pde) {
-               if (test_bit(pde, pd->used_pdes))
+               if (pd->page_table[pde])
                        continue;
 
                pt = alloc_pt(dev_priv);
@@ -1121,7 +1087,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct 
i915_address_space *vm,
 
                gen8_initialize_pt(vm, pt);
                pd->page_table[pde] = pt;
-               __set_bit(pde, pd->used_pdes);
+               pd->num_pdes++;
                trace_i915_page_table_entry_alloc(vm, pde, start, 
GEN8_PDE_SHIFT);
        }
 
@@ -1160,7 +1126,7 @@ gen8_ppgtt_alloc_page_directories(struct 
i915_address_space *vm,
        const uint64_t start_save = start;
 
        gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
-               if (test_bit(pdpe, pdp->used_pdpes))
+               if (pdp->page_directory[pdpe])
                        continue;
 
                pd = alloc_pd(dev_priv);
@@ -1172,7 +1138,7 @@ gen8_ppgtt_alloc_page_directories(struct 
i915_address_space *vm,
 
                gen8_initialize_pd(vm, pd);
                pdp->page_directory[pdpe] = pd;
-               __set_bit(pdpe, pdp->used_pdpes);
+               pdp->num_pdpes++;
                trace_i915_page_directory_entry_alloc(vm, pdpe, start, 
GEN8_PDPE_SHIFT);
        }
 
@@ -1205,7 +1171,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct 
i915_address_space *vm,
        const uint64_t start_save = start;
 
        gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-               if (test_bit(pml4e, pml4->used_pml4es))
+               if (pml4->pdps[pml4e])
                        continue;
 
                pdp = alloc_pdp(dev_priv);
@@ -1217,7 +1183,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct 
i915_address_space *vm,
 
                gen8_initialize_pdp(vm, pdp);
                pml4->pdps[pml4e] = pdp;
-               __set_bit(pml4e, pml4->used_pml4es);
+               pml4->num_pml4es++;
                trace_i915_page_directory_pointer_entry_alloc(vm,
                                                              pml4e,
                                                              start,
@@ -1286,9 +1252,7 @@ static int gen8_alloc_va_range_3lvl(struct 
i915_address_space *vm,
                        WARN_ON(!gen8_pte_count(pd_start, pd_len));
 
                        /* Set our used ptes within the page table */
-                       bitmap_set(pt->used_ptes,
-                                  gen8_pte_index(pd_start),
-                                  gen8_pte_count(pd_start, pd_len));
+                       pt->num_ptes += gen8_pte_count(pd_start, pd_len);
 
                        /* Map the PDE to the page table */
                        page_directory[pde] = gen8_pde_encode(px_dma(pt),
@@ -1296,7 +1260,7 @@ static int gen8_alloc_va_range_3lvl(struct 
i915_address_space *vm,
                        trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
                                                        gen8_pte_index(start),
                                                        gen8_pte_count(start, 
length),
-                                                       
bitmap_weight(pt->used_ptes, GEN8_PTES));
+                                                       pt->num_ptes);
 
                        /* NB: We haven't yet mapped ptes to pages. At this
                         * point we're still relying on insert_entries() */
@@ -1373,7 +1337,7 @@ static void gen8_dump_pdp(struct 
i915_page_directory_pointer *pdp,
                uint64_t pd_start = start;
                uint32_t pde;
 
-               if (!test_bit(pdpe, pdp->used_pdpes))
+               if (!pdp->page_directory[pdpe])
                        continue;
 
                seq_printf(m, "\tPDPE #%d\n", pdpe);
@@ -1381,7 +1345,7 @@ static void gen8_dump_pdp(struct 
i915_page_directory_pointer *pdp,
                        uint32_t  pte;
                        gen8_pte_t *pt_vaddr;
 
-                       if (!test_bit(pde, pd->used_pdes))
+                       if (!pd->page_table[pde])
                                continue;
 
                        pt_vaddr = kmap_px(pt);
@@ -1432,7 +1396,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, 
struct seq_file *m)
                struct i915_page_directory_pointer *pdp;
 
                gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
-                       if (!test_bit(pml4e, pml4->used_pml4es))
+                       if (!pml4->pdps[pml4e])
                                continue;
 
                        seq_printf(m, "    PML4E #%llu\n", pml4e);
@@ -1452,9 +1416,6 @@ static int gen8_preallocate_top_level_pdps(struct 
i915_hw_ppgtt *ppgtt)
         */
        ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
                                                start, length);
-       if (!ret)
-               bitmap_set(ppgtt->pdp.used_pdpes, gen8_pdpe_index(start),
-                          i915_pte_count(start, length, GEN8_PDPE_SHIFT));
 
        return ret;
 }
@@ -1848,12 +1809,12 @@ static int gen6_alloc_va_range(struct 
i915_address_space *vm,
         */
        gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
                if (pt != vm->scratch_pt) {
-                       WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
+                       WARN_ON(pt->num_ptes == 0);
                        continue;
                }
 
                /* We've already allocated a page table */
-               WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
+               WARN_ON(pt->num_ptes != 0);
 
                pt = alloc_pt(dev_priv);
                if (IS_ERR(pt)) {
@@ -1866,8 +1827,7 @@ static int gen6_alloc_va_range(struct i915_address_space 
*vm,
                trace_i915_page_table_entry_alloc(vm, pde, start, 
GEN6_PDE_SHIFT);
 
                gen6_initialize_pt(vm, pt);
-               bitmap_set(pt->used_ptes, gen6_pte_index(start),
-                          gen6_pte_count(start, length));
+               pt->num_ptes += gen6_pte_count(start, length);
 
                ppgtt->pd.page_table[pde] = pt;
                gen6_write_pde(&ppgtt->pd, pde, pt);
@@ -1875,7 +1835,7 @@ static int gen6_alloc_va_range(struct i915_address_space 
*vm,
                trace_i915_page_table_entry_map(vm, pde, pt,
                                         gen6_pte_index(start),
                                         gen6_pte_count(start, length),
-                                        bitmap_weight(pt->used_ptes, 
GEN6_PTES));
+                                        pt->num_ptes);
        }
 
        /* Make sure write is complete before other code can use this page
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h 
b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8965bbb..1fc65ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -193,27 +193,27 @@ struct i915_page_dma {
 struct i915_page_table {
        struct i915_page_dma base;
 
-       unsigned long *used_ptes;
+       unsigned int num_ptes;
 };
 
 struct i915_page_directory {
        struct i915_page_dma base;
 
-       unsigned long *used_pdes;
+       unsigned int num_pdes;
        struct i915_page_table *page_table[I915_PDES]; /* PDEs */
 };
 
 struct i915_page_directory_pointer {
        struct i915_page_dma base;
 
-       unsigned long *used_pdpes;
+       unsigned int num_pdpes;
        struct i915_page_directory **page_directory;
 };
 
 struct i915_pml4 {
        struct i915_page_dma base;
 
-       DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4);
+       unsigned int num_pml4es;
        struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4];
 };
 
@@ -477,7 +477,7 @@ static inline size_t gen8_pte_count(uint64_t address, 
uint64_t length)
 static inline dma_addr_t
 i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
 {
-       return test_bit(n, ppgtt->pdp.used_pdpes) ?
+       return ppgtt->pdp.page_directory[n] ?
                px_dma(ppgtt->pdp.page_directory[n]) :
                px_dma(ppgtt->base.scratch_pd);
 }
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to