[Intel-gfx] [PATCH 02/11] drm/i915: Introduce i915_gem_object_get_dma_address()
From: Chris Wilson This utility function is a companion to i915_gem_object_get_page() that uses the same cached iterator for the scatterlist to perform fast sequential lookup of the dma address associated with any page within the object. Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 17 + 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e4c8e34..2a09ccf 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3107,6 +3107,23 @@ static inline int __sg_page_count(struct scatterlist *sg) struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n); +static inline dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n) +{ + if (n < obj->get_page.last) { + obj->get_page.sg = obj->pages->sgl; + obj->get_page.last = 0; + } + + while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) { + obj->get_page.last += __sg_page_count(obj->get_page.sg++); + if (unlikely(sg_is_chain(obj->get_page.sg))) + obj->get_page.sg = sg_chain_ptr(obj->get_page.sg); + } + + return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT); +} + static inline struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 01/11] drm/i915: Add support for mapping an object page by page
From: Chris Wilson Introduced a new vm specfic callback insert_page() to program a single pte in ggtt or ppgtt. This allows us to map a single page in to the mappable aperture space. This can be iterated over to access the whole object by using space as meagre as page size. v2: Added low level rpm assertions to insert_page routines (Chris) v3: Added POSTING_READ post register write (Tvrtko) v4: Rebase (Ankit) v5: Removed wmb() and FLUSH_CTL from insert_page, caller to take care of it (Chris) v6: insert_page not working correctly without FLSH_CNTL write, added the write again. Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/char/agp/intel-gtt.c| 8 + drivers/gpu/drm/i915/i915_gem_gtt.c | 66 - drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++ include/drm/intel-gtt.h | 3 ++ 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index aef87fd..4431129 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -840,6 +840,14 @@ static bool i830_check_flags(unsigned int flags) return false; } +void intel_gtt_insert_page(dma_addr_t addr, + unsigned int pg, + unsigned int flags) +{ + intel_private.driver->write_entry(addr, pg, flags); +} +EXPORT_SYMBOL(intel_gtt_insert_page); + void intel_gtt_insert_sg_entries(struct sg_table *st, unsigned int pg_start, unsigned int flags) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4668477..7a139a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2355,6 +2355,28 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) #endif } +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, uint64_t start, @@ -2424,6 +2446,28 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); } +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 flags) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + iowrite32(vm->pte_encode(addr, level, true, flags), pte); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -2543,6 +2587,24 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } +static void i915_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level cache_level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct
[Intel-gfx] [PATCH 05/11] drm/i915: Support for creating Stolen memory backed objects
From: Ankitprasad Sharma Extend the drm_i915_gem_create structure to add support for creating Stolen memory backed objects. Added a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. v2: Rebased to the latest drm-intel-nightly (Ankit) v3: Changed versioning of GEM_CREATE param, added new comments (Tvrtko) v4: Changed size from 32b to 64b to prevent userspace overflow (Tvrtko) Corrected function arguments ordering (Chris) v5: Corrected function name (Chris) v6: Updated datatype for flags to keep sizeof(drm_i915_gem_create) u64 aligned (Chris) v7: Use first 8 bits of gem_create flags for placement (Chris), Add helper function for object allocation from stolen region (Ankit) v8: Added comment explaining STOLEN placement flag (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 49 ++ drivers/gpu/drm/i915/i915_gem_stolen.c | 4 +-- include/uapi/drm/i915_drm.h| 41 5 files changed, 91 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 07edaed..83ae436 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -231,6 +231,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: value = 1; break; + case I915_PARAM_CREATE_VERSION: + value = 2; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 73643ec..0e8fe13 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3521,7 +3521,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_device *dev); void i915_gem_cleanup_stolen(struct drm_device *dev); struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_device *dev, u32 size); +i915_gem_object_create_stolen(struct drm_device *dev, u64 size); struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, u32 stolen_offset, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 982f632..0224969 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -384,10 +384,36 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) kmem_cache_free(dev_priv->objects, obj); } +static struct drm_i915_gem_object * +i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) +{ + struct drm_i915_gem_object *obj; + int ret; + + mutex_lock(&dev->struct_mutex); + obj = i915_gem_object_create_stolen(dev, size); + if (!obj) { + mutex_unlock(&dev->struct_mutex); + return NULL; + } + + /* Always clear fresh buffers before handing to userspace */ + ret = i915_gem_object_clear(obj); + if (ret) { + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + return NULL; + } + + mutex_unlock(&dev->struct_mutex); + return obj; +} + static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, + uint64_t flags, uint32_t *handle_p) { struct drm_i915_gem_object *obj; @@ -398,10 +424,23 @@ i915_gem_create(struct drm_file *file, if (size == 0) return -EINVAL; + if (flags & __I915_CREATE_UNKNOWN_FLAGS) + return -EINVAL; + /* Allocate the new object */ - obj = i915_gem_object_create(dev, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); + switch (flags & I915_CREATE_PLACEMENT_MASK) { + case I915_CREATE_PLACEMENT_NORMAL: + obj = i915_gem_object_create(dev, size); + break; + case I915_CREATE_PLACEMENT_STOLEN: + obj = i915_gem_alloc_object_stolen(dev, size); + break; + default: + return -EINVAL; + } + + if (IS_ERR_OR_NULL(obj)) + return -ENOMEM; ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ @@ -422,7 +461,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args-
[Intel-gfx] [PATCH v20 00/11] Support for creating/using Stolen memory backed objects
From: Ankitprasad Sharma This patch series adds support for creating/using Stolen memory backed objects. Despite being a unified memory architecture (UMA) some bits of memory are more equal than others. In particular we have the thorny issue of stolen memory, memory stolen from the system by the BIOS and reserved for igfx use. Stolen memory is required for some functions of the GPU and display engine, but in general it goes wasted. Whilst we cannot return it back to the system, we need to find some other method for utilising it. As we do not support direct access to the physical address in the stolen region, it behaves like a different class of memory, closer in kin to local GPU memory. This strongly suggests that we need a placement model like TTM if we are to fully utilize these discrete chunks of differing memory. To add support for creating Stolen memory backed objects, we extend the drm_i915_gem_create structure, by adding a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. This patch series adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the memory from stolen region, but can also be used for other shmem allocated objects. Currently being used for buffers allocated in the stolen region. Also adding support for stealing purgable stolen pages, if we run out of stolen memory when trying to allocate an object. v2: Added support for read/write from/to objects not backed by shmem using the pread/pwrite interface. Also extended the current get_aperture ioctl to retrieve the total and available size of the stolen region. v3: Removed the extended get_aperture ioctl patch 5 (to be submitted as part of other patch series), addressed comments by Chris about pread/pwrite for non shmem backed objects. v4: Rebased to the latest drm-intel-nightly. v5: Addressed comments, replaced patch 1/4 "Clearing buffers via blitter engine" by "Clearing buffers via CPU/GTT". v6: Rebased to the latest drm-intel-nightly, Addressed comments, updated stolen memory purging logic by maintaining a list for purgable stolen memory objects, enabled pread/pwrite for all non-shmem backed objects without tiling restrictions. v7: Addressed comments, compiler optimization, new patch added for correct error code propagation to the userspace. v8: Added a new patch to the series to Migrate stolen objects before hibernation, as stolen memory is not preserved across hibernation. Added correct error propagation for shmem as well non-shmem backed object allocation. v9: Addressed comments, use of insert_page helper function to map object page by page which can be helpful in low aperture space availability. v10: Addressed comments, use insert_page for clearing out the stolen memory v11: Addressed comments, 3 new patches added to support allocation from Stolen memory 1. Allow use of i915_gem_object_get_dma_address for stolen backed objects 2. Use insert_page for pwrite_fast 3. Fail the execbuff using stolen objects as batchbuffers v12: Addressed comments, Removed patch "Fail the execbuff using stolen objects as batchbuffers" v13: Addressed comments, Added 2 patches to detect Intel RST and disable stolen for persistent data if RST device found 1. acpi: Export acpi_bus_type 2. drm/i915: Disable use of stolen area by User when Intel RST is present v14: Addressed comments, Added 2 base patches to the series 1. drm/i915: Add support for mapping an object page by page 2. drm/i915: Introduce i915_gem_object_get_dma_address() v15: Addressed comments, Disabled stolen memory by default v16: Addressed comments, Added low level rpm assertions, Enabled stolen memory v17: Addressed comments v18: Rebased and fixed issue v19: Rebased and added 2 more patches to report mappable and stolen size numbers 1. drm/i915: Extend GET_APERTURE ioctl to report available map space 2. drm/i915: Extend GET_APERTURE ioctl to report size of the stolen region v20: Rebased and squashed last 2 patches into one. This can be verified using IGT tests: igt/gem_stolen, igt/gem_create, igt/gem_pread, igt/gem_pwrite Ankitprasad Sharma (7): drm/i915: Use insert_page for pwrite_fast drm/i915: Clearing buffer objects via CPU/GTT drm/i915: Support for creating Stolen memory backed objects drm/i915: Propagating correct error codes to the userspace drm/i915: Support for pread/pwrite from/to non shmem backed objects drm/i915: Disable use of stolen area by User when Intel RST is present drm/i915: Extend GET_APERTURE ioctl to report available map space Chris Wilson (4): drm/i915: Add support for mapping an object page by page drm/i915: Introduce i915_gem_object_get_dma_address() drm/i915: Add support for stealing purgable stolen pages drm/i915: Migrate stolen objects before hibernation drivers/char/agp/intel-gtt.c
[Intel-gfx] [PATCH 08/11] drm/i915: Support for pread/pwrite from/to non shmem backed objects
From: Ankitprasad Sharma This patch adds support for extending the pread/pwrite functionality for objects not backed by shmem. The access will be made through gtt interface. This will cover objects backed by stolen memory as well as other non-shmem backed objects. v2: Drop locks around slow_user_access, prefault the pages before access (Chris) v3: Rebased to the latest drm-intel-nightly (Ankit) v4: Moved page base & offset calculations outside the copy loop, corrected data types for size and offset variables, corrected if-else braces format (Tvrtko/kerneldocs) v5: Enabled pread/pwrite for all non-shmem backed objects including without tiling restrictions (Ankit) v6: Using pwrite_fast for non-shmem backed objects as well (Chris) v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy, added pwrite slow path for non-shmem backed objects (Chris/Tvrtko) v8: Updated v7 commit message, mutex unlock around pwrite slow path for non-shmem backed objects (Tvrtko) v9: Corrected check during pread_ioctl, to avoid shmem_pread being called for non-shmem backed objects (Tvrtko) v10: Moved the write_domain check to needs_clflush and tiling mode check to pwrite_fast (Chris) v11: Use pwrite_fast fallback for all objects (shmem and non-shmem backed), call fast_user_write regardless of pagefault in previous iteration v12: Use page-by-page copy for slow user access too (Chris) v13: Handled EFAULT, Avoid use of WARN_ON, put_fence only if whole obj pinned (Chris) v14: Corrected datatypes/initializations (Tvrtko) Testcase: igt/gem_stolen, igt/gem_pread, igt/gem_pwrite Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 218 ++-- 1 file changed, 188 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 980624a..fca3f6d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -54,6 +54,9 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) return true; @@ -641,6 +644,142 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, return ret ? - EFAULT : 0; } +static inline unsigned long +slow_user_access(struct io_mapping *mapping, +uint64_t page_base, int page_offset, +char __user *user_data, +unsigned long length, bool pwrite) +{ + void __iomem *ioaddr; + void *vaddr; + uint64_t unwritten; + + ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)ioaddr + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(ioaddr); + return unwritten; +} + +static int +i915_gem_gtt_pread(struct drm_device *dev, + struct drm_i915_gem_object *obj, uint64_t size, + uint64_t data_offset, uint64_t data_ptr) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_mm_node node; + char __user *user_data; + uint64_t remain; + uint64_t offset; + int ret; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) { + ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + ret = i915_gem_object_put_fence(obj); + if (ret) + goto out_unpin; + } + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto out_unpin; + + user_data = to_user_ptr(data_ptr); + remain = size; + offset = data_offset; + + mutex_unlock(&dev->struct_mutex); + if (likely(!i915.prefault_disable)) { + ret = fault_in_multipages_writeable(user_data, remain); + if (ret) { + mutex_lock(&dev->struct_mutex); + goto out_unpin; + } + } + + while (remain > 0) { + /* Operation in this page +* +* page_base = page offset w
[Intel-gfx] [PATCH 07/11] drm/i915: Add support for stealing purgable stolen pages
From: Chris Wilson If we run out of stolen memory when trying to allocate an object, see if we can reap enough purgeable objects to free up enough contiguous free space for the allocation. This is in principle very much like evicting objects to free up enough contiguous space in the vma when binding a new object - and you will be forgiven for thinking that the code looks very similar. At the moment, we do not allow userspace to allocate objects in stolen, so there is neither the memory pressure to trigger stolen eviction nor any purgeable objects inside the stolen arena. However, this will change in the near future, and so better management and defragmentation of stolen memory will become a real issue. v2: Remember to remove the drm_mm_node. v3: Rebased to the latest drm-intel-nightly (Ankit) v4: corrected if-else braces format (Tvrtko/kerneldoc) v5: Rebased to the latest drm-intel-nightly (Ankit) Added a seperate list to maintain purgable objects from stolen memory region (Chris/Daniel) v6: Compiler optimization (merging 2 single loops into one for() loop), corrected code for object eviction, retire_requests before starting object eviction (Chris) v7: Added kernel doc for i915_gem_object_create_stolen() v8: Check for struct_mutex lock before creating object from stolen region (Tvrtko) v9: Renamed variables to make usage clear, added comment, removed onetime used macro (Tvrtko) v10: Avoid masking of error when stolen_alloc fails (Tvrtko) v11: Renamed stolen_link to tmp_link, as it may be used for other purposes too (Chris) Used ERR_CAST to cast error pointers while returning v12: Added lockdep_assert before starting stolen-backed object eviction (Chris) v13: Rebased Testcase: igt/gem_stolen Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c| 6 +- drivers/gpu/drm/i915/i915_drv.h| 17 +++- drivers/gpu/drm/i915/i915_gem.c| 15 +++ drivers/gpu/drm/i915/i915_gem_stolen.c | 171 + drivers/gpu/drm/i915/intel_pm.c| 4 +- 5 files changed, 188 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ac7e569..aeae600 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -182,7 +182,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_puts(m, ")"); } if (obj->stolen) - seq_printf(m, " (stolen: %08llx)", obj->stolen->start); + seq_printf(m, " (stolen: %08llx)", obj->stolen->base.start); if (obj->pin_display || obj->fault_mappable) { char s[3], *t = s; if (obj->pin_display) @@ -254,9 +254,9 @@ static int obj_rank_by_stolen(void *priv, struct drm_i915_gem_object *b = container_of(B, struct drm_i915_gem_object, obj_exec_link); - if (a->stolen->start < b->stolen->start) + if (a->stolen->base.start < b->stolen->base.start) return -1; - if (a->stolen->start > b->stolen->start) + if (a->stolen->base.start > b->stolen->base.start) return 1; return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0e8fe13..d15c018 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -831,6 +831,12 @@ struct i915_ctx_hang_stats { bool banned; }; +struct i915_stolen_node { + struct drm_mm_node base; + struct list_head mm_link; + struct drm_i915_gem_object *obj; +}; + /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 @@ -1279,6 +1285,13 @@ struct i915_gem_mm { */ struct list_head unbound_list; + /** +* List of stolen objects that have been marked as purgeable and +* thus available for reaping if we need more space for a new +* allocation. Ordered by time of marking purgeable. +*/ + struct list_head stolen_list; + /** Usable portion of the GTT for GEM */ unsigned long stolen_base; /* limited to low memory (32-bit) */ @@ -2132,7 +2145,7 @@ struct drm_i915_gem_object { struct list_head vma_list; /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; + struct i915_stolen_node *stolen; struct list_head global_list; struct list_head engine_list[I915_NUM_ENGINES]; @@ -2140,6 +2153,8 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + /** Used to link an object to a list temporarily */ + struct list_head tmp_link; /** * This is set if the object is on the active lists (has pending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i9
[Intel-gfx] [PATCH 09/11] drm/i915: Migrate stolen objects before hibernation
From: Chris Wilson Ville reminded us that stolen memory is not preserved across hibernation, and a result of this was that context objects now being allocated from stolen were being corrupted on S4 and promptly hanging the GPU on resume. We want to utilise stolen for as much as possible (nothing else will use that wasted memory otherwise), so we need a strategy for handling general objects allocated from stolen and hibernation. A simple solution is to do a CPU copy through the GTT of the stolen object into a fresh shmemfs backing store and thenceforth treat it as a normal objects. This can be refined in future to either use a GPU copy to avoid the slow uncached reads (though it's hibernation!) and recreate stolen objects upon resume/first-use. For now, a simple approach should suffice for testing the object migration. v2: Swap PTE for pinned bindings over to the shmemfs. This adds a complicated dance, but is required as many stolen objects are likely to be pinned for use by the hardware. Swapping the PTEs should not result in externally visible behaviour, as each PTE update should be atomic and the two pages identical. (danvet) safe-by-default, or the principle of least surprise. We need a new flag to mark objects that we can wilfully discard and recreate across hibernation. (danvet) Just use the global_list rather than invent a new stolen_list. This is the slowpath hibernate and so adding a new list and the associated complexity isn't worth it. v3: Rebased on drm-intel-nightly (Ankit) v4: Use insert_page to map stolen memory backed pages for migration to shmem (Chris) v5: Acquire mutex lock while copying stolen buffer objects to shmem (Chris) v6: Handled file leak, Splitted object migration function, added kerneldoc for migrate_stolen_to_shmemfs() function (Tvrtko) Use i915 wrapper function for drm_mm_insert_node_in_range() v7: Keep the object in cpu domain after get_pages, remove the object from the unbound list only when marked PURGED, Corrected split of object migration function (Chris) v8: Split i915_gem_freeze(), removed redundant use of barrier, corrected use of set_to_cpu_domain() (Chris) v9: Replaced WARN_ON by BUG_ON and added a comment explaining it (Daniel/Tvrtko) v10: Document use of barriers (Chris) v11: Resolved list corruption due to not removing obj from global_list if no reference to pages is held, Rebased (Ankit) v12: Rebase (Ankit) Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.c | 22 +++- drivers/gpu/drm/i915/i915_drv.h | 10 ++ drivers/gpu/drm/i915/i915_gem.c | 204 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 49 drivers/gpu/drm/i915/intel_display.c| 3 + drivers/gpu/drm/i915/intel_fbdev.c | 6 + drivers/gpu/drm/i915/intel_pm.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 + 8 files changed, 284 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 943d7b2..827a3fe 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1066,6 +1066,22 @@ static int i915_pm_suspend(struct device *dev) return i915_drm_suspend(drm_dev); } +/* freeze: before creating the hibernation_image */ +static int i915_pm_freeze(struct device *dev) +{ + int ret; + + ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev))); + if (ret) + return ret; + + ret = i915_pm_suspend(dev); + if (ret) + return ret; + + return 0; +} + static int i915_pm_suspend_late(struct device *dev) { struct drm_device *drm_dev = dev_to_i915(dev)->dev; @@ -1115,12 +1131,6 @@ static int i915_pm_resume(struct device *dev) return i915_drm_resume(drm_dev); } -/* freeze: before creating the hibernation_image */ -static int i915_pm_freeze(struct device *dev) -{ - return i915_pm_suspend(dev); -} - static int i915_pm_freeze_late(struct device *dev) { int ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d15c018..6b7e790 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2180,6 +2180,12 @@ struct drm_i915_gem_object { * Advice: are the backing pages purgeable? */ unsigned int madv:2; + /** +* Whereas madv is for userspace, there are certain situations +* where we want I915_MADV_DONTNEED behaviour on internal objects +* without conflating the userspace setting. +*/ + unsigned int internal_volatile:1; /** * Current tiling mode for the object. @@ -3317,6 +3323,9 @@ int __must_check i915_gem_init_hw(struct drm_device *dev); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); +int __mu
[Intel-gfx] [PATCH 10/11] drm/i915: Disable use of stolen area by User when Intel RST is present
From: Ankitprasad Sharma The BIOS RapidStartTechnology may corrupt the stolen memory across S3 suspend due to unalarmed hibernation, in which case we will not be able to preserve the User data stored in the stolen region. Hence this patch tries to identify presence of the RST device on the ACPI bus, and disables use of stolen memory (for persistent data) if found. v2: Updated comment, updated/corrected new functions private to driver (Chris/Tvrtko) v3: Disabling stolen by default, wait till required acpi changes to detect device presence are pulled in (Ankit) v4: Enabled stolen by default as required acpi changes are merged (Ankit) v5: renamed variable, is IS_ENABLED() in place of #ifdef, use char* instead of structures (Lukas) Signed-off-by: Ankitprasad Sharma Cc: Lukas Wunner Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h| 11 +++ drivers/gpu/drm/i915/i915_gem.c| 8 drivers/gpu/drm/i915/i915_gem_stolen.c | 12 drivers/gpu/drm/i915/intel_acpi.c | 7 +++ 4 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6b7e790..f2dbb26 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1338,6 +1338,16 @@ struct i915_gem_mm { */ bool busy; + /** +* Stolen will be lost upon hibernate (as the memory is unpowered). +* Across resume, we expect stolen to be intact - however, it may +* also be utililised by third parties (e.g. Intel RapidStart +* Technology) and if so we have to assume that any data stored in +* stolen across resume is lost and we set this flag to indicate that +* the stolen memory is volatile. +*/ + bool volatile_stolen; + /* the indicator for dispatch video commands on two BSD rings */ unsigned int bsd_ring_dispatch_index; @@ -3701,6 +3711,7 @@ static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev) #endif /* intel_acpi.c */ +bool intel_detect_acpi_rst(void); #ifdef CONFIG_ACPI extern void intel_register_dsm_handler(void); extern void intel_unregister_dsm_handler(void); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e0adb2a..8bdcc55 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -391,8 +391,16 @@ static struct drm_i915_gem_object * i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) { struct drm_i915_gem_object *obj; + struct drm_i915_private *dev_priv = dev->dev_private; int ret; + if (dev_priv->mm.volatile_stolen) { + /* Stolen may be overwritten by external parties +* so unsuitable for persistent user data. +*/ + return ERR_PTR(-ENODEV); + } + mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 2518ebb..0e6203c 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -492,6 +492,18 @@ int i915_gem_init_stolen(struct drm_device *dev) */ drm_mm_init(&dev_priv->mm.stolen, 0, ggtt->stolen_usable_size); + /* If the stolen region can be modified behind our backs upon suspend, +* then we cannot use it to store nonvolatile contents (i.e user data) +* as it will be corrupted upon resume. +*/ + dev_priv->mm.volatile_stolen = false; + if (IS_ENABLED(CONFIG_SUSPEND)) { + /* BIOSes using RapidStart Technology have been reported +* to overwrite stolen across S3, not just S4. +*/ + dev_priv->mm.volatile_stolen = intel_detect_acpi_rst(); + } + return 0; } diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c index eb638a1..05fd67f 100644 --- a/drivers/gpu/drm/i915/intel_acpi.c +++ b/drivers/gpu/drm/i915/intel_acpi.c @@ -23,6 +23,8 @@ static const u8 intel_dsm_guid[] = { 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c }; +static const char *irst_id = "INT3392"; + static char *intel_dsm_port_name(u8 id) { switch (id) { @@ -162,3 +164,8 @@ void intel_register_dsm_handler(void) void intel_unregister_dsm_handler(void) { } + +bool intel_detect_acpi_rst(void) +{ + return acpi_dev_present(irst_id); +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 11/11] drm/i915: Extend GET_APERTURE ioctl to report available map space
From: Ankitprasad Sharma When constructing a batchbuffer, it is sometimes crucial to know the largest hole into which we can fit a fenceable buffer (for example when handling very large objects on gen2 and gen3). This depends on the fragmentation of pinned buffers inside the aperture, a question only the kernel can easily answer. This patch extends the current DRM_I915_GEM_GET_APERTURE ioctl to include a couple of new fields in its reply to userspace - the total amount of space available in the mappable region of the aperture and also the single largest block available. This is not quite what userspace wants to answer the question of whether this batch will fit as fences are also required to meet severe alignment constraints within the batch. For this purpose, a third conservative estimate of largest fence available is also provided. For when userspace needs more than one batch, we also provide the culmulative space available for fences such that it has some additional guidance to how much space it could allocate to fences. Conservatism still wins. This patch extends the GET_APERTURE ioctl to add support for getting total size and available size of the stolen region as well as single largest block available in the stolen region too. The patch also adds a debugfs file for convenient testing and reporting. v2: The first object cannot end at offset 0, so we can use last==0 to detect the empty list. v3: Expand all values to 64bit, just in case. Report total mappable aperture size for userspace that cannot easily determine it by inspecting the PCI device. v4: (Rodrigo) Fixed rebase conflicts. v5: Rebased to the latest drm-intel-nightly (Ankit) v6: Keeping limits to get_aperture ioctl, and moved changing numbers to debugfs, Addressed comments (Chris/Tvrtko) v7: Squashed stolen memory size patch to this one, Added a new version field to validate the map_size and stolen size values, Changed Author to me (Ankit) due to signifcant changes in the logic used to get size values Signed-off-by: Chris Wilson Signed-off-by: Rodrigo Vivi Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_debugfs.c| 143 + drivers/gpu/drm/i915/i915_drv.h| 3 + drivers/gpu/drm/i915/i915_gem.c| 4 + drivers/gpu/drm/i915/i915_gem_stolen.c | 27 +++ include/uapi/drm/i915_drm.h| 17 5 files changed, 194 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index aeae600..625e3cc 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -586,6 +586,148 @@ static int i915_gem_object_info(struct seq_file *m, void* data) return 0; } +static int vma_rank_by_ggtt(void *priv, + struct list_head *A, + struct list_head *B) +{ + struct i915_vma *a = list_entry(A, typeof(*a), exec_list); + struct i915_vma *b = list_entry(B, typeof(*b), exec_list); + + return a->node.start - b->node.start; +} + +static u32 __fence_size(struct drm_i915_private *dev_priv, u32 start, u32 end) +{ + u32 size = end - start; + u32 fence_size; + + if (INTEL_INFO(dev_priv)->gen < 4) { + u32 fence_max; + u32 fence_next; + + if (IS_GEN3(dev_priv)) { + fence_max = I830_FENCE_MAX_SIZE_VAL << 20; + fence_next = 1024*1024; + } else { + fence_max = I830_FENCE_MAX_SIZE_VAL << 19; + fence_next = 512*1024; + } + + fence_max = min(fence_max, size); + fence_size = 0; + /* Find fence_size less than fence_max and power of 2 */ + while (fence_next <= fence_max) { + u32 base = ALIGN(start, fence_next); + if (base + fence_next > end) + break; + + fence_size = fence_next; + fence_next <<= 1; + } + } else { + fence_size = size; + } + + return fence_size; +} + +static int i915_gem_aperture_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_i915_gem_get_aperture arg; + struct i915_vma *vma; + struct list_head map_list; + const uint64_t map_limit = ggtt->mappable_end; + uint64_t map_space, map_largest, fence_space, fence_largest; + uint64_t last, hole_size, stolen_free, stolen_largest; + int ret; + + INIT_LIST_HEAD(&map_list); + + map_space = map_largest = 0; + fence_space = fence_largest = 0; + + ret = i915_gem_get_aperture_ioctl(node->minor->dev, &arg, NULL); +
[Intel-gfx] [PATCH 04/11] drm/i915: Clearing buffer objects via CPU/GTT
From: Ankitprasad Sharma This patch adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the non shmem backed objects. Currently intend to use this only for buffers allocated from stolen region. v2: Added kernel doc for i915_gem_clear_object(), corrected/removed variable assignments (Tvrtko) v3: Map object page by page to the gtt if the pinning of the whole object to the ggtt fails, Corrected function name (Chris) v4: Clear the buffer page by page, and not map the whole object in the gtt aperture. Use i915 wrapper function in place of drm_mm_insert_node_in_range. v5: Use renamed wrapper function for drm_mm_insert_node_in_range, updated barrier positioning (Chris) v6: Use PAGE_SIZE instead of 4096, use get_pages call before pinning pages (Tvrtko) v7: Fixed the onion (undo operation in reverse order) (Chris) v8: Rebase (Ankit) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 45 + 2 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2a09ccf..73643ec 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3098,6 +3098,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, int *needs_clflush); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +int i915_gem_object_clear(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 74346cf..982f632 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5373,3 +5373,48 @@ fail: drm_gem_object_unreference(&obj->base); return ERR_PTR(ret); } + +/** + * i915_gem_object_clear() - Clear buffer object via CPU/GTT + * @obj: Buffer object to be cleared + * + * Return: 0 - success, non-zero - failure + */ +int i915_gem_object_clear(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node node; + char __iomem *base; + uint64_t size = obj->base.size; + int ret, i; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + return ret; + + ret = i915_gem_object_get_pages(obj); + if (ret) + goto err_remove_node; + + i915_gem_object_pin_pages(obj); + base = io_mapping_map_wc(ggtt->mappable, node.start, PAGE_SIZE); + + for (i = 0; i < size/PAGE_SIZE; i++) { + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, i), + node.start, I915_CACHE_NONE, 0); + wmb(); /* flush modifications to the GGTT (insert_page) */ + memset_io(base, 0, PAGE_SIZE); + wmb(); /* flush the write before we modify the GGTT */ + } + + io_mapping_unmap(base); + ggtt->base.clear_range(&ggtt->base, node.start, node.size, true); + i915_gem_object_unpin_pages(obj); + +err_remove_node: + remove_mappable_node(&node); + return ret; +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 06/11] drm/i915: Propagating correct error codes to the userspace
From: Ankitprasad Sharma Propagating correct error codes to userspace by using ERR_PTR and PTR_ERR macros for stolen memory based object allocation. We generally return -ENOMEM to the user whenever there is a failure in object allocation. This patch helps user to identify the correct reason for the failure and not just -ENOMEM each time. v2: Moved the patch up in the series, added error propagation for i915_gem_alloc_object too (Chris) v3: Removed storing of error pointer inside structs, Corrected error propagation in caller functions (Chris) v4: Remove assignments inside the predicate (Chris) v5: Removed unnecessary initializations, updated kerneldoc for i915_guc_client, corrected missed error pointer handling (Tvrtko) v6: Use ERR_CAST/temporary variable to avoid storing invalid pointer in a common field (Chris) v7: Resolved rebasing conflicts (Ankit) v8: Removed redundant code (Chris) v9: Rebase v10: Rebase, resolve merge conflicts Signed-off-by: Ankitprasad Sharma Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 15 drivers/gpu/drm/i915/i915_gem_render_state.c | 7 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 53 +++- drivers/gpu/drm/i915/i915_guc_submission.c | 50 -- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 3 +- drivers/gpu/drm/i915/intel_pm.c | 7 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +-- 9 files changed, 83 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0224969..4d7a135 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -392,19 +392,18 @@ i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); - if (!obj) { - mutex_unlock(&dev->struct_mutex); - return NULL; - } + if (IS_ERR(obj)) + goto out; /* Always clear fresh buffers before handing to userspace */ ret = i915_gem_object_clear(obj); if (ret) { drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); - return NULL; + obj = ERR_PTR(ret); + goto out; } +out: mutex_unlock(&dev->struct_mutex); return obj; } @@ -439,8 +438,8 @@ i915_gem_create(struct drm_file *file, return -EINVAL; } - if (IS_ERR_OR_NULL(obj)) - return -ENOMEM; + if (IS_ERR(obj)) + return PTR_ERR(obj); ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 7c93327..84d91c9 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -59,8 +59,11 @@ static int render_state_init(struct render_state *so, return -EINVAL; so->obj = i915_gem_object_create(dev_priv->dev, 4096); - if (IS_ERR(so->obj)) - return PTR_ERR(so->obj); + if (IS_ERR(so->obj)) { + ret = PTR_ERR(so->obj); + so->obj = NULL; + return ret; + } ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 81d5b6b..dcb70c1 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -503,6 +503,7 @@ i915_pages_create_for_stolen(struct drm_device *dev, struct i915_ggtt *ggtt = &dev_priv->ggtt; struct sg_table *st; struct scatterlist *sg; + int ret; DRM_DEBUG_DRIVER("offset=0x%x, size=%d\n", offset, size); BUG_ON(offset > ggtt->stolen_size - size); @@ -514,11 +515,12 @@ i915_pages_create_for_stolen(struct drm_device *dev, st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) - return NULL; + return ERR_PTR(-ENOMEM); - if (sg_alloc_table(st, 1, GFP_KERNEL)) { + ret = sg_alloc_table(st, 1, GFP_KERNEL); + if (ret) { kfree(st); - return NULL; + return ERR_PTR(ret); } sg = st->sgl; @@ -567,18 +569,23 @@ _i915_gem_object_create_stolen(struct drm_device *dev, struct drm_mm_node *stolen) { struct drm_i915_gem_object *obj; + struct sg_table *pages; obj = i915_gem_object_alloc(dev); if (obj == NULL) - return NULL; + return ERR_PTR(-ENOMEM); drm_gem_private_
[Intel-gfx] [PATCH 03/11] drm/i915: Use insert_page for pwrite_fast
From: Ankitprasad Sharma In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First, we try a nonblocking pin for the whole object (since that is fastest if reused), then failing that we try to grab one page in the mappable aperture. It also allows us to handle objects larger than the mappable aperture (e.g. if we need to pwrite with vGPU restricting the aperture to a measely 8MiB or something like that). v2: Pin pages before starting pwrite, Combined duplicate loops (Chris) v3: Combined loops based on local patch by Chris (Chris) v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris) v5: Renamed wrapper function for drm_mm_insert_node_in_range (Chris) v5: Added wrapper for drm_mm_remove_node() (Chris) v6: Added get_pages call before pinning the pages (Tvrtko) Added remove_mappable_node() wrapper for drm_mm_remove_node() (Chris) v7: Added size argument for insert_mappable_node (Tvrtko) v8: Do not put_pages after pwrite, do memset of node in the wrapper function (insert_mappable_node) (Chris) v9: Rebase (Ankit) Signed-off-by: Ankitprasad Sharma Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 90 +++-- 1 file changed, 68 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 89967f9..74346cf 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -60,6 +60,24 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } +static int +insert_mappable_node(struct drm_i915_private *i915, + struct drm_mm_node *node, u32 size) +{ + memset(node, 0, sizeof(*node)); + return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node, + size, 0, 0, 0, + i915->ggtt.mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); +} + +static void +remove_mappable_node(struct drm_mm_node *node) +{ + drm_mm_remove_node(node); +} + /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -755,21 +773,34 @@ fast_user_write(struct io_mapping *mapping, * user into the GTT, uncached. */ static int -i915_gem_gtt_pwrite_fast(struct drm_device *dev, +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - ssize_t remain; - loff_t offset, page_base; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node node; + uint64_t remain, offset; char __user *user_data; - int page_offset, page_length, ret; + int ret; ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) - goto out; + if (ret) { + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -779,26 +810,32 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, if (ret) goto out_unpin; - user_data = to_user_ptr(args->data_ptr); - remain = args->size; - - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + obj->dirty = true; - while (remain > 0) { + user_data = to_user_ptr(args->data_ptr); + offset = args->offset; + remain = args->size; + while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; - + u32 page_base = node.start; + unsigned page_offset = offset_in_page(offset); +
[Intel-gfx] [PATCH 1/3] igt/gem_stolen: Verify contents of stolen-backed objects across hibernation
From: Ankitprasad Sharma This patch verifies if the contents of the stolen backed object were preserved across hibernation. This is to validate kernel changes related to moving stolen-backed objects to shmem on hibernation. v2: Added comment, Use igt_assert_eq() instead of igt_assert(), Made loops more readable (Tvrtko) v3: Corrected assertion (Tvrtko) Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- tests/gem_stolen.c | 95 ++ 1 file changed, 95 insertions(+) diff --git a/tests/gem_stolen.c b/tests/gem_stolen.c index 07fdd39..3a3cf81 100644 --- a/tests/gem_stolen.c +++ b/tests/gem_stolen.c @@ -290,6 +290,98 @@ static void stolen_fill_purge_test(int fd) gem_close(fd, handle[i]); } +static void stolen_hibernate(int fd) +{ + drm_intel_bo *bo; + drm_intel_bo *src, *dest; + int obj_count = 0, i = 0; + int ret, j; + uint32_t handle[MAX_OBJECTS], src_handle; + uint32_t *virt; + + gem_require_stolen_support(fd); + + src_handle = gem_create(fd, SIZE); + src = gem_handle_to_libdrm_bo(bufmgr, fd, +"bo", src_handle); + igt_assert(src != NULL); + + ret = drm_intel_gem_bo_map_gtt(src); + igt_assert_eq(ret, 0); + + virt = src->virtual; + for (j = 0; j < SIZE/DWORD_SIZE; j++) { + igt_assert_eq(virt[j], 0); + virt[j] = j; + } + + drm_intel_bo_unmap(src); + /* Exhaust Stolen space */ + for (i = 0; i < MAX_OBJECTS; i++) { + handle[i] = __gem_create_stolen(fd, SIZE); + if (!handle[i]) + break; + + bo = gem_handle_to_libdrm_bo(bufmgr, fd, +"verify_bo", handle[i]); + igt_assert(bo != NULL); + ret = drm_intel_gem_bo_map_gtt(bo); + igt_assert_eq(ret, 0); + + virt = bo->virtual; + for (j = 0; j < SIZE/DWORD_SIZE; j++) + igt_assert_eq(virt[j], 0); + + drm_intel_bo_unmap(bo); + drm_intel_bo_unreference(bo); + + obj_count++; + } + + /* Assert if atleast one object is allocated from stolen, that +* is good enough to verify the content preservation across +* hibernation. +*/ + igt_assert(obj_count > 0); + + /* Copy data to all stolen backed objects */ + for (i = 0; i < obj_count; i++) { + dest = gem_handle_to_libdrm_bo(bufmgr, fd, + "dst_bo", handle[i]); + igt_assert(dest != NULL); + /* Copy contents to stolen backed objects via blt and +* verify post-hibernation, this also helps in identifying +* that the operation was completed before going to +* hibernation. +*/ + intel_copy_bo(batch, dest, src, SIZE); + } + + drm_intel_bo_unreference(src); + + igt_system_hibernate_autoresume(); + /* Check if the object's memory contents are intact +* across hibernation. +*/ + for (i = 0; i < obj_count; i++) { + bo = gem_handle_to_libdrm_bo(bufmgr, fd, +"verify_bo", handle[i]); + igt_assert(bo != NULL); + ret = drm_intel_gem_bo_map_gtt(bo); + igt_assert_eq(ret, 0); + virt = bo->virtual; + for (j = 0; j < SIZE/DWORD_SIZE; j++) + igt_assert_eq(virt[j], j); + + drm_intel_bo_unmap(bo); + drm_intel_bo_unreference(bo); + } + + gem_close(fd, src_handle); + for (i = 0; i < obj_count; i++) + gem_close(fd, handle[i]); +} + static void stolen_no_mmap(int fd) { @@ -353,6 +445,9 @@ igt_main igt_subtest("stolen-fill-purge") stolen_fill_purge_test(fd); + igt_subtest("stolen-hibernate") + stolen_hibernate(fd); + igt_fixture { intel_batchbuffer_free(batch); drm_intel_bufmgr_destroy(bufmgr); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 3/3] igt/gem_stolen: Check for available stolen memory size
From: Ankitprasad Sharma Check for available stolen memory size before attempting to run the stolen memory tests. This way we make sure that we do not create objects from stolen memory without knowing the available size. This checks if the kernel supports creation of stolen backed objects before doing any operation on stolen backed objects. Also correcting the CREATE_VERSION ioctl number in getparam ioctl, due to kernel changes added in between. Signed-off-by: Ankitprasad Sharma --- lib/ioctl_wrappers.c | 48 +++- lib/ioctl_wrappers.h | 7 +-- tests/gem_create.c | 2 +- tests/gem_pread.c| 3 +++ tests/gem_pwrite.c | 2 ++ tests/gem_stolen.c | 16 6 files changed, 66 insertions(+), 12 deletions(-) diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c index f224091..e6120bb 100644 --- a/lib/ioctl_wrappers.c +++ b/lib/ioctl_wrappers.c @@ -455,7 +455,7 @@ bool gem_create__has_stolen_support(int fd) if (has_stolen_support < 0) { memset(&gp, 0, sizeof(gp)); - gp.param = 36; /* CREATE_VERSION */ + gp.param = 38; /* CREATE_VERSION */ gp.value = &val; /* Do we have the extended gem_create_ioctl? */ @@ -1230,6 +1230,52 @@ bool gem_has_bsd2(int fd) has_bsd2 = has_param(fd, LOCAL_I915_PARAM_HAS_BSD2); return has_bsd2; } + +struct local_i915_gem_get_aperture { + __u64 aper_size; + __u64 aper_available_size; + __u64 version; + __u64 map_total_size; + __u64 stolen_total_size; +}; +#define DRM_I915_GEM_GET_APERTURE 0x23 +#define LOCAL_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct local_i915_gem_get_aperture) +/** + * gem_total_mappable_size: + * @fd: open i915 drm file descriptor + * + * Feature test macro to query the kernel for the total mappable size. + * + * Returns: Total mappable address space size. + */ +uint64_t gem_total_mappable_size(int fd) +{ + struct local_i915_gem_get_aperture aperture; + + memset(&aperture, 0, sizeof(aperture)); + do_ioctl(fd, LOCAL_IOCTL_I915_GEM_GET_APERTURE, &aperture); + + return aperture.map_total_size; +} + +/** + * gem_total_stolen_size: + * @fd: open i915 drm file descriptor + * + * Feature test macro to query the kernel for the total stolen size. + * + * Returns: Total stolen memory. + */ +uint64_t gem_total_stolen_size(int fd) +{ + struct local_i915_gem_get_aperture aperture; + + memset(&aperture, 0, sizeof(aperture)); + do_ioctl(fd, LOCAL_IOCTL_I915_GEM_GET_APERTURE, &aperture); + + return aperture.stolen_total_size; +} + /** * gem_available_aperture_size: * @fd: open i915 drm file descriptor diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h index f3bd23f..ae04b35 100644 --- a/lib/ioctl_wrappers.h +++ b/lib/ioctl_wrappers.h @@ -93,8 +93,9 @@ void *__gem_mmap__wc(int fd, uint32_t handle, uint64_t offset, uint64_t size, un * Test macro to query whether support for allocating objects from stolen * memory is available. Automatically skips through igt_require() if not. */ -#define gem_require_stolen_support(fd) \ - igt_require(gem_create__has_stolen_support(fd)) +#define gem_require_stolen_support(fd, size) \ + igt_require(gem_create__has_stolen_support(fd) && \ + (gem_total_stolen_size(fd) > size)) /** * gem_require_mmap_wc: @@ -153,6 +154,8 @@ int gem_gtt_type(int fd); bool gem_uses_ppgtt(int fd); bool gem_uses_full_ppgtt(int fd); int gem_available_fences(int fd); +uint64_t gem_total_mappable_size(int fd); +uint64_t gem_total_stolen_size(int fd); uint64_t gem_available_aperture_size(int fd); uint64_t gem_aperture_size(int fd); uint64_t gem_global_aperture_size(int fd); diff --git a/tests/gem_create.c b/tests/gem_create.c index 25f75d4..b251216 100644 --- a/tests/gem_create.c +++ b/tests/gem_create.c @@ -78,7 +78,7 @@ static void invalid_flag_test(int fd) { int ret; - gem_require_stolen_support(fd); + gem_require_stolen_support(fd, PAGE_SIZE); create.handle = 0; create.size = PAGE_SIZE; diff --git a/tests/gem_pread.c b/tests/gem_pread.c index afa072d..3da8bed 100644 --- a/tests/gem_pread.c +++ b/tests/gem_pread.c @@ -152,6 +152,7 @@ int main(int argc, char **argv) } igt_subtest("stolen-normal") { + gem_require_stolen_support(fd, OBJECT_SIZE); for (count = 1; count <= 1<<17; count <<= 1) { struct timeval start, end; @@ -167,6 +168,7 @@ int main(int argc, char **argv) } for (c = cache; c->level != -1; c++) { igt_subtest_f("stolen-%s", c->name) { + gem_require_stolen_support(fd, OBJECT_SIZE); gem_set_caching(fd, src_stolen, c->level); for (
[Intel-gfx] [PATCH 2/3] igt/gem_stolen: Fix for no_mmap subtest
From: Ankitprasad Sharma no_mmap subtest is expected to fail, but calling gem_mmap__cpu will assert the returned value itself, which makes test fail. Replacing gem_mmap__cpu by __gem_mmap__cpu and checking the returned value. Signed-off-by: Ankitprasad Sharma --- tests/gem_stolen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/gem_stolen.c b/tests/gem_stolen.c index 3a3cf81..7d329dd 100644 --- a/tests/gem_stolen.c +++ b/tests/gem_stolen.c @@ -392,7 +392,7 @@ stolen_no_mmap(int fd) handle = gem_create_stolen(fd, SIZE); - addr = gem_mmap__cpu(fd, handle, 0, SIZE, PROT_READ | PROT_WRITE); + addr = __gem_mmap__cpu(fd, handle, 0, SIZE, PROT_READ | PROT_WRITE); igt_assert(addr == NULL); gem_close(fd, handle); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/3] igt/gem_stolen: Verify contents of stolen-backed objects across hibernation
From: Ankitprasad Sharma This patch verifies if the contents of the stolen backed object were preserved across hibernation. This is to validate kernel changes related to moving stolen-backed objects to shmem on hibernation. v2: Added comment, Use igt_assert_eq() instead of igt_assert(), Made loops more readable (Tvrtko) v3: Corrected assertion (Tvrtko) Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- tests/gem_stolen.c | 95 ++ 1 file changed, 95 insertions(+) diff --git a/tests/gem_stolen.c b/tests/gem_stolen.c index 07fdd39..3a3cf81 100644 --- a/tests/gem_stolen.c +++ b/tests/gem_stolen.c @@ -290,6 +290,98 @@ static void stolen_fill_purge_test(int fd) gem_close(fd, handle[i]); } +static void stolen_hibernate(int fd) +{ + drm_intel_bo *bo; + drm_intel_bo *src, *dest; + int obj_count = 0, i = 0; + int ret, j; + uint32_t handle[MAX_OBJECTS], src_handle; + uint32_t *virt; + + gem_require_stolen_support(fd); + + src_handle = gem_create(fd, SIZE); + src = gem_handle_to_libdrm_bo(bufmgr, fd, +"bo", src_handle); + igt_assert(src != NULL); + + ret = drm_intel_gem_bo_map_gtt(src); + igt_assert_eq(ret, 0); + + virt = src->virtual; + for (j = 0; j < SIZE/DWORD_SIZE; j++) { + igt_assert_eq(virt[j], 0); + virt[j] = j; + } + + drm_intel_bo_unmap(src); + /* Exhaust Stolen space */ + for (i = 0; i < MAX_OBJECTS; i++) { + handle[i] = __gem_create_stolen(fd, SIZE); + if (!handle[i]) + break; + + bo = gem_handle_to_libdrm_bo(bufmgr, fd, +"verify_bo", handle[i]); + igt_assert(bo != NULL); + ret = drm_intel_gem_bo_map_gtt(bo); + igt_assert_eq(ret, 0); + + virt = bo->virtual; + for (j = 0; j < SIZE/DWORD_SIZE; j++) + igt_assert_eq(virt[j], 0); + + drm_intel_bo_unmap(bo); + drm_intel_bo_unreference(bo); + + obj_count++; + } + + /* Assert if atleast one object is allocated from stolen, that +* is good enough to verify the content preservation across +* hibernation. +*/ + igt_assert(obj_count > 0); + + /* Copy data to all stolen backed objects */ + for (i = 0; i < obj_count; i++) { + dest = gem_handle_to_libdrm_bo(bufmgr, fd, + "dst_bo", handle[i]); + igt_assert(dest != NULL); + /* Copy contents to stolen backed objects via blt and +* verify post-hibernation, this also helps in identifying +* that the operation was completed before going to +* hibernation. +*/ + intel_copy_bo(batch, dest, src, SIZE); + } + + drm_intel_bo_unreference(src); + + igt_system_hibernate_autoresume(); + /* Check if the object's memory contents are intact +* across hibernation. +*/ + for (i = 0; i < obj_count; i++) { + bo = gem_handle_to_libdrm_bo(bufmgr, fd, +"verify_bo", handle[i]); + igt_assert(bo != NULL); + ret = drm_intel_gem_bo_map_gtt(bo); + igt_assert_eq(ret, 0); + virt = bo->virtual; + for (j = 0; j < SIZE/DWORD_SIZE; j++) + igt_assert_eq(virt[j], j); + + drm_intel_bo_unmap(bo); + drm_intel_bo_unreference(bo); + } + + gem_close(fd, src_handle); + for (i = 0; i < obj_count; i++) + gem_close(fd, handle[i]); +} + static void stolen_no_mmap(int fd) { @@ -353,6 +445,9 @@ igt_main igt_subtest("stolen-fill-purge") stolen_fill_purge_test(fd); + igt_subtest("stolen-hibernate") + stolen_hibernate(fd); + igt_fixture { intel_batchbuffer_free(batch); drm_intel_bufmgr_destroy(bufmgr); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH] igt/gem_stolen: Check for available stolen memory size
From: Ankitprasad Sharma Check for available stolen memory size before attempting to run the stolen memory tests. This way we make sure that we do not create objects from stolen memory without knowing the available size. This checks if the kernel supports creation of stolen backed objects before doing any operation on stolen backed objects. Also correcting the CREATE_VERSION ioctl number in getparam ioctl, due to kernel changes added in between. v2: Removed size argument for checking stolen memory availability (Tvrtko) Signed-off-by: Ankitprasad Sharma --- lib/ioctl_wrappers.c | 48 +++- lib/ioctl_wrappers.h | 5 - tests/gem_pread.c| 3 +++ tests/gem_pwrite.c | 2 ++ 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c index f224091..818853e 100644 --- a/lib/ioctl_wrappers.c +++ b/lib/ioctl_wrappers.c @@ -455,7 +455,7 @@ bool gem_create__has_stolen_support(int fd) if (has_stolen_support < 0) { memset(&gp, 0, sizeof(gp)); - gp.param = 36; /* CREATE_VERSION */ + gp.param = 38; /* CREATE_VERSION */ gp.value = &val; /* Do we have the extended gem_create_ioctl? */ @@ -1230,6 +1230,52 @@ bool gem_has_bsd2(int fd) has_bsd2 = has_param(fd, LOCAL_I915_PARAM_HAS_BSD2); return has_bsd2; } + +struct local_i915_gem_get_aperture { + __u64 aper_size; + __u64 aper_available_size; + __u64 version; + __u64 map_total_size; + __u64 stolen_total_size; +}; +#define DRM_I915_GEM_GET_APERTURE 0x23 +#define LOCAL_IOCTL_I915_GEM_GET_APERTURE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_APERTURE, struct local_i915_gem_get_aperture) +/** + * gem_total_mappable_size: + * @fd: open i915 drm file descriptor + * + * Feature test macro to query the kernel for the total mappable size. + * + * Returns: Total mappable address space size. + */ +uint64_t gem_total_mappable_size(int fd) +{ + struct local_i915_gem_get_aperture aperture; + + memset(&aperture, 0, sizeof(aperture)); + do_ioctl(fd, LOCAL_IOCTL_I915_GEM_GET_APERTURE, &aperture); + + return aperture.map_total_size; +} + +/** + * gem_total_stolen_size: + * @fd: open i915 drm file descriptor + * + * Feature test macro to query the kernel for the total stolen size. + * + * Returns: Total stolen memory. + */ +uint64_t gem_total_stolen_size(int fd) +{ + struct local_i915_gem_get_aperture aperture; + + memset(&aperture, 0, sizeof(aperture)); + do_ioctl(fd, LOCAL_IOCTL_I915_GEM_GET_APERTURE, &aperture); + + return aperture.stolen_total_size; +} + /** * gem_available_aperture_size: * @fd: open i915 drm file descriptor diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h index f3bd23f..0d42ba9 100644 --- a/lib/ioctl_wrappers.h +++ b/lib/ioctl_wrappers.h @@ -94,7 +94,8 @@ void *__gem_mmap__wc(int fd, uint32_t handle, uint64_t offset, uint64_t size, un * memory is available. Automatically skips through igt_require() if not. */ #define gem_require_stolen_support(fd) \ - igt_require(gem_create__has_stolen_support(fd)) + igt_require(gem_create__has_stolen_support(fd) && \ + (gem_total_stolen_size(fd) > 0)) /** * gem_require_mmap_wc: @@ -153,6 +154,8 @@ int gem_gtt_type(int fd); bool gem_uses_ppgtt(int fd); bool gem_uses_full_ppgtt(int fd); int gem_available_fences(int fd); +uint64_t gem_total_mappable_size(int fd); +uint64_t gem_total_stolen_size(int fd); uint64_t gem_available_aperture_size(int fd); uint64_t gem_aperture_size(int fd); uint64_t gem_global_aperture_size(int fd); diff --git a/tests/gem_pread.c b/tests/gem_pread.c index afa072d..f4cf472 100644 --- a/tests/gem_pread.c +++ b/tests/gem_pread.c @@ -152,6 +152,7 @@ int main(int argc, char **argv) } igt_subtest("stolen-normal") { + gem_require_stolen_support(fd); for (count = 1; count <= 1<<17; count <<= 1) { struct timeval start, end; @@ -167,6 +168,7 @@ int main(int argc, char **argv) } for (c = cache; c->level != -1; c++) { igt_subtest_f("stolen-%s", c->name) { + gem_require_stolen_support(fd); gem_set_caching(fd, src_stolen, c->level); for (count = 1; count <= 1<<17; count <<= 1) { @@ -190,6 +192,7 @@ int main(int argc, char **argv) * user space buffer */ igt_subtest("pagefault-pread") { + gem_require_stolen_support(fd); large_stolen = gem_create_stolen(fd, LARGE_OBJECT_SIZE); stolen_nopf_user = (uint32_t *) mmap(NULL, LARGE_OBJECT_SIZE, PROT_WRITE, diff --git a/tests/gem_pwrite.c b/tests/gem_pwrite.c index a322f91..8db5454 100644
[Intel-gfx] [PATCH v21 00/11] Support for creating/using Stolen memory backed objects
From: Ankitprasad Sharma This patch series adds support for creating/using Stolen memory backed objects. Despite being a unified memory architecture (UMA) some bits of memory are more equal than others. In particular we have the thorny issue of stolen memory, memory stolen from the system by the BIOS and reserved for igfx use. Stolen memory is required for some functions of the GPU and display engine, but in general it goes wasted. Whilst we cannot return it back to the system, we need to find some other method for utilising it. As we do not support direct access to the physical address in the stolen region, it behaves like a different class of memory, closer in kin to local GPU memory. This strongly suggests that we need a placement model like TTM if we are to fully utilize these discrete chunks of differing memory. To add support for creating Stolen memory backed objects, we extend the drm_i915_gem_create structure, by adding a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. This patch series adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the memory from stolen region, but can also be used for other shmem allocated objects. Currently being used for buffers allocated in the stolen region. Also adding support for stealing purgable stolen pages, if we run out of stolen memory when trying to allocate an object. v2: Added support for read/write from/to objects not backed by shmem using the pread/pwrite interface. Also extended the current get_aperture ioctl to retrieve the total and available size of the stolen region. v3: Removed the extended get_aperture ioctl patch 5 (to be submitted as part of other patch series), addressed comments by Chris about pread/pwrite for non shmem backed objects. v4: Rebased to the latest drm-intel-nightly. v5: Addressed comments, replaced patch 1/4 "Clearing buffers via blitter engine" by "Clearing buffers via CPU/GTT". v6: Rebased to the latest drm-intel-nightly, Addressed comments, updated stolen memory purging logic by maintaining a list for purgable stolen memory objects, enabled pread/pwrite for all non-shmem backed objects without tiling restrictions. v7: Addressed comments, compiler optimization, new patch added for correct error code propagation to the userspace. v8: Added a new patch to the series to Migrate stolen objects before hibernation, as stolen memory is not preserved across hibernation. Added correct error propagation for shmem as well non-shmem backed object allocation. v9: Addressed comments, use of insert_page helper function to map object page by page which can be helpful in low aperture space availability. v10: Addressed comments, use insert_page for clearing out the stolen memory v11: Addressed comments, 3 new patches added to support allocation from Stolen memory 1. Allow use of i915_gem_object_get_dma_address for stolen backed objects 2. Use insert_page for pwrite_fast 3. Fail the execbuff using stolen objects as batchbuffers v12: Addressed comments, Removed patch "Fail the execbuff using stolen objects as batchbuffers" v13: Addressed comments, Added 2 patches to detect Intel RST and disable stolen for persistent data if RST device found 1. acpi: Export acpi_bus_type 2. drm/i915: Disable use of stolen area by User when Intel RST is present v14: Addressed comments, Added 2 base patches to the series 1. drm/i915: Add support for mapping an object page by page 2. drm/i915: Introduce i915_gem_object_get_dma_address() v15: Addressed comments, Disabled stolen memory by default v16: Addressed comments, Added low level rpm assertions, Enabled stolen memory v17: Addressed comments v18: Rebased and fixed issue v19: Rebased and added 2 more patches to report mappable and stolen size numbers 1. drm/i915: Extend GET_APERTURE ioctl to report available map space 2. drm/i915: Extend GET_APERTURE ioctl to report size of the stolen region v20: Rebased and squashed last 2 patches into one. v21: Rebased and resolved conflicts. This can be verified using IGT tests: igt/gem_stolen, igt/gem_create, igt/gem_pread, igt/gem_pwrite Ankitprasad Sharma (7): drm/i915: Use insert_page for pwrite_fast drm/i915: Clearing buffer objects via CPU/GTT drm/i915: Support for creating Stolen memory backed objects drm/i915: Propagating correct error codes to the userspace drm/i915: Support for pread/pwrite from/to non shmem backed objects drm/i915: Disable use of stolen area by User when Intel RST is present drm/i915: Extend GET_APERTURE ioctl to report available map space Chris Wilson (4): drm/i915: Add support for mapping an object page by page drm/i915: Introduce i915_gem_object_get_dma_address() drm/i915: Add support for stealing purgable stolen pages drm/i915: Migrate stolen objects before hiberna
[Intel-gfx] [PATCH 01/11] drm/i915: Add support for mapping an object page by page
From: Chris Wilson Introduced a new vm specfic callback insert_page() to program a single pte in ggtt or ppgtt. This allows us to map a single page in to the mappable aperture space. This can be iterated over to access the whole object by using space as meagre as page size. v2: Added low level rpm assertions to insert_page routines (Chris) v3: Added POSTING_READ post register write (Tvrtko) v4: Rebase (Ankit) v5: Removed wmb() and FLUSH_CTL from insert_page, caller to take care of it (Chris) v6: insert_page not working correctly without FLSH_CNTL write, added the write again. Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/char/agp/intel-gtt.c| 8 + drivers/gpu/drm/i915/i915_gem_gtt.c | 66 - drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++ include/drm/intel-gtt.h | 3 ++ 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index aef87fd..4431129 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -840,6 +840,14 @@ static bool i830_check_flags(unsigned int flags) return false; } +void intel_gtt_insert_page(dma_addr_t addr, + unsigned int pg, + unsigned int flags) +{ + intel_private.driver->write_entry(addr, pg, flags); +} +EXPORT_SYMBOL(intel_gtt_insert_page); + void intel_gtt_insert_sg_entries(struct sg_table *st, unsigned int pg_start, unsigned int flags) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4668477..7a139a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2355,6 +2355,28 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) #endif } +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, uint64_t start, @@ -2424,6 +2446,28 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); } +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 flags) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + iowrite32(vm->pte_encode(addr, level, true, flags), pte); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -2543,6 +2587,24 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } +static void i915_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level cache_level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct
[Intel-gfx] [PATCH 09/11] drm/i915: Migrate stolen objects before hibernation
From: Chris Wilson Ville reminded us that stolen memory is not preserved across hibernation, and a result of this was that context objects now being allocated from stolen were being corrupted on S4 and promptly hanging the GPU on resume. We want to utilise stolen for as much as possible (nothing else will use that wasted memory otherwise), so we need a strategy for handling general objects allocated from stolen and hibernation. A simple solution is to do a CPU copy through the GTT of the stolen object into a fresh shmemfs backing store and thenceforth treat it as a normal objects. This can be refined in future to either use a GPU copy to avoid the slow uncached reads (though it's hibernation!) and recreate stolen objects upon resume/first-use. For now, a simple approach should suffice for testing the object migration. v2: Swap PTE for pinned bindings over to the shmemfs. This adds a complicated dance, but is required as many stolen objects are likely to be pinned for use by the hardware. Swapping the PTEs should not result in externally visible behaviour, as each PTE update should be atomic and the two pages identical. (danvet) safe-by-default, or the principle of least surprise. We need a new flag to mark objects that we can wilfully discard and recreate across hibernation. (danvet) Just use the global_list rather than invent a new stolen_list. This is the slowpath hibernate and so adding a new list and the associated complexity isn't worth it. v3: Rebased on drm-intel-nightly (Ankit) v4: Use insert_page to map stolen memory backed pages for migration to shmem (Chris) v5: Acquire mutex lock while copying stolen buffer objects to shmem (Chris) v6: Handled file leak, Splitted object migration function, added kerneldoc for migrate_stolen_to_shmemfs() function (Tvrtko) Use i915 wrapper function for drm_mm_insert_node_in_range() v7: Keep the object in cpu domain after get_pages, remove the object from the unbound list only when marked PURGED, Corrected split of object migration function (Chris) v8: Split i915_gem_freeze(), removed redundant use of barrier, corrected use of set_to_cpu_domain() (Chris) v9: Replaced WARN_ON by BUG_ON and added a comment explaining it (Daniel/Tvrtko) v10: Document use of barriers (Chris) v11: Resolved list corruption due to not removing obj from global_list if no reference to pages is held, Rebased (Ankit) v12: Rebase (Ankit) Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.c | 22 +++- drivers/gpu/drm/i915/i915_drv.h | 10 ++ drivers/gpu/drm/i915/i915_gem.c | 204 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 49 drivers/gpu/drm/i915/intel_display.c| 3 + drivers/gpu/drm/i915/intel_fbdev.c | 6 + drivers/gpu/drm/i915/intel_pm.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 + 8 files changed, 284 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 872c6060..dc9e06d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1058,6 +1058,22 @@ static int i915_pm_suspend(struct device *dev) return i915_drm_suspend(drm_dev); } +/* freeze: before creating the hibernation_image */ +static int i915_pm_freeze(struct device *dev) +{ + int ret; + + ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev))); + if (ret) + return ret; + + ret = i915_pm_suspend(dev); + if (ret) + return ret; + + return 0; +} + static int i915_pm_suspend_late(struct device *dev) { struct drm_device *drm_dev = dev_to_i915(dev)->dev; @@ -1107,12 +1123,6 @@ static int i915_pm_resume(struct device *dev) return i915_drm_resume(drm_dev); } -/* freeze: before creating the hibernation_image */ -static int i915_pm_freeze(struct device *dev) -{ - return i915_pm_suspend(dev); -} - static int i915_pm_freeze_late(struct device *dev) { int ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 94bd79f..6323e12 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2182,6 +2182,12 @@ struct drm_i915_gem_object { * Advice: are the backing pages purgeable? */ unsigned int madv:2; + /** +* Whereas madv is for userspace, there are certain situations +* where we want I915_MADV_DONTNEED behaviour on internal objects +* without conflating the userspace setting. +*/ + unsigned int internal_volatile:1; /** * Current tiling mode for the object. @@ -3319,6 +3325,9 @@ int __must_check i915_gem_init_hw(struct drm_device *dev); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); +int __m
[Intel-gfx] [PATCH 10/11] drm/i915: Disable use of stolen area by User when Intel RST is present
From: Ankitprasad Sharma The BIOS RapidStartTechnology may corrupt the stolen memory across S3 suspend due to unalarmed hibernation, in which case we will not be able to preserve the User data stored in the stolen region. Hence this patch tries to identify presence of the RST device on the ACPI bus, and disables use of stolen memory (for persistent data) if found. v2: Updated comment, updated/corrected new functions private to driver (Chris/Tvrtko) v3: Disabling stolen by default, wait till required acpi changes to detect device presence are pulled in (Ankit) v4: Enabled stolen by default as required acpi changes are merged (Ankit) v5: renamed variable, is IS_ENABLED() in place of #ifdef, use char* instead of structures (Lukas) Signed-off-by: Ankitprasad Sharma Cc: Lukas Wunner Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h| 11 +++ drivers/gpu/drm/i915/i915_gem.c| 8 drivers/gpu/drm/i915/i915_gem_stolen.c | 12 drivers/gpu/drm/i915/intel_acpi.c | 7 +++ 4 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 6323e12..aac4b5b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1340,6 +1340,16 @@ struct i915_gem_mm { */ bool busy; + /** +* Stolen will be lost upon hibernate (as the memory is unpowered). +* Across resume, we expect stolen to be intact - however, it may +* also be utililised by third parties (e.g. Intel RapidStart +* Technology) and if so we have to assume that any data stored in +* stolen across resume is lost and we set this flag to indicate that +* the stolen memory is volatile. +*/ + bool volatile_stolen; + /* the indicator for dispatch video commands on two BSD rings */ unsigned int bsd_ring_dispatch_index; @@ -3703,6 +3713,7 @@ static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev) #endif /* intel_acpi.c */ +bool intel_detect_acpi_rst(void); #ifdef CONFIG_ACPI extern void intel_register_dsm_handler(void); extern void intel_unregister_dsm_handler(void); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ccd1fa2..ba2d482 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -391,8 +391,16 @@ static struct drm_i915_gem_object * i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) { struct drm_i915_gem_object *obj; + struct drm_i915_private *dev_priv = dev->dev_private; int ret; + if (dev_priv->mm.volatile_stolen) { + /* Stolen may be overwritten by external parties +* so unsuitable for persistent user data. +*/ + return ERR_PTR(-ENODEV); + } + mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 2518ebb..0e6203c 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -492,6 +492,18 @@ int i915_gem_init_stolen(struct drm_device *dev) */ drm_mm_init(&dev_priv->mm.stolen, 0, ggtt->stolen_usable_size); + /* If the stolen region can be modified behind our backs upon suspend, +* then we cannot use it to store nonvolatile contents (i.e user data) +* as it will be corrupted upon resume. +*/ + dev_priv->mm.volatile_stolen = false; + if (IS_ENABLED(CONFIG_SUSPEND)) { + /* BIOSes using RapidStart Technology have been reported +* to overwrite stolen across S3, not just S4. +*/ + dev_priv->mm.volatile_stolen = intel_detect_acpi_rst(); + } + return 0; } diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c index eb638a1..05fd67f 100644 --- a/drivers/gpu/drm/i915/intel_acpi.c +++ b/drivers/gpu/drm/i915/intel_acpi.c @@ -23,6 +23,8 @@ static const u8 intel_dsm_guid[] = { 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c }; +static const char *irst_id = "INT3392"; + static char *intel_dsm_port_name(u8 id) { switch (id) { @@ -162,3 +164,8 @@ void intel_register_dsm_handler(void) void intel_unregister_dsm_handler(void) { } + +bool intel_detect_acpi_rst(void) +{ + return acpi_dev_found(irst_id); +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 02/11] drm/i915: Introduce i915_gem_object_get_dma_address()
From: Chris Wilson This utility function is a companion to i915_gem_object_get_page() that uses the same cached iterator for the scatterlist to perform fast sequential lookup of the dma address associated with any page within the object. Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 17 + 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0113207..1c7786a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3109,6 +3109,23 @@ static inline int __sg_page_count(struct scatterlist *sg) struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n); +static inline dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n) +{ + if (n < obj->get_page.last) { + obj->get_page.sg = obj->pages->sgl; + obj->get_page.last = 0; + } + + while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) { + obj->get_page.last += __sg_page_count(obj->get_page.sg++); + if (unlikely(sg_is_chain(obj->get_page.sg))) + obj->get_page.sg = sg_chain_ptr(obj->get_page.sg); + } + + return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT); +} + static inline struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 05/11] drm/i915: Support for creating Stolen memory backed objects
From: Ankitprasad Sharma Extend the drm_i915_gem_create structure to add support for creating Stolen memory backed objects. Added a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. v2: Rebased to the latest drm-intel-nightly (Ankit) v3: Changed versioning of GEM_CREATE param, added new comments (Tvrtko) v4: Changed size from 32b to 64b to prevent userspace overflow (Tvrtko) Corrected function arguments ordering (Chris) v5: Corrected function name (Chris) v6: Updated datatype for flags to keep sizeof(drm_i915_gem_create) u64 aligned (Chris) v7: Use first 8 bits of gem_create flags for placement (Chris), Add helper function for object allocation from stolen region (Ankit) v8: Added comment explaining STOLEN placement flag (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 49 ++ drivers/gpu/drm/i915/i915_gem_stolen.c | 4 +-- include/uapi/drm/i915_drm.h| 41 5 files changed, 91 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 07edaed..83ae436 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -231,6 +231,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: value = 1; break; + case I915_PARAM_CREATE_VERSION: + value = 2; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 05df730..5245a65 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3523,7 +3523,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_device *dev); void i915_gem_cleanup_stolen(struct drm_device *dev); struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_device *dev, u32 size); +i915_gem_object_create_stolen(struct drm_device *dev, u64 size); struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, u32 stolen_offset, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 6d8ad9b..1a52e0f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -384,10 +384,36 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) kmem_cache_free(dev_priv->objects, obj); } +static struct drm_i915_gem_object * +i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) +{ + struct drm_i915_gem_object *obj; + int ret; + + mutex_lock(&dev->struct_mutex); + obj = i915_gem_object_create_stolen(dev, size); + if (!obj) { + mutex_unlock(&dev->struct_mutex); + return NULL; + } + + /* Always clear fresh buffers before handing to userspace */ + ret = i915_gem_object_clear(obj); + if (ret) { + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + return NULL; + } + + mutex_unlock(&dev->struct_mutex); + return obj; +} + static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, + uint64_t flags, uint32_t *handle_p) { struct drm_i915_gem_object *obj; @@ -398,10 +424,23 @@ i915_gem_create(struct drm_file *file, if (size == 0) return -EINVAL; + if (flags & __I915_CREATE_UNKNOWN_FLAGS) + return -EINVAL; + /* Allocate the new object */ - obj = i915_gem_object_create(dev, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); + switch (flags & I915_CREATE_PLACEMENT_MASK) { + case I915_CREATE_PLACEMENT_NORMAL: + obj = i915_gem_object_create(dev, size); + break; + case I915_CREATE_PLACEMENT_STOLEN: + obj = i915_gem_alloc_object_stolen(dev, size); + break; + default: + return -EINVAL; + } + + if (IS_ERR_OR_NULL(obj)) + return -ENOMEM; ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ @@ -422,7 +461,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args-
[Intel-gfx] [PATCH 03/11] drm/i915: Use insert_page for pwrite_fast
From: Ankitprasad Sharma In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First, we try a nonblocking pin for the whole object (since that is fastest if reused), then failing that we try to grab one page in the mappable aperture. It also allows us to handle objects larger than the mappable aperture (e.g. if we need to pwrite with vGPU restricting the aperture to a measely 8MiB or something like that). v2: Pin pages before starting pwrite, Combined duplicate loops (Chris) v3: Combined loops based on local patch by Chris (Chris) v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris) v5: Renamed wrapper function for drm_mm_insert_node_in_range (Chris) v5: Added wrapper for drm_mm_remove_node() (Chris) v6: Added get_pages call before pinning the pages (Tvrtko) Added remove_mappable_node() wrapper for drm_mm_remove_node() (Chris) v7: Added size argument for insert_mappable_node (Tvrtko) v8: Do not put_pages after pwrite, do memset of node in the wrapper function (insert_mappable_node) (Chris) v9: Rebase (Ankit) Signed-off-by: Ankitprasad Sharma Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 90 +++-- 1 file changed, 68 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1bfc260..165a970 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -60,6 +60,24 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } +static int +insert_mappable_node(struct drm_i915_private *i915, + struct drm_mm_node *node, u32 size) +{ + memset(node, 0, sizeof(*node)); + return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node, + size, 0, 0, 0, + i915->ggtt.mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); +} + +static void +remove_mappable_node(struct drm_mm_node *node) +{ + drm_mm_remove_node(node); +} + /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -765,21 +783,34 @@ fast_user_write(struct io_mapping *mapping, * @file: drm file pointer */ static int -i915_gem_gtt_pwrite_fast(struct drm_device *dev, +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - ssize_t remain; - loff_t offset, page_base; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node node; + uint64_t remain, offset; char __user *user_data; - int page_offset, page_length, ret; + int ret; ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) - goto out; + if (ret) { + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -789,26 +820,32 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, if (ret) goto out_unpin; - user_data = u64_to_user_ptr(args->data_ptr); - remain = args->size; - - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + obj->dirty = true; - while (remain > 0) { + user_data = u64_to_user_ptr(args->data_ptr); + offset = args->offset; + remain = args->size; + while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; - + u32 page_base = node.start; + unsigned page_offset = offset_in_page(offset); +
[Intel-gfx] [PATCH 04/11] drm/i915: Clearing buffer objects via CPU/GTT
From: Ankitprasad Sharma This patch adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the non shmem backed objects. Currently intend to use this only for buffers allocated from stolen region. v2: Added kernel doc for i915_gem_clear_object(), corrected/removed variable assignments (Tvrtko) v3: Map object page by page to the gtt if the pinning of the whole object to the ggtt fails, Corrected function name (Chris) v4: Clear the buffer page by page, and not map the whole object in the gtt aperture. Use i915 wrapper function in place of drm_mm_insert_node_in_range. v5: Use renamed wrapper function for drm_mm_insert_node_in_range, updated barrier positioning (Chris) v6: Use PAGE_SIZE instead of 4096, use get_pages call before pinning pages (Tvrtko) v7: Fixed the onion (undo operation in reverse order) (Chris) v8: Rebase (Ankit) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 45 + 2 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1c7786a..05df730 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3100,6 +3100,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, int *needs_clflush); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +int i915_gem_object_clear(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 165a970..6d8ad9b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5420,3 +5420,48 @@ fail: drm_gem_object_unreference(&obj->base); return ERR_PTR(ret); } + +/** + * i915_gem_object_clear() - Clear buffer object via CPU/GTT + * @obj: Buffer object to be cleared + * + * Return: 0 - success, non-zero - failure + */ +int i915_gem_object_clear(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node node; + char __iomem *base; + uint64_t size = obj->base.size; + int ret, i; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + return ret; + + ret = i915_gem_object_get_pages(obj); + if (ret) + goto err_remove_node; + + i915_gem_object_pin_pages(obj); + base = io_mapping_map_wc(ggtt->mappable, node.start, PAGE_SIZE); + + for (i = 0; i < size/PAGE_SIZE; i++) { + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, i), + node.start, I915_CACHE_NONE, 0); + wmb(); /* flush modifications to the GGTT (insert_page) */ + memset_io(base, 0, PAGE_SIZE); + wmb(); /* flush the write before we modify the GGTT */ + } + + io_mapping_unmap(base); + ggtt->base.clear_range(&ggtt->base, node.start, node.size, true); + i915_gem_object_unpin_pages(obj); + +err_remove_node: + remove_mappable_node(&node); + return ret; +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 07/11] drm/i915: Add support for stealing purgable stolen pages
From: Chris Wilson If we run out of stolen memory when trying to allocate an object, see if we can reap enough purgeable objects to free up enough contiguous free space for the allocation. This is in principle very much like evicting objects to free up enough contiguous space in the vma when binding a new object - and you will be forgiven for thinking that the code looks very similar. At the moment, we do not allow userspace to allocate objects in stolen, so there is neither the memory pressure to trigger stolen eviction nor any purgeable objects inside the stolen arena. However, this will change in the near future, and so better management and defragmentation of stolen memory will become a real issue. v2: Remember to remove the drm_mm_node. v3: Rebased to the latest drm-intel-nightly (Ankit) v4: corrected if-else braces format (Tvrtko/kerneldoc) v5: Rebased to the latest drm-intel-nightly (Ankit) Added a seperate list to maintain purgable objects from stolen memory region (Chris/Daniel) v6: Compiler optimization (merging 2 single loops into one for() loop), corrected code for object eviction, retire_requests before starting object eviction (Chris) v7: Added kernel doc for i915_gem_object_create_stolen() v8: Check for struct_mutex lock before creating object from stolen region (Tvrtko) v9: Renamed variables to make usage clear, added comment, removed onetime used macro (Tvrtko) v10: Avoid masking of error when stolen_alloc fails (Tvrtko) v11: Renamed stolen_link to tmp_link, as it may be used for other purposes too (Chris) Used ERR_CAST to cast error pointers while returning v12: Added lockdep_assert before starting stolen-backed object eviction (Chris) v13: Rebased Testcase: igt/gem_stolen Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c| 6 +- drivers/gpu/drm/i915/i915_drv.h| 17 +++- drivers/gpu/drm/i915/i915_gem.c| 15 +++ drivers/gpu/drm/i915/i915_gem_stolen.c | 171 + drivers/gpu/drm/i915/intel_pm.c| 4 +- 5 files changed, 188 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e4f2c55..e5b4274 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -182,7 +182,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_puts(m, ")"); } if (obj->stolen) - seq_printf(m, " (stolen: %08llx)", obj->stolen->start); + seq_printf(m, " (stolen: %08llx)", obj->stolen->base.start); if (obj->pin_display || obj->fault_mappable) { char s[3], *t = s; if (obj->pin_display) @@ -254,9 +254,9 @@ static int obj_rank_by_stolen(void *priv, struct drm_i915_gem_object *b = container_of(B, struct drm_i915_gem_object, obj_exec_link); - if (a->stolen->start < b->stolen->start) + if (a->stolen->base.start < b->stolen->base.start) return -1; - if (a->stolen->start > b->stolen->start) + if (a->stolen->base.start > b->stolen->base.start) return 1; return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 5245a65..94bd79f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -831,6 +831,12 @@ struct i915_ctx_hang_stats { bool banned; }; +struct i915_stolen_node { + struct drm_mm_node base; + struct list_head mm_link; + struct drm_i915_gem_object *obj; +}; + /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 @@ -1281,6 +1287,13 @@ struct i915_gem_mm { */ struct list_head unbound_list; + /** +* List of stolen objects that have been marked as purgeable and +* thus available for reaping if we need more space for a new +* allocation. Ordered by time of marking purgeable. +*/ + struct list_head stolen_list; + /** Usable portion of the GTT for GEM */ unsigned long stolen_base; /* limited to low memory (32-bit) */ @@ -2134,7 +2147,7 @@ struct drm_i915_gem_object { struct list_head vma_list; /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; + struct i915_stolen_node *stolen; struct list_head global_list; struct list_head engine_list[I915_NUM_ENGINES]; @@ -2142,6 +2155,8 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + /** Used to link an object to a list temporarily */ + struct list_head tmp_link; /** * This is set if the object is on the active lists (has pending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i9
[Intel-gfx] [PATCH 11/11] drm/i915: Extend GET_APERTURE ioctl to report available map space
From: Ankitprasad Sharma When constructing a batchbuffer, it is sometimes crucial to know the largest hole into which we can fit a fenceable buffer (for example when handling very large objects on gen2 and gen3). This depends on the fragmentation of pinned buffers inside the aperture, a question only the kernel can easily answer. This patch extends the current DRM_I915_GEM_GET_APERTURE ioctl to include a couple of new fields in its reply to userspace - the total amount of space available in the mappable region of the aperture and also the single largest block available. This is not quite what userspace wants to answer the question of whether this batch will fit as fences are also required to meet severe alignment constraints within the batch. For this purpose, a third conservative estimate of largest fence available is also provided. For when userspace needs more than one batch, we also provide the cumulative space available for fences such that it has some additional guidance to how much space it could allocate to fences. Conservatism still wins. This patch extends the GET_APERTURE ioctl to add support for getting total size and available size of the stolen region as well as single largest block available in the stolen region too. The patch also adds a debugfs file for convenient testing and reporting. v2: The first object cannot end at offset 0, so we can use last==0 to detect the empty list. v3: Expand all values to 64bit, just in case. Report total mappable aperture size for userspace that cannot easily determine it by inspecting the PCI device. v4: (Rodrigo) Fixed rebase conflicts. v5: Rebased to the latest drm-intel-nightly (Ankit) v6: Keeping limits to get_aperture ioctl, and moved changing numbers to debugfs, Addressed comments (Chris/Tvrtko) v7: Squashed stolen memory size patch to this one, Added a new version field to validate the map_size and stolen size values, Changed Author to me (Ankit) due to significant changes in the logic used to get size values Signed-off-by: Chris Wilson Signed-off-by: Rodrigo Vivi Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c| 143 + drivers/gpu/drm/i915/i915_drv.h| 3 + drivers/gpu/drm/i915/i915_gem.c| 4 + drivers/gpu/drm/i915/i915_gem_stolen.c | 27 +++ include/uapi/drm/i915_drm.h| 17 5 files changed, 194 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e5b4274..c052174 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -586,6 +586,148 @@ static int i915_gem_object_info(struct seq_file *m, void* data) return 0; } +static int vma_rank_by_ggtt(void *priv, + struct list_head *A, + struct list_head *B) +{ + struct i915_vma *a = list_entry(A, typeof(*a), exec_list); + struct i915_vma *b = list_entry(B, typeof(*b), exec_list); + + return a->node.start - b->node.start; +} + +static u32 __fence_size(struct drm_i915_private *dev_priv, u32 start, u32 end) +{ + u32 size = end - start; + u32 fence_size; + + if (INTEL_INFO(dev_priv)->gen < 4) { + u32 fence_max; + u32 fence_next; + + if (IS_GEN3(dev_priv)) { + fence_max = I830_FENCE_MAX_SIZE_VAL << 20; + fence_next = 1024*1024; + } else { + fence_max = I830_FENCE_MAX_SIZE_VAL << 19; + fence_next = 512*1024; + } + + fence_max = min(fence_max, size); + fence_size = 0; + /* Find fence_size less than fence_max and power of 2 */ + while (fence_next <= fence_max) { + u32 base = ALIGN(start, fence_next); + if (base + fence_next > end) + break; + + fence_size = fence_next; + fence_next <<= 1; + } + } else { + fence_size = size; + } + + return fence_size; +} + +static int i915_gem_aperture_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_i915_gem_get_aperture arg; + struct i915_vma *vma; + struct list_head map_list; + const uint64_t map_limit = ggtt->mappable_end; + uint64_t map_space, map_largest, fence_space, fence_largest; + uint64_t last, hole_size, stolen_free, stolen_largest; + int ret; + + INIT_LIST_HEAD(&map_list); + + map_space = map_largest = 0; + fence_space = fence_largest = 0; + + ret = i915_gem_get_aperture_ioctl(node
[Intel-gfx] [PATCH 08/11] drm/i915: Support for pread/pwrite from/to non shmem backed objects
From: Ankitprasad Sharma This patch adds support for extending the pread/pwrite functionality for objects not backed by shmem. The access will be made through gtt interface. This will cover objects backed by stolen memory as well as other non-shmem backed objects. v2: Drop locks around slow_user_access, prefault the pages before access (Chris) v3: Rebased to the latest drm-intel-nightly (Ankit) v4: Moved page base & offset calculations outside the copy loop, corrected data types for size and offset variables, corrected if-else braces format (Tvrtko/kerneldocs) v5: Enabled pread/pwrite for all non-shmem backed objects including without tiling restrictions (Ankit) v6: Using pwrite_fast for non-shmem backed objects as well (Chris) v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy, added pwrite slow path for non-shmem backed objects (Chris/Tvrtko) v8: Updated v7 commit message, mutex unlock around pwrite slow path for non-shmem backed objects (Tvrtko) v9: Corrected check during pread_ioctl, to avoid shmem_pread being called for non-shmem backed objects (Tvrtko) v10: Moved the write_domain check to needs_clflush and tiling mode check to pwrite_fast (Chris) v11: Use pwrite_fast fallback for all objects (shmem and non-shmem backed), call fast_user_write regardless of pagefault in previous iteration v12: Use page-by-page copy for slow user access too (Chris) v13: Handled EFAULT, Avoid use of WARN_ON, put_fence only if whole obj pinned (Chris) v14: Corrected datatypes/initializations (Tvrtko) Testcase: igt/gem_stolen, igt/gem_pread, igt/gem_pwrite Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 218 ++-- 1 file changed, 188 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5f5a81b..90ae7ae 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -54,6 +54,9 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) return true; @@ -644,6 +647,142 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, return ret ? - EFAULT : 0; } +static inline unsigned long +slow_user_access(struct io_mapping *mapping, +uint64_t page_base, int page_offset, +char __user *user_data, +unsigned long length, bool pwrite) +{ + void __iomem *ioaddr; + void *vaddr; + uint64_t unwritten; + + ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)ioaddr + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(ioaddr); + return unwritten; +} + +static int +i915_gem_gtt_pread(struct drm_device *dev, + struct drm_i915_gem_object *obj, uint64_t size, + uint64_t data_offset, uint64_t data_ptr) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_mm_node node; + char __user *user_data; + uint64_t remain; + uint64_t offset; + int ret; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) { + ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + ret = i915_gem_object_put_fence(obj); + if (ret) + goto out_unpin; + } + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto out_unpin; + + user_data = u64_to_user_ptr(data_ptr); + remain = size; + offset = data_offset; + + mutex_unlock(&dev->struct_mutex); + if (likely(!i915.prefault_disable)) { + ret = fault_in_multipages_writeable(user_data, remain); + if (ret) { + mutex_lock(&dev->struct_mutex); + goto out_unpin; + } + } + + while (remain > 0) { + /* Operation in this page +* +* page_base = page offs
[Intel-gfx] [PATCH 06/11] drm/i915: Propagating correct error codes to the userspace
From: Ankitprasad Sharma Propagating correct error codes to userspace by using ERR_PTR and PTR_ERR macros for stolen memory based object allocation. We generally return -ENOMEM to the user whenever there is a failure in object allocation. This patch helps user to identify the correct reason for the failure and not just -ENOMEM each time. v2: Moved the patch up in the series, added error propagation for i915_gem_alloc_object too (Chris) v3: Removed storing of error pointer inside structs, Corrected error propagation in caller functions (Chris) v4: Remove assignments inside the predicate (Chris) v5: Removed unnecessary initializations, updated kerneldoc for i915_guc_client, corrected missed error pointer handling (Tvrtko) v6: Use ERR_CAST/temporary variable to avoid storing invalid pointer in a common field (Chris) v7: Resolved rebasing conflicts (Ankit) v8: Removed redundant code (Chris) v9: Rebase v10: Rebase, resolve merge conflicts Signed-off-by: Ankitprasad Sharma Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 15 drivers/gpu/drm/i915/i915_gem_render_state.c | 7 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 53 +++- drivers/gpu/drm/i915/i915_guc_submission.c | 50 -- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 3 +- drivers/gpu/drm/i915/intel_pm.c | 7 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +-- 9 files changed, 83 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1a52e0f..52181fe 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -392,19 +392,18 @@ i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); - if (!obj) { - mutex_unlock(&dev->struct_mutex); - return NULL; - } + if (IS_ERR(obj)) + goto out; /* Always clear fresh buffers before handing to userspace */ ret = i915_gem_object_clear(obj); if (ret) { drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); - return NULL; + obj = ERR_PTR(ret); + goto out; } +out: mutex_unlock(&dev->struct_mutex); return obj; } @@ -439,8 +438,8 @@ i915_gem_create(struct drm_file *file, return -EINVAL; } - if (IS_ERR_OR_NULL(obj)) - return -ENOMEM; + if (IS_ERR(obj)) + return PTR_ERR(obj); ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 7c93327..84d91c9 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -59,8 +59,11 @@ static int render_state_init(struct render_state *so, return -EINVAL; so->obj = i915_gem_object_create(dev_priv->dev, 4096); - if (IS_ERR(so->obj)) - return PTR_ERR(so->obj); + if (IS_ERR(so->obj)) { + ret = PTR_ERR(so->obj); + so->obj = NULL; + return ret; + } ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 81d5b6b..dcb70c1 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -503,6 +503,7 @@ i915_pages_create_for_stolen(struct drm_device *dev, struct i915_ggtt *ggtt = &dev_priv->ggtt; struct sg_table *st; struct scatterlist *sg; + int ret; DRM_DEBUG_DRIVER("offset=0x%x, size=%d\n", offset, size); BUG_ON(offset > ggtt->stolen_size - size); @@ -514,11 +515,12 @@ i915_pages_create_for_stolen(struct drm_device *dev, st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) - return NULL; + return ERR_PTR(-ENOMEM); - if (sg_alloc_table(st, 1, GFP_KERNEL)) { + ret = sg_alloc_table(st, 1, GFP_KERNEL); + if (ret) { kfree(st); - return NULL; + return ERR_PTR(ret); } sg = st->sgl; @@ -567,18 +569,23 @@ _i915_gem_object_create_stolen(struct drm_device *dev, struct drm_mm_node *stolen) { struct drm_i915_gem_object *obj; + struct sg_table *pages; obj = i915_gem_object_alloc(dev); if (obj == NULL) - return NULL; + return ERR_PTR(-ENOMEM); drm_gem_private_
[Intel-gfx] [PATCH 03/11] drm/i915: Use insert_page for pwrite_fast
From: Ankitprasad Sharma In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First, we try a nonblocking pin for the whole object (since that is fastest if reused), then failing that we try to grab one page in the mappable aperture. It also allows us to handle objects larger than the mappable aperture (e.g. if we need to pwrite with vGPU restricting the aperture to a measely 8MiB or something like that). v2: Pin pages before starting pwrite, Combined duplicate loops (Chris) v3: Combined loops based on local patch by Chris (Chris) v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris) v5: Renamed wrapper function for drm_mm_insert_node_in_range (Chris) v5: Added wrapper for drm_mm_remove_node() (Chris) v6: Added get_pages call before pinning the pages (Tvrtko) Added remove_mappable_node() wrapper for drm_mm_remove_node() (Chris) v7: Added size argument for insert_mappable_node (Tvrtko) v8: Do not put_pages after pwrite, do memset of node in the wrapper function (insert_mappable_node) (Chris) v9: Rebase (Ankit) Signed-off-by: Ankitprasad Sharma Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 90 +++-- 1 file changed, 68 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index eae8d7a..452178c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -60,6 +60,24 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } +static int +insert_mappable_node(struct drm_i915_private *i915, + struct drm_mm_node *node, u32 size) +{ + memset(node, 0, sizeof(*node)); + return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node, + size, 0, 0, 0, + i915->ggtt.mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); +} + +static void +remove_mappable_node(struct drm_mm_node *node) +{ + drm_mm_remove_node(node); +} + /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -765,21 +783,34 @@ fast_user_write(struct io_mapping *mapping, * @file: drm file pointer */ static int -i915_gem_gtt_pwrite_fast(struct drm_device *dev, +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - ssize_t remain; - loff_t offset, page_base; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node node; + uint64_t remain, offset; char __user *user_data; - int page_offset, page_length, ret; + int ret; ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) - goto out; + if (ret) { + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -789,26 +820,32 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, if (ret) goto out_unpin; - user_data = u64_to_user_ptr(args->data_ptr); - remain = args->size; - - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + obj->dirty = true; - while (remain > 0) { + user_data = u64_to_user_ptr(args->data_ptr); + offset = args->offset; + remain = args->size; + while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; - + u32 page_base = node.start; + unsigned page_offset = offset_in_page(offset); +
[Intel-gfx] [PATCH 01/11] drm/i915: Add support for mapping an object page by page
From: Chris Wilson Introduced a new vm specfic callback insert_page() to program a single pte in ggtt or ppgtt. This allows us to map a single page in to the mappable aperture space. This can be iterated over to access the whole object by using space as meagre as page size. v2: Added low level rpm assertions to insert_page routines (Chris) v3: Added POSTING_READ post register write (Tvrtko) v4: Rebase (Ankit) v5: Removed wmb() and FLUSH_CTL from insert_page, caller to take care of it (Chris) v6: insert_page not working correctly without FLSH_CNTL write, added the write again. Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/char/agp/intel-gtt.c| 8 + drivers/gpu/drm/i915/i915_gem_gtt.c | 66 - drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++ include/drm/intel-gtt.h | 3 ++ 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index aef87fd..4431129 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -840,6 +840,14 @@ static bool i830_check_flags(unsigned int flags) return false; } +void intel_gtt_insert_page(dma_addr_t addr, + unsigned int pg, + unsigned int flags) +{ + intel_private.driver->write_entry(addr, pg, flags); +} +EXPORT_SYMBOL(intel_gtt_insert_page); + void intel_gtt_insert_sg_entries(struct sg_table *st, unsigned int pg_start, unsigned int flags) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4668477..7a139a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2355,6 +2355,28 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) #endif } +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, uint64_t start, @@ -2424,6 +2446,28 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); } +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 flags) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + iowrite32(vm->pte_encode(addr, level, true, flags), pte); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -2543,6 +2587,24 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } +static void i915_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level cache_level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct
[Intel-gfx] [PATCH v22 00/11] Support for creating/using Stolen memory backed objects
From: Ankitprasad Sharma This patch series adds support for creating/using Stolen memory backed objects. Despite being a unified memory architecture (UMA) some bits of memory are more equal than others. In particular we have the thorny issue of stolen memory, memory stolen from the system by the BIOS and reserved for igfx use. Stolen memory is required for some functions of the GPU and display engine, but in general it goes wasted. Whilst we cannot return it back to the system, we need to find some other method for utilising it. As we do not support direct access to the physical address in the stolen region, it behaves like a different class of memory, closer in kin to local GPU memory. This strongly suggests that we need a placement model like TTM if we are to fully utilize these discrete chunks of differing memory. To add support for creating Stolen memory backed objects, we extend the drm_i915_gem_create structure, by adding a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. This patch series adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the memory from stolen region, but can also be used for other shmem allocated objects. Currently being used for buffers allocated in the stolen region. Also adding support for stealing purgable stolen pages, if we run out of stolen memory when trying to allocate an object. v2: Added support for read/write from/to objects not backed by shmem using the pread/pwrite interface. Also extended the current get_aperture ioctl to retrieve the total and available size of the stolen region. v3: Removed the extended get_aperture ioctl patch 5 (to be submitted as part of other patch series), addressed comments by Chris about pread/pwrite for non shmem backed objects. v4: Rebased to the latest drm-intel-nightly. v5: Addressed comments, replaced patch 1/4 "Clearing buffers via blitter engine" by "Clearing buffers via CPU/GTT". v6: Rebased to the latest drm-intel-nightly, Addressed comments, updated stolen memory purging logic by maintaining a list for purgable stolen memory objects, enabled pread/pwrite for all non-shmem backed objects without tiling restrictions. v7: Addressed comments, compiler optimization, new patch added for correct error code propagation to the userspace. v8: Added a new patch to the series to Migrate stolen objects before hibernation, as stolen memory is not preserved across hibernation. Added correct error propagation for shmem as well non-shmem backed object allocation. v9: Addressed comments, use of insert_page helper function to map object page by page which can be helpful in low aperture space availability. v10: Addressed comments, use insert_page for clearing out the stolen memory v11: Addressed comments, 3 new patches added to support allocation from Stolen memory 1. Allow use of i915_gem_object_get_dma_address for stolen backed objects 2. Use insert_page for pwrite_fast 3. Fail the execbuff using stolen objects as batchbuffers v12: Addressed comments, Removed patch "Fail the execbuff using stolen objects as batchbuffers" v13: Addressed comments, Added 2 patches to detect Intel RST and disable stolen for persistent data if RST device found 1. acpi: Export acpi_bus_type 2. drm/i915: Disable use of stolen area by User when Intel RST is present v14: Addressed comments, Added 2 base patches to the series 1. drm/i915: Add support for mapping an object page by page 2. drm/i915: Introduce i915_gem_object_get_dma_address() v15: Addressed comments, Disabled stolen memory by default v16: Addressed comments, Added low level rpm assertions, Enabled stolen memory v17: Addressed comments v18: Rebased and fixed issue v19: Rebased and added 2 more patches to report mappable and stolen size numbers 1. drm/i915: Extend GET_APERTURE ioctl to report available map space 2. drm/i915: Extend GET_APERTURE ioctl to report size of the stolen region v20: Rebased and squashed last 2 patches into one. v21: Rebased and resolved conflicts. v22: Rebased again. This can be verified using IGT tests: igt/gem_stolen, igt/gem_create, igt/gem_pread, igt/gem_pwrite Ankitprasad Sharma (7): drm/i915: Use insert_page for pwrite_fast drm/i915: Clearing buffer objects via CPU/GTT drm/i915: Support for creating Stolen memory backed objects drm/i915: Propagating correct error codes to the userspace drm/i915: Support for pread/pwrite from/to non shmem backed objects drm/i915: Disable use of stolen area by User when Intel RST is present drm/i915: Extend GET_APERTURE ioctl to report available map space Chris Wilson (4): drm/i915: Add support for mapping an object page by page drm/i915: Introduce i915_gem_object_get_dma_address() drm/i915: Add support for stealing purgable stolen pages drm/i915: Migrate stolen o
[Intel-gfx] [PATCH 07/11] drm/i915: Add support for stealing purgable stolen pages
From: Chris Wilson If we run out of stolen memory when trying to allocate an object, see if we can reap enough purgeable objects to free up enough contiguous free space for the allocation. This is in principle very much like evicting objects to free up enough contiguous space in the vma when binding a new object - and you will be forgiven for thinking that the code looks very similar. At the moment, we do not allow userspace to allocate objects in stolen, so there is neither the memory pressure to trigger stolen eviction nor any purgeable objects inside the stolen arena. However, this will change in the near future, and so better management and defragmentation of stolen memory will become a real issue. v2: Remember to remove the drm_mm_node. v3: Rebased to the latest drm-intel-nightly (Ankit) v4: corrected if-else braces format (Tvrtko/kerneldoc) v5: Rebased to the latest drm-intel-nightly (Ankit) Added a seperate list to maintain purgable objects from stolen memory region (Chris/Daniel) v6: Compiler optimization (merging 2 single loops into one for() loop), corrected code for object eviction, retire_requests before starting object eviction (Chris) v7: Added kernel doc for i915_gem_object_create_stolen() v8: Check for struct_mutex lock before creating object from stolen region (Tvrtko) v9: Renamed variables to make usage clear, added comment, removed onetime used macro (Tvrtko) v10: Avoid masking of error when stolen_alloc fails (Tvrtko) v11: Renamed stolen_link to tmp_link, as it may be used for other purposes too (Chris) Used ERR_CAST to cast error pointers while returning v12: Added lockdep_assert before starting stolen-backed object eviction (Chris) v13: Rebased Testcase: igt/gem_stolen Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c| 6 +- drivers/gpu/drm/i915/i915_drv.h| 17 +++- drivers/gpu/drm/i915/i915_gem.c| 15 +++ drivers/gpu/drm/i915/i915_gem_stolen.c | 171 + drivers/gpu/drm/i915/intel_pm.c| 4 +- 5 files changed, 188 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e4f2c55..e5b4274 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -182,7 +182,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_puts(m, ")"); } if (obj->stolen) - seq_printf(m, " (stolen: %08llx)", obj->stolen->start); + seq_printf(m, " (stolen: %08llx)", obj->stolen->base.start); if (obj->pin_display || obj->fault_mappable) { char s[3], *t = s; if (obj->pin_display) @@ -254,9 +254,9 @@ static int obj_rank_by_stolen(void *priv, struct drm_i915_gem_object *b = container_of(B, struct drm_i915_gem_object, obj_exec_link); - if (a->stolen->start < b->stolen->start) + if (a->stolen->base.start < b->stolen->base.start) return -1; - if (a->stolen->start > b->stolen->start) + if (a->stolen->base.start > b->stolen->base.start) return 1; return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c92cb60..a4caff4 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -831,6 +831,12 @@ struct i915_ctx_hang_stats { bool banned; }; +struct i915_stolen_node { + struct drm_mm_node base; + struct list_head mm_link; + struct drm_i915_gem_object *obj; +}; + /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 @@ -1281,6 +1287,13 @@ struct i915_gem_mm { */ struct list_head unbound_list; + /** +* List of stolen objects that have been marked as purgeable and +* thus available for reaping if we need more space for a new +* allocation. Ordered by time of marking purgeable. +*/ + struct list_head stolen_list; + /** Usable portion of the GTT for GEM */ unsigned long stolen_base; /* limited to low memory (32-bit) */ @@ -2134,7 +2147,7 @@ struct drm_i915_gem_object { struct list_head vma_list; /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; + struct i915_stolen_node *stolen; struct list_head global_list; struct list_head engine_list[I915_NUM_ENGINES]; @@ -2142,6 +2155,8 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + /** Used to link an object to a list temporarily */ + struct list_head tmp_link; /** * This is set if the object is on the active lists (has pending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i9
[Intel-gfx] [PATCH 05/11] drm/i915: Support for creating Stolen memory backed objects
From: Ankitprasad Sharma Extend the drm_i915_gem_create structure to add support for creating Stolen memory backed objects. Added a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. v2: Rebased to the latest drm-intel-nightly (Ankit) v3: Changed versioning of GEM_CREATE param, added new comments (Tvrtko) v4: Changed size from 32b to 64b to prevent userspace overflow (Tvrtko) Corrected function arguments ordering (Chris) v5: Corrected function name (Chris) v6: Updated datatype for flags to keep sizeof(drm_i915_gem_create) u64 aligned (Chris) v7: Use first 8 bits of gem_create flags for placement (Chris), Add helper function for object allocation from stolen region (Ankit) v8: Added comment explaining STOLEN placement flag (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 49 ++ drivers/gpu/drm/i915/i915_gem_stolen.c | 4 +-- include/uapi/drm/i915_drm.h| 41 5 files changed, 91 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 07edaed..83ae436 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -231,6 +231,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: value = 1; break; + case I915_PARAM_CREATE_VERSION: + value = 2; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 96eea3d..c92cb60 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3523,7 +3523,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_device *dev); void i915_gem_cleanup_stolen(struct drm_device *dev); struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_device *dev, u32 size); +i915_gem_object_create_stolen(struct drm_device *dev, u64 size); struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, u32 stolen_offset, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d658d46..28c7e28 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -384,10 +384,36 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) kmem_cache_free(dev_priv->objects, obj); } +static struct drm_i915_gem_object * +i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) +{ + struct drm_i915_gem_object *obj; + int ret; + + mutex_lock(&dev->struct_mutex); + obj = i915_gem_object_create_stolen(dev, size); + if (!obj) { + mutex_unlock(&dev->struct_mutex); + return NULL; + } + + /* Always clear fresh buffers before handing to userspace */ + ret = i915_gem_object_clear(obj); + if (ret) { + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + return NULL; + } + + mutex_unlock(&dev->struct_mutex); + return obj; +} + static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, + uint64_t flags, uint32_t *handle_p) { struct drm_i915_gem_object *obj; @@ -398,10 +424,23 @@ i915_gem_create(struct drm_file *file, if (size == 0) return -EINVAL; + if (flags & __I915_CREATE_UNKNOWN_FLAGS) + return -EINVAL; + /* Allocate the new object */ - obj = i915_gem_object_create(dev, size); - if (IS_ERR(obj)) - return PTR_ERR(obj); + switch (flags & I915_CREATE_PLACEMENT_MASK) { + case I915_CREATE_PLACEMENT_NORMAL: + obj = i915_gem_object_create(dev, size); + break; + case I915_CREATE_PLACEMENT_STOLEN: + obj = i915_gem_alloc_object_stolen(dev, size); + break; + default: + return -EINVAL; + } + + if (IS_ERR_OR_NULL(obj)) + return -ENOMEM; ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ @@ -422,7 +461,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args-
[Intel-gfx] [PATCH 06/11] drm/i915: Propagating correct error codes to the userspace
From: Ankitprasad Sharma Propagating correct error codes to userspace by using ERR_PTR and PTR_ERR macros for stolen memory based object allocation. We generally return -ENOMEM to the user whenever there is a failure in object allocation. This patch helps user to identify the correct reason for the failure and not just -ENOMEM each time. v2: Moved the patch up in the series, added error propagation for i915_gem_alloc_object too (Chris) v3: Removed storing of error pointer inside structs, Corrected error propagation in caller functions (Chris) v4: Remove assignments inside the predicate (Chris) v5: Removed unnecessary initializations, updated kerneldoc for i915_guc_client, corrected missed error pointer handling (Tvrtko) v6: Use ERR_CAST/temporary variable to avoid storing invalid pointer in a common field (Chris) v7: Resolved rebasing conflicts (Ankit) v8: Removed redundant code (Chris) v9: Rebase v10: Rebase, resolve merge conflicts Signed-off-by: Ankitprasad Sharma Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 15 drivers/gpu/drm/i915/i915_gem_render_state.c | 7 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 53 +++- drivers/gpu/drm/i915/i915_guc_submission.c | 50 -- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 2 +- drivers/gpu/drm/i915/intel_overlay.c | 3 +- drivers/gpu/drm/i915/intel_pm.c | 7 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 4 +-- 9 files changed, 83 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 28c7e28..22e39b1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -392,19 +392,18 @@ i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); - if (!obj) { - mutex_unlock(&dev->struct_mutex); - return NULL; - } + if (IS_ERR(obj)) + goto out; /* Always clear fresh buffers before handing to userspace */ ret = i915_gem_object_clear(obj); if (ret) { drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); - return NULL; + obj = ERR_PTR(ret); + goto out; } +out: mutex_unlock(&dev->struct_mutex); return obj; } @@ -439,8 +438,8 @@ i915_gem_create(struct drm_file *file, return -EINVAL; } - if (IS_ERR_OR_NULL(obj)) - return -ENOMEM; + if (IS_ERR(obj)) + return PTR_ERR(obj); ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 7c93327..84d91c9 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -59,8 +59,11 @@ static int render_state_init(struct render_state *so, return -EINVAL; so->obj = i915_gem_object_create(dev_priv->dev, 4096); - if (IS_ERR(so->obj)) - return PTR_ERR(so->obj); + if (IS_ERR(so->obj)) { + ret = PTR_ERR(so->obj); + so->obj = NULL; + return ret; + } ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 81d5b6b..dcb70c1 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -503,6 +503,7 @@ i915_pages_create_for_stolen(struct drm_device *dev, struct i915_ggtt *ggtt = &dev_priv->ggtt; struct sg_table *st; struct scatterlist *sg; + int ret; DRM_DEBUG_DRIVER("offset=0x%x, size=%d\n", offset, size); BUG_ON(offset > ggtt->stolen_size - size); @@ -514,11 +515,12 @@ i915_pages_create_for_stolen(struct drm_device *dev, st = kmalloc(sizeof(*st), GFP_KERNEL); if (st == NULL) - return NULL; + return ERR_PTR(-ENOMEM); - if (sg_alloc_table(st, 1, GFP_KERNEL)) { + ret = sg_alloc_table(st, 1, GFP_KERNEL); + if (ret) { kfree(st); - return NULL; + return ERR_PTR(ret); } sg = st->sgl; @@ -567,18 +569,23 @@ _i915_gem_object_create_stolen(struct drm_device *dev, struct drm_mm_node *stolen) { struct drm_i915_gem_object *obj; + struct sg_table *pages; obj = i915_gem_object_alloc(dev); if (obj == NULL) - return NULL; + return ERR_PTR(-ENOMEM); drm_gem_private_
[Intel-gfx] [PATCH 04/11] drm/i915: Clearing buffer objects via CPU/GTT
From: Ankitprasad Sharma This patch adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the non shmem backed objects. Currently intend to use this only for buffers allocated from stolen region. v2: Added kernel doc for i915_gem_clear_object(), corrected/removed variable assignments (Tvrtko) v3: Map object page by page to the gtt if the pinning of the whole object to the ggtt fails, Corrected function name (Chris) v4: Clear the buffer page by page, and not map the whole object in the gtt aperture. Use i915 wrapper function in place of drm_mm_insert_node_in_range. v5: Use renamed wrapper function for drm_mm_insert_node_in_range, updated barrier positioning (Chris) v6: Use PAGE_SIZE instead of 4096, use get_pages call before pinning pages (Tvrtko) v7: Fixed the onion (undo operation in reverse order) (Chris) v8: Rebase (Ankit) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 45 + 2 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8228e8a..96eea3d 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3100,6 +3100,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, int *needs_clflush); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +int i915_gem_object_clear(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 452178c..d658d46 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5418,3 +5418,48 @@ fail: drm_gem_object_unreference(&obj->base); return ERR_PTR(ret); } + +/** + * i915_gem_object_clear() - Clear buffer object via CPU/GTT + * @obj: Buffer object to be cleared + * + * Return: 0 - success, non-zero - failure + */ +int i915_gem_object_clear(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node node; + char __iomem *base; + uint64_t size = obj->base.size; + int ret, i; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + return ret; + + ret = i915_gem_object_get_pages(obj); + if (ret) + goto err_remove_node; + + i915_gem_object_pin_pages(obj); + base = io_mapping_map_wc(ggtt->mappable, node.start, PAGE_SIZE); + + for (i = 0; i < size/PAGE_SIZE; i++) { + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, i), + node.start, I915_CACHE_NONE, 0); + wmb(); /* flush modifications to the GGTT (insert_page) */ + memset_io(base, 0, PAGE_SIZE); + wmb(); /* flush the write before we modify the GGTT */ + } + + io_mapping_unmap(base); + ggtt->base.clear_range(&ggtt->base, node.start, node.size, true); + i915_gem_object_unpin_pages(obj); + +err_remove_node: + remove_mappable_node(&node); + return ret; +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 02/11] drm/i915: Introduce i915_gem_object_get_dma_address()
From: Chris Wilson This utility function is a companion to i915_gem_object_get_page() that uses the same cached iterator for the scatterlist to perform fast sequential lookup of the dma address associated with any page within the object. Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 17 + 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index adc5e7d..8228e8a 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3109,6 +3109,23 @@ static inline int __sg_page_count(struct scatterlist *sg) struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n); +static inline dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n) +{ + if (n < obj->get_page.last) { + obj->get_page.sg = obj->pages->sgl; + obj->get_page.last = 0; + } + + while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) { + obj->get_page.last += __sg_page_count(obj->get_page.sg++); + if (unlikely(sg_is_chain(obj->get_page.sg))) + obj->get_page.sg = sg_chain_ptr(obj->get_page.sg); + } + + return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT); +} + static inline struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 08/11] drm/i915: Support for pread/pwrite from/to non shmem backed objects
From: Ankitprasad Sharma This patch adds support for extending the pread/pwrite functionality for objects not backed by shmem. The access will be made through gtt interface. This will cover objects backed by stolen memory as well as other non-shmem backed objects. v2: Drop locks around slow_user_access, prefault the pages before access (Chris) v3: Rebased to the latest drm-intel-nightly (Ankit) v4: Moved page base & offset calculations outside the copy loop, corrected data types for size and offset variables, corrected if-else braces format (Tvrtko/kerneldocs) v5: Enabled pread/pwrite for all non-shmem backed objects including without tiling restrictions (Ankit) v6: Using pwrite_fast for non-shmem backed objects as well (Chris) v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy, added pwrite slow path for non-shmem backed objects (Chris/Tvrtko) v8: Updated v7 commit message, mutex unlock around pwrite slow path for non-shmem backed objects (Tvrtko) v9: Corrected check during pread_ioctl, to avoid shmem_pread being called for non-shmem backed objects (Tvrtko) v10: Moved the write_domain check to needs_clflush and tiling mode check to pwrite_fast (Chris) v11: Use pwrite_fast fallback for all objects (shmem and non-shmem backed), call fast_user_write regardless of pagefault in previous iteration v12: Use page-by-page copy for slow user access too (Chris) v13: Handled EFAULT, Avoid use of WARN_ON, put_fence only if whole obj pinned (Chris) v14: Corrected datatypes/initializations (Tvrtko) Testcase: igt/gem_stolen, igt/gem_pread, igt/gem_pwrite Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 218 ++-- 1 file changed, 188 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 58c83d8..30069c0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -54,6 +54,9 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) return true; @@ -644,6 +647,142 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, return ret ? - EFAULT : 0; } +static inline unsigned long +slow_user_access(struct io_mapping *mapping, +uint64_t page_base, int page_offset, +char __user *user_data, +unsigned long length, bool pwrite) +{ + void __iomem *ioaddr; + void *vaddr; + uint64_t unwritten; + + ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)ioaddr + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(ioaddr); + return unwritten; +} + +static int +i915_gem_gtt_pread(struct drm_device *dev, + struct drm_i915_gem_object *obj, uint64_t size, + uint64_t data_offset, uint64_t data_ptr) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_mm_node node; + char __user *user_data; + uint64_t remain; + uint64_t offset; + int ret; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) { + ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + ret = i915_gem_object_put_fence(obj); + if (ret) + goto out_unpin; + } + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto out_unpin; + + user_data = u64_to_user_ptr(data_ptr); + remain = size; + offset = data_offset; + + mutex_unlock(&dev->struct_mutex); + if (likely(!i915.prefault_disable)) { + ret = fault_in_multipages_writeable(user_data, remain); + if (ret) { + mutex_lock(&dev->struct_mutex); + goto out_unpin; + } + } + + while (remain > 0) { + /* Operation in this page +* +* page_base = page offs
[Intel-gfx] [PATCH 09/11] drm/i915: Migrate stolen objects before hibernation
From: Chris Wilson Ville reminded us that stolen memory is not preserved across hibernation, and a result of this was that context objects now being allocated from stolen were being corrupted on S4 and promptly hanging the GPU on resume. We want to utilise stolen for as much as possible (nothing else will use that wasted memory otherwise), so we need a strategy for handling general objects allocated from stolen and hibernation. A simple solution is to do a CPU copy through the GTT of the stolen object into a fresh shmemfs backing store and thenceforth treat it as a normal objects. This can be refined in future to either use a GPU copy to avoid the slow uncached reads (though it's hibernation!) and recreate stolen objects upon resume/first-use. For now, a simple approach should suffice for testing the object migration. v2: Swap PTE for pinned bindings over to the shmemfs. This adds a complicated dance, but is required as many stolen objects are likely to be pinned for use by the hardware. Swapping the PTEs should not result in externally visible behaviour, as each PTE update should be atomic and the two pages identical. (danvet) safe-by-default, or the principle of least surprise. We need a new flag to mark objects that we can wilfully discard and recreate across hibernation. (danvet) Just use the global_list rather than invent a new stolen_list. This is the slowpath hibernate and so adding a new list and the associated complexity isn't worth it. v3: Rebased on drm-intel-nightly (Ankit) v4: Use insert_page to map stolen memory backed pages for migration to shmem (Chris) v5: Acquire mutex lock while copying stolen buffer objects to shmem (Chris) v6: Handled file leak, Splitted object migration function, added kerneldoc for migrate_stolen_to_shmemfs() function (Tvrtko) Use i915 wrapper function for drm_mm_insert_node_in_range() v7: Keep the object in cpu domain after get_pages, remove the object from the unbound list only when marked PURGED, Corrected split of object migration function (Chris) v8: Split i915_gem_freeze(), removed redundant use of barrier, corrected use of set_to_cpu_domain() (Chris) v9: Replaced WARN_ON by BUG_ON and added a comment explaining it (Daniel/Tvrtko) v10: Document use of barriers (Chris) v11: Resolved list corruption due to not removing obj from global_list if no reference to pages is held, Rebased (Ankit) v12: Rebase (Ankit) Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.c | 22 +++- drivers/gpu/drm/i915/i915_drv.h | 10 ++ drivers/gpu/drm/i915/i915_gem.c | 204 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 49 drivers/gpu/drm/i915/intel_display.c| 3 + drivers/gpu/drm/i915/intel_fbdev.c | 6 + drivers/gpu/drm/i915/intel_pm.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 + 8 files changed, 284 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 872c6060..dc9e06d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1058,6 +1058,22 @@ static int i915_pm_suspend(struct device *dev) return i915_drm_suspend(drm_dev); } +/* freeze: before creating the hibernation_image */ +static int i915_pm_freeze(struct device *dev) +{ + int ret; + + ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev))); + if (ret) + return ret; + + ret = i915_pm_suspend(dev); + if (ret) + return ret; + + return 0; +} + static int i915_pm_suspend_late(struct device *dev) { struct drm_device *drm_dev = dev_to_i915(dev)->dev; @@ -1107,12 +1123,6 @@ static int i915_pm_resume(struct device *dev) return i915_drm_resume(drm_dev); } -/* freeze: before creating the hibernation_image */ -static int i915_pm_freeze(struct device *dev) -{ - return i915_pm_suspend(dev); -} - static int i915_pm_freeze_late(struct device *dev) { int ret; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a4caff4..81e0551 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2182,6 +2182,12 @@ struct drm_i915_gem_object { * Advice: are the backing pages purgeable? */ unsigned int madv:2; + /** +* Whereas madv is for userspace, there are certain situations +* where we want I915_MADV_DONTNEED behaviour on internal objects +* without conflating the userspace setting. +*/ + unsigned int internal_volatile:1; /** * Current tiling mode for the object. @@ -3319,6 +3325,9 @@ int __must_check i915_gem_init_hw(struct drm_device *dev); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); +int __m
[Intel-gfx] [PATCH 10/11] drm/i915: Disable use of stolen area by User when Intel RST is present
From: Ankitprasad Sharma The BIOS RapidStartTechnology may corrupt the stolen memory across S3 suspend due to unalarmed hibernation, in which case we will not be able to preserve the User data stored in the stolen region. Hence this patch tries to identify presence of the RST device on the ACPI bus, and disables use of stolen memory (for persistent data) if found. v2: Updated comment, updated/corrected new functions private to driver (Chris/Tvrtko) v3: Disabling stolen by default, wait till required acpi changes to detect device presence are pulled in (Ankit) v4: Enabled stolen by default as required acpi changes are merged (Ankit) v5: renamed variable, is IS_ENABLED() in place of #ifdef, use char* instead of structures (Lukas) Signed-off-by: Ankitprasad Sharma Cc: Lukas Wunner Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h| 11 +++ drivers/gpu/drm/i915/i915_gem.c| 8 drivers/gpu/drm/i915/i915_gem_stolen.c | 12 drivers/gpu/drm/i915/intel_acpi.c | 7 +++ 4 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 81e0551..5ac1996 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1340,6 +1340,16 @@ struct i915_gem_mm { */ bool busy; + /** +* Stolen will be lost upon hibernate (as the memory is unpowered). +* Across resume, we expect stolen to be intact - however, it may +* also be utililised by third parties (e.g. Intel RapidStart +* Technology) and if so we have to assume that any data stored in +* stolen across resume is lost and we set this flag to indicate that +* the stolen memory is volatile. +*/ + bool volatile_stolen; + /* the indicator for dispatch video commands on two BSD rings */ unsigned int bsd_ring_dispatch_index; @@ -3704,6 +3714,7 @@ static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev) #endif /* intel_acpi.c */ +bool intel_detect_acpi_rst(void); #ifdef CONFIG_ACPI extern void intel_register_dsm_handler(void); extern void intel_unregister_dsm_handler(void); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b5a2604..3b66b68 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -391,8 +391,16 @@ static struct drm_i915_gem_object * i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) { struct drm_i915_gem_object *obj; + struct drm_i915_private *dev_priv = dev->dev_private; int ret; + if (dev_priv->mm.volatile_stolen) { + /* Stolen may be overwritten by external parties +* so unsuitable for persistent user data. +*/ + return ERR_PTR(-ENODEV); + } + mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 2518ebb..0e6203c 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -492,6 +492,18 @@ int i915_gem_init_stolen(struct drm_device *dev) */ drm_mm_init(&dev_priv->mm.stolen, 0, ggtt->stolen_usable_size); + /* If the stolen region can be modified behind our backs upon suspend, +* then we cannot use it to store nonvolatile contents (i.e user data) +* as it will be corrupted upon resume. +*/ + dev_priv->mm.volatile_stolen = false; + if (IS_ENABLED(CONFIG_SUSPEND)) { + /* BIOSes using RapidStart Technology have been reported +* to overwrite stolen across S3, not just S4. +*/ + dev_priv->mm.volatile_stolen = intel_detect_acpi_rst(); + } + return 0; } diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c index eb638a1..60ccb39 100644 --- a/drivers/gpu/drm/i915/intel_acpi.c +++ b/drivers/gpu/drm/i915/intel_acpi.c @@ -23,6 +23,8 @@ static const u8 intel_dsm_guid[] = { 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c }; +static const char *irst_id = "INT3392"; + static char *intel_dsm_port_name(u8 id) { switch (id) { @@ -162,3 +164,8 @@ void intel_register_dsm_handler(void) void intel_unregister_dsm_handler(void) { } + +bool intel_detect_acpi_rst(void) +{ + return acpi_dev_found(irst_id); +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 11/11] drm/i915: Extend GET_APERTURE ioctl to report available map space
From: Ankitprasad Sharma When constructing a batchbuffer, it is sometimes crucial to know the largest hole into which we can fit a fenceable buffer (for example when handling very large objects on gen2 and gen3). This depends on the fragmentation of pinned buffers inside the aperture, a question only the kernel can easily answer. This patch extends the current DRM_I915_GEM_GET_APERTURE ioctl to include a couple of new fields in its reply to userspace - the total amount of space available in the mappable region of the aperture and also the single largest block available. This is not quite what userspace wants to answer the question of whether this batch will fit as fences are also required to meet severe alignment constraints within the batch. For this purpose, a third conservative estimate of largest fence available is also provided. For when userspace needs more than one batch, we also provide the cumulative space available for fences such that it has some additional guidance to how much space it could allocate to fences. Conservatism still wins. This patch extends the GET_APERTURE ioctl to add support for getting total size and available size of the stolen region as well as single largest block available in the stolen region too. The patch also adds a debugfs file for convenient testing and reporting. v2: The first object cannot end at offset 0, so we can use last==0 to detect the empty list. v3: Expand all values to 64bit, just in case. Report total mappable aperture size for userspace that cannot easily determine it by inspecting the PCI device. v4: (Rodrigo) Fixed rebase conflicts. v5: Rebased to the latest drm-intel-nightly (Ankit) v6: Keeping limits to get_aperture ioctl, and moved changing numbers to debugfs, Addressed comments (Chris/Tvrtko) v7: Squashed stolen memory size patch to this one, Added a new version field to validate the map_size and stolen size values, Changed Author to me (Ankit) due to significant changes in the logic used to get size values Signed-off-by: Chris Wilson Signed-off-by: Rodrigo Vivi Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c| 143 + drivers/gpu/drm/i915/i915_drv.h| 3 + drivers/gpu/drm/i915/i915_gem.c| 4 + drivers/gpu/drm/i915/i915_gem_stolen.c | 27 +++ include/uapi/drm/i915_drm.h| 17 5 files changed, 194 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index e5b4274..c052174 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -586,6 +586,148 @@ static int i915_gem_object_info(struct seq_file *m, void* data) return 0; } +static int vma_rank_by_ggtt(void *priv, + struct list_head *A, + struct list_head *B) +{ + struct i915_vma *a = list_entry(A, typeof(*a), exec_list); + struct i915_vma *b = list_entry(B, typeof(*b), exec_list); + + return a->node.start - b->node.start; +} + +static u32 __fence_size(struct drm_i915_private *dev_priv, u32 start, u32 end) +{ + u32 size = end - start; + u32 fence_size; + + if (INTEL_INFO(dev_priv)->gen < 4) { + u32 fence_max; + u32 fence_next; + + if (IS_GEN3(dev_priv)) { + fence_max = I830_FENCE_MAX_SIZE_VAL << 20; + fence_next = 1024*1024; + } else { + fence_max = I830_FENCE_MAX_SIZE_VAL << 19; + fence_next = 512*1024; + } + + fence_max = min(fence_max, size); + fence_size = 0; + /* Find fence_size less than fence_max and power of 2 */ + while (fence_next <= fence_max) { + u32 base = ALIGN(start, fence_next); + if (base + fence_next > end) + break; + + fence_size = fence_next; + fence_next <<= 1; + } + } else { + fence_size = size; + } + + return fence_size; +} + +static int i915_gem_aperture_info(struct seq_file *m, void *data) +{ + struct drm_info_node *node = m->private; + struct drm_device *dev = node->minor->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_i915_gem_get_aperture arg; + struct i915_vma *vma; + struct list_head map_list; + const uint64_t map_limit = ggtt->mappable_end; + uint64_t map_space, map_largest, fence_space, fence_largest; + uint64_t last, hole_size, stolen_free, stolen_largest; + int ret; + + INIT_LIST_HEAD(&map_list); + + map_space = map_largest = 0; + fence_space = fence_largest = 0; + + ret = i915_gem_get_aperture_ioctl(node
[Intel-gfx] [PATCH 1/5] drm/i915: Add support for mapping an object page by page
From: Chris Wilson Introduced a new vm specfic callback insert_page() to program a single pte in ggtt or ppgtt. This allows us to map a single page in to the mappable aperture space. This can be iterated over to access the whole object by using space as meagre as page size. v2: Added low level rpm assertions to insert_page routines (Chris) v3: Added POSTING_READ post register write (Tvrtko) v4: Rebase (Ankit) v5: Removed wmb() and FLUSH_CTL from insert_page, caller to take care of it (Chris) v6: insert_page not working correctly without FLSH_CNTL write, added the write again. Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/char/agp/intel-gtt.c| 8 + drivers/gpu/drm/i915/i915_gem_gtt.c | 66 - drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++ include/drm/intel-gtt.h | 3 ++ 4 files changed, 81 insertions(+), 1 deletion(-) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index aef87fd..4431129 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -840,6 +840,14 @@ static bool i830_check_flags(unsigned int flags) return false; } +void intel_gtt_insert_page(dma_addr_t addr, + unsigned int pg, + unsigned int flags) +{ + intel_private.driver->write_entry(addr, pg, flags); +} +EXPORT_SYMBOL(intel_gtt_insert_page); + void intel_gtt_insert_sg_entries(struct sg_table *st, unsigned int pg_start, unsigned int flags) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 4668477..7a139a6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2355,6 +2355,28 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) #endif } +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)dev_priv->ggtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, uint64_t start, @@ -2424,6 +2446,28 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); } +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 flags) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)dev_priv->ggtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + iowrite32(vm->pte_encode(addr, level, true, flags), pte); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -2543,6 +2587,24 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } +static void i915_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level cache_level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct
[Intel-gfx] [PATCH 3/5] drm/i915: Use insert_page for pwrite_fast
From: Ankitprasad Sharma In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First, we try a nonblocking pin for the whole object (since that is fastest if reused), then failing that we try to grab one page in the mappable aperture. It also allows us to handle objects larger than the mappable aperture (e.g. if we need to pwrite with vGPU restricting the aperture to a measely 8MiB or something like that). v2: Pin pages before starting pwrite, Combined duplicate loops (Chris) v3: Combined loops based on local patch by Chris (Chris) v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris) v5: Renamed wrapper function for drm_mm_insert_node_in_range (Chris) v5: Added wrapper for drm_mm_remove_node() (Chris) v6: Added get_pages call before pinning the pages (Tvrtko) Added remove_mappable_node() wrapper for drm_mm_remove_node() (Chris) v7: Added size argument for insert_mappable_node (Tvrtko) v8: Do not put_pages after pwrite, do memset of node in the wrapper function (insert_mappable_node) (Chris) v9: Rebase (Ankit) Signed-off-by: Ankitprasad Sharma Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 90 +++-- 1 file changed, 68 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index eae8d7a..452178c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -60,6 +60,24 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } +static int +insert_mappable_node(struct drm_i915_private *i915, + struct drm_mm_node *node, u32 size) +{ + memset(node, 0, sizeof(*node)); + return drm_mm_insert_node_in_range_generic(&i915->ggtt.base.mm, node, + size, 0, 0, 0, + i915->ggtt.mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); +} + +static void +remove_mappable_node(struct drm_mm_node *node) +{ + drm_mm_remove_node(node); +} + /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -765,21 +783,34 @@ fast_user_write(struct io_mapping *mapping, * @file: drm file pointer */ static int -i915_gem_gtt_pwrite_fast(struct drm_device *dev, +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_i915_private *dev_priv = to_i915(dev); - struct i915_ggtt *ggtt = &dev_priv->ggtt; - ssize_t remain; - loff_t offset, page_base; + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node node; + uint64_t remain, offset; char __user *user_data; - int page_offset, page_length, ret; + int ret; ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) - goto out; + if (ret) { + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -789,26 +820,32 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, if (ret) goto out_unpin; - user_data = u64_to_user_ptr(args->data_ptr); - remain = args->size; - - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + obj->dirty = true; - while (remain > 0) { + user_data = u64_to_user_ptr(args->data_ptr); + offset = args->offset; + remain = args->size; + while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; - + u32 page_base = node.start; + unsigned page_offset = offset_in_page(offset); +
[Intel-gfx] [PATCH 2/5] drm/i915: Introduce i915_gem_object_get_dma_address()
From: Chris Wilson This utility function is a companion to i915_gem_object_get_page() that uses the same cached iterator for the scatterlist to perform fast sequential lookup of the dma address associated with any page within the object. Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 17 + 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 53d9e3f..0349c5f 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3118,6 +3118,23 @@ static inline int __sg_page_count(struct scatterlist *sg) struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n); +static inline dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n) +{ + if (n < obj->get_page.last) { + obj->get_page.sg = obj->pages->sgl; + obj->get_page.last = 0; + } + + while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) { + obj->get_page.last += __sg_page_count(obj->get_page.sg++); + if (unlikely(sg_is_chain(obj->get_page.sg))) + obj->get_page.sg = sg_chain_ptr(obj->get_page.sg); + } + + return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT); +} + static inline struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 4/5] drm/i915: Clearing buffer objects via CPU/GTT
From: Ankitprasad Sharma This patch adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the non shmem backed objects. Currently intend to use this only for buffers allocated from stolen region. v2: Added kernel doc for i915_gem_clear_object(), corrected/removed variable assignments (Tvrtko) v3: Map object page by page to the gtt if the pinning of the whole object to the ggtt fails, Corrected function name (Chris) v4: Clear the buffer page by page, and not map the whole object in the gtt aperture. Use i915 wrapper function in place of drm_mm_insert_node_in_range. v5: Use renamed wrapper function for drm_mm_insert_node_in_range, updated barrier positioning (Chris) v6: Use PAGE_SIZE instead of 4096, use get_pages call before pinning pages (Tvrtko) v7: Fixed the onion (undo operation in reverse order) (Chris) v8: Rebase (Ankit) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 45 + 2 files changed, 46 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 0349c5f..e72e6af 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3109,6 +3109,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, int *needs_clflush); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +int i915_gem_object_clear(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 452178c..d658d46 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5418,3 +5418,48 @@ fail: drm_gem_object_unreference(&obj->base); return ERR_PTR(ret); } + +/** + * i915_gem_object_clear() - Clear buffer object via CPU/GTT + * @obj: Buffer object to be cleared + * + * Return: 0 - success, non-zero - failure + */ +int i915_gem_object_clear(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; + struct drm_mm_node node; + char __iomem *base; + uint64_t size = obj->base.size; + int ret, i; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + return ret; + + ret = i915_gem_object_get_pages(obj); + if (ret) + goto err_remove_node; + + i915_gem_object_pin_pages(obj); + base = io_mapping_map_wc(ggtt->mappable, node.start, PAGE_SIZE); + + for (i = 0; i < size/PAGE_SIZE; i++) { + ggtt->base.insert_page(&ggtt->base, + i915_gem_object_get_dma_address(obj, i), + node.start, I915_CACHE_NONE, 0); + wmb(); /* flush modifications to the GGTT (insert_page) */ + memset_io(base, 0, PAGE_SIZE); + wmb(); /* flush the write before we modify the GGTT */ + } + + io_mapping_unmap(base); + ggtt->base.clear_range(&ggtt->base, node.start, node.size, true); + i915_gem_object_unpin_pages(obj); + +err_remove_node: + remove_mappable_node(&node); + return ret; +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 5/5] drm/i915: Support for pread/pwrite from/to non shmem backed objects
From: Ankitprasad Sharma This patch adds support for extending the pread/pwrite functionality for objects not backed by shmem. The access will be made through gtt interface. This will cover objects backed by stolen memory as well as other non-shmem backed objects. v2: Drop locks around slow_user_access, prefault the pages before access (Chris) v3: Rebased to the latest drm-intel-nightly (Ankit) v4: Moved page base & offset calculations outside the copy loop, corrected data types for size and offset variables, corrected if-else braces format (Tvrtko/kerneldocs) v5: Enabled pread/pwrite for all non-shmem backed objects including without tiling restrictions (Ankit) v6: Using pwrite_fast for non-shmem backed objects as well (Chris) v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy, added pwrite slow path for non-shmem backed objects (Chris/Tvrtko) v8: Updated v7 commit message, mutex unlock around pwrite slow path for non-shmem backed objects (Tvrtko) v9: Corrected check during pread_ioctl, to avoid shmem_pread being called for non-shmem backed objects (Tvrtko) v10: Moved the write_domain check to needs_clflush and tiling mode check to pwrite_fast (Chris) v11: Use pwrite_fast fallback for all objects (shmem and non-shmem backed), call fast_user_write regardless of pagefault in previous iteration v12: Use page-by-page copy for slow user access too (Chris) v13: Handled EFAULT, Avoid use of WARN_ON, put_fence only if whole obj pinned (Chris) v14: Corrected datatypes/initializations (Tvrtko) Testcase: igt/gem_stolen, igt/gem_pread, igt/gem_pwrite Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 218 ++-- 1 file changed, 188 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d658d46..1777202 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -54,6 +54,9 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) return true; @@ -606,6 +609,142 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, return ret ? - EFAULT : 0; } +static inline unsigned long +slow_user_access(struct io_mapping *mapping, +uint64_t page_base, int page_offset, +char __user *user_data, +unsigned long length, bool pwrite) +{ + void __iomem *ioaddr; + void *vaddr; + uint64_t unwritten; + + ioaddr = io_mapping_map_wc(mapping, page_base, PAGE_SIZE); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)ioaddr + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(ioaddr); + return unwritten; +} + +static int +i915_gem_gtt_pread(struct drm_device *dev, + struct drm_i915_gem_object *obj, uint64_t size, + uint64_t data_offset, uint64_t data_ptr) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct i915_ggtt *ggtt = &dev_priv->ggtt; + struct drm_mm_node node; + char __user *user_data; + uint64_t remain; + uint64_t offset; + int ret; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) { + ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + ret = i915_gem_object_put_fence(obj); + if (ret) + goto out_unpin; + } + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto out_unpin; + + user_data = u64_to_user_ptr(data_ptr); + remain = size; + offset = data_offset; + + mutex_unlock(&dev->struct_mutex); + if (likely(!i915.prefault_disable)) { + ret = fault_in_multipages_writeable(user_data, remain); + if (ret) { + mutex_lock(&dev->struct_mutex); + goto out_unpin; + } + } + + while (remain > 0) { + /* Operation in this page +* +* page_base = page offs
[Intel-gfx] [PATCH v4 0/3] Tests for verifying the old and extended GEM_CREATE ioctl
From: Ankitprasad Sharma This new set of tests verifies the old and the new extended GEM_CREATE ioctl gem_stolen tries to verify the new extended GEM_CREATE ioctl, which tries to create an object backed by stolen memory and performs basic operations on it. It verifies the creation as well as the purging of an object when it is no longer needed, copying contents of a stolen backed object to another, doing a pread/pwrite on the object and verifying its contents. We also try to pread/pwrite a stolen backed object multiple times to get the pread/pwrite speed, as well as pread speed when there is a page fault while accessing userspace buffer. There is a new test gem_create to do a sanity check for creating both stolen/shmem backed objects with valid and invalid parameters. v2: Rebased to the latest and added IGT_TEST_DESCRIPTION v3: Addressed comments by Tvrtko and Dave, added one more testcase for verifying extended gem_create ioctl v4: Addressed comments by Tvrtko, removed unused variables, addressed compiler warnings v5: Added one more patch to verify contents of stolen backed object across hibernation Ankitprasad Sharma (4): igt/gem_stolen: Verifying extended gem_create ioctl igt/gem_pread: Support to verify pread/pwrite for non-shmem backed obj igt/gem_create: Test to validate parameters for GEM_CREATE ioctl igt/gem_stolen: Verify contents of stolen-backed objects across hibernation lib/ioctl_wrappers.c | 72 lib/ioctl_wrappers.h | 13 ++ tests/Makefile.sources | 2 + tests/gem_create.c | 166 ++ tests/gem_pread.c | 108 +++- tests/gem_pwrite.c | 55 +- tests/gem_stolen.c | 446 + 7 files changed, 849 insertions(+), 13 deletions(-) create mode 100644 tests/gem_create.c create mode 100644 tests/gem_stolen.c -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/4] igt/gem_pread: Support to verify pread/pwrite for non-shmem backed obj
From: Ankitprasad Sharma This patch adds support to verify pread/pwrite for non-shmem backed objects. It also shows the pread/pwrite speed. It also tests speeds for pread with and without user side page faults v2: Fixed Rebase conflicts (Ankit) v3: Precalculating values to avoid redundant function calls (Dave) Replaced igt_subtest by igt_subtest_f, added asserts for mmap, corrected indentation (Tvrtko) v4: Updated data types to avoid redundant type conversions (Tvrtko) Corrected pagefault-pread time calculation (Ankit) Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- tests/gem_pread.c | 108 + tests/gem_pwrite.c | 55 --- 2 files changed, 150 insertions(+), 13 deletions(-) diff --git a/tests/gem_pread.c b/tests/gem_pread.c index cc83948..41553b8 100644 --- a/tests/gem_pread.c +++ b/tests/gem_pread.c @@ -41,6 +41,10 @@ #include "drmtest.h" #define OBJECT_SIZE 16384 +#define LARGE_OBJECT_SIZE 1024 * 1024 +#define KGRN "\x1B[32m" +#define KRED "\x1B[31m" +#define KNRM "\x1B[0m" static void do_gem_read(int fd, uint32_t handle, void *buf, int len, int loops) { @@ -76,12 +80,16 @@ static const char *bytes_per_sec(char *buf, double v) uint32_t *src, dst; +uint32_t *dst_user, src_stolen, large_stolen; +uint32_t *stolen_pf_user, *stolen_nopf_user; int fd, count; int main(int argc, char **argv) { int object_size = 0; - uint32_t buf[20]; + double usecs; + char buf[100]; + char* bps; const struct { int level; const char *name; @@ -106,6 +114,8 @@ int main(int argc, char **argv) dst = gem_create(fd, object_size); src = malloc(object_size); + src_stolen = gem_create_stolen(fd, object_size); + dst_user = malloc(object_size); } igt_subtest("normal") { @@ -115,10 +125,10 @@ int main(int argc, char **argv) gettimeofday(&start, NULL); do_gem_read(fd, dst, src, object_size, count); gettimeofday(&end, NULL); + usecs = elapsed(&start, &end, count); + bps = bytes_per_sec(buf, object_size/usecs*1e6); igt_info("Time to pread %d bytes x %6d: %7.3fµs, %s\n", -object_size, count, -elapsed(&start, &end, count), -bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6)); +object_size, count, usecs, bps); fflush(stdout); } } @@ -133,18 +143,102 @@ int main(int argc, char **argv) gettimeofday(&start, NULL); do_gem_read(fd, dst, src, object_size, count); gettimeofday(&end, NULL); + usecs = elapsed(&start, &end, count); + bps = bytes_per_sec(buf, object_size/usecs*1e6); igt_info("Time to %s pread %d bytes x %6d: %7.3fµs, %s\n", -c->name, object_size, count, -elapsed(&start, &end, count), -bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6)); +c->name, object_size, count, usecs, bps); fflush(stdout); } } } + igt_subtest("stolen-normal") { + for (count = 1; count <= 1<<17; count <<= 1) { + struct timeval start, end; + + gettimeofday(&start, NULL); + do_gem_read(fd, src_stolen, dst_user, object_size, count); + gettimeofday(&end, NULL); + usecs = elapsed(&start, &end, count); + bps = bytes_per_sec(buf, object_size/usecs*1e6); + igt_info("Time to pread %d bytes x %6d: %7.3fµs, %s\n", +object_size, count, usecs, bps); + fflush(stdout); + } + } + for (c = cache; c->level != -1; c++) { + igt_subtest_f("stolen-%s", c->name) { + gem_set_caching(fd, src_stolen, c->level); + + for (count = 1; count <= 1<<17; count <<= 1) { + struct timeval start, end; + + gettimeofday(&start, NULL); + do_gem_read(fd, src_stolen, dst_user, + object_size, count); + gettimeofday(&end, NULL); + usecs = elapsed(&start, &end, count)
[Intel-gfx] [PATCH 1/4] igt/gem_stolen: Verifying extended gem_create ioctl
From: Ankitprasad Sharma This patch adds the testcases for verifying the new extended gem_create ioctl. By means of this extended ioctl, memory placement of the GEM object can be specified, i.e. either shmem or stolen memory. These testcases include functional tests and interface tests for testing the gem_create ioctl call for stolen memory placement v2: Testing pread/pwrite functionality for stolen backed objects, added local struct for extended gem_create and gem_get_aperture, until headers catch up (Chris) v3: Removed get_aperture related functions, extended gem_pread to compare speeds for user pages with and without page faults, unexposed local_gem_create struct, changed gem_create_stolen usage (Chris) v4: Splitting patch to remove changes from gem_pread/gem_pwrite to another patch (Ankit) v5: Fixed Rebase conflicts (Ankit) Added IGT_TEST_DESCRIPTION (Thomas Wood) v6: Added __gem_create_stolen for user to handle error, updated gem_create_stolen to align with gem_create function, corrected fill_purge test (out of bound access), added testcase to validate allocating of more than 32 bit sized buffers (Tvrtko) v7: Removed unused variables, Corrected comments & formatting (Tvrtko) Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- lib/ioctl_wrappers.c | 72 ++ lib/ioctl_wrappers.h | 13 ++ tests/Makefile.sources | 1 + tests/gem_stolen.c | 360 + 4 files changed, 446 insertions(+) create mode 100644 tests/gem_stolen.c diff --git a/lib/ioctl_wrappers.c b/lib/ioctl_wrappers.c index a269d0f..0489272 100644 --- a/lib/ioctl_wrappers.c +++ b/lib/ioctl_wrappers.c @@ -389,6 +389,78 @@ void gem_sync(int fd, uint32_t handle) I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT); } +bool gem_create__has_stolen_support(int fd) +{ + static int has_stolen_support = -1; + struct drm_i915_getparam gp; + int val = -1; + + if (has_stolen_support < 0) { + memset(&gp, 0, sizeof(gp)); + gp.param = 36; /* CREATE_VERSION */ + gp.value = &val; + + /* Do we have the extended gem_create_ioctl? */ + ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + has_stolen_support = val >= 2; + } + + return has_stolen_support; +} + +struct local_i915_gem_create_v2 { + uint64_t size; + uint32_t handle; + uint32_t pad; +#define I915_CREATE_PLACEMENT_STOLEN (1<<0) + uint32_t flags; +}; + +#define LOCAL_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct local_i915_gem_create_v2) +uint32_t __gem_create_stolen(int fd, uint64_t size) +{ + struct local_i915_gem_create_v2 create; + int ret; + + memset(&create, 0, sizeof(create)); + create.handle = 0; + create.size = size; + create.flags = I915_CREATE_PLACEMENT_STOLEN; + ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CREATE, &create); + + if (ret < 0) + return 0; + + errno = 0; + return create.handle; +} + +/** + * gem_create_stolen: + * @fd: open i915 drm file descriptor + * @size: desired size of the buffer + * + * This wraps the new GEM_CREATE ioctl, which allocates a new gem buffer + * object of @size and placement in stolen memory region. + * + * Returns: The file-private handle of the created buffer object + */ + +uint32_t gem_create_stolen(int fd, uint64_t size) +{ + struct local_i915_gem_create_v2 create; + + memset(&create, 0, sizeof(create)); + create.handle = 0; + create.size = size; + create.flags = I915_CREATE_PLACEMENT_STOLEN; + do_ioctl(fd, LOCAL_IOCTL_I915_GEM_CREATE, &create); + igt_assert(create.handle); + + return create.handle; +} + + uint32_t __gem_create(int fd, int size) { struct drm_i915_gem_create create; diff --git a/lib/ioctl_wrappers.h b/lib/ioctl_wrappers.h index bc5d4bd..ff1584a 100644 --- a/lib/ioctl_wrappers.h +++ b/lib/ioctl_wrappers.h @@ -56,6 +56,9 @@ void gem_read(int fd, uint32_t handle, uint64_t offset, void *buf, uint64_t leng void gem_set_domain(int fd, uint32_t handle, uint32_t read_domains, uint32_t write_domain); void gem_sync(int fd, uint32_t handle); +bool gem_create__has_stolen_support(int fd); +uint32_t __gem_create_stolen(int fd, uint64_t size); +uint32_t gem_create_stolen(int fd, uint64_t size); uint32_t __gem_create(int fd, int size); uint32_t gem_create(int fd, uint64_t size); void gem_execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf); @@ -67,6 +70,16 @@ bool gem_mmap__has_wc(int fd); void *gem_mmap__wc(int fd, uint32_t handle, uint64_t offset, uint64_t size, unsigned prot); /** + * gem_require_stolen_support: + * @fd: open i915 drm file descriptor + * + * Test macro to query whether support for allocating objects from stolen + * memory is available. Automatically skips through igt_require() if not. + */ +#define ge
[Intel-gfx] [PATCH 3/4] igt/gem_create: Test to validate parameters for GEM_CREATE ioctl
From: Ankitprasad Sharma This test validates the two parameters (size and flags) GEM_CREATE ioctl. v2: Added IGT_TEST_DESCRIPTION (Thomas Wood) v3: Removed use of hard coded values, updated comments (Tvrtko) v4: Removed over-use of macros, updated with multiples of PAGE_SIZE (Tvrtko) Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- tests/Makefile.sources | 1 + tests/gem_create.c | 166 + 2 files changed, 167 insertions(+) create mode 100644 tests/gem_create.c diff --git a/tests/Makefile.sources b/tests/Makefile.sources index 324cbb5..f5790df 100644 --- a/tests/Makefile.sources +++ b/tests/Makefile.sources @@ -15,6 +15,7 @@ TESTS_progs_M = \ gem_close_race \ gem_concurrent_blit \ gem_concurrent_all \ + gem_create \ gem_cs_tlb \ gem_ctx_param_basic \ gem_ctx_bad_exec \ diff --git a/tests/gem_create.c b/tests/gem_create.c new file mode 100644 index 000..8f32072 --- /dev/null +++ b/tests/gem_create.c @@ -0,0 +1,166 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *Ankitprasad Sharma + * + */ + +/** @file gem_create.c + * + * This is a test for the extended and old gem_create ioctl, that + * includes allocation of object from stolen memory and shmem. + * + * The goal is to simply ensure that basics work and invalid input + * combinations are rejected. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "ioctl_wrappers.h" +#include "intel_bufmgr.h" +#include "intel_batchbuffer.h" +#include "intel_io.h" +#include "intel_chipset.h" +#include "igt_aux.h" +#include "drmtest.h" +#include "drm.h" +#include "i915_drm.h" + +IGT_TEST_DESCRIPTION("This is a test for the extended & old gem_create ioctl," +" that includes allocation of object from stolen memory" +" and shmem."); + +#define CLEAR(s) memset(&s, 0, sizeof(s)) +#define PAGE_SIZE 4096 + +struct local_i915_gem_create_v2 { + uint64_t size; + uint32_t handle; + uint32_t pad; +#define I915_CREATE_PLACEMENT_STOLEN (1<<0) + uint32_t flags; +} create; + +#define LOCAL_IOCTL_I915_GEM_CREATE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_CREATE, struct local_i915_gem_create_v2) + +static void invalid_flag_test(int fd) +{ + int ret; + + gem_require_stolen_support(fd); + + create.handle = 0; + create.size = PAGE_SIZE; + create.flags = ~I915_CREATE_PLACEMENT_STOLEN; + ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CREATE, &create); + + igt_assert(ret <= 0); + + create.flags = ~0; + ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_CREATE, &create); + + igt_assert(ret <= 0); +} + +static void invalid_size_test(int fd) +{ + int handle; + + handle = __gem_create(fd, 0); + igt_assert(!handle); +} + +/* + * Creating an object with non-aligned size and trying to access it with an + * offset, which is greater than the requested size but smaller than the + * object's last page boundary. pwrite here must be successful. + */ +static void valid_nonaligned_size(int fd) +{ + int handle; + char buf[PAGE_SIZE]; + + handle = gem_create(fd, PAGE_SIZE / 2); + + gem_write(fd, handle, PAGE_SIZE / 2, buf, PAGE_SIZE / 2); + + gem_close(fd, handle); +} + +/* + * Creating an object with non-aligned size and trying to access it with an + * offset, which is greater than the requested size and larger than the + * object's last page boundary. pwrite here must fail. + */ +static void invalid_nonaligned_size(int fd) +{ + int handle; + char buf[PAGE_SIZE]; + struct drm_i915_gem_pwrite gem_pwrite; + + handle = gem_create(fd, PAGE_SIZE / 2); + + CLEAR(gem_pwrite); +
[Intel-gfx] [PATCH 4/4] igt/gem_stolen: Verify contents of stolen-backed objects across hibernation
From: Ankitprasad Sharma This patch verifies if the contents of the stolen backed object were preserved across hibernation. This is to validate kernel changes related to moving stolen-backed objects to shmem on hibernation. Signed-off-by: Ankitprasad Sharma --- tests/gem_stolen.c | 86 ++ 1 file changed, 86 insertions(+) diff --git a/tests/gem_stolen.c b/tests/gem_stolen.c index 3374716..1f13fb0 100644 --- a/tests/gem_stolen.c +++ b/tests/gem_stolen.c @@ -290,6 +290,89 @@ static void stolen_fill_purge_test(int fd) gem_close(fd, handle[i]); } +static void stolen_hibernate(int fd) +{ + drm_intel_bo *bo; + drm_intel_bo *src, *dest; + int obj_count = 0, i = 0; + int _ret = 0, j = 0; + uint32_t handle[MAX_OBJECTS], src_handle; + uint32_t *virt; + + gem_require_stolen_support(fd); + + src_handle = gem_create(fd, SIZE); + igt_assert(!src_handle); + src = gem_handle_to_libdrm_bo(bufmgr, fd, +"bo", src_handle); + igt_assert(src != NULL); + + _ret = drm_intel_gem_bo_map_gtt(src); + igt_assert(!_ret); + + virt = src->virtual; + for (j = 0; j < SIZE/DWORD_SIZE; j++) + virt[j] = DATA; + + drm_intel_bo_unmap(src); + /* Exhaust Stolen space */ + do { + handle[i] = __gem_create_stolen(fd, SIZE); + if (handle[i] != 0) { + bo = gem_handle_to_libdrm_bo(bufmgr, fd, +"verify_bo", handle[i]); + igt_assert(bo != NULL); + _ret = drm_intel_gem_bo_map_gtt(bo); + igt_assert(!_ret); + + virt = bo->virtual; + for (j = 0; j < SIZE/DWORD_SIZE; j++) + igt_assert(!virt[j]); + + drm_intel_bo_unmap(bo); + drm_intel_bo_unreference(bo); + + obj_count++; + } + + i++; + } while (handle[i-1] && i < MAX_OBJECTS); + + igt_assert(obj_count > 0); + + for (i = 0; i < obj_count; i++) { + dest = gem_handle_to_libdrm_bo(bufmgr, fd, + "dst_bo", handle[i]); + igt_assert(dest != NULL); + intel_copy_bo(batch, dest, src, SIZE); + drm_intel_bo_unreference(dest); + } + + drm_intel_bo_unreference(src); + + igt_system_hibernate_autoresume(); + /* Check if the object's memory contents are intact +* across hibernation. +*/ + for (i = 0; i < obj_count; i++) { + bo = gem_handle_to_libdrm_bo(bufmgr, fd, +"verify_bo", handle[i]); + igt_assert(bo != NULL); + _ret = drm_intel_gem_bo_map_gtt(bo); + igt_assert(!_ret); + virt = bo->virtual; + for (j = 0; j < SIZE/DWORD_SIZE; j++) + igt_assert_eq(virt[j], DATA); + + drm_intel_bo_unmap(bo); + drm_intel_bo_unreference(bo); + } + + gem_close(fd, src_handle); + for (i = 0; i < obj_count; i++) + gem_close(fd, handle[i]); +} + static void stolen_no_mmap(int fd) { @@ -353,6 +436,9 @@ igt_main igt_subtest("stolen-fill-purge") stolen_fill_purge_test(fd); + igt_subtest("stolen-hibernate") + stolen_hibernate(fd); + igt_fixture { intel_batchbuffer_free(batch); drm_intel_bufmgr_destroy(bufmgr); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/6] drm/i915: Clearing buffer objects via CPU/GTT
From: Ankitprasad Sharma This patch adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the non shmem backed objects. Currently intend to use this only for buffers allocated from stolen region. v2: Added kernel doc for i915_gem_clear_object(), corrected/removed variable assignments (Tvrtko) v3: Map object page by page to the gtt if the pinning of the whole object to the ggtt fails, Corrected function name (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 79 + 2 files changed, 80 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 548a0eb..8e554d3 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2856,6 +2856,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, int *needs_clflush); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +int i915_gem_object_clear(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 9d2e6e3..d57e850 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5244,3 +5244,82 @@ fail: drm_gem_object_unreference(&obj->base); return ERR_PTR(ret); } + +/** + * i915_gem_clear_object() - Clear buffer object via CPU/GTT + * @obj: Buffer object to be cleared + * + * Return: 0 - success, non-zero - failure + */ +int i915_gem_object_clear(struct drm_i915_gem_object *obj) +{ + int ret, i; + char __iomem *base; + size_t size = obj->base.size; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_mm_node node; + + WARN_ON(!mutex_is_locked(&obj->base.dev->struct_mutex)); + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); + if (ret) { + memset(&node, 0, sizeof(node)); + ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, + &node, 4096, 0, + I915_CACHE_NONE, 0, + i915->gtt.mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); + if (ret) + goto out; + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + } + + ret = i915_gem_object_put_fence(obj); + if (ret) + goto unpin; + + if (node.allocated) { + for (i = 0; i < size/PAGE_SIZE; i++) { + wmb(); + i915->gtt.base.insert_page(&i915->gtt.base, + i915_gem_object_get_dma_address(obj, i), + node.start, + I915_CACHE_NONE, + 0); + wmb(); + base = ioremap_wc(i915->gtt.mappable_base + node.start, 4096); + memset_io(base, 0, 4096); + iounmap(base); + } + } else { + /* Get the CPU virtual address of the buffer */ + base = ioremap_wc(i915->gtt.mappable_base + + node.start, size); + if (base == NULL) { + DRM_ERROR("Mapping of gem object to CPU failed!\n"); + ret = -ENOSPC; + goto unpin; + } + + memset_io(base, 0, size); + iounmap(base); + } +unpin: + if (node.allocated) { + wmb(); + i915->gtt.base.clear_range(&i915->gtt.base, + node.start, node.size, + true); + drm_mm_remove_node(&node); + i915_gem_object_unpin_pages(obj); + } + else { + i915_gem_object_ggtt_unpin(obj); + } +out: + return ret; +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 4/6] drm/i915: Add support for stealing purgable stolen pages
From: Chris Wilson If we run out of stolen memory when trying to allocate an object, see if we can reap enough purgeable objects to free up enough contiguous free space for the allocation. This is in principle very much like evicting objects to free up enough contiguous space in the vma when binding a new object - and you will be forgiven for thinking that the code looks very similar. At the moment, we do not allow userspace to allocate objects in stolen, so there is neither the memory pressure to trigger stolen eviction nor any purgeable objects inside the stolen arena. However, this will change in the near future, and so better management and defragmentation of stolen memory will become a real issue. v2: Remember to remove the drm_mm_node. v3: Rebased to the latest drm-intel-nightly (Ankit) v4: corrected if-else braces format (Tvrtko/kerneldoc) v5: Rebased to the latest drm-intel-nightly (Ankit) Added a seperate list to maintain purgable objects from stolen memory region (Chris/Daniel) v6: Compiler optimization (merging 2 single loops into one for() loop), corrected code for object eviction, retire_requests before starting object eviction (Chris) v7: Added kernel doc for i915_gem_object_create_stolen() v8: Check for struct_mutex lock before creating object from stolen region (Tvrtko) v9: Renamed variables to make usage clear, added comment, removed onetime used macro (Tvrtko) Testcase: igt/gem_stolen Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_debugfs.c| 6 +- drivers/gpu/drm/i915/i915_drv.h| 17 +++- drivers/gpu/drm/i915/i915_gem.c| 16 drivers/gpu/drm/i915/i915_gem_stolen.c | 170 + drivers/gpu/drm/i915/intel_pm.c| 4 +- 5 files changed, 188 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 5659d4c..89b0fec 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -174,7 +174,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_puts(m, ")"); } if (obj->stolen) - seq_printf(m, " (stolen: %08llx)", obj->stolen->start); + seq_printf(m, " (stolen: %08llx)", obj->stolen->base.start); if (obj->pin_display || obj->fault_mappable) { char s[3], *t = s; if (obj->pin_display) @@ -253,9 +253,9 @@ static int obj_rank_by_stolen(void *priv, struct drm_i915_gem_object *b = container_of(B, struct drm_i915_gem_object, obj_exec_link); - if (a->stolen->start < b->stolen->start) + if (a->stolen->base.start < b->stolen->base.start) return -1; - if (a->stolen->start > b->stolen->start) + if (a->stolen->base.start > b->stolen->base.start) return 1; return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index d45274e..e0b09b0 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -841,6 +841,12 @@ struct i915_ctx_hang_stats { bool banned; }; +struct i915_stolen_node { + struct drm_mm_node base; + struct list_head mm_link; + struct drm_i915_gem_object *obj; +}; + /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 @@ -1252,6 +1258,13 @@ struct i915_gem_mm { */ struct list_head unbound_list; + /** +* List of stolen objects that have been marked as purgeable and +* thus available for reaping if we need more space for a new +* allocation. Ordered by time of marking purgeable. +*/ + struct list_head stolen_list; + /** Usable portion of the GTT for GEM */ unsigned long stolen_base; /* limited to low memory (32-bit) */ @@ -2032,7 +2045,7 @@ struct drm_i915_gem_object { struct list_head vma_list; /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; + struct i915_stolen_node *stolen; struct list_head global_list; struct list_head ring_list[I915_NUM_RINGS]; @@ -2040,6 +2053,8 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + /** Used during stolen memory allocations to temporarily hold a ref */ + struct list_head stolen_link; /** * This is set if the object is on the active lists (has pending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 5812748..ed97de6 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4359,6 +4359,20 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (obj->madv == I915_MADV_DONTNEED && obj->pages == NULL) i915_gem_objec
[Intel-gfx] [PATCH 3/6] drm/i915: Propagating correct error codes to the userspace
From: Ankitprasad Sharma Propagating correct error codes to userspace by using ERR_PTR and PTR_ERR macros for stolen memory based object allocation. We generally return -ENOMEM to the user whenever there is a failure in object allocation. This patch helps user to identify the correct reason for the failure and not just -ENOMEM each time. v2: Moved the patch up in the series, added error propagation for i915_gem_alloc_object too (Chris) v3: Removed storing of error pointer inside structs, Corrected error propagation in caller functions (Chris) v4: Remove assignments inside the predicate (Chris) Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem.c | 16 +- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 +-- drivers/gpu/drm/i915/i915_gem_context.c | 4 +-- drivers/gpu/drm/i915/i915_gem_render_state.c | 7 +++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 43 ++ drivers/gpu/drm/i915/i915_guc_submission.c | 45 ++-- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 6 ++-- drivers/gpu/drm/i915/intel_lrc.c | 10 --- drivers/gpu/drm/i915/intel_overlay.c | 4 +-- drivers/gpu/drm/i915/intel_pm.c | 2 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 21 ++--- 12 files changed, 95 insertions(+), 69 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 296e63f..5812748 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -393,9 +393,9 @@ i915_gem_create(struct drm_file *file, if (flags & I915_CREATE_PLACEMENT_STOLEN) { mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); - if (!obj) { + if (IS_ERR(obj)) { mutex_unlock(&dev->struct_mutex); - return -ENOMEM; + return PTR_ERR(obj); } /* Always clear fresh buffers before handing to userspace */ @@ -411,8 +411,8 @@ i915_gem_create(struct drm_file *file, obj = i915_gem_alloc_object(dev, size); } - if (obj == NULL) - return -ENOMEM; + if (IS_ERR(obj)) + return PTR_ERR(obj); ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ @@ -4399,14 +4399,16 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, struct drm_i915_gem_object *obj; struct address_space *mapping; gfp_t mask; + int ret; obj = i915_gem_object_alloc(dev); if (obj == NULL) - return NULL; + return ERR_PTR(-ENOMEM); - if (drm_gem_object_init(dev, &obj->base, size) != 0) { + ret = drm_gem_object_init(dev, &obj->base, size); + if (ret) { i915_gem_object_free(obj); - return NULL; + return ERR_PTR(ret); } mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 7bf2f3f..d79caa2 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -135,8 +135,8 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, int ret; obj = i915_gem_alloc_object(pool->dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(obj)) + return obj; ret = i915_gem_object_get_pages(obj); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 204dc7c..4d24cfc 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -181,8 +181,8 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) int ret; obj = i915_gem_alloc_object(dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(obj)) + return obj; /* * Try to make the context utilize L3 as well as LLC. diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 5026a62..2bfdd49 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -58,8 +58,11 @@ static int render_state_init(struct render_state *so, struct drm_device *dev) return -EINVAL; so->obj = i915_gem_alloc_object(dev, 4096); - if (so->obj == NULL) - return -ENOMEM; + if (IS_ERR(so->obj)) { + ret = PTR_ERR(so->obj); + so->obj = NULL; + return ret; +
[Intel-gfx] [PATCH 2/6] drm/i915: Support for creating Stolen memory backed objects
From: Ankitprasad Sharma Extend the drm_i915_gem_create structure to add support for creating Stolen memory backed objects. Added a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. v2: Rebased to the latest drm-intel-nightly (Ankit) v3: Changed versioning of GEM_CREATE param, added new comments (Tvrtko) v4: Changed size from 32b to 64b to prevent userspace overflow (Tvrtko) Corrected function arguments ordering (Chris) v5: Corrected function name (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_dma.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 30 +++--- drivers/gpu/drm/i915/i915_gem_stolen.c | 4 ++-- include/uapi/drm/i915_drm.h| 16 5 files changed, 49 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index ffcb9c6..6927c7e 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_RESOURCE_STREAMER: value = HAS_RESOURCE_STREAMER(dev); break; + case I915_PARAM_CREATE_VERSION: + value = 2; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 8e554d3..d45274e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3213,7 +3213,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_device *dev); void i915_gem_cleanup_stolen(struct drm_device *dev); struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_device *dev, u32 size); +i915_gem_object_create_stolen(struct drm_device *dev, u64 size); struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, u32 stolen_offset, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d57e850..296e63f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -375,6 +375,7 @@ static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, + uint32_t flags, uint32_t *handle_p) { struct drm_i915_gem_object *obj; @@ -385,8 +386,31 @@ i915_gem_create(struct drm_file *file, if (size == 0) return -EINVAL; + if (flags & __I915_CREATE_UNKNOWN_FLAGS) + return -EINVAL; + /* Allocate the new object */ - obj = i915_gem_alloc_object(dev, size); + if (flags & I915_CREATE_PLACEMENT_STOLEN) { + mutex_lock(&dev->struct_mutex); + obj = i915_gem_object_create_stolen(dev, size); + if (!obj) { + mutex_unlock(&dev->struct_mutex); + return -ENOMEM; + } + + /* Always clear fresh buffers before handing to userspace */ + ret = i915_gem_object_clear(obj); + if (ret) { + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + return ret; + } + + mutex_unlock(&dev->struct_mutex); + } else { + obj = i915_gem_alloc_object(dev, size); + } + if (obj == NULL) return -ENOMEM; @@ -409,7 +433,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, 0, &args->handle); } /** @@ -422,7 +446,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_create *args = data; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, args->flags, &args->handle); } static inline int diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 598ed2f..b98a3bf 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -583,7 +583,7 @@ cleanup: } struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_device *dev, u32 size) +i915_gem_ob
[Intel-gfx] [PATCH v10 0/6] Support for creating/using Stolen memory backed objects
From: Ankitprasad Sharma This patch series adds support for creating/using Stolen memory backed objects. Despite being a unified memory architecture (UMA) some bits of memory are more equal than others. In particular we have the thorny issue of stolen memory, memory stolen from the system by the BIOS and reserved for igfx use. Stolen memory is required for some functions of the GPU and display engine, but in general it goes wasted. Whilst we cannot return it back to the system, we need to find some other method for utilising it. As we do not support direct access to the physical address in the stolen region, it behaves like a different class of memory, closer in kin to local GPU memory. This strongly suggests that we need a placement model like TTM if we are to fully utilize these discrete chunks of differing memory. To add support for creating Stolen memory backed objects, we extend the drm_i915_gem_create structure, by adding a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. This patch series adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the memory from stolen region, but can also be used for other shmem allocated objects. Currently being used for buffers allocated in the stolen region. Also adding support for stealing purgable stolen pages, if we run out of stolen memory when trying to allocate an object. v2: Added support for read/write from/to objects not backed by shmem using the pread/pwrite interface. Also extended the current get_aperture ioctl to retrieve the total and available size of the stolen region. v3: Removed the extended get_aperture ioctl patch 5 (to be submitted as part of other patch series), addressed comments by Chris about pread/pwrite for non shmem backed objects. v4: Rebased to the latest drm-intel-nightly. v5: Addressed comments, replaced patch 1/4 "Clearing buffers via blitter engine" by "Clearing buffers via CPU/GTT". v6: Rebased to the latest drm-intel-nightly, Addressed comments, updated stolen memory purging logic by maintaining a list for purgable stolen memory objects, enabled pread/pwrite for all non-shmem backed objects without tiling restrictions. v7: Addressed comments, compiler optimization, new patch added for correct error code propagation to the userspace. v8: Added a new patch to the series to Migrate stolen objects before hibernation, as stolen memory is not preserved across hibernation. Added correct error propagation for shmem as well non-shmem backed object allocation. v9: Addressed comments, use of insert_page helper function to map object page by page which can be helpful in low aperture space availability. v10: Addressed comments, use insert_page for clearing out the stolen memory buffer contents to not thrash gtt. This can be verified using IGT tests: igt/gem_stolen, igt/gem_create Ankitprasad Sharma (4): drm/i915: Clearing buffer objects via CPU/GTT drm/i915: Support for creating Stolen memory backed objects drm/i915: Propagating correct error codes to the userspace drm/i915: Support for pread/pwrite from/to non shmem backed objects Chris Wilson (2): drm/i915: Add support for stealing purgable stolen pages drm/i915: Migrate stolen objects before hibernation drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_dma.c | 3 + drivers/gpu/drm/i915/i915_drv.c | 17 +- drivers/gpu/drm/i915/i915_drv.h | 27 +- drivers/gpu/drm/i915/i915_gem.c | 520 --- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 +- drivers/gpu/drm/i915/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 7 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 211 +-- drivers/gpu/drm/i915/i915_guc_submission.c | 45 ++- drivers/gpu/drm/i915/intel_display.c | 5 +- drivers/gpu/drm/i915/intel_fbdev.c | 12 +- drivers/gpu/drm/i915/intel_lrc.c | 10 +- drivers/gpu/drm/i915/intel_overlay.c | 4 +- drivers/gpu/drm/i915/intel_pm.c | 8 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 27 +- include/uapi/drm/i915_drm.h | 16 + 17 files changed, 795 insertions(+), 131 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 5/6] drm/i915: Support for pread/pwrite from/to non shmem backed objects
From: Ankitprasad Sharma This patch adds support for extending the pread/pwrite functionality for objects not backed by shmem. The access will be made through gtt interface. This will cover objects backed by stolen memory as well as other non-shmem backed objects. v2: Drop locks around slow_user_access, prefault the pages before access (Chris) v3: Rebased to the latest drm-intel-nightly (Ankit) v4: Moved page base & offset calculations outside the copy loop, corrected data types for size and offset variables, corrected if-else braces format (Tvrtko/kerneldocs) v5: Enabled pread/pwrite for all non-shmem backed objects including without tiling restrictions (Ankit) v6: Using pwrite_fast for non-shmem backed objects as well (Chris) v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy, added pwrite slow path for non-shmem backed objects (Chris/Tvrtko) v8: Updated v7 commit message, mutex unlock around pwrite slow path for non-shmem backed objects (Tvrtko) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem.c | 151 +--- 1 file changed, 127 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ed97de6..68ed67a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -614,6 +614,99 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, return ret ? - EFAULT : 0; } +static inline uint64_t +slow_user_access(struct io_mapping *mapping, +uint64_t page_base, int page_offset, +char __user *user_data, +int length, bool pwrite) +{ + void __iomem *vaddr_inatomic; + void *vaddr; + uint64_t unwritten; + + vaddr_inatomic = io_mapping_map_wc(mapping, page_base); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)vaddr_inatomic + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(vaddr_inatomic); + return unwritten; +} + +static int +i915_gem_gtt_copy(struct drm_device *dev, + struct drm_i915_gem_object *obj, uint64_t size, + uint64_t data_offset, uint64_t data_ptr) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + char __user *user_data; + uint64_t remain; + uint64_t offset, page_base; + int page_offset, page_length, ret = 0; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) + goto out; + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto out_unpin; + + ret = i915_gem_object_put_fence(obj); + if (ret) + goto out_unpin; + + user_data = to_user_ptr(data_ptr); + remain = size; + offset = i915_gem_obj_ggtt_offset(obj) + data_offset; + + mutex_unlock(&dev->struct_mutex); + if (likely(!i915.prefault_disable)) + ret = fault_in_multipages_writeable(user_data, remain); + + /* +* page_offset = offset within page +* page_base = page offset within aperture +*/ + page_offset = offset_in_page(offset); + page_base = offset & PAGE_MASK; + + while (remain > 0) { + /* page_length = bytes to copy for this page */ + page_length = remain; + if ((page_offset + remain) > PAGE_SIZE) + page_length = PAGE_SIZE - page_offset; + + /* This is a slow read/write as it tries to read from +* and write to user memory which may result into page +* faults +*/ + ret = slow_user_access(dev_priv->gtt.mappable, page_base, + page_offset, user_data, + page_length, false); + + if (ret) { + ret = -EFAULT; + break; + } + + remain -= page_length; + user_data += page_length; + page_base += page_length; + page_offset = 0; + } + + mutex_lock(&dev->struct_mutex); + +out_unpin: + i915_gem_object_ggtt_unpin(obj); +out: + return ret; +} + static int i915_gem_shmem_pread(struct drm_device *dev, struct drm_i915_gem_object *obj, @@ -737,17 +830,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, goto out; } - /* prime objects have no backing filp to GEM pread/pwrite -* pages from. -*/ - if (!obj->base.filp) { - ret = -EINVAL; - goto out; - } - trace_i915_gem_object_pread(obj, args->offset, ar
[Intel-gfx] [PATCH 6/6] drm/i915: Migrate stolen objects before hibernation
From: Chris Wilson Ville reminded us that stolen memory is not preserved across hibernation, and a result of this was that context objects now being allocated from stolen were being corrupted on S4 and promptly hanging the GPU on resume. We want to utilise stolen for as much as possible (nothing else will use that wasted memory otherwise), so we need a strategy for handling general objects allocated from stolen and hibernation. A simple solution is to do a CPU copy through the GTT of the stolen object into a fresh shmemfs backing store and thenceforth treat it as a normal objects. This can be refined in future to either use a GPU copy to avoid the slow uncached reads (though it's hibernation!) and recreate stolen objects upon resume/first-use. For now, a simple approach should suffice for testing the object migration. v2: Swap PTE for pinned bindings over to the shmemfs. This adds a complicated dance, but is required as many stolen objects are likely to be pinned for use by the hardware. Swapping the PTEs should not result in externally visible behaviour, as each PTE update should be atomic and the two pages identical. (danvet) safe-by-default, or the principle of least surprise. We need a new flag to mark objects that we can wilfully discard and recreate across hibernation. (danvet) Just use the global_list rather than invent a new stolen_list. This is the slowpath hibernate and so adding a new list and the associated complexity isn't worth it. v3: Rebased on drm-intel-nightly (Ankit) v4: Use insert_page to map stolen memory backed pages for migration to shmem (Chris) v5: Acquire mutex lock while copying stolen buffer objects to shmem (Chris) Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_drv.c | 17 ++- drivers/gpu/drm/i915/i915_drv.h | 7 + drivers/gpu/drm/i915/i915_gem.c | 232 ++-- drivers/gpu/drm/i915/intel_display.c| 3 + drivers/gpu/drm/i915/intel_fbdev.c | 6 + drivers/gpu/drm/i915/intel_pm.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 + 7 files changed, 261 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 9f55209..2bb9e9e 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1036,6 +1036,21 @@ static int i915_pm_suspend(struct device *dev) return i915_drm_suspend(drm_dev); } +static int i915_pm_freeze(struct device *dev) +{ + int ret; + + ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev))); + if (ret) + return ret; + + ret = i915_pm_suspend(dev); + if (ret) + return ret; + + return 0; +} + static int i915_pm_suspend_late(struct device *dev) { struct drm_device *drm_dev = dev_to_i915(dev)->dev; @@ -1700,7 +1715,7 @@ static const struct dev_pm_ops i915_pm_ops = { * @restore, @restore_early : called after rebooting and restoring the *hibernation image [PMSG_RESTORE] */ - .freeze = i915_pm_suspend, + .freeze = i915_pm_freeze, .freeze_late = i915_pm_suspend_late, .thaw_early = i915_pm_resume_early, .thaw = i915_pm_resume, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e0b09b0..0d18b07 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2080,6 +2080,12 @@ struct drm_i915_gem_object { * Advice: are the backing pages purgeable? */ unsigned int madv:2; + /** +* Whereas madv is for userspace, there are certain situations +* where we want I915_MADV_DONTNEED behaviour on internal objects +* without conflating the userspace setting. +*/ + unsigned int internal_volatile:1; /** * Current tiling mode for the object. @@ -3006,6 +3012,7 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); +int __must_check i915_gem_freeze(struct drm_device *dev); int __must_check i915_gem_suspend(struct drm_device *dev); void __i915_add_request(struct drm_i915_gem_request *req, struct drm_i915_gem_object *batch_obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 68ed67a..1f134b0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4511,12 +4511,27 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = { .put_pages = i915_gem_object_put_pages_gtt, }; +static struct address_space * +i915_gem_set_inode_gfp(struct drm_device *dev, struct file *file) +{ + struct address_space *mapping = file_inode(file)->i_mapping; + gfp_t mask; + +
[Intel-gfx] [PATCH 5/9] drm/i915: Propagating correct error codes to the userspace
From: Ankitprasad Sharma Propagating correct error codes to userspace by using ERR_PTR and PTR_ERR macros for stolen memory based object allocation. We generally return -ENOMEM to the user whenever there is a failure in object allocation. This patch helps user to identify the correct reason for the failure and not just -ENOMEM each time. v2: Moved the patch up in the series, added error propagation for i915_gem_alloc_object too (Chris) v3: Removed storing of error pointer inside structs, Corrected error propagation in caller functions (Chris) v4: Remove assignments inside the predicate (Chris) v5: Removed unnecessary initializations, updated kerneldoc for i915_guc_client, corrected missed error pointer handling (Tvrtko) Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem.c | 16 + drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 +-- drivers/gpu/drm/i915/i915_gem_context.c | 4 +-- drivers/gpu/drm/i915/i915_gem_render_state.c | 7 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 46 drivers/gpu/drm/i915/i915_guc_submission.c | 52 ++-- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 6 ++-- drivers/gpu/drm/i915/intel_lrc.c | 10 +++--- drivers/gpu/drm/i915/intel_overlay.c | 4 +-- drivers/gpu/drm/i915/intel_pm.c | 7 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +-- 12 files changed, 101 insertions(+), 78 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0a859b0..05505de 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -408,9 +408,9 @@ i915_gem_create(struct drm_file *file, if (flags & I915_CREATE_PLACEMENT_STOLEN) { mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); - if (!obj) { + if (IS_ERR(obj)) { mutex_unlock(&dev->struct_mutex); - return -ENOMEM; + return PTR_ERR(obj); } /* Always clear fresh buffers before handing to userspace */ @@ -426,8 +426,8 @@ i915_gem_create(struct drm_file *file, obj = i915_gem_alloc_object(dev, size); } - if (obj == NULL) - return -ENOMEM; + if (IS_ERR(obj)) + return PTR_ERR(obj); ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ @@ -4451,14 +4451,16 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, struct drm_i915_gem_object *obj; struct address_space *mapping; gfp_t mask; + int ret; obj = i915_gem_object_alloc(dev); if (obj == NULL) - return NULL; + return ERR_PTR(-ENOMEM); - if (drm_gem_object_init(dev, &obj->base, size) != 0) { + ret = drm_gem_object_init(dev, &obj->base, size); + if (ret) { i915_gem_object_free(obj); - return NULL; + return ERR_PTR(ret); } mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 7bf2f3f..d79caa2 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -135,8 +135,8 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, int ret; obj = i915_gem_alloc_object(pool->dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(obj)) + return obj; ret = i915_gem_object_get_pages(obj); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 43761c5..9754894 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -179,8 +179,8 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) int ret; obj = i915_gem_alloc_object(dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(obj)) + return obj; /* * Try to make the context utilize L3 as well as LLC. diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 5026a62..2bfdd49 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -58,8 +58,11 @@ static int render_state_init(struct render_state *so, struct drm_device *dev) return -EINVAL; so->obj = i915_gem_alloc_object(dev, 4096); - if (so->obj == NULL) - return -ENOMEM; + if (
[Intel-gfx] [PATCH 2/9] drm/i915: Use insert_page for pwrite_fast
From: Ankitprasad Sharma In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First, we try a nonblocking pin for the whole object (since that is fastest if reused), then failing that we try to grab one page in the mappable aperture. It also allows us to handle objects larger than the mappable aperture (e.g. if we need to pwrite with vGPU restricting the aperture to a measely 8MiB or something like that). v2: Pin pages before starting pwrite, Combined duplicate loops (Chris) v3: Combined loops based on local patch by Chris (Chris) v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris) Signed-off-by: Ankitprasad Sharma Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 86 ++--- 1 file changed, 64 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bf7f203..46c1e75 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -61,6 +61,21 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } +static int +i915_gem_insert_node_in_range(struct drm_i915_private *i915, + struct drm_mm_node *node, u64 size, + unsigned alignment, u64 start, u64 end) +{ + int ret; + + ret = drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node, + size, alignment, 0, start, + end, DRM_MM_SEARCH_DEFAULT, + DRM_MM_SEARCH_DEFAULT); + + return ret; +} + /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -760,20 +775,29 @@ fast_user_write(struct io_mapping *mapping, * user into the GTT, uncached. */ static int -i915_gem_gtt_pwrite_fast(struct drm_device *dev, +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; - ssize_t remain; - loff_t offset, page_base; + struct drm_mm_node node; + uint64_t remain, offset; char __user *user_data; - int page_offset, page_length, ret; + int ret; ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) - goto out; + if (ret) { + memset(&node, 0, sizeof(node)); + ret = i915_gem_insert_node_in_range(i915, &node, 4096, 0, + 0, i915->gtt.mappable_end); + if (ret) + goto out; + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -783,31 +807,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, if (ret) goto out_unpin; - user_data = to_user_ptr(args->data_ptr); - remain = args->size; - - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + obj->dirty = true; - while (remain > 0) { + user_data = to_user_ptr(args->data_ptr); + offset = args->offset; + remain = args->size; + while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; - + u32 page_base = node.start; + unsigned page_offset = offset_in_page(offset); + unsigned page_length = PAGE_SIZE - page_offset; + page_length = remain < page_length ? remain : page_length; + if (node.allocated) { + wmb(); + i915->gtt.base.insert_page(&i915->gtt.base, + i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), + node.start, + I915_CACHE_NONE, + 0); + wmb(); + } else { + page_base += offset & PAGE_MASK; +
[Intel-gfx] [PATCH 8/9] drm/i915: Migrate stolen objects before hibernation
From: Chris Wilson Ville reminded us that stolen memory is not preserved across hibernation, and a result of this was that context objects now being allocated from stolen were being corrupted on S4 and promptly hanging the GPU on resume. We want to utilise stolen for as much as possible (nothing else will use that wasted memory otherwise), so we need a strategy for handling general objects allocated from stolen and hibernation. A simple solution is to do a CPU copy through the GTT of the stolen object into a fresh shmemfs backing store and thenceforth treat it as a normal objects. This can be refined in future to either use a GPU copy to avoid the slow uncached reads (though it's hibernation!) and recreate stolen objects upon resume/first-use. For now, a simple approach should suffice for testing the object migration. v2: Swap PTE for pinned bindings over to the shmemfs. This adds a complicated dance, but is required as many stolen objects are likely to be pinned for use by the hardware. Swapping the PTEs should not result in externally visible behaviour, as each PTE update should be atomic and the two pages identical. (danvet) safe-by-default, or the principle of least surprise. We need a new flag to mark objects that we can wilfully discard and recreate across hibernation. (danvet) Just use the global_list rather than invent a new stolen_list. This is the slowpath hibernate and so adding a new list and the associated complexity isn't worth it. v3: Rebased on drm-intel-nightly (Ankit) v4: Use insert_page to map stolen memory backed pages for migration to shmem (Chris) v5: Acquire mutex lock while copying stolen buffer objects to shmem (Chris) v6: Handled file leak, Splitted object migration function, added kerneldoc for migrate_stolen_to_shmemfs() function (Tvrtko) Use i915 wrapper function for drm_mm_insert_node_in_range() Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_drv.c | 17 ++- drivers/gpu/drm/i915/i915_drv.h | 7 + drivers/gpu/drm/i915/i915_gem.c | 243 ++-- drivers/gpu/drm/i915/intel_display.c| 3 + drivers/gpu/drm/i915/intel_fbdev.c | 6 + drivers/gpu/drm/i915/intel_pm.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 + 7 files changed, 272 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e6935f1..8f675ae7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -979,6 +979,21 @@ static int i915_pm_suspend(struct device *dev) return i915_drm_suspend(drm_dev); } +static int i915_pm_freeze(struct device *dev) +{ + int ret; + + ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev))); + if (ret) + return ret; + + ret = i915_pm_suspend(dev); + if (ret) + return ret; + + return 0; +} + static int i915_pm_suspend_late(struct device *dev) { struct drm_device *drm_dev = dev_to_i915(dev)->dev; @@ -1607,7 +1622,7 @@ static const struct dev_pm_ops i915_pm_ops = { * @restore, @restore_early : called after rebooting and restoring the *hibernation image [PMSG_RESTORE] */ - .freeze = i915_pm_suspend, + .freeze = i915_pm_freeze, .freeze_late = i915_pm_suspend_late, .thaw_early = i915_pm_resume_early, .thaw = i915_pm_resume, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 479703b..b874292 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2079,6 +2079,12 @@ struct drm_i915_gem_object { * Advice: are the backing pages purgeable? */ unsigned int madv:2; + /** +* Whereas madv is for userspace, there are certain situations +* where we want I915_MADV_DONTNEED behaviour on internal objects +* without conflating the userspace setting. +*/ + unsigned int internal_volatile:1; /** * Current tiling mode for the object. @@ -3047,6 +3053,7 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); +int __must_check i915_gem_freeze(struct drm_device *dev); int __must_check i915_gem_suspend(struct drm_device *dev); void __i915_add_request(struct drm_i915_gem_request *req, struct drm_i915_gem_object *batch_obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ad61783..ae3729f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4563,12 +4563,27 @@ static const struct drm_i915_gem_object_ops i915_gem_object_ops = { .put_pages = i915_gem_object_put_pages_gtt, }; +static struct
[Intel-gfx] [PATCH 9/9] drm/i915: Fail the execbuff using stolen objects as batchbuffers
From: Ankitprasad Sharma Using stolen backed objects as a batchbuffer may result into a kernel panic during relocation. Added a check to prevent the panic and fail the execbuffer call. It is not recommended to use stolen object as a batchbuffer. Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 48ec484..d342f10 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -462,7 +462,9 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, if (obj->active && pagefault_disabled()) return -EFAULT; - if (use_cpu_reloc(obj)) + if (obj->stolen) + ret = -EINVAL; + else if (use_cpu_reloc(obj)) ret = relocate_entry_cpu(obj, reloc, target_offset); else if (obj->map_and_fenceable) ret = relocate_entry_gtt(obj, reloc, target_offset); -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 7/9] drm/i915: Support for pread/pwrite from/to non shmem backed objects
From: Ankitprasad Sharma This patch adds support for extending the pread/pwrite functionality for objects not backed by shmem. The access will be made through gtt interface. This will cover objects backed by stolen memory as well as other non-shmem backed objects. v2: Drop locks around slow_user_access, prefault the pages before access (Chris) v3: Rebased to the latest drm-intel-nightly (Ankit) v4: Moved page base & offset calculations outside the copy loop, corrected data types for size and offset variables, corrected if-else braces format (Tvrtko/kerneldocs) v5: Enabled pread/pwrite for all non-shmem backed objects including without tiling restrictions (Ankit) v6: Using pwrite_fast for non-shmem backed objects as well (Chris) v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy, added pwrite slow path for non-shmem backed objects (Chris/Tvrtko) v8: Updated v7 commit message, mutex unlock around pwrite slow path for non-shmem backed objects (Tvrtko) v9: Corrected check during pread_ioctl, to avoid shmem_pread being called for non-shmem backed objects (Tvrtko) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem.c | 151 +--- 1 file changed, 127 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 8a508cd..ad61783 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -629,6 +629,99 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, return ret ? - EFAULT : 0; } +static inline uint64_t +slow_user_access(struct io_mapping *mapping, +uint64_t page_base, int page_offset, +char __user *user_data, +int length, bool pwrite) +{ + void __iomem *vaddr_inatomic; + void *vaddr; + uint64_t unwritten; + + vaddr_inatomic = io_mapping_map_wc(mapping, page_base); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)vaddr_inatomic + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(vaddr_inatomic); + return unwritten; +} + +static int +i915_gem_gtt_copy(struct drm_device *dev, + struct drm_i915_gem_object *obj, uint64_t size, + uint64_t data_offset, uint64_t data_ptr) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + char __user *user_data; + uint64_t remain; + uint64_t offset, page_base; + int page_offset, page_length, ret = 0; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) + goto out; + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto out_unpin; + + ret = i915_gem_object_put_fence(obj); + if (ret) + goto out_unpin; + + user_data = to_user_ptr(data_ptr); + remain = size; + offset = i915_gem_obj_ggtt_offset(obj) + data_offset; + + mutex_unlock(&dev->struct_mutex); + if (likely(!i915.prefault_disable)) + ret = fault_in_multipages_writeable(user_data, remain); + + /* +* page_offset = offset within page +* page_base = page offset within aperture +*/ + page_offset = offset_in_page(offset); + page_base = offset & PAGE_MASK; + + while (remain > 0) { + /* page_length = bytes to copy for this page */ + page_length = remain; + if ((page_offset + remain) > PAGE_SIZE) + page_length = PAGE_SIZE - page_offset; + + /* This is a slow read/write as it tries to read from +* and write to user memory which may result into page +* faults +*/ + ret = slow_user_access(dev_priv->gtt.mappable, page_base, + page_offset, user_data, + page_length, false); + + if (ret) { + ret = -EFAULT; + break; + } + + remain -= page_length; + user_data += page_length; + page_base += page_length; + page_offset = 0; + } + + mutex_lock(&dev->struct_mutex); + +out_unpin: + i915_gem_object_ggtt_unpin(obj); +out: + return ret; +} + static int i915_gem_shmem_pread(struct drm_device *dev, struct drm_i915_gem_object *obj, @@ -752,17 +845,14 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, goto out; } - /* prime objects have no backing filp to GEM pread/pwrite -* pages from. -*/ - if (!obj->base.filp) { -
[Intel-gfx] [PATCH 4/9] drm/i915: Support for creating Stolen memory backed objects
From: Ankitprasad Sharma Extend the drm_i915_gem_create structure to add support for creating Stolen memory backed objects. Added a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. v2: Rebased to the latest drm-intel-nightly (Ankit) v3: Changed versioning of GEM_CREATE param, added new comments (Tvrtko) v4: Changed size from 32b to 64b to prevent userspace overflow (Tvrtko) Corrected function arguments ordering (Chris) v5: Corrected function name (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_dma.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 30 +++--- drivers/gpu/drm/i915/i915_gem_stolen.c | 4 ++-- include/uapi/drm/i915_drm.h| 16 5 files changed, 49 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 52b8289..5d2189c 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -172,6 +172,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: value = 1; break; + case I915_PARAM_CREATE_VERSION: + value = 2; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e195fee..dcdfb97 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3255,7 +3255,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_device *dev); void i915_gem_cleanup_stolen(struct drm_device *dev); struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_device *dev, u32 size); +i915_gem_object_create_stolen(struct drm_device *dev, u64 size); struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, u32 stolen_offset, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e50a91b..0a859b0 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -390,6 +390,7 @@ static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, + uint32_t flags, uint32_t *handle_p) { struct drm_i915_gem_object *obj; @@ -400,8 +401,31 @@ i915_gem_create(struct drm_file *file, if (size == 0) return -EINVAL; + if (flags & __I915_CREATE_UNKNOWN_FLAGS) + return -EINVAL; + /* Allocate the new object */ - obj = i915_gem_alloc_object(dev, size); + if (flags & I915_CREATE_PLACEMENT_STOLEN) { + mutex_lock(&dev->struct_mutex); + obj = i915_gem_object_create_stolen(dev, size); + if (!obj) { + mutex_unlock(&dev->struct_mutex); + return -ENOMEM; + } + + /* Always clear fresh buffers before handing to userspace */ + ret = i915_gem_object_clear(obj); + if (ret) { + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + return ret; + } + + mutex_unlock(&dev->struct_mutex); + } else { + obj = i915_gem_alloc_object(dev, size); + } + if (obj == NULL) return -ENOMEM; @@ -424,7 +448,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, 0, &args->handle); } /** @@ -437,7 +461,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_create *args = data; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, args->flags, &args->handle); } static inline int diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 5384767..17d679e 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -586,7 +586,7 @@ cleanup: } struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_device *dev, u32 size) +i915_gem_object_create_stolen(struct drm_
[Intel-gfx] [PATCH 3/9] drm/i915: Clearing buffer objects via CPU/GTT
From: Ankitprasad Sharma This patch adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the non shmem backed objects. Currently intend to use this only for buffers allocated from stolen region. v2: Added kernel doc for i915_gem_clear_object(), corrected/removed variable assignments (Tvrtko) v3: Map object page by page to the gtt if the pinning of the whole object to the ggtt fails, Corrected function name (Chris) v4: Clear the buffer page by page, and not map the whole object in the gtt aperture. Use i915 wrapper function in place of drm_mm_insert_node_in_range. Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 44 + 2 files changed, 45 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a10b866..e195fee 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2897,6 +2897,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, int *needs_clflush); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +int i915_gem_object_clear(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 46c1e75..e50a91b 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5293,3 +5293,47 @@ fail: drm_gem_object_unreference(&obj->base); return ERR_PTR(ret); } + +/** + * i915_gem_object_clear() - Clear buffer object via CPU/GTT + * @obj: Buffer object to be cleared + * + * Return: 0 - success, non-zero - failure + */ +int i915_gem_object_clear(struct drm_i915_gem_object *obj) +{ + int ret, i; + char __iomem *base; + size_t size = obj->base.size; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_mm_node node; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + memset(&node, 0, sizeof(node)); + ret = i915_gem_insert_node_in_range(i915, &node, 4096, 0, + 0, i915->gtt.mappable_end); + if (ret) + goto out; + + i915_gem_object_pin_pages(obj); + base = io_mapping_map_wc(i915->gtt.mappable, node.start); + for (i = 0; i < size/PAGE_SIZE; i++) { + wmb(); + i915->gtt.base.insert_page(&i915->gtt.base, + i915_gem_object_get_dma_address(obj, i), + node.start, + I915_CACHE_NONE, 0); + wmb(); + memset_io(base, 0, 4096); + } + + wmb(); + io_mapping_unmap(base); + i915->gtt.base.clear_range(&i915->gtt.base, + node.start, node.size, + true); + drm_mm_remove_node(&node); + i915_gem_object_unpin_pages(obj); +out: + return ret; +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 1/9] drm/i915: Allow use of get_dma_address for stolen backed objects
From: Ankitprasad Sharma i915_gem_object_get_dma_address function is used to retrieve the dma address of a particular page so as to map it in a given GTT entry for CPU access. This function would be used for stolen backed objects also for tasks like pwrite, clearing of the pages etc. So the obj->get_page.sg needs to be initialized for the stolen objects also. Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem_stolen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 598ed2f..5384767 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -569,6 +569,9 @@ _i915_gem_object_create_stolen(struct drm_device *dev, if (obj->pages == NULL) goto cleanup; + obj->get_page.sg = obj->pages->sgl; + obj->get_page.last = 0; + i915_gem_object_pin_pages(obj); obj->stolen = stolen; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v11 0/9] Support for creating/using Stolen memory backed objects
From: Ankitprasad Sharma This patch series adds support for creating/using Stolen memory backed objects. Despite being a unified memory architecture (UMA) some bits of memory are more equal than others. In particular we have the thorny issue of stolen memory, memory stolen from the system by the BIOS and reserved for igfx use. Stolen memory is required for some functions of the GPU and display engine, but in general it goes wasted. Whilst we cannot return it back to the system, we need to find some other method for utilising it. As we do not support direct access to the physical address in the stolen region, it behaves like a different class of memory, closer in kin to local GPU memory. This strongly suggests that we need a placement model like TTM if we are to fully utilize these discrete chunks of differing memory. To add support for creating Stolen memory backed objects, we extend the drm_i915_gem_create structure, by adding a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. This patch series adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the memory from stolen region, but can also be used for other shmem allocated objects. Currently being used for buffers allocated in the stolen region. Also adding support for stealing purgable stolen pages, if we run out of stolen memory when trying to allocate an object. v2: Added support for read/write from/to objects not backed by shmem using the pread/pwrite interface. Also extended the current get_aperture ioctl to retrieve the total and available size of the stolen region. v3: Removed the extended get_aperture ioctl patch 5 (to be submitted as part of other patch series), addressed comments by Chris about pread/pwrite for non shmem backed objects. v4: Rebased to the latest drm-intel-nightly. v5: Addressed comments, replaced patch 1/4 "Clearing buffers via blitter engine" by "Clearing buffers via CPU/GTT". v6: Rebased to the latest drm-intel-nightly, Addressed comments, updated stolen memory purging logic by maintaining a list for purgable stolen memory objects, enabled pread/pwrite for all non-shmem backed objects without tiling restrictions. v7: Addressed comments, compiler optimization, new patch added for correct error code propagation to the userspace. v8: Added a new patch to the series to Migrate stolen objects before hibernation, as stolen memory is not preserved across hibernation. Added correct error propagation for shmem as well non-shmem backed object allocation. v9: Addressed comments, use of insert_page helper function to map object page by page which can be helpful in low aperture space availability. v10: Addressed comments, use insert_page for clearing out the stolen memory v11: Addressed comments, 3 new patches added to support allocation from Stolen memory 1. Allow use of i915_gem_object_get_dma_address for stolen backed objects 2. Use insert_page for pwrite_fast 3. Fail the execbuff using stolen objects as batchbuffers This can be verified using IGT tests: igt/gem_stolen, igt/gem_create Ankitprasad Sharma (7): drm/i915: Allow use of i915_gem_object_get_dma_address for stolen backed objects drm/i915: Use insert_page for pwrite_fast drm/i915: Clearing buffer objects via CPU/GTT drm/i915: Support for creating Stolen memory backed objects drm/i915: Propagating correct error codes to the userspace drm/i915: Support for pread/pwrite from/to non shmem backed objects drm/i915: Fail the execbuff using stolen objects as batchbuffers Chris Wilson (2): drm/i915: Add support for stealing purgable stolen pages drm/i915: Migrate stolen objects before hibernation drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_dma.c | 3 + drivers/gpu/drm/i915/i915_drv.c | 17 +- drivers/gpu/drm/i915/i915_drv.h | 27 +- drivers/gpu/drm/i915/i915_gem.c | 580 --- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 +- drivers/gpu/drm/i915/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 7 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 215 -- drivers/gpu/drm/i915/i915_guc_submission.c | 52 ++- drivers/gpu/drm/i915/intel_display.c | 5 +- drivers/gpu/drm/i915/intel_fbdev.c | 12 +- drivers/gpu/drm/i915/intel_lrc.c | 10 +- drivers/gpu/drm/i915/intel_overlay.c | 4 +- drivers/gpu/drm/i915/intel_pm.c | 13 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 27 +- include/uapi/drm/i915_drm.h | 16 + 18 files changed, 845 insertions(+), 161 deletions(-) -- 1.9.1 ___ I
[Intel-gfx] [PATCH 6/9] drm/i915: Add support for stealing purgable stolen pages
From: Chris Wilson If we run out of stolen memory when trying to allocate an object, see if we can reap enough purgeable objects to free up enough contiguous free space for the allocation. This is in principle very much like evicting objects to free up enough contiguous space in the vma when binding a new object - and you will be forgiven for thinking that the code looks very similar. At the moment, we do not allow userspace to allocate objects in stolen, so there is neither the memory pressure to trigger stolen eviction nor any purgeable objects inside the stolen arena. However, this will change in the near future, and so better management and defragmentation of stolen memory will become a real issue. v2: Remember to remove the drm_mm_node. v3: Rebased to the latest drm-intel-nightly (Ankit) v4: corrected if-else braces format (Tvrtko/kerneldoc) v5: Rebased to the latest drm-intel-nightly (Ankit) Added a seperate list to maintain purgable objects from stolen memory region (Chris/Daniel) v6: Compiler optimization (merging 2 single loops into one for() loop), corrected code for object eviction, retire_requests before starting object eviction (Chris) v7: Added kernel doc for i915_gem_object_create_stolen() v8: Check for struct_mutex lock before creating object from stolen region (Tvrtko) v9: Renamed variables to make usage clear, added comment, removed onetime used macro (Tvrtko) v10: Avoid masking of error when stolen_alloc fails (Tvrtko) Testcase: igt/gem_stolen Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_debugfs.c| 6 +- drivers/gpu/drm/i915/i915_drv.h| 17 +++- drivers/gpu/drm/i915/i915_gem.c| 16 drivers/gpu/drm/i915/i915_gem_stolen.c | 170 + drivers/gpu/drm/i915/intel_pm.c| 4 +- 5 files changed, 188 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a8721fc..f0aa3d4 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -174,7 +174,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_puts(m, ")"); } if (obj->stolen) - seq_printf(m, " (stolen: %08llx)", obj->stolen->start); + seq_printf(m, " (stolen: %08llx)", obj->stolen->base.start); if (obj->pin_display || obj->fault_mappable) { char s[3], *t = s; if (obj->pin_display) @@ -253,9 +253,9 @@ static int obj_rank_by_stolen(void *priv, struct drm_i915_gem_object *b = container_of(B, struct drm_i915_gem_object, obj_exec_link); - if (a->stolen->start < b->stolen->start) + if (a->stolen->base.start < b->stolen->base.start) return -1; - if (a->stolen->start > b->stolen->start) + if (a->stolen->base.start > b->stolen->base.start) return 1; return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dcdfb97..479703b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -841,6 +841,12 @@ struct i915_ctx_hang_stats { bool banned; }; +struct i915_stolen_node { + struct drm_mm_node base; + struct list_head mm_link; + struct drm_i915_gem_object *obj; +}; + /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 @@ -1251,6 +1257,13 @@ struct i915_gem_mm { */ struct list_head unbound_list; + /** +* List of stolen objects that have been marked as purgeable and +* thus available for reaping if we need more space for a new +* allocation. Ordered by time of marking purgeable. +*/ + struct list_head stolen_list; + /** Usable portion of the GTT for GEM */ unsigned long stolen_base; /* limited to low memory (32-bit) */ @@ -2031,7 +2044,7 @@ struct drm_i915_gem_object { struct list_head vma_list; /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; + struct i915_stolen_node *stolen; struct list_head global_list; struct list_head ring_list[I915_NUM_RINGS]; @@ -2039,6 +2052,8 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + /** Used during stolen memory allocations to temporarily hold a ref */ + struct list_head stolen_link; /** * This is set if the object is on the active lists (has pending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 05505de..8a508cd 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -4411,6 +4411,20 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, if (obj->madv == I915_MADV_
[Intel-gfx] [PATCH 1/8] drm/i915: Allow use of get_dma_address for stolen backed objects
From: Ankitprasad Sharma i915_gem_object_get_dma_address function is used to retrieve the dma address of a particular page so as to map it in a given GTT entry for CPU access. This function would be used for stolen backed objects also for tasks like pwrite, clearing of the pages etc. So the obj->get_page.sg needs to be initialized for the stolen objects also. Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem_stolen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 598ed2f..5384767 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -569,6 +569,9 @@ _i915_gem_object_create_stolen(struct drm_device *dev, if (obj->pages == NULL) goto cleanup; + obj->get_page.sg = obj->pages->sgl; + obj->get_page.last = 0; + i915_gem_object_pin_pages(obj); obj->stolen = stolen; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 6/8] drm/i915: Add support for stealing purgable stolen pages
From: Chris Wilson If we run out of stolen memory when trying to allocate an object, see if we can reap enough purgeable objects to free up enough contiguous free space for the allocation. This is in principle very much like evicting objects to free up enough contiguous space in the vma when binding a new object - and you will be forgiven for thinking that the code looks very similar. At the moment, we do not allow userspace to allocate objects in stolen, so there is neither the memory pressure to trigger stolen eviction nor any purgeable objects inside the stolen arena. However, this will change in the near future, and so better management and defragmentation of stolen memory will become a real issue. v2: Remember to remove the drm_mm_node. v3: Rebased to the latest drm-intel-nightly (Ankit) v4: corrected if-else braces format (Tvrtko/kerneldoc) v5: Rebased to the latest drm-intel-nightly (Ankit) Added a seperate list to maintain purgable objects from stolen memory region (Chris/Daniel) v6: Compiler optimization (merging 2 single loops into one for() loop), corrected code for object eviction, retire_requests before starting object eviction (Chris) v7: Added kernel doc for i915_gem_object_create_stolen() v8: Check for struct_mutex lock before creating object from stolen region (Tvrtko) v9: Renamed variables to make usage clear, added comment, removed onetime used macro (Tvrtko) v10: Avoid masking of error when stolen_alloc fails (Tvrtko) v11: Renamed stolen_link to tmp_link, as it may be used for other purposes too (Chris) Used ERR_CAST to cast error pointers while returning Testcase: igt/gem_stolen Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_debugfs.c| 6 +- drivers/gpu/drm/i915/i915_drv.h| 17 +++- drivers/gpu/drm/i915/i915_gem.c| 15 +++ drivers/gpu/drm/i915/i915_gem_stolen.c | 170 + drivers/gpu/drm/i915/intel_pm.c| 4 +- 5 files changed, 187 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a8721fc..f0aa3d4 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -174,7 +174,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_puts(m, ")"); } if (obj->stolen) - seq_printf(m, " (stolen: %08llx)", obj->stolen->start); + seq_printf(m, " (stolen: %08llx)", obj->stolen->base.start); if (obj->pin_display || obj->fault_mappable) { char s[3], *t = s; if (obj->pin_display) @@ -253,9 +253,9 @@ static int obj_rank_by_stolen(void *priv, struct drm_i915_gem_object *b = container_of(B, struct drm_i915_gem_object, obj_exec_link); - if (a->stolen->start < b->stolen->start) + if (a->stolen->base.start < b->stolen->base.start) return -1; - if (a->stolen->start > b->stolen->start) + if (a->stolen->base.start > b->stolen->base.start) return 1; return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index dcdfb97..2f21e71 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -841,6 +841,12 @@ struct i915_ctx_hang_stats { bool banned; }; +struct i915_stolen_node { + struct drm_mm_node base; + struct list_head mm_link; + struct drm_i915_gem_object *obj; +}; + /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 @@ -1251,6 +1257,13 @@ struct i915_gem_mm { */ struct list_head unbound_list; + /** +* List of stolen objects that have been marked as purgeable and +* thus available for reaping if we need more space for a new +* allocation. Ordered by time of marking purgeable. +*/ + struct list_head stolen_list; + /** Usable portion of the GTT for GEM */ unsigned long stolen_base; /* limited to low memory (32-bit) */ @@ -2031,7 +2044,7 @@ struct drm_i915_gem_object { struct list_head vma_list; /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; + struct i915_stolen_node *stolen; struct list_head global_list; struct list_head ring_list[I915_NUM_RINGS]; @@ -2039,6 +2052,8 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + /** Used to link an object to a list temporarily */ + struct list_head tmp_link; /** * This is set if the object is on the active lists (has pending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 740a5a1..bdf39a7 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c
[Intel-gfx] [PATCH 3/8] drm/i915: Clearing buffer objects via CPU/GTT
From: Ankitprasad Sharma This patch adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the non shmem backed objects. Currently intend to use this only for buffers allocated from stolen region. v2: Added kernel doc for i915_gem_clear_object(), corrected/removed variable assignments (Tvrtko) v3: Map object page by page to the gtt if the pinning of the whole object to the ggtt fails, Corrected function name (Chris) v4: Clear the buffer page by page, and not map the whole object in the gtt aperture. Use i915 wrapper function in place of drm_mm_insert_node_in_range. v5: Use renamed wrapper function for drm_mm_insert_node_in_range, updated barrier positioning (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 42 + 2 files changed, 43 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a10b866..e195fee 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2897,6 +2897,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, int *needs_clflush); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +int i915_gem_object_clear(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index fe8b14d..b25f28c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5288,3 +5288,45 @@ fail: drm_gem_object_unreference(&obj->base); return ERR_PTR(ret); } + +/** + * i915_gem_object_clear() - Clear buffer object via CPU/GTT + * @obj: Buffer object to be cleared + * + * Return: 0 - success, non-zero - failure + */ +int i915_gem_object_clear(struct drm_i915_gem_object *obj) +{ + int ret, i; + char __iomem *base; + size_t size = obj->base.size; + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_mm_node node; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + memset(&node, 0, sizeof(node)); + ret = insert_mappable_node(i915, &node); + if (ret) + goto out; + + i915_gem_object_pin_pages(obj); + base = io_mapping_map_wc(i915->gtt.mappable, node.start); + for (i = 0; i < size/PAGE_SIZE; i++) { + i915->gtt.base.insert_page(&i915->gtt.base, + i915_gem_object_get_dma_address(obj, i), + node.start, + I915_CACHE_NONE, 0); + wmb(); + memset_io(base, 0, 4096); + wmb(); + } + + io_mapping_unmap(base); + i915->gtt.base.clear_range(&i915->gtt.base, + node.start, node.size, + true); + drm_mm_remove_node(&node); + i915_gem_object_unpin_pages(obj); +out: + return ret; +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 2/8] drm/i915: Use insert_page for pwrite_fast
From: Ankitprasad Sharma In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First, we try a nonblocking pin for the whole object (since that is fastest if reused), then failing that we try to grab one page in the mappable aperture. It also allows us to handle objects larger than the mappable aperture (e.g. if we need to pwrite with vGPU restricting the aperture to a measely 8MiB or something like that). v2: Pin pages before starting pwrite, Combined duplicate loops (Chris) v3: Combined loops based on local patch by Chris (Chris) v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris) v5: Renamed wrapper function for drm_mm_insert_node_in_range (Chris) Signed-off-by: Ankitprasad Sharma Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 81 ++--- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bf7f203..fe8b14d 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -61,6 +61,17 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } +static int +insert_mappable_node(struct drm_i915_private *i915, + struct drm_mm_node *node) +{ + return drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node, + 4096, 0, 0, + 0, i915->gtt.mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); +} + /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -760,20 +771,28 @@ fast_user_write(struct io_mapping *mapping, * user into the GTT, uncached. */ static int -i915_gem_gtt_pwrite_fast(struct drm_device *dev, +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; - ssize_t remain; - loff_t offset, page_base; + struct drm_mm_node node; + uint64_t remain, offset; char __user *user_data; - int page_offset, page_length, ret; + int ret; ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) - goto out; + if (ret) { + memset(&node, 0, sizeof(node)); + ret = insert_mappable_node(i915, &node); + if (ret) + goto out; + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -783,31 +802,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, if (ret) goto out_unpin; - user_data = to_user_ptr(args->data_ptr); - remain = args->size; - - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + obj->dirty = true; - while (remain > 0) { + user_data = to_user_ptr(args->data_ptr); + offset = args->offset; + remain = args->size; + while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; - + u32 page_base = node.start; + unsigned page_offset = offset_in_page(offset); + unsigned page_length = PAGE_SIZE - page_offset; + page_length = remain < page_length ? remain : page_length; + if (node.allocated) { + wmb(); + i915->gtt.base.insert_page(&i915->gtt.base, + i915_gem_object_get_dma_address(obj, offset >> PAGE_SHIFT), + node.start, + I915_CACHE_NONE, + 0); + wmb(); + } else { + page_base += offset & PAGE_MASK; + } /* If we get a fault while copying data, then (presumably) our *
[Intel-gfx] [PATCH 4/8] drm/i915: Support for creating Stolen memory backed objects
From: Ankitprasad Sharma Extend the drm_i915_gem_create structure to add support for creating Stolen memory backed objects. Added a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. v2: Rebased to the latest drm-intel-nightly (Ankit) v3: Changed versioning of GEM_CREATE param, added new comments (Tvrtko) v4: Changed size from 32b to 64b to prevent userspace overflow (Tvrtko) Corrected function arguments ordering (Chris) v5: Corrected function name (Chris) v6: Updated datatype for flags to keep sizeof(drm_i915_gem_create) u64 aligned (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_dma.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 30 +++--- drivers/gpu/drm/i915/i915_gem_stolen.c | 4 ++-- include/uapi/drm/i915_drm.h| 16 5 files changed, 49 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index 52b8289..5d2189c 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -172,6 +172,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: value = 1; break; + case I915_PARAM_CREATE_VERSION: + value = 2; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e195fee..dcdfb97 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3255,7 +3255,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_device *dev); void i915_gem_cleanup_stolen(struct drm_device *dev); struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_device *dev, u32 size); +i915_gem_object_create_stolen(struct drm_device *dev, u64 size); struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, u32 stolen_offset, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b25f28c..020bb29 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -386,6 +386,7 @@ static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, + uint64_t flags, uint32_t *handle_p) { struct drm_i915_gem_object *obj; @@ -396,8 +397,31 @@ i915_gem_create(struct drm_file *file, if (size == 0) return -EINVAL; + if (flags & __I915_CREATE_UNKNOWN_FLAGS) + return -EINVAL; + /* Allocate the new object */ - obj = i915_gem_alloc_object(dev, size); + if (flags & I915_CREATE_PLACEMENT_STOLEN) { + mutex_lock(&dev->struct_mutex); + obj = i915_gem_object_create_stolen(dev, size); + if (!obj) { + mutex_unlock(&dev->struct_mutex); + return -ENOMEM; + } + + /* Always clear fresh buffers before handing to userspace */ + ret = i915_gem_object_clear(obj); + if (ret) { + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + return ret; + } + + mutex_unlock(&dev->struct_mutex); + } else { + obj = i915_gem_alloc_object(dev, size); + } + if (obj == NULL) return -ENOMEM; @@ -420,7 +444,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, 0, &args->handle); } /** @@ -433,7 +457,7 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_create *args = data; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, args->flags, &args->handle); } static inline int diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 5384767..17d679e 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -586,7 +586,7 @@ cleanup: } struct drm_i915_gem_object * -i915_gem_object_cr
[Intel-gfx] [PATCH 8/8] drm/i915: Migrate stolen objects before hibernation
From: Chris Wilson Ville reminded us that stolen memory is not preserved across hibernation, and a result of this was that context objects now being allocated from stolen were being corrupted on S4 and promptly hanging the GPU on resume. We want to utilise stolen for as much as possible (nothing else will use that wasted memory otherwise), so we need a strategy for handling general objects allocated from stolen and hibernation. A simple solution is to do a CPU copy through the GTT of the stolen object into a fresh shmemfs backing store and thenceforth treat it as a normal objects. This can be refined in future to either use a GPU copy to avoid the slow uncached reads (though it's hibernation!) and recreate stolen objects upon resume/first-use. For now, a simple approach should suffice for testing the object migration. v2: Swap PTE for pinned bindings over to the shmemfs. This adds a complicated dance, but is required as many stolen objects are likely to be pinned for use by the hardware. Swapping the PTEs should not result in externally visible behaviour, as each PTE update should be atomic and the two pages identical. (danvet) safe-by-default, or the principle of least surprise. We need a new flag to mark objects that we can wilfully discard and recreate across hibernation. (danvet) Just use the global_list rather than invent a new stolen_list. This is the slowpath hibernate and so adding a new list and the associated complexity isn't worth it. v3: Rebased on drm-intel-nightly (Ankit) v4: Use insert_page to map stolen memory backed pages for migration to shmem (Chris) v5: Acquire mutex lock while copying stolen buffer objects to shmem (Chris) v6: Handled file leak, Splitted object migration function, added kerneldoc for migrate_stolen_to_shmemfs() function (Tvrtko) Use i915 wrapper function for drm_mm_insert_node_in_range() v7: Keep the object in cpu domain after get_pages, remove the object from the unbound list only when marked PURGED, Corrected split of object migration function (Chris) Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_drv.c | 17 ++- drivers/gpu/drm/i915/i915_drv.h | 7 + drivers/gpu/drm/i915/i915_gem.c | 234 ++-- drivers/gpu/drm/i915/intel_display.c| 3 + drivers/gpu/drm/i915/intel_fbdev.c | 6 + drivers/gpu/drm/i915/intel_pm.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 + 7 files changed, 263 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index e6935f1..8f675ae7 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -979,6 +979,21 @@ static int i915_pm_suspend(struct device *dev) return i915_drm_suspend(drm_dev); } +static int i915_pm_freeze(struct device *dev) +{ + int ret; + + ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev))); + if (ret) + return ret; + + ret = i915_pm_suspend(dev); + if (ret) + return ret; + + return 0; +} + static int i915_pm_suspend_late(struct device *dev) { struct drm_device *drm_dev = dev_to_i915(dev)->dev; @@ -1607,7 +1622,7 @@ static const struct dev_pm_ops i915_pm_ops = { * @restore, @restore_early : called after rebooting and restoring the *hibernation image [PMSG_RESTORE] */ - .freeze = i915_pm_suspend, + .freeze = i915_pm_freeze, .freeze_late = i915_pm_suspend_late, .thaw_early = i915_pm_resume_early, .thaw = i915_pm_resume, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 2f21e71..817ca59 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2079,6 +2079,12 @@ struct drm_i915_gem_object { * Advice: are the backing pages purgeable? */ unsigned int madv:2; + /** +* Whereas madv is for userspace, there are certain situations +* where we want I915_MADV_DONTNEED behaviour on internal objects +* without conflating the userspace setting. +*/ + unsigned int internal_volatile:1; /** * Current tiling mode for the object. @@ -3047,6 +3053,7 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); +int __must_check i915_gem_freeze(struct drm_device *dev); int __must_check i915_gem_suspend(struct drm_device *dev); void __i915_add_request(struct drm_i915_gem_request *req, struct drm_i915_gem_object *batch_obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index c22be9f..7555912 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_g
[Intel-gfx] [PATCH v12 0/8] Support for creating/using Stolen memory backed objects
From: Ankitprasad Sharma This patch series adds support for creating/using Stolen memory backed objects. Despite being a unified memory architecture (UMA) some bits of memory are more equal than others. In particular we have the thorny issue of stolen memory, memory stolen from the system by the BIOS and reserved for igfx use. Stolen memory is required for some functions of the GPU and display engine, but in general it goes wasted. Whilst we cannot return it back to the system, we need to find some other method for utilising it. As we do not support direct access to the physical address in the stolen region, it behaves like a different class of memory, closer in kin to local GPU memory. This strongly suggests that we need a placement model like TTM if we are to fully utilize these discrete chunks of differing memory. To add support for creating Stolen memory backed objects, we extend the drm_i915_gem_create structure, by adding a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. This patch series adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the memory from stolen region, but can also be used for other shmem allocated objects. Currently being used for buffers allocated in the stolen region. Also adding support for stealing purgable stolen pages, if we run out of stolen memory when trying to allocate an object. v2: Added support for read/write from/to objects not backed by shmem using the pread/pwrite interface. Also extended the current get_aperture ioctl to retrieve the total and available size of the stolen region. v3: Removed the extended get_aperture ioctl patch 5 (to be submitted as part of other patch series), addressed comments by Chris about pread/pwrite for non shmem backed objects. v4: Rebased to the latest drm-intel-nightly. v5: Addressed comments, replaced patch 1/4 "Clearing buffers via blitter engine" by "Clearing buffers via CPU/GTT". v6: Rebased to the latest drm-intel-nightly, Addressed comments, updated stolen memory purging logic by maintaining a list for purgable stolen memory objects, enabled pread/pwrite for all non-shmem backed objects without tiling restrictions. v7: Addressed comments, compiler optimization, new patch added for correct error code propagation to the userspace. v8: Added a new patch to the series to Migrate stolen objects before hibernation, as stolen memory is not preserved across hibernation. Added correct error propagation for shmem as well non-shmem backed object allocation. v9: Addressed comments, use of insert_page helper function to map object page by page which can be helpful in low aperture space availability. v10: Addressed comments, use insert_page for clearing out the stolen memory v11: Addressed comments, 3 new patches added to support allocation from Stolen memory 1. Allow use of i915_gem_object_get_dma_address for stolen backed objects 2. Use insert_page for pwrite_fast 3. Fail the execbuff using stolen objects as batchbuffers v12: Addressed comments, Removed patch "Fail the execbuff using stolen objects as batchbuffers" This can be verified using IGT tests: igt/gem_stolen, igt/gem_create Ankitprasad Sharma (6): drm/i915: Allow use of get_dma_address for stolen backed objects drm/i915: Use insert_page for pwrite_fast drm/i915: Clearing buffer objects via CPU/GTT drm/i915: Support for creating Stolen memory backed objects drm/i915: Propagating correct error codes to the userspace drm/i915: Support for pread/pwrite from/to non shmem backed objects Chris Wilson (2): drm/i915: Add support for stealing purgable stolen pages drm/i915: Migrate stolen objects before hibernation drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_dma.c | 3 + drivers/gpu/drm/i915/i915_drv.c | 17 +- drivers/gpu/drm/i915/i915_drv.h | 27 +- drivers/gpu/drm/i915/i915_gem.c | 574 +++ drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 +- drivers/gpu/drm/i915/i915_gem_context.c | 4 +- drivers/gpu/drm/i915/i915_gem_render_state.c | 7 +- drivers/gpu/drm/i915/i915_gem_stolen.c | 220 -- drivers/gpu/drm/i915/i915_guc_submission.c | 52 ++- drivers/gpu/drm/i915/intel_display.c | 5 +- drivers/gpu/drm/i915/intel_fbdev.c | 12 +- drivers/gpu/drm/i915/intel_lrc.c | 10 +- drivers/gpu/drm/i915/intel_overlay.c | 4 +- drivers/gpu/drm/i915/intel_pm.c | 13 +- drivers/gpu/drm/i915/intel_ringbuffer.c | 27 +- include/uapi/drm/i915_drm.h | 16 + 17 files changed, 836 insertions(+), 165 deletions(-) -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedes
[Intel-gfx] [PATCH 5/8] drm/i915: Propagating correct error codes to the userspace
From: Ankitprasad Sharma Propagating correct error codes to userspace by using ERR_PTR and PTR_ERR macros for stolen memory based object allocation. We generally return -ENOMEM to the user whenever there is a failure in object allocation. This patch helps user to identify the correct reason for the failure and not just -ENOMEM each time. v2: Moved the patch up in the series, added error propagation for i915_gem_alloc_object too (Chris) v3: Removed storing of error pointer inside structs, Corrected error propagation in caller functions (Chris) v4: Remove assignments inside the predicate (Chris) v5: Removed unnecessary initializations, updated kerneldoc for i915_guc_client, corrected missed error pointer handling (Tvrtko) v6: Use ERR_CAST/temporary variable to avoid storing invalid pointer in a common field (Chris) Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem.c | 16 + drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 +-- drivers/gpu/drm/i915/i915_gem_context.c | 4 +-- drivers/gpu/drm/i915/i915_gem_render_state.c | 7 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 53 +++- drivers/gpu/drm/i915/i915_guc_submission.c | 52 +-- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 6 ++-- drivers/gpu/drm/i915/intel_lrc.c | 10 +++--- drivers/gpu/drm/i915/intel_overlay.c | 4 +-- drivers/gpu/drm/i915/intel_pm.c | 7 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +-- 12 files changed, 106 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 020bb29..740a5a1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -404,9 +404,9 @@ i915_gem_create(struct drm_file *file, if (flags & I915_CREATE_PLACEMENT_STOLEN) { mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); - if (!obj) { + if (IS_ERR(obj)) { mutex_unlock(&dev->struct_mutex); - return -ENOMEM; + return PTR_ERR(obj); } /* Always clear fresh buffers before handing to userspace */ @@ -422,8 +422,8 @@ i915_gem_create(struct drm_file *file, obj = i915_gem_alloc_object(dev, size); } - if (obj == NULL) - return -ENOMEM; + if (IS_ERR(obj)) + return PTR_ERR(obj); ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ @@ -4446,14 +4446,16 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, struct drm_i915_gem_object *obj; struct address_space *mapping; gfp_t mask; + int ret; obj = i915_gem_object_alloc(dev); if (obj == NULL) - return NULL; + return ERR_PTR(-ENOMEM); - if (drm_gem_object_init(dev, &obj->base, size) != 0) { + ret = drm_gem_object_init(dev, &obj->base, size); + if (ret) { i915_gem_object_free(obj); - return NULL; + return ERR_PTR(ret); } mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 7bf2f3f..d79caa2 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -135,8 +135,8 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, int ret; obj = i915_gem_alloc_object(pool->dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(obj)) + return obj; ret = i915_gem_object_get_pages(obj); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 43761c5..9754894 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -179,8 +179,8 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) int ret; obj = i915_gem_alloc_object(dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(obj)) + return obj; /* * Try to make the context utilize L3 as well as LLC. diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index 5026a62..2bfdd49 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -58,8 +58,11 @@ static int render_state_init(struct render_state *so, struct drm_device *dev) return -EINVAL; so->obj = i915_gem
[Intel-gfx] [PATCH 7/8] drm/i915: Support for pread/pwrite from/to non shmem backed objects
From: Ankitprasad Sharma This patch adds support for extending the pread/pwrite functionality for objects not backed by shmem. The access will be made through gtt interface. This will cover objects backed by stolen memory as well as other non-shmem backed objects. v2: Drop locks around slow_user_access, prefault the pages before access (Chris) v3: Rebased to the latest drm-intel-nightly (Ankit) v4: Moved page base & offset calculations outside the copy loop, corrected data types for size and offset variables, corrected if-else braces format (Tvrtko/kerneldocs) v5: Enabled pread/pwrite for all non-shmem backed objects including without tiling restrictions (Ankit) v6: Using pwrite_fast for non-shmem backed objects as well (Chris) v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy, added pwrite slow path for non-shmem backed objects (Chris/Tvrtko) v8: Updated v7 commit message, mutex unlock around pwrite slow path for non-shmem backed objects (Tvrtko) v9: Corrected check during pread_ioctl, to avoid shmem_pread being called for non-shmem backed objects (Tvrtko) v10: Moved the write_domain check to needs_clflush and tiling mode check to pwrite_fast (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem.c | 162 +--- 1 file changed, 134 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index bdf39a7..c22be9f 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -55,6 +55,9 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) return true; @@ -625,6 +628,99 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, return ret ? - EFAULT : 0; } +static inline uint64_t +slow_user_access(struct io_mapping *mapping, +uint64_t page_base, int page_offset, +char __user *user_data, +int length, bool pwrite) +{ + void __iomem *vaddr_inatomic; + void *vaddr; + uint64_t unwritten; + + vaddr_inatomic = io_mapping_map_wc(mapping, page_base); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)vaddr_inatomic + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(vaddr_inatomic); + return unwritten; +} + +static int +i915_gem_gtt_copy(struct drm_device *dev, + struct drm_i915_gem_object *obj, uint64_t size, + uint64_t data_offset, uint64_t data_ptr) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + char __user *user_data; + uint64_t remain; + uint64_t offset, page_base; + int page_offset, page_length, ret = 0; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) + goto out; + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto out_unpin; + + ret = i915_gem_object_put_fence(obj); + if (ret) + goto out_unpin; + + user_data = to_user_ptr(data_ptr); + remain = size; + offset = i915_gem_obj_ggtt_offset(obj) + data_offset; + + mutex_unlock(&dev->struct_mutex); + if (likely(!i915.prefault_disable)) + ret = fault_in_multipages_writeable(user_data, remain); + + /* +* page_offset = offset within page +* page_base = page offset within aperture +*/ + page_offset = offset_in_page(offset); + page_base = offset & PAGE_MASK; + + while (remain > 0) { + /* page_length = bytes to copy for this page */ + page_length = remain; + if ((page_offset + remain) > PAGE_SIZE) + page_length = PAGE_SIZE - page_offset; + + /* This is a slow read/write as it tries to read from +* and write to user memory which may result into page +* faults +*/ + ret = slow_user_access(dev_priv->gtt.mappable, page_base, + page_offset, user_data, + page_length, false); + + if (ret) { + ret = -EFAULT; + break; + } + + remain -= page_length; + user_data += page_length; + page_base += page_length; + page_offset = 0; + } + + mutex_lock(&dev->struct_mutex); + +out_unpin: +
[Intel-gfx] [PATCH 10/10] drm/i915: Disable use of stolen area by User when Intel RST is present
From: Ankitprasad Sharma The BIOS RapidStartTechnology may corrupt the stolen memory across S3 suspend due to unalarmed hibernation, in which case we will not be able to preserve the User data stored in the stolen region. Hence this patch tries to identify presence of the RST device on the ACPI bus, and disables use of stolen memory (for persistent data) if found. v2: Updated comment, updated/corrected new functions private to driver (Chris/Tvrtko) v3: Disabling stolen by default, wait till required acpi changes to detect device presence are pulled in (Ankit) v4: Enabled stolen by default as required acpi changes are merged (Ankit) Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_drv.h| 11 +++ drivers/gpu/drm/i915/i915_gem.c| 8 drivers/gpu/drm/i915/i915_gem_stolen.c | 14 ++ drivers/gpu/drm/i915/intel_acpi.c | 10 ++ 4 files changed, 43 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 16f2f94..9d67097 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1349,6 +1349,16 @@ struct i915_gem_mm { */ bool busy; + /** +* Stolen will be lost upon hibernate (as the memory is unpowered). +* Across resume, we expect stolen to be intact - however, it may +* also be utililised by third parties (e.g. Intel RapidStart +* Technology) and if so we have to assume that any data stored in +* stolen across resume is lost and we set this flag to indicate that +* the stolen memory is volatile. +*/ + bool nonvolatile_stolen; + /* the indicator for dispatch video commands on two BSD rings */ unsigned int bsd_ring_dispatch_index; @@ -3465,6 +3475,7 @@ intel_opregion_notify_adapter(struct drm_device *dev, pci_power_t state) #endif /* intel_acpi.c */ +bool intel_detect_acpi_rst(void); #ifdef CONFIG_ACPI extern void intel_register_dsm_handler(void); extern void intel_unregister_dsm_handler(void); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 0cd57d4..63dab63 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -396,8 +396,16 @@ static struct drm_i915_gem_object * i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) { struct drm_i915_gem_object *obj; + struct drm_i915_private *dev_priv = dev->dev_private; int ret; + if (!dev_priv->mm.nonvolatile_stolen) { + /* Stolen may be overwritten by external parties +* so unsuitable for persistent user data. +*/ + return ERR_PTR(-ENODEV); + } + mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 335a1ef..4f44531 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -482,6 +482,20 @@ int i915_gem_init_stolen(struct drm_device *dev) */ drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->gtt.stolen_usable_size); + /* If the stolen region can be modified behind our backs upon suspend, +* then we cannot use it to store nonvolatile contents (i.e user data) +* as it will be corrupted upon resume. +*/ + dev_priv->mm.nonvolatile_stolen = true; +#ifdef CONFIG_SUSPEND + if (intel_detect_acpi_rst()) { + /* BIOSes using RapidStart Technology have been reported +* to overwrite stolen across S3, not just S4. +*/ + dev_priv->mm.nonvolatile_stolen = false; + } +#endif + return 0; } diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c index eb638a1..67dc9b2 100644 --- a/drivers/gpu/drm/i915/intel_acpi.c +++ b/drivers/gpu/drm/i915/intel_acpi.c @@ -23,6 +23,11 @@ static const u8 intel_dsm_guid[] = { 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c }; +static const struct acpi_device_id irst_ids[] = { + {"INT3392", 0}, + {"", 0} +}; + static char *intel_dsm_port_name(u8 id) { switch (id) { @@ -162,3 +167,8 @@ void intel_register_dsm_handler(void) void intel_unregister_dsm_handler(void) { } + +bool intel_detect_acpi_rst(void) +{ + return acpi_dev_present(irst_ids[0].id); +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 09/10] drm/i915: Migrate stolen objects before hibernation
From: Chris Wilson Ville reminded us that stolen memory is not preserved across hibernation, and a result of this was that context objects now being allocated from stolen were being corrupted on S4 and promptly hanging the GPU on resume. We want to utilise stolen for as much as possible (nothing else will use that wasted memory otherwise), so we need a strategy for handling general objects allocated from stolen and hibernation. A simple solution is to do a CPU copy through the GTT of the stolen object into a fresh shmemfs backing store and thenceforth treat it as a normal objects. This can be refined in future to either use a GPU copy to avoid the slow uncached reads (though it's hibernation!) and recreate stolen objects upon resume/first-use. For now, a simple approach should suffice for testing the object migration. v2: Swap PTE for pinned bindings over to the shmemfs. This adds a complicated dance, but is required as many stolen objects are likely to be pinned for use by the hardware. Swapping the PTEs should not result in externally visible behaviour, as each PTE update should be atomic and the two pages identical. (danvet) safe-by-default, or the principle of least surprise. We need a new flag to mark objects that we can wilfully discard and recreate across hibernation. (danvet) Just use the global_list rather than invent a new stolen_list. This is the slowpath hibernate and so adding a new list and the associated complexity isn't worth it. v3: Rebased on drm-intel-nightly (Ankit) v4: Use insert_page to map stolen memory backed pages for migration to shmem (Chris) v5: Acquire mutex lock while copying stolen buffer objects to shmem (Chris) v6: Handled file leak, Splitted object migration function, added kerneldoc for migrate_stolen_to_shmemfs() function (Tvrtko) Use i915 wrapper function for drm_mm_insert_node_in_range() v7: Keep the object in cpu domain after get_pages, remove the object from the unbound list only when marked PURGED, Corrected split of object migration function (Chris) v8: Split i915_gem_freeze(), removed redundant use of barrier, corrected use of set_to_cpu_domain() (Chris) v9: Replaced WARN_ON by BUG_ON and added a comment explaining it (Daniel/Tvrtko) v10: Document use of barriers (Chris) Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.c | 17 ++- drivers/gpu/drm/i915/i915_drv.h | 10 ++ drivers/gpu/drm/i915/i915_gem.c | 198 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 49 drivers/gpu/drm/i915/intel_display.c| 3 + drivers/gpu/drm/i915/intel_fbdev.c | 6 + drivers/gpu/drm/i915/intel_pm.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 + 8 files changed, 279 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 11d8414..cfa44af 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -996,6 +996,21 @@ static int i915_pm_suspend(struct device *dev) return i915_drm_suspend(drm_dev); } +static int i915_pm_freeze(struct device *dev) +{ + int ret; + + ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev))); + if (ret) + return ret; + + ret = i915_pm_suspend(dev); + if (ret) + return ret; + + return 0; +} + static int i915_pm_suspend_late(struct device *dev) { struct drm_device *drm_dev = dev_to_i915(dev)->dev; @@ -1643,7 +1658,7 @@ static const struct dev_pm_ops i915_pm_ops = { * @restore, @restore_early : called after rebooting and restoring the *hibernation image [PMSG_RESTORE] */ - .freeze = i915_pm_suspend, + .freeze = i915_pm_freeze, .freeze_late = i915_pm_suspend_late, .thaw_early = i915_pm_resume_early, .thaw = i915_pm_resume, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 943b301..16f2f94 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2137,6 +2137,12 @@ struct drm_i915_gem_object { * Advice: are the backing pages purgeable? */ unsigned int madv:2; + /** +* Whereas madv is for userspace, there are certain situations +* where we want I915_MADV_DONTNEED behaviour on internal objects +* without conflating the userspace setting. +*/ + unsigned int internal_volatile:1; /** * Current tiling mode for the object. @@ -3093,6 +3099,9 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); +int __must_check i915_gem_freeze(struct drm_device *dev); +int __must_check +i915_gem_object_migrate_stol
[Intel-gfx] [PATCH 01/10] drm/i915: Add support for mapping an object page by page
From: Chris Wilson Introduced a new vm specfic callback insert_page() to program a single pte in ggtt or ppgtt. This allows us to map a single page in to the mappable aperture space. This can be iterated over to access the whole object by using space as meagre as page size. v2: Added low level rpm assertions to insert_page routines (Chris) Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma --- drivers/char/agp/intel-gtt.c| 9 + drivers/gpu/drm/i915/i915_gem_gtt.c | 65 + drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++ include/drm/intel-gtt.h | 3 ++ 4 files changed, 82 insertions(+) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 1341a94..7c68576 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -838,6 +838,15 @@ static bool i830_check_flags(unsigned int flags) return false; } +void intel_gtt_insert_page(dma_addr_t addr, + unsigned int pg, + unsigned int flags) +{ + intel_private.driver->write_entry(addr, pg, flags); + wmb(); +} +EXPORT_SYMBOL(intel_gtt_insert_page); + void intel_gtt_insert_sg_entries(struct sg_table *st, unsigned int pg_start, unsigned int flags) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 715a771..a64018f 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2341,6 +2341,28 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) #endif } +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)dev_priv->gtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); + wmb(); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, uint64_t start, @@ -2412,6 +2434,28 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); } +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 flags) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)dev_priv->gtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + iowrite32(vm->pte_encode(addr, level, true, flags), pte); + wmb(); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -2523,6 +2567,24 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } +static void i915_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level cache_level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, uint64_t start, @@ -3054,6 +3116,7 @@ static int gen8_gmch_probe(struct drm_device *dev, ret = ggtt_probe_common(dev, gtt_size); dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range; + dev_priv->gtt.base.insert_page = gen8_ggtt_insert_page; d
[Intel-gfx] [PATCH 07/10] drm/i915: Add support for stealing purgable stolen pages
From: Chris Wilson If we run out of stolen memory when trying to allocate an object, see if we can reap enough purgeable objects to free up enough contiguous free space for the allocation. This is in principle very much like evicting objects to free up enough contiguous space in the vma when binding a new object - and you will be forgiven for thinking that the code looks very similar. At the moment, we do not allow userspace to allocate objects in stolen, so there is neither the memory pressure to trigger stolen eviction nor any purgeable objects inside the stolen arena. However, this will change in the near future, and so better management and defragmentation of stolen memory will become a real issue. v2: Remember to remove the drm_mm_node. v3: Rebased to the latest drm-intel-nightly (Ankit) v4: corrected if-else braces format (Tvrtko/kerneldoc) v5: Rebased to the latest drm-intel-nightly (Ankit) Added a seperate list to maintain purgable objects from stolen memory region (Chris/Daniel) v6: Compiler optimization (merging 2 single loops into one for() loop), corrected code for object eviction, retire_requests before starting object eviction (Chris) v7: Added kernel doc for i915_gem_object_create_stolen() v8: Check for struct_mutex lock before creating object from stolen region (Tvrtko) v9: Renamed variables to make usage clear, added comment, removed onetime used macro (Tvrtko) v10: Avoid masking of error when stolen_alloc fails (Tvrtko) v11: Renamed stolen_link to tmp_link, as it may be used for other purposes too (Chris) Used ERR_CAST to cast error pointers while returning v12: Added lockdep_assert before starting stolen-backed object eviction (Chris) Testcase: igt/gem_stolen Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c| 6 +- drivers/gpu/drm/i915/i915_drv.h| 17 +++- drivers/gpu/drm/i915/i915_gem.c| 15 +++ drivers/gpu/drm/i915/i915_gem_stolen.c | 171 + drivers/gpu/drm/i915/intel_pm.c| 4 +- 5 files changed, 188 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ec0c2a05e..aa7c7a3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -174,7 +174,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_puts(m, ")"); } if (obj->stolen) - seq_printf(m, " (stolen: %08llx)", obj->stolen->start); + seq_printf(m, " (stolen: %08llx)", obj->stolen->base.start); if (obj->pin_display || obj->fault_mappable) { char s[3], *t = s; if (obj->pin_display) @@ -253,9 +253,9 @@ static int obj_rank_by_stolen(void *priv, struct drm_i915_gem_object *b = container_of(B, struct drm_i915_gem_object, obj_exec_link); - if (a->stolen->start < b->stolen->start) + if (a->stolen->base.start < b->stolen->base.start) return -1; - if (a->stolen->start > b->stolen->start) + if (a->stolen->base.start > b->stolen->base.start) return 1; return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 55f2de9..943b301 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -840,6 +840,12 @@ struct i915_ctx_hang_stats { bool banned; }; +struct i915_stolen_node { + struct drm_mm_node base; + struct list_head mm_link; + struct drm_i915_gem_object *obj; +}; + /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 @@ -1291,6 +1297,13 @@ struct i915_gem_mm { */ struct list_head unbound_list; + /** +* List of stolen objects that have been marked as purgeable and +* thus available for reaping if we need more space for a new +* allocation. Ordered by time of marking purgeable. +*/ + struct list_head stolen_list; + /** Usable portion of the GTT for GEM */ unsigned long stolen_base; /* limited to low memory (32-bit) */ @@ -2089,7 +2102,7 @@ struct drm_i915_gem_object { struct list_head vma_list; /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; + struct i915_stolen_node *stolen; struct list_head global_list; struct list_head ring_list[I915_NUM_RINGS]; @@ -2097,6 +2110,8 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + /** Used to link an object to a list temporarily */ + struct list_head tmp_link; /** * This is set if the object is on the active lists (has pending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_
[Intel-gfx] [PATCH 08/10] drm/i915: Support for pread/pwrite from/to non shmem backed objects
From: Ankitprasad Sharma This patch adds support for extending the pread/pwrite functionality for objects not backed by shmem. The access will be made through gtt interface. This will cover objects backed by stolen memory as well as other non-shmem backed objects. v2: Drop locks around slow_user_access, prefault the pages before access (Chris) v3: Rebased to the latest drm-intel-nightly (Ankit) v4: Moved page base & offset calculations outside the copy loop, corrected data types for size and offset variables, corrected if-else braces format (Tvrtko/kerneldocs) v5: Enabled pread/pwrite for all non-shmem backed objects including without tiling restrictions (Ankit) v6: Using pwrite_fast for non-shmem backed objects as well (Chris) v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy, added pwrite slow path for non-shmem backed objects (Chris/Tvrtko) v8: Updated v7 commit message, mutex unlock around pwrite slow path for non-shmem backed objects (Tvrtko) v9: Corrected check during pread_ioctl, to avoid shmem_pread being called for non-shmem backed objects (Tvrtko) v10: Moved the write_domain check to needs_clflush and tiling mode check to pwrite_fast (Chris) v11: Use pwrite_fast fallback for all objects (shmem and non-shmem backed), call fast_user_write regardless of pagefault in previous iteration v12: Use page-by-page copy for slow user access too (Chris) v13: Handled EFAULT, Avoid use of WARN_ON, put_fence only if whole obj pinned (Chris) Testcase: igt/gem_stolen, igt/gem_pread, igt/gem_pwrite Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem.c | 211 ++-- 1 file changed, 179 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ed8ae5d..40f2906 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -55,6 +55,9 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) return true; @@ -646,6 +649,141 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, return ret ? - EFAULT : 0; } +static inline uint64_t +slow_user_access(struct io_mapping *mapping, +uint64_t page_base, int page_offset, +char __user *user_data, +int length, bool pwrite) +{ + void __iomem *ioaddr; + void *vaddr; + uint64_t unwritten; + + ioaddr = io_mapping_map_wc(mapping, page_base); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)ioaddr + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(ioaddr); + return unwritten; +} + +static int +i915_gem_gtt_pread(struct drm_device *dev, + struct drm_i915_gem_object *obj, uint64_t size, + uint64_t data_offset, uint64_t data_ptr) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_mm_node node; + char __user *user_data; + uint64_t remain; + uint64_t offset; + int ret = 0; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) { + ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + ret = i915_gem_object_put_fence(obj); + if (ret) + goto out_unpin; + } + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto out_unpin; + + user_data = to_user_ptr(data_ptr); + remain = size; + offset = i915_gem_obj_ggtt_offset(obj) + data_offset; + + mutex_unlock(&dev->struct_mutex); + if (likely(!i915.prefault_disable)) { + ret = fault_in_multipages_writeable(user_data, remain); + if (ret) { + mutex_lock(&dev->struct_mutex); + goto out_unpin; + } + } + + while (remain > 0) { + /* Operation in this page +* +* page_base = page offset within aperture +* page_offset = offset within page +* page_length = bytes to copy for th
[Intel-gfx] [PATCH 05/10] drm/i915: Support for creating Stolen memory backed objects
From: Ankitprasad Sharma Extend the drm_i915_gem_create structure to add support for creating Stolen memory backed objects. Added a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. v2: Rebased to the latest drm-intel-nightly (Ankit) v3: Changed versioning of GEM_CREATE param, added new comments (Tvrtko) v4: Changed size from 32b to 64b to prevent userspace overflow (Tvrtko) Corrected function arguments ordering (Chris) v5: Corrected function name (Chris) v6: Updated datatype for flags to keep sizeof(drm_i915_gem_create) u64 aligned (Chris) v7: Use first 8 bits of gem_create flags for placement (Chris), Add helper function for object allocation from stolen region (Ankit) v8: Added comment explaining STOLEN placement flag (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 45 +++--- drivers/gpu/drm/i915/i915_gem_stolen.c | 4 +-- include/uapi/drm/i915_drm.h| 41 +++ 5 files changed, 89 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a42eb58..1aa2cb6 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -172,6 +172,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: value = 1; break; + case I915_PARAM_CREATE_VERSION: + value = 2; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1122e1b..55f2de9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3301,7 +3301,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_device *dev); void i915_gem_cleanup_stolen(struct drm_device *dev); struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_device *dev, u32 size); +i915_gem_object_create_stolen(struct drm_device *dev, u64 size); struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, u32 stolen_offset, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1aa4fc9..60d27fe 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -389,10 +389,36 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) kmem_cache_free(dev_priv->objects, obj); } +static struct drm_i915_gem_object * +i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) +{ + struct drm_i915_gem_object *obj; + int ret; + + mutex_lock(&dev->struct_mutex); + obj = i915_gem_object_create_stolen(dev, size); + if (!obj) { + mutex_unlock(&dev->struct_mutex); + return NULL; + } + + /* Always clear fresh buffers before handing to userspace */ + ret = i915_gem_object_clear(obj); + if (ret) { + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + return NULL; + } + + mutex_unlock(&dev->struct_mutex); + return obj; +} + static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, + uint64_t flags, uint32_t *handle_p) { struct drm_i915_gem_object *obj; @@ -403,8 +429,21 @@ i915_gem_create(struct drm_file *file, if (size == 0) return -EINVAL; + if (flags & __I915_CREATE_UNKNOWN_FLAGS) + return -EINVAL; + /* Allocate the new object */ - obj = i915_gem_alloc_object(dev, size); + switch (flags & I915_CREATE_PLACEMENT_MASK) { + case I915_CREATE_PLACEMENT_NORMAL: + obj = i915_gem_alloc_object(dev, size); + break; + case I915_CREATE_PLACEMENT_STOLEN: + obj = i915_gem_alloc_object_stolen(dev, size); + break; + default: + return -EINVAL; + } + if (obj == NULL) return -ENOMEM; @@ -427,7 +466,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, 0, &args->handle);
[Intel-gfx] [PATCH 03/10] drm/i915: Use insert_page for pwrite_fast
From: Ankitprasad Sharma In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First, we try a nonblocking pin for the whole object (since that is fastest if reused), then failing that we try to grab one page in the mappable aperture. It also allows us to handle objects larger than the mappable aperture (e.g. if we need to pwrite with vGPU restricting the aperture to a measely 8MiB or something like that). v2: Pin pages before starting pwrite, Combined duplicate loops (Chris) v3: Combined loops based on local patch by Chris (Chris) v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris) v5: Renamed wrapper function for drm_mm_insert_node_in_range (Chris) v5: Added wrapper for drm_mm_remove_node() (Chris) v6: Added get_pages call before pinning the pages (Tvrtko) Added remove_mappable_node() wrapper for drm_mm_remove_node() (Chris) v7: Added size argument for insert_mappable_node (Tvrtko) v8: Do not put_pages after pwrite, do memset of node in the wrapper function (insert_mappable_node) (Chris) Signed-off-by: Ankitprasad Sharma Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 92 +++-- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a928823..49a03f2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -61,6 +61,24 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } +static int +insert_mappable_node(struct drm_i915_private *i915, + struct drm_mm_node *node, u32 size) +{ + memset(node, 0, sizeof(*node)); + return drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node, + size, 0, 0, 0, + i915->gtt.mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); +} + +static void +remove_mappable_node(struct drm_mm_node *node) +{ + drm_mm_remove_node(node); +} + /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -760,20 +778,33 @@ fast_user_write(struct io_mapping *mapping, * user into the GTT, uncached. */ static int -i915_gem_gtt_pwrite_fast(struct drm_device *dev, +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; - ssize_t remain; - loff_t offset, page_base; + struct drm_mm_node node; + uint64_t remain, offset; char __user *user_data; - int page_offset, page_length, ret; + int ret; ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) - goto out; + if (ret) { + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -783,31 +814,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, if (ret) goto out_unpin; - user_data = to_user_ptr(args->data_ptr); - remain = args->size; - - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + obj->dirty = true; - while (remain > 0) { + user_data = to_user_ptr(args->data_ptr); + offset = args->offset; + remain = args->size; + while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; - + u32 page_base = node.start; + unsigned page_offset = offset_in_page(offset); + unsigned page_length = PAGE_SIZE - page_offset; + page_length = remain < page_length ? rem
[Intel-gfx] [PATCH 02/10] drm/i915: Introduce i915_gem_object_get_dma_address()
From: Chris Wilson This utility function is a companion to i915_gem_object_get_page() that uses the same cached iterator for the scatterlist to perform fast sequential lookup of the dma address associated with any page within the object. Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_drv.h | 17 + 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 65a2cd0..e4c25c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2947,6 +2947,23 @@ static inline int __sg_page_count(struct scatterlist *sg) struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n); +static inline dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n) +{ + if (n < obj->get_page.last) { + obj->get_page.sg = obj->pages->sgl; + obj->get_page.last = 0; + } + + while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) { + obj->get_page.last += __sg_page_count(obj->get_page.sg++); + if (unlikely(sg_is_chain(obj->get_page.sg))) + obj->get_page.sg = sg_chain_ptr(obj->get_page.sg); + } + + return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT); +} + static inline struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 04/10] drm/i915: Clearing buffer objects via CPU/GTT
From: Ankitprasad Sharma This patch adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the non shmem backed objects. Currently intend to use this only for buffers allocated from stolen region. v2: Added kernel doc for i915_gem_clear_object(), corrected/removed variable assignments (Tvrtko) v3: Map object page by page to the gtt if the pinning of the whole object to the ggtt fails, Corrected function name (Chris) v4: Clear the buffer page by page, and not map the whole object in the gtt aperture. Use i915 wrapper function in place of drm_mm_insert_node_in_range. v5: Use renamed wrapper function for drm_mm_insert_node_in_range, updated barrier positioning (Chris) v6: Use PAGE_SIZE instead of 4096, use get_pages call before pinning pages (Tvrtko) v7: Fixed the onion (undo operation in reverse order) (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 47 + 2 files changed, 48 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e4c25c6..1122e1b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2938,6 +2938,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, int *needs_clflush); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +int i915_gem_object_clear(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 49a03f2..1aa4fc9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5405,3 +5405,50 @@ fail: drm_gem_object_unreference(&obj->base); return ERR_PTR(ret); } + +/** + * i915_gem_object_clear() - Clear buffer object via CPU/GTT + * @obj: Buffer object to be cleared + * + * Return: 0 - success, non-zero - failure + */ +int i915_gem_object_clear(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_mm_node node; + char __iomem *base; + uint64_t size = obj->base.size; + int ret, i; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + return ret; + + ret = i915_gem_object_get_pages(obj); + if (ret) + goto err_remove_node; + + i915_gem_object_pin_pages(obj); + base = io_mapping_map_wc(i915->gtt.mappable, node.start); + + for (i = 0; i < size/PAGE_SIZE; i++) { + i915->gtt.base.insert_page(&i915->gtt.base, + i915_gem_object_get_dma_address(obj, i), + node.start, + I915_CACHE_NONE, 0); + wmb(); /* flush modifications to the GGTT (insert_page) */ + memset_io(base, 0, PAGE_SIZE); + wmb(); /* flush the write before we modify the GGTT */ + } + + io_mapping_unmap(base); + i915->gtt.base.clear_range(&i915->gtt.base, + node.start, node.size, + true); + i915_gem_object_unpin_pages(obj); + +err_remove_node: + remove_mappable_node(&node); + return ret; +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH v16 0/10] Support for creating/using Stolen memory backed objects
From: Ankitprasad Sharma This patch series adds support for creating/using Stolen memory backed objects. Despite being a unified memory architecture (UMA) some bits of memory are more equal than others. In particular we have the thorny issue of stolen memory, memory stolen from the system by the BIOS and reserved for igfx use. Stolen memory is required for some functions of the GPU and display engine, but in general it goes wasted. Whilst we cannot return it back to the system, we need to find some other method for utilising it. As we do not support direct access to the physical address in the stolen region, it behaves like a different class of memory, closer in kin to local GPU memory. This strongly suggests that we need a placement model like TTM if we are to fully utilize these discrete chunks of differing memory. To add support for creating Stolen memory backed objects, we extend the drm_i915_gem_create structure, by adding a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. This patch series adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the memory from stolen region, but can also be used for other shmem allocated objects. Currently being used for buffers allocated in the stolen region. Also adding support for stealing purgable stolen pages, if we run out of stolen memory when trying to allocate an object. v2: Added support for read/write from/to objects not backed by shmem using the pread/pwrite interface. Also extended the current get_aperture ioctl to retrieve the total and available size of the stolen region. v3: Removed the extended get_aperture ioctl patch 5 (to be submitted as part of other patch series), addressed comments by Chris about pread/pwrite for non shmem backed objects. v4: Rebased to the latest drm-intel-nightly. v5: Addressed comments, replaced patch 1/4 "Clearing buffers via blitter engine" by "Clearing buffers via CPU/GTT". v6: Rebased to the latest drm-intel-nightly, Addressed comments, updated stolen memory purging logic by maintaining a list for purgable stolen memory objects, enabled pread/pwrite for all non-shmem backed objects without tiling restrictions. v7: Addressed comments, compiler optimization, new patch added for correct error code propagation to the userspace. v8: Added a new patch to the series to Migrate stolen objects before hibernation, as stolen memory is not preserved across hibernation. Added correct error propagation for shmem as well non-shmem backed object allocation. v9: Addressed comments, use of insert_page helper function to map object page by page which can be helpful in low aperture space availability. v10: Addressed comments, use insert_page for clearing out the stolen memory v11: Addressed comments, 3 new patches added to support allocation from Stolen memory 1. Allow use of i915_gem_object_get_dma_address for stolen backed objects 2. Use insert_page for pwrite_fast 3. Fail the execbuff using stolen objects as batchbuffers v12: Addressed comments, Removed patch "Fail the execbuff using stolen objects as batchbuffers" v13: Addressed comments, Added 2 patches to detect Intel RST and disable stolen for persistent data if RST device found 1. acpi: Export acpi_bus_type 2. drm/i915: Disable use of stolen area by User when Intel RST is present v14: Addressed comments, Added 2 base patches to the series 1. drm/i915: Add support for mapping an object page by page 2. drm/i915: Introduce i915_gem_object_get_dma_address() v15: Addressed comments, Disabled stolen memory by default v16: Addressed comments, Added low level rpm assertions, Enabled stolen memory This can be verified using IGT tests: igt/gem_stolen, igt/gem_create Ankitprasad Sharma (6): drm/i915: Use insert_page for pwrite_fast drm/i915: Clearing buffer objects via CPU/GTT drm/i915: Support for creating Stolen memory backed objects drm/i915: Propagating correct error codes to the userspace drm/i915: Support for pread/pwrite from/to non shmem backed objects drm/i915: Disable use of stolen area by User when Intel RST is present Chris Wilson (4): drm/i915: Add support for mapping an object page by page drm/i915: Introduce i915_gem_object_get_dma_address() drm/i915: Add support for stealing purgable stolen pages drm/i915: Migrate stolen objects before hibernation drivers/char/agp/intel-gtt.c | 9 + drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_dma.c | 3 + drivers/gpu/drm/i915/i915_drv.c | 17 +- drivers/gpu/drm/i915/i915_drv.h | 58 ++- drivers/gpu/drm/i915/i915_gem.c | 621 --- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 +- drivers/gpu/drm/i915/i915_gem_context.c | 4 +-
[Intel-gfx] [PATCH 06/10] drm/i915: Propagating correct error codes to the userspace
From: Ankitprasad Sharma Propagating correct error codes to userspace by using ERR_PTR and PTR_ERR macros for stolen memory based object allocation. We generally return -ENOMEM to the user whenever there is a failure in object allocation. This patch helps user to identify the correct reason for the failure and not just -ENOMEM each time. v2: Moved the patch up in the series, added error propagation for i915_gem_alloc_object too (Chris) v3: Removed storing of error pointer inside structs, Corrected error propagation in caller functions (Chris) v4: Remove assignments inside the predicate (Chris) v5: Removed unnecessary initializations, updated kerneldoc for i915_guc_client, corrected missed error pointer handling (Tvrtko) v6: Use ERR_CAST/temporary variable to avoid storing invalid pointer in a common field (Chris) v7: Resolved rebasing conflicts (Ankit) v8: Removed redundant code (Chris) Signed-off-by: Ankitprasad Sharma Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 23 ++-- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 +-- drivers/gpu/drm/i915/i915_gem_context.c | 4 +-- drivers/gpu/drm/i915/i915_gem_render_state.c | 7 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 53 +++- drivers/gpu/drm/i915/i915_guc_submission.c | 52 +-- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 6 ++-- drivers/gpu/drm/i915/intel_lrc.c | 10 +++--- drivers/gpu/drm/i915/intel_overlay.c | 4 +-- drivers/gpu/drm/i915/intel_pm.c | 7 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +-- 12 files changed, 110 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 60d27fe..d63f18c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -397,19 +397,18 @@ i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); - if (!obj) { - mutex_unlock(&dev->struct_mutex); - return NULL; - } + if (IS_ERR(obj)) + goto out; /* Always clear fresh buffers before handing to userspace */ ret = i915_gem_object_clear(obj); if (ret) { drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); - return NULL; + obj = ERR_PTR(ret); + goto out; } +out: mutex_unlock(&dev->struct_mutex); return obj; } @@ -444,8 +443,8 @@ i915_gem_create(struct drm_file *file, return -EINVAL; } - if (obj == NULL) - return -ENOMEM; + if (IS_ERR(obj)) + return PTR_ERR(obj); ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ @@ -4562,14 +4561,16 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, struct drm_i915_gem_object *obj; struct address_space *mapping; gfp_t mask; + int ret; obj = i915_gem_object_alloc(dev); if (obj == NULL) - return NULL; + return ERR_PTR(-ENOMEM); - if (drm_gem_object_init(dev, &obj->base, size) != 0) { + ret = drm_gem_object_init(dev, &obj->base, size); + if (ret) { i915_gem_object_free(obj); - return NULL; + return ERR_PTR(ret); } mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 7bf2f3f..d79caa2 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -135,8 +135,8 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, int ret; obj = i915_gem_alloc_object(pool->dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(obj)) + return obj; ret = i915_gem_object_get_pages(obj); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 83a097c..2dd5fed 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -179,8 +179,8 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) int ret; obj = i915_gem_alloc_object(dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(obj)) + return obj; /* * Try to make the context utilize L3 as well as LLC. diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i91
[Intel-gfx] [PATCH] drm/i915: Add RPS debugfs disabling for gen6+ platforms
From: Ankitprasad Sharma This patch exposes a new debugfs interface 'i915_rps_disable' Following 2 values shall be echoed into this file. '0' - RPS explicitly enabled . '1' - RPS explicitly disabled. This interface provides capabilty to enable/disable Turbo feature at runtime, which is needed for its validation. Signed-off-by: Deepak S Signed-off-by: Praveen Paneri Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_debugfs.c | 55 + drivers/gpu/drm/i915/i915_drv.h | 2 ++ 2 files changed, 57 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index aa7c7a3..1f58540 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -5023,6 +5023,60 @@ DEFINE_SIMPLE_ATTRIBUTE(i915_min_freq_fops, i915_min_freq_get, i915_min_freq_set, "%llu\n"); +static int i915_rps_disable_get(void *data, u64 *val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev->dev_private; + + if(INTEL_INFO(dev)->gen < 6) + return -ENODEV; + + flush_delayed_work(&dev_priv->rps.delayed_resume_work); + + *val = dev_priv->rps.rps_disable; + + return 0; +} + +static int i915_rps_disable_set(void *data, u64 val) +{ + struct drm_device *dev = data; + struct drm_i915_private *dev_priv = dev->dev_private; + int ret; + + flush_delayed_work(&dev_priv->rps.delayed_resume_work); + + if(INTEL_INFO(dev)->gen < 6) + return -ENODEV; + + DRM_DEBUG_DRIVER("Setting RPS disable %s\n", +val ? "true" : "false"); + + ret = mutex_lock_interruptible(&dev_priv->rps.hw_lock); + if (ret) + return ret; + + dev_priv->rps.rps_disable = val; + + if (val) + I915_WRITE(GEN6_RP_CONTROL, 0); + else + I915_WRITE(GEN6_RP_CONTROL, GEN6_RP_MEDIA_TURBO | + GEN6_RP_MEDIA_HW_NORMAL_MODE | + GEN6_RP_MEDIA_IS_GFX | + GEN6_RP_ENABLE | + GEN6_RP_UP_BUSY_AVG | + GEN6_RP_DOWN_IDLE_AVG); + + mutex_unlock(&dev_priv->rps.hw_lock); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(i915_rps_disable_fops, + i915_rps_disable_get, i915_rps_disable_set, + "%llu\n"); + static int i915_cache_sharing_get(void *data, u64 *val) { @@ -5391,6 +5445,7 @@ static const struct i915_debugfs_files { {"i915_wedged", &i915_wedged_fops}, {"i915_max_freq", &i915_max_freq_fops}, {"i915_min_freq", &i915_min_freq_fops}, + {"i915_rps_disable", &i915_rps_disable_fops}, {"i915_cache_sharing", &i915_cache_sharing_fops}, {"i915_ring_stop", &i915_ring_stop_fops}, {"i915_ring_missed_irq", &i915_ring_missed_irq_fops}, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9d67097..28e3537 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1169,6 +1169,8 @@ struct intel_gen6_power_mgmt { u8 up_threshold; /* Current %busy required to uplock */ u8 down_threshold; /* Current %busy required to downclock */ + bool rps_disable; + int last_adj; enum { LOW_POWER, BETWEEN, HIGH_POWER } power; -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 07/10] drm/i915: Add support for stealing purgable stolen pages
From: Chris Wilson If we run out of stolen memory when trying to allocate an object, see if we can reap enough purgeable objects to free up enough contiguous free space for the allocation. This is in principle very much like evicting objects to free up enough contiguous space in the vma when binding a new object - and you will be forgiven for thinking that the code looks very similar. At the moment, we do not allow userspace to allocate objects in stolen, so there is neither the memory pressure to trigger stolen eviction nor any purgeable objects inside the stolen arena. However, this will change in the near future, and so better management and defragmentation of stolen memory will become a real issue. v2: Remember to remove the drm_mm_node. v3: Rebased to the latest drm-intel-nightly (Ankit) v4: corrected if-else braces format (Tvrtko/kerneldoc) v5: Rebased to the latest drm-intel-nightly (Ankit) Added a seperate list to maintain purgable objects from stolen memory region (Chris/Daniel) v6: Compiler optimization (merging 2 single loops into one for() loop), corrected code for object eviction, retire_requests before starting object eviction (Chris) v7: Added kernel doc for i915_gem_object_create_stolen() v8: Check for struct_mutex lock before creating object from stolen region (Tvrtko) v9: Renamed variables to make usage clear, added comment, removed onetime used macro (Tvrtko) v10: Avoid masking of error when stolen_alloc fails (Tvrtko) v11: Renamed stolen_link to tmp_link, as it may be used for other purposes too (Chris) Used ERR_CAST to cast error pointers while returning v12: Added lockdep_assert before starting stolen-backed object eviction (Chris) Testcase: igt/gem_stolen Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_debugfs.c| 6 +- drivers/gpu/drm/i915/i915_drv.h| 17 +++- drivers/gpu/drm/i915/i915_gem.c| 15 +++ drivers/gpu/drm/i915/i915_gem_stolen.c | 171 + drivers/gpu/drm/i915/intel_pm.c| 4 +- 5 files changed, 188 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index ec0c2a05e..aa7c7a3 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -174,7 +174,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj) seq_puts(m, ")"); } if (obj->stolen) - seq_printf(m, " (stolen: %08llx)", obj->stolen->start); + seq_printf(m, " (stolen: %08llx)", obj->stolen->base.start); if (obj->pin_display || obj->fault_mappable) { char s[3], *t = s; if (obj->pin_display) @@ -253,9 +253,9 @@ static int obj_rank_by_stolen(void *priv, struct drm_i915_gem_object *b = container_of(B, struct drm_i915_gem_object, obj_exec_link); - if (a->stolen->start < b->stolen->start) + if (a->stolen->base.start < b->stolen->base.start) return -1; - if (a->stolen->start > b->stolen->start) + if (a->stolen->base.start > b->stolen->base.start) return 1; return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 55f2de9..943b301 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -840,6 +840,12 @@ struct i915_ctx_hang_stats { bool banned; }; +struct i915_stolen_node { + struct drm_mm_node base; + struct list_head mm_link; + struct drm_i915_gem_object *obj; +}; + /* This must match up with the value previously used for execbuf2.rsvd1. */ #define DEFAULT_CONTEXT_HANDLE 0 @@ -1291,6 +1297,13 @@ struct i915_gem_mm { */ struct list_head unbound_list; + /** +* List of stolen objects that have been marked as purgeable and +* thus available for reaping if we need more space for a new +* allocation. Ordered by time of marking purgeable. +*/ + struct list_head stolen_list; + /** Usable portion of the GTT for GEM */ unsigned long stolen_base; /* limited to low memory (32-bit) */ @@ -2089,7 +2102,7 @@ struct drm_i915_gem_object { struct list_head vma_list; /** Stolen memory for this object, instead of being backed by shmem. */ - struct drm_mm_node *stolen; + struct i915_stolen_node *stolen; struct list_head global_list; struct list_head ring_list[I915_NUM_RINGS]; @@ -2097,6 +2110,8 @@ struct drm_i915_gem_object { struct list_head obj_exec_link; struct list_head batch_pool_link; + /** Used to link an object to a list temporarily */ + struct list_head tmp_link; /** * This is set if the object is on the active lists (has pending diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_
[Intel-gfx] [PATCH 02/10] drm/i915: Introduce i915_gem_object_get_dma_address()
From: Chris Wilson This utility function is a companion to i915_gem_object_get_page() that uses the same cached iterator for the scatterlist to perform fast sequential lookup of the dma address associated with any page within the object. Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.h | 17 + 1 file changed, 17 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 65a2cd0..e4c25c6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2947,6 +2947,23 @@ static inline int __sg_page_count(struct scatterlist *sg) struct page * i915_gem_object_get_dirty_page(struct drm_i915_gem_object *obj, int n); +static inline dma_addr_t +i915_gem_object_get_dma_address(struct drm_i915_gem_object *obj, int n) +{ + if (n < obj->get_page.last) { + obj->get_page.sg = obj->pages->sgl; + obj->get_page.last = 0; + } + + while (obj->get_page.last + __sg_page_count(obj->get_page.sg) <= n) { + obj->get_page.last += __sg_page_count(obj->get_page.sg++); + if (unlikely(sg_is_chain(obj->get_page.sg))) + obj->get_page.sg = sg_chain_ptr(obj->get_page.sg); + } + + return sg_dma_address(obj->get_page.sg) + ((n - obj->get_page.last) << PAGE_SHIFT); +} + static inline struct page * i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n) { -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 01/10] drm/i915: Add support for mapping an object page by page
From: Chris Wilson Introduced a new vm specfic callback insert_page() to program a single pte in ggtt or ppgtt. This allows us to map a single page in to the mappable aperture space. This can be iterated over to access the whole object by using space as meagre as page size. v2: Added low level rpm assertions to insert_page routines (Chris) v3: Added POSTING_READ post register write (Tvrtko) Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma --- drivers/char/agp/intel-gtt.c| 9 + drivers/gpu/drm/i915/i915_gem_gtt.c | 67 + drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++ include/drm/intel-gtt.h | 3 ++ 4 files changed, 84 insertions(+) diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c index 1341a94..7c68576 100644 --- a/drivers/char/agp/intel-gtt.c +++ b/drivers/char/agp/intel-gtt.c @@ -838,6 +838,15 @@ static bool i830_check_flags(unsigned int flags) return false; } +void intel_gtt_insert_page(dma_addr_t addr, + unsigned int pg, + unsigned int flags) +{ + intel_private.driver->write_entry(addr, pg, flags); + wmb(); +} +EXPORT_SYMBOL(intel_gtt_insert_page); + void intel_gtt_insert_sg_entries(struct sg_table *st, unsigned int pg_start, unsigned int flags) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 715a771..6586525 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2341,6 +2341,29 @@ static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) #endif } +static void gen8_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen8_pte_t __iomem *pte = + (gen8_pte_t __iomem *)dev_priv->gtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + gen8_set_pte(pte, gen8_pte_encode(addr, level, true)); + wmb(); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void gen8_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *st, uint64_t start, @@ -2412,6 +2435,29 @@ static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm, stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL); } +static void gen6_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level level, + u32 flags) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + gen6_pte_t __iomem *pte = + (gen6_pte_t __iomem *)dev_priv->gtt.gsm + + (offset >> PAGE_SHIFT); + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + iowrite32(vm->pte_encode(addr, level, true, flags), pte); + wmb(); + + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); + POSTING_READ(GFX_FLSH_CNTL_GEN6); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + /* * Binds an object into the global gtt with the specified cache level. The object * will be accessible to the GPU via commands whose operands reference offsets @@ -2523,6 +2569,24 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm, assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); } +static void i915_ggtt_insert_page(struct i915_address_space *vm, + dma_addr_t addr, + uint64_t offset, + enum i915_cache_level cache_level, + u32 unused) +{ + struct drm_i915_private *dev_priv = to_i915(vm->dev); + unsigned int flags = (cache_level == I915_CACHE_NONE) ? + AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY; + int rpm_atomic_seq; + + rpm_atomic_seq = assert_rpm_atomic_begin(dev_priv); + + intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags); + + assert_rpm_atomic_end(dev_priv, rpm_atomic_seq); +} + static void i915_ggtt_insert_entries(struct i915_address_space *vm, struct sg_table *pages, uint64_t start, @@ -3054,6 +3118,7 @@ static int gen8_gmch_probe(struct drm_device *dev, ret = ggtt_probe_common(dev, gtt_size);
[Intel-gfx] [PATCH 03/10] drm/i915: Use insert_page for pwrite_fast
From: Ankitprasad Sharma In pwrite_fast, map an object page by page if obj_ggtt_pin fails. First, we try a nonblocking pin for the whole object (since that is fastest if reused), then failing that we try to grab one page in the mappable aperture. It also allows us to handle objects larger than the mappable aperture (e.g. if we need to pwrite with vGPU restricting the aperture to a measely 8MiB or something like that). v2: Pin pages before starting pwrite, Combined duplicate loops (Chris) v3: Combined loops based on local patch by Chris (Chris) v4: Added i915 wrapper function for drm_mm_insert_node_in_range (Chris) v5: Renamed wrapper function for drm_mm_insert_node_in_range (Chris) v5: Added wrapper for drm_mm_remove_node() (Chris) v6: Added get_pages call before pinning the pages (Tvrtko) Added remove_mappable_node() wrapper for drm_mm_remove_node() (Chris) v7: Added size argument for insert_mappable_node (Tvrtko) v8: Do not put_pages after pwrite, do memset of node in the wrapper function (insert_mappable_node) (Chris) Signed-off-by: Ankitprasad Sharma Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_gem.c | 92 +++-- 1 file changed, 70 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index a928823..49a03f2 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -61,6 +61,24 @@ static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) return obj->pin_display; } +static int +insert_mappable_node(struct drm_i915_private *i915, + struct drm_mm_node *node, u32 size) +{ + memset(node, 0, sizeof(*node)); + return drm_mm_insert_node_in_range_generic(&i915->gtt.base.mm, node, + size, 0, 0, 0, + i915->gtt.mappable_end, + DRM_MM_SEARCH_DEFAULT, + DRM_MM_CREATE_DEFAULT); +} + +static void +remove_mappable_node(struct drm_mm_node *node) +{ + drm_mm_remove_node(node); +} + /* some bookkeeping */ static void i915_gem_info_add_obj(struct drm_i915_private *dev_priv, size_t size) @@ -760,20 +778,33 @@ fast_user_write(struct io_mapping *mapping, * user into the GTT, uncached. */ static int -i915_gem_gtt_pwrite_fast(struct drm_device *dev, +i915_gem_gtt_pwrite_fast(struct drm_i915_private *i915, struct drm_i915_gem_object *obj, struct drm_i915_gem_pwrite *args, struct drm_file *file) { - struct drm_i915_private *dev_priv = dev->dev_private; - ssize_t remain; - loff_t offset, page_base; + struct drm_mm_node node; + uint64_t remain, offset; char __user *user_data; - int page_offset, page_length, ret; + int ret; ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK); - if (ret) - goto out; + if (ret) { + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + } ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -783,31 +814,39 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev, if (ret) goto out_unpin; - user_data = to_user_ptr(args->data_ptr); - remain = args->size; - - offset = i915_gem_obj_ggtt_offset(obj) + args->offset; - intel_fb_obj_invalidate(obj, ORIGIN_GTT); + obj->dirty = true; - while (remain > 0) { + user_data = to_user_ptr(args->data_ptr); + offset = args->offset; + remain = args->size; + while (remain) { /* Operation in this page * * page_base = page offset within aperture * page_offset = offset within page * page_length = bytes to copy for this page */ - page_base = offset & PAGE_MASK; - page_offset = offset_in_page(offset); - page_length = remain; - if ((page_offset + remain) > PAGE_SIZE) - page_length = PAGE_SIZE - page_offset; - + u32 page_base = node.start; + unsigned page_offset = offset_in_page(offset); + unsigned page_length = PAGE_SIZE - page_offset; + page_length = remain < page_length ? rem
[Intel-gfx] [PATCH v17 0/10] Support for creating/using Stolen memory backed objects
From: Ankitprasad Sharma This patch series adds support for creating/using Stolen memory backed objects. Despite being a unified memory architecture (UMA) some bits of memory are more equal than others. In particular we have the thorny issue of stolen memory, memory stolen from the system by the BIOS and reserved for igfx use. Stolen memory is required for some functions of the GPU and display engine, but in general it goes wasted. Whilst we cannot return it back to the system, we need to find some other method for utilising it. As we do not support direct access to the physical address in the stolen region, it behaves like a different class of memory, closer in kin to local GPU memory. This strongly suggests that we need a placement model like TTM if we are to fully utilize these discrete chunks of differing memory. To add support for creating Stolen memory backed objects, we extend the drm_i915_gem_create structure, by adding a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. This patch series adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the memory from stolen region, but can also be used for other shmem allocated objects. Currently being used for buffers allocated in the stolen region. Also adding support for stealing purgable stolen pages, if we run out of stolen memory when trying to allocate an object. v2: Added support for read/write from/to objects not backed by shmem using the pread/pwrite interface. Also extended the current get_aperture ioctl to retrieve the total and available size of the stolen region. v3: Removed the extended get_aperture ioctl patch 5 (to be submitted as part of other patch series), addressed comments by Chris about pread/pwrite for non shmem backed objects. v4: Rebased to the latest drm-intel-nightly. v5: Addressed comments, replaced patch 1/4 "Clearing buffers via blitter engine" by "Clearing buffers via CPU/GTT". v6: Rebased to the latest drm-intel-nightly, Addressed comments, updated stolen memory purging logic by maintaining a list for purgable stolen memory objects, enabled pread/pwrite for all non-shmem backed objects without tiling restrictions. v7: Addressed comments, compiler optimization, new patch added for correct error code propagation to the userspace. v8: Added a new patch to the series to Migrate stolen objects before hibernation, as stolen memory is not preserved across hibernation. Added correct error propagation for shmem as well non-shmem backed object allocation. v9: Addressed comments, use of insert_page helper function to map object page by page which can be helpful in low aperture space availability. v10: Addressed comments, use insert_page for clearing out the stolen memory v11: Addressed comments, 3 new patches added to support allocation from Stolen memory 1. Allow use of i915_gem_object_get_dma_address for stolen backed objects 2. Use insert_page for pwrite_fast 3. Fail the execbuff using stolen objects as batchbuffers v12: Addressed comments, Removed patch "Fail the execbuff using stolen objects as batchbuffers" v13: Addressed comments, Added 2 patches to detect Intel RST and disable stolen for persistent data if RST device found 1. acpi: Export acpi_bus_type 2. drm/i915: Disable use of stolen area by User when Intel RST is present v14: Addressed comments, Added 2 base patches to the series 1. drm/i915: Add support for mapping an object page by page 2. drm/i915: Introduce i915_gem_object_get_dma_address() v15: Addressed comments, Disabled stolen memory by default v16: Addressed comments, Added low level rpm assertions, Enabled stolen memory v17: Addressed comments This can be verified using IGT tests: igt/gem_stolen, igt/gem_create, igt/gem_pread, igt/gem_pwrite Ankitprasad Sharma (6): drm/i915: Use insert_page for pwrite_fast drm/i915: Clearing buffer objects via CPU/GTT drm/i915: Support for creating Stolen memory backed objects drm/i915: Propagating correct error codes to the userspace drm/i915: Support for pread/pwrite from/to non shmem backed objects drm/i915: Disable use of stolen area by User when Intel RST is present Chris Wilson (4): drm/i915: Add support for mapping an object page by page drm/i915: Introduce i915_gem_object_get_dma_address() drm/i915: Add support for stealing purgable stolen pages drm/i915: Migrate stolen objects before hibernation drivers/char/agp/intel-gtt.c | 9 + drivers/gpu/drm/i915/i915_debugfs.c | 6 +- drivers/gpu/drm/i915/i915_dma.c | 3 + drivers/gpu/drm/i915/i915_drv.c | 17 +- drivers/gpu/drm/i915/i915_drv.h | 58 ++- drivers/gpu/drm/i915/i915_gem.c | 631 --- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 +-
[Intel-gfx] [PATCH 06/10] drm/i915: Propagating correct error codes to the userspace
From: Ankitprasad Sharma Propagating correct error codes to userspace by using ERR_PTR and PTR_ERR macros for stolen memory based object allocation. We generally return -ENOMEM to the user whenever there is a failure in object allocation. This patch helps user to identify the correct reason for the failure and not just -ENOMEM each time. v2: Moved the patch up in the series, added error propagation for i915_gem_alloc_object too (Chris) v3: Removed storing of error pointer inside structs, Corrected error propagation in caller functions (Chris) v4: Remove assignments inside the predicate (Chris) v5: Removed unnecessary initializations, updated kerneldoc for i915_guc_client, corrected missed error pointer handling (Tvrtko) v6: Use ERR_CAST/temporary variable to avoid storing invalid pointer in a common field (Chris) v7: Resolved rebasing conflicts (Ankit) v8: Removed redundant code (Chris) Signed-off-by: Ankitprasad Sharma Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem.c | 23 ++-- drivers/gpu/drm/i915/i915_gem_batch_pool.c | 4 +-- drivers/gpu/drm/i915/i915_gem_context.c | 4 +-- drivers/gpu/drm/i915/i915_gem_render_state.c | 7 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 53 +++- drivers/gpu/drm/i915/i915_guc_submission.c | 52 +-- drivers/gpu/drm/i915/intel_display.c | 2 +- drivers/gpu/drm/i915/intel_fbdev.c | 6 ++-- drivers/gpu/drm/i915/intel_lrc.c | 10 +++--- drivers/gpu/drm/i915/intel_overlay.c | 4 +-- drivers/gpu/drm/i915/intel_pm.c | 7 ++-- drivers/gpu/drm/i915/intel_ringbuffer.c | 21 +-- 12 files changed, 110 insertions(+), 83 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 60d27fe..d63f18c 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -397,19 +397,18 @@ i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); - if (!obj) { - mutex_unlock(&dev->struct_mutex); - return NULL; - } + if (IS_ERR(obj)) + goto out; /* Always clear fresh buffers before handing to userspace */ ret = i915_gem_object_clear(obj); if (ret) { drm_gem_object_unreference(&obj->base); - mutex_unlock(&dev->struct_mutex); - return NULL; + obj = ERR_PTR(ret); + goto out; } +out: mutex_unlock(&dev->struct_mutex); return obj; } @@ -444,8 +443,8 @@ i915_gem_create(struct drm_file *file, return -EINVAL; } - if (obj == NULL) - return -ENOMEM; + if (IS_ERR(obj)) + return PTR_ERR(obj); ret = drm_gem_handle_create(file, &obj->base, &handle); /* drop reference from allocate - handle holds it now */ @@ -4562,14 +4561,16 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev, struct drm_i915_gem_object *obj; struct address_space *mapping; gfp_t mask; + int ret; obj = i915_gem_object_alloc(dev); if (obj == NULL) - return NULL; + return ERR_PTR(-ENOMEM); - if (drm_gem_object_init(dev, &obj->base, size) != 0) { + ret = drm_gem_object_init(dev, &obj->base, size); + if (ret) { i915_gem_object_free(obj); - return NULL; + return ERR_PTR(ret); } mask = GFP_HIGHUSER | __GFP_RECLAIMABLE; diff --git a/drivers/gpu/drm/i915/i915_gem_batch_pool.c b/drivers/gpu/drm/i915/i915_gem_batch_pool.c index 7bf2f3f..d79caa2 100644 --- a/drivers/gpu/drm/i915/i915_gem_batch_pool.c +++ b/drivers/gpu/drm/i915/i915_gem_batch_pool.c @@ -135,8 +135,8 @@ i915_gem_batch_pool_get(struct i915_gem_batch_pool *pool, int ret; obj = i915_gem_alloc_object(pool->dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(obj)) + return obj; ret = i915_gem_object_get_pages(obj); if (ret) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 83a097c..2dd5fed 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -179,8 +179,8 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) int ret; obj = i915_gem_alloc_object(dev, size); - if (obj == NULL) - return ERR_PTR(-ENOMEM); + if (IS_ERR(obj)) + return obj; /* * Try to make the context utilize L3 as well as LLC. diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i91
[Intel-gfx] [PATCH 08/10] drm/i915: Support for pread/pwrite from/to non shmem backed objects
From: Ankitprasad Sharma This patch adds support for extending the pread/pwrite functionality for objects not backed by shmem. The access will be made through gtt interface. This will cover objects backed by stolen memory as well as other non-shmem backed objects. v2: Drop locks around slow_user_access, prefault the pages before access (Chris) v3: Rebased to the latest drm-intel-nightly (Ankit) v4: Moved page base & offset calculations outside the copy loop, corrected data types for size and offset variables, corrected if-else braces format (Tvrtko/kerneldocs) v5: Enabled pread/pwrite for all non-shmem backed objects including without tiling restrictions (Ankit) v6: Using pwrite_fast for non-shmem backed objects as well (Chris) v7: Updated commit message, Renamed i915_gem_gtt_read to i915_gem_gtt_copy, added pwrite slow path for non-shmem backed objects (Chris/Tvrtko) v8: Updated v7 commit message, mutex unlock around pwrite slow path for non-shmem backed objects (Tvrtko) v9: Corrected check during pread_ioctl, to avoid shmem_pread being called for non-shmem backed objects (Tvrtko) v10: Moved the write_domain check to needs_clflush and tiling mode check to pwrite_fast (Chris) v11: Use pwrite_fast fallback for all objects (shmem and non-shmem backed), call fast_user_write regardless of pagefault in previous iteration v12: Use page-by-page copy for slow user access too (Chris) v13: Handled EFAULT, Avoid use of WARN_ON, put_fence only if whole obj pinned (Chris) v14: Corrected datatypes/initializations (Tvrtko) Testcase: igt/gem_stolen, igt/gem_pread, igt/gem_pwrite Signed-off-by: Ankitprasad Sharma --- drivers/gpu/drm/i915/i915_gem.c | 221 ++-- 1 file changed, 189 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index ed8ae5d..0938ab1 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -55,6 +55,9 @@ static bool cpu_cache_is_coherent(struct drm_device *dev, static bool cpu_write_needs_clflush(struct drm_i915_gem_object *obj) { + if (obj->base.write_domain == I915_GEM_DOMAIN_CPU) + return false; + if (!cpu_cache_is_coherent(obj->base.dev, obj->cache_level)) return true; @@ -646,6 +649,141 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length, return ret ? - EFAULT : 0; } +static inline uint64_t +slow_user_access(struct io_mapping *mapping, +uint64_t page_base, int page_offset, +char __user *user_data, +unsigned long length, bool pwrite) +{ + void __iomem *ioaddr; + void *vaddr; + uint64_t unwritten; + + ioaddr = io_mapping_map_wc(mapping, page_base); + /* We can use the cpu mem copy function because this is X86. */ + vaddr = (void __force *)ioaddr + page_offset; + if (pwrite) + unwritten = __copy_from_user(vaddr, user_data, length); + else + unwritten = __copy_to_user(user_data, vaddr, length); + + io_mapping_unmap(ioaddr); + return unwritten; +} + +static int +i915_gem_gtt_pread(struct drm_device *dev, + struct drm_i915_gem_object *obj, uint64_t size, + uint64_t data_offset, uint64_t data_ptr) +{ + struct drm_i915_private *dev_priv = dev->dev_private; + struct drm_mm_node node; + char __user *user_data; + uint64_t remain; + uint64_t offset; + int ret; + + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE); + if (ret) { + ret = insert_mappable_node(dev_priv, &node, PAGE_SIZE); + if (ret) + goto out; + + ret = i915_gem_object_get_pages(obj); + if (ret) { + remove_mappable_node(&node); + goto out; + } + + i915_gem_object_pin_pages(obj); + } else { + node.start = i915_gem_obj_ggtt_offset(obj); + node.allocated = false; + ret = i915_gem_object_put_fence(obj); + if (ret) + goto out_unpin; + } + + ret = i915_gem_object_set_to_gtt_domain(obj, false); + if (ret) + goto out_unpin; + + user_data = to_user_ptr(data_ptr); + remain = size; + offset = data_offset; + + mutex_unlock(&dev->struct_mutex); + if (likely(!i915.prefault_disable)) { + ret = fault_in_multipages_writeable(user_data, remain); + if (ret) { + mutex_lock(&dev->struct_mutex); + goto out_unpin; + } + } + + while (remain > 0) { + /* Operation in this page +* +* page_base = page offset within aperture +* page_offset = offset within page +* page_leng
[Intel-gfx] [PATCH 05/10] drm/i915: Support for creating Stolen memory backed objects
From: Ankitprasad Sharma Extend the drm_i915_gem_create structure to add support for creating Stolen memory backed objects. Added a new flag through which user can specify the preference to allocate the object from stolen memory, which if set, an attempt will be made to allocate the object from stolen memory subject to the availability of free space in the stolen region. v2: Rebased to the latest drm-intel-nightly (Ankit) v3: Changed versioning of GEM_CREATE param, added new comments (Tvrtko) v4: Changed size from 32b to 64b to prevent userspace overflow (Tvrtko) Corrected function arguments ordering (Chris) v5: Corrected function name (Chris) v6: Updated datatype for flags to keep sizeof(drm_i915_gem_create) u64 aligned (Chris) v7: Use first 8 bits of gem_create flags for placement (Chris), Add helper function for object allocation from stolen region (Ankit) v8: Added comment explaining STOLEN placement flag (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_dma.c| 3 +++ drivers/gpu/drm/i915/i915_drv.h| 2 +- drivers/gpu/drm/i915/i915_gem.c| 45 +++--- drivers/gpu/drm/i915/i915_gem_stolen.c | 4 +-- include/uapi/drm/i915_drm.h| 41 +++ 5 files changed, 89 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index a42eb58..1aa2cb6 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -172,6 +172,9 @@ static int i915_getparam(struct drm_device *dev, void *data, case I915_PARAM_HAS_EXEC_SOFTPIN: value = 1; break; + case I915_PARAM_CREATE_VERSION: + value = 2; + break; default: DRM_DEBUG("Unknown parameter %d\n", param->param); return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 1122e1b..55f2de9 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3301,7 +3301,7 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv, int i915_gem_init_stolen(struct drm_device *dev); void i915_gem_cleanup_stolen(struct drm_device *dev); struct drm_i915_gem_object * -i915_gem_object_create_stolen(struct drm_device *dev, u32 size); +i915_gem_object_create_stolen(struct drm_device *dev, u64 size); struct drm_i915_gem_object * i915_gem_object_create_stolen_for_preallocated(struct drm_device *dev, u32 stolen_offset, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 1aa4fc9..60d27fe 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -389,10 +389,36 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) kmem_cache_free(dev_priv->objects, obj); } +static struct drm_i915_gem_object * +i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) +{ + struct drm_i915_gem_object *obj; + int ret; + + mutex_lock(&dev->struct_mutex); + obj = i915_gem_object_create_stolen(dev, size); + if (!obj) { + mutex_unlock(&dev->struct_mutex); + return NULL; + } + + /* Always clear fresh buffers before handing to userspace */ + ret = i915_gem_object_clear(obj); + if (ret) { + drm_gem_object_unreference(&obj->base); + mutex_unlock(&dev->struct_mutex); + return NULL; + } + + mutex_unlock(&dev->struct_mutex); + return obj; +} + static int i915_gem_create(struct drm_file *file, struct drm_device *dev, uint64_t size, + uint64_t flags, uint32_t *handle_p) { struct drm_i915_gem_object *obj; @@ -403,8 +429,21 @@ i915_gem_create(struct drm_file *file, if (size == 0) return -EINVAL; + if (flags & __I915_CREATE_UNKNOWN_FLAGS) + return -EINVAL; + /* Allocate the new object */ - obj = i915_gem_alloc_object(dev, size); + switch (flags & I915_CREATE_PLACEMENT_MASK) { + case I915_CREATE_PLACEMENT_NORMAL: + obj = i915_gem_alloc_object(dev, size); + break; + case I915_CREATE_PLACEMENT_STOLEN: + obj = i915_gem_alloc_object_stolen(dev, size); + break; + default: + return -EINVAL; + } + if (obj == NULL) return -ENOMEM; @@ -427,7 +466,7 @@ i915_gem_dumb_create(struct drm_file *file, args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64); args->size = args->pitch * args->height; return i915_gem_create(file, dev, - args->size, &args->handle); + args->size, 0, &args->handle);
[Intel-gfx] [PATCH 04/10] drm/i915: Clearing buffer objects via CPU/GTT
From: Ankitprasad Sharma This patch adds support for clearing buffer objects via CPU/GTT. This is particularly useful for clearing out the non shmem backed objects. Currently intend to use this only for buffers allocated from stolen region. v2: Added kernel doc for i915_gem_clear_object(), corrected/removed variable assignments (Tvrtko) v3: Map object page by page to the gtt if the pinning of the whole object to the ggtt fails, Corrected function name (Chris) v4: Clear the buffer page by page, and not map the whole object in the gtt aperture. Use i915 wrapper function in place of drm_mm_insert_node_in_range. v5: Use renamed wrapper function for drm_mm_insert_node_in_range, updated barrier positioning (Chris) v6: Use PAGE_SIZE instead of 4096, use get_pages call before pinning pages (Tvrtko) v7: Fixed the onion (undo operation in reverse order) (Chris) Testcase: igt/gem_stolen Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin Reviewed-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 1 + drivers/gpu/drm/i915/i915_gem.c | 47 + 2 files changed, 48 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e4c25c6..1122e1b 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2938,6 +2938,7 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj, int *needs_clflush); int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj); +int i915_gem_object_clear(struct drm_i915_gem_object *obj); static inline int __sg_page_count(struct scatterlist *sg) { diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 49a03f2..1aa4fc9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -5405,3 +5405,50 @@ fail: drm_gem_object_unreference(&obj->base); return ERR_PTR(ret); } + +/** + * i915_gem_object_clear() - Clear buffer object via CPU/GTT + * @obj: Buffer object to be cleared + * + * Return: 0 - success, non-zero - failure + */ +int i915_gem_object_clear(struct drm_i915_gem_object *obj) +{ + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct drm_mm_node node; + char __iomem *base; + uint64_t size = obj->base.size; + int ret, i; + + lockdep_assert_held(&obj->base.dev->struct_mutex); + ret = insert_mappable_node(i915, &node, PAGE_SIZE); + if (ret) + return ret; + + ret = i915_gem_object_get_pages(obj); + if (ret) + goto err_remove_node; + + i915_gem_object_pin_pages(obj); + base = io_mapping_map_wc(i915->gtt.mappable, node.start); + + for (i = 0; i < size/PAGE_SIZE; i++) { + i915->gtt.base.insert_page(&i915->gtt.base, + i915_gem_object_get_dma_address(obj, i), + node.start, + I915_CACHE_NONE, 0); + wmb(); /* flush modifications to the GGTT (insert_page) */ + memset_io(base, 0, PAGE_SIZE); + wmb(); /* flush the write before we modify the GGTT */ + } + + io_mapping_unmap(base); + i915->gtt.base.clear_range(&i915->gtt.base, + node.start, node.size, + true); + i915_gem_object_unpin_pages(obj); + +err_remove_node: + remove_mappable_node(&node); + return ret; +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 10/10] drm/i915: Disable use of stolen area by User when Intel RST is present
From: Ankitprasad Sharma The BIOS RapidStartTechnology may corrupt the stolen memory across S3 suspend due to unalarmed hibernation, in which case we will not be able to preserve the User data stored in the stolen region. Hence this patch tries to identify presence of the RST device on the ACPI bus, and disables use of stolen memory (for persistent data) if found. v2: Updated comment, updated/corrected new functions private to driver (Chris/Tvrtko) v3: Disabling stolen by default, wait till required acpi changes to detect device presence are pulled in (Ankit) v4: Enabled stolen by default as required acpi changes are merged (Ankit) v5: renamed variable, is IS_ENABLED() in place of #ifdef, use char* instead of structures (Lukas) Signed-off-by: Ankitprasad Sharma Cc: Lukas Wunner --- drivers/gpu/drm/i915/i915_drv.h| 11 +++ drivers/gpu/drm/i915/i915_gem.c| 8 drivers/gpu/drm/i915/i915_gem_stolen.c | 12 drivers/gpu/drm/i915/intel_acpi.c | 7 +++ 4 files changed, 38 insertions(+) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 16f2f94..75e6935 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1349,6 +1349,16 @@ struct i915_gem_mm { */ bool busy; + /** +* Stolen will be lost upon hibernate (as the memory is unpowered). +* Across resume, we expect stolen to be intact - however, it may +* also be utililised by third parties (e.g. Intel RapidStart +* Technology) and if so we have to assume that any data stored in +* stolen across resume is lost and we set this flag to indicate that +* the stolen memory is volatile. +*/ + bool volatile_stolen; + /* the indicator for dispatch video commands on two BSD rings */ unsigned int bsd_ring_dispatch_index; @@ -3465,6 +3475,7 @@ intel_opregion_notify_adapter(struct drm_device *dev, pci_power_t state) #endif /* intel_acpi.c */ +bool intel_detect_acpi_rst(void); #ifdef CONFIG_ACPI extern void intel_register_dsm_handler(void); extern void intel_unregister_dsm_handler(void); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 587beea..8e5fce4 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -396,8 +396,16 @@ static struct drm_i915_gem_object * i915_gem_alloc_object_stolen(struct drm_device *dev, size_t size) { struct drm_i915_gem_object *obj; + struct drm_i915_private *dev_priv = dev->dev_private; int ret; + if (dev_priv->mm.volatile_stolen) { + /* Stolen may be overwritten by external parties +* so unsuitable for persistent user data. +*/ + return ERR_PTR(-ENODEV); + } + mutex_lock(&dev->struct_mutex); obj = i915_gem_object_create_stolen(dev, size); if (IS_ERR(obj)) diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index 335a1ef..88ee036 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -482,6 +482,18 @@ int i915_gem_init_stolen(struct drm_device *dev) */ drm_mm_init(&dev_priv->mm.stolen, 0, dev_priv->gtt.stolen_usable_size); + /* If the stolen region can be modified behind our backs upon suspend, +* then we cannot use it to store nonvolatile contents (i.e user data) +* as it will be corrupted upon resume. +*/ + dev_priv->mm.volatile_stolen = false; + if (IS_ENABLED(CONFIG_SUSPEND)) { + /* BIOSes using RapidStart Technology have been reported +* to overwrite stolen across S3, not just S4. +*/ + dev_priv->mm.volatile_stolen = intel_detect_acpi_rst(); + } + return 0; } diff --git a/drivers/gpu/drm/i915/intel_acpi.c b/drivers/gpu/drm/i915/intel_acpi.c index eb638a1..05fd67f 100644 --- a/drivers/gpu/drm/i915/intel_acpi.c +++ b/drivers/gpu/drm/i915/intel_acpi.c @@ -23,6 +23,8 @@ static const u8 intel_dsm_guid[] = { 0x0f, 0x13, 0x17, 0xb0, 0x1c, 0x2c }; +static const char *irst_id = "INT3392"; + static char *intel_dsm_port_name(u8 id) { switch (id) { @@ -162,3 +164,8 @@ void intel_register_dsm_handler(void) void intel_unregister_dsm_handler(void) { } + +bool intel_detect_acpi_rst(void) +{ + return acpi_dev_present(irst_id); +} -- 1.9.1 ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 09/10] drm/i915: Migrate stolen objects before hibernation
From: Chris Wilson Ville reminded us that stolen memory is not preserved across hibernation, and a result of this was that context objects now being allocated from stolen were being corrupted on S4 and promptly hanging the GPU on resume. We want to utilise stolen for as much as possible (nothing else will use that wasted memory otherwise), so we need a strategy for handling general objects allocated from stolen and hibernation. A simple solution is to do a CPU copy through the GTT of the stolen object into a fresh shmemfs backing store and thenceforth treat it as a normal objects. This can be refined in future to either use a GPU copy to avoid the slow uncached reads (though it's hibernation!) and recreate stolen objects upon resume/first-use. For now, a simple approach should suffice for testing the object migration. v2: Swap PTE for pinned bindings over to the shmemfs. This adds a complicated dance, but is required as many stolen objects are likely to be pinned for use by the hardware. Swapping the PTEs should not result in externally visible behaviour, as each PTE update should be atomic and the two pages identical. (danvet) safe-by-default, or the principle of least surprise. We need a new flag to mark objects that we can wilfully discard and recreate across hibernation. (danvet) Just use the global_list rather than invent a new stolen_list. This is the slowpath hibernate and so adding a new list and the associated complexity isn't worth it. v3: Rebased on drm-intel-nightly (Ankit) v4: Use insert_page to map stolen memory backed pages for migration to shmem (Chris) v5: Acquire mutex lock while copying stolen buffer objects to shmem (Chris) v6: Handled file leak, Splitted object migration function, added kerneldoc for migrate_stolen_to_shmemfs() function (Tvrtko) Use i915 wrapper function for drm_mm_insert_node_in_range() v7: Keep the object in cpu domain after get_pages, remove the object from the unbound list only when marked PURGED, Corrected split of object migration function (Chris) v8: Split i915_gem_freeze(), removed redundant use of barrier, corrected use of set_to_cpu_domain() (Chris) v9: Replaced WARN_ON by BUG_ON and added a comment explaining it (Daniel/Tvrtko) v10: Document use of barriers (Chris) Signed-off-by: Chris Wilson Signed-off-by: Ankitprasad Sharma Reviewed-by: Tvrtko Ursulin --- drivers/gpu/drm/i915/i915_drv.c | 17 ++- drivers/gpu/drm/i915/i915_drv.h | 10 ++ drivers/gpu/drm/i915/i915_gem.c | 198 ++-- drivers/gpu/drm/i915/i915_gem_stolen.c | 49 drivers/gpu/drm/i915/intel_display.c| 3 + drivers/gpu/drm/i915/intel_fbdev.c | 6 + drivers/gpu/drm/i915/intel_pm.c | 2 + drivers/gpu/drm/i915/intel_ringbuffer.c | 6 + 8 files changed, 279 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 11d8414..cfa44af 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -996,6 +996,21 @@ static int i915_pm_suspend(struct device *dev) return i915_drm_suspend(drm_dev); } +static int i915_pm_freeze(struct device *dev) +{ + int ret; + + ret = i915_gem_freeze(pci_get_drvdata(to_pci_dev(dev))); + if (ret) + return ret; + + ret = i915_pm_suspend(dev); + if (ret) + return ret; + + return 0; +} + static int i915_pm_suspend_late(struct device *dev) { struct drm_device *drm_dev = dev_to_i915(dev)->dev; @@ -1643,7 +1658,7 @@ static const struct dev_pm_ops i915_pm_ops = { * @restore, @restore_early : called after rebooting and restoring the *hibernation image [PMSG_RESTORE] */ - .freeze = i915_pm_suspend, + .freeze = i915_pm_freeze, .freeze_late = i915_pm_suspend_late, .thaw_early = i915_pm_resume_early, .thaw = i915_pm_resume, diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 943b301..16f2f94 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2137,6 +2137,12 @@ struct drm_i915_gem_object { * Advice: are the backing pages purgeable? */ unsigned int madv:2; + /** +* Whereas madv is for userspace, there are certain situations +* where we want I915_MADV_DONTNEED behaviour on internal objects +* without conflating the userspace setting. +*/ + unsigned int internal_volatile:1; /** * Current tiling mode for the object. @@ -3093,6 +3099,9 @@ int i915_gem_l3_remap(struct drm_i915_gem_request *req, int slice); void i915_gem_init_swizzling(struct drm_device *dev); void i915_gem_cleanup_ringbuffer(struct drm_device *dev); int __must_check i915_gpu_idle(struct drm_device *dev); +int __must_check i915_gem_freeze(struct drm_device *dev); +int __must_check +i915_gem_object_migrate_stol