[RFC PATCH] drm/ttm, drm/vmwgfx: Have TTM support AMD SEV encryption
From: Thomas Hellstrom With SEV encryption, all DMA memory must be marked decrypted (AKA "shared") for devices to be able to read it. In the future we might want to be able to switch normal (encrypted) memory to decrypted in exactly the same way as we handle caching states, and that would require additional memory pools. But for now, rely on memory allocated with dma_alloc_coherent() which is already decrypted with SEV enabled. Set up the page protection accordingly. Drivers must detect SEV enabled and switch to the dma page pool. This patch has not yet been tested. As a follow-up, we might want to cache decrypted pages in the dma page pool regardless of their caching state. Cc: Christian König Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/ttm/ttm_bo_util.c| 17 + drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 -- drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 3 +++ drivers/gpu/drm/vmwgfx/vmwgfx_blit.c | 6 -- include/drm/ttm/ttm_bo_driver.h | 8 +--- include/drm/ttm/ttm_tt.h | 1 + 6 files changed, 30 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 895d77d799e4..1d6643bd0b01 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -419,11 +419,13 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo, page = i * dir + add; if (old_iomap == NULL) { pgprot_t prot = ttm_io_prot(old_mem->placement, + ttm->page_flags, PAGE_KERNEL); ret = ttm_copy_ttm_io_page(ttm, new_iomap, page, prot); } else if (new_iomap == NULL) { pgprot_t prot = ttm_io_prot(new_mem->placement, + ttm->page_flags, PAGE_KERNEL); ret = ttm_copy_io_ttm_page(ttm, old_iomap, page, prot); @@ -526,11 +528,11 @@ static int ttm_buffer_object_transfer(struct ttm_buffer_object *bo, return 0; } -pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp) +pgprot_t ttm_io_prot(u32 caching_flags, u32 tt_page_flags, pgprot_t tmp) { /* Cached mappings need no adjustment */ if (caching_flags & TTM_PL_FLAG_CACHED) - return tmp; + goto check_encryption; #if defined(__i386__) || defined(__x86_64__) if (caching_flags & TTM_PL_FLAG_WC) @@ -548,6 +550,11 @@ pgprot_t ttm_io_prot(uint32_t caching_flags, pgprot_t tmp) #if defined(__sparc__) || defined(__mips__) tmp = pgprot_noncached(tmp); #endif + +check_encryption: + if (tt_page_flags & TTM_PAGE_FLAG_DECRYPTED) + tmp = pgprot_decrypted(tmp); + return tmp; } EXPORT_SYMBOL(ttm_io_prot); @@ -594,7 +601,8 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, if (ret) return ret; - if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED)) { + if (num_pages == 1 && (mem->placement & TTM_PL_FLAG_CACHED) && + !(ttm->page_flags & TTM_PAGE_FLAG_DECRYPTED)) { /* * We're mapping a single page, and the desired * page protection is consistent with the bo. @@ -608,7 +616,8 @@ static int ttm_bo_kmap_ttm(struct ttm_buffer_object *bo, * We need to use vmap to get the desired page protection * or to make the buffer object look contiguous. */ - prot = ttm_io_prot(mem->placement, PAGE_KERNEL); + prot = ttm_io_prot(mem->placement, ttm->page_flags, + PAGE_KERNEL); map->bo_kmap_type = ttm_bo_map_vmap; map->virtual = vmap(ttm->pages + start_page, num_pages, 0, prot); diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 2d9862fcf6fd..e12247edd243 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -245,7 +245,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, goto out_io_unlock; } - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot); if (!bo->mem.bus.is_iomem) { struct ttm_operation_ctx ctx = { .interruptible = false, @@ -255,13 +254,16 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, }; ttm = bo->ttm; + cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, + ttm->page_flags, prot); if (ttm_tt_populate(bo->ttm, &ctx)) { ret = VM_
[PATCH v4 6/9] drm/vmwgfx: Implement an infrastructure for write-coherent resources
Emulate coherency by tracking vm accesses. * @backup: The backup buffer if any. Protected by resource reserved. * @backup_offset: Offset into the backup buffer if any. Protected by resource * reserved. Note that only a few resource types can have a @backup_offset @@ -151,14 +154,16 @@ struct vmw_res_func; * @hw_destroy: Callback to destroy the resource on the device, as part of * resource destruction. */ +struct vmw_resource_dirty; struct vmw_resource { struct kref kref; struct vmw_private *dev_priv; int id; u32 used_prio; unsigned long backup_size; - bool res_dirty; - bool backup_dirty; + u32 res_dirty : 1; + u32 backup_dirty : 1; + u32 coherent : 1; struct vmw_buffer_object *backup; unsigned long backup_offset; unsigned long pin_count; @@ -166,6 +171,7 @@ struct vmw_resource { struct list_head lru_head; struct list_head mob_head; struct list_head binding_head; + struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); void (*hw_destroy) (struct vmw_resource *res); }; @@ -606,6 +612,9 @@ struct vmw_private { /* Validation memory reservation */ struct vmw_validation_mem vvm; + + /* VM operations */ + struct vm_operations_struct vm_ops; }; static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res) @@ -722,6 +731,8 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv); extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo); void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res); +void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, + pgoff_t end); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached @@ -1410,6 +1421,15 @@ int vmw_host_log(const char *log); #define VMW_DEBUG_USER(fmt, ...) \ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) +/* Resource dirtying - vmwgfx_page_dirty.c */ +void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo); +int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); +void vmw_bo_dirty_clear_res(struct vmw_resource *res); +void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); +vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); + /** * Inline helper functions */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 33533d126277..319c1ca35663 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv, offsetof(typeof(*cmd), sid)); cmd = container_of(header, typeof(*cmd), header); - return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, VMW_RES_DIRTY_NONE, user_surface_converter, &cmd->sid, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c new file mode 100644 index ..8d154f90bdc0 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/****** + * + * Copyright 2019 VMware, Inc., Palo Alto, CA., USA + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **/ +#include "vmwgfx_drv.h" + +/* + * Different methods for tracking dirty: + * VMW_BO_DIRTY_PAG
[PATCH v4 9/9] drm/vmwgfx: Add surface dirty-tracking callbacks
From: Thomas Hellstrom Add the callbacks necessary to implement emulated coherent memory for surfaces. Add a flag to the gb_surface_create ioctl to indicate that surface memory should be coherent. Also bump the drm minor version to signal the availability of coherent surfaces. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- .../device_include/svga3d_surfacedefs.h | 233 ++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 395 +- include/uapi/drm/vmwgfx_drm.h | 4 +- 4 files changed, 629 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h index f2bfd3d80598..61414f105c67 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h @@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format, return offset; } - static inline u32 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, surf_size_struct baseLevelSize, @@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format) return svga3dsurface_is_dx_screen_target_format(format); } +/** + * struct svga3dsurface_mip - Mimpmap level information + * @bytes: Bytes required in the backing store of this mipmap level. + * @img_stride: Byte stride per image. + * @row_stride: Byte stride per block row. + * @size: The size of the mipmap. + */ +struct svga3dsurface_mip { + size_t bytes; + size_t img_stride; + size_t row_stride; + struct drm_vmw_size size; + +}; + +/** + * struct svga3dsurface_cache - Cached surface information + * @desc: Pointer to the surface descriptor + * @mip: Array of mipmap level information. Valid size is @num_mip_levels. + * @mip_chain_bytes: Bytes required in the backing store for the whole chain + * of mip levels. + * @sheet_bytes: Bytes required in the backing store for a sheet + * representing a single sample. + * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in + * a chain. + * @num_layers: Number of slices in an array texture or number of faces in + * a cubemap texture. + */ +struct svga3dsurface_cache { + const struct svga3d_surface_desc *desc; + struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS]; + size_t mip_chain_bytes; + size_t sheet_bytes; + u32 num_mip_levels; + u32 num_layers; +}; + +/** + * struct svga3dsurface_loc - Surface location + * @sub_resource: Surface subresource. Defined as layer * num_mip_levels + + * mip_level. + * @x: X coordinate. + * @y: Y coordinate. + * @z: Z coordinate. + */ +struct svga3dsurface_loc { + u32 sub_resource; + u32 x, y, z; +}; + +/** + * svga3dsurface_subres - Compute the subresource from layer and mipmap. + * @cache: Surface layout data. + * @mip_level: The mipmap level. + * @layer: The surface layer (face or array slice). + * + * Return: The subresource. + */ +static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache, + u32 mip_level, u32 layer) +{ + return cache->num_mip_levels * layer + mip_level; +} + +/** + * svga3dsurface_setup_cache - Build a surface cache entry + * @size: The surface base level dimensions. + * @format: The surface format. + * @num_mip_levels: Number of mipmap levels. + * @num_layers: Number of layers. + * @cache: Pointer to a struct svga3dsurface_cach object to be filled in. + * + * Return: Zero on success, -EINVAL on invalid surface layout. + */ +static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size, + SVGA3dSurfaceFormat format, + u32 num_mip_levels, + u32 num_layers, + u32 num_samples, + struct svga3dsurface_cache *cache) +{ + const struct svga3d_surface_desc *desc; + u32 i; + + memset(cache, 0, sizeof(*cache)); + cache->desc = desc = svga3dsurface_get_desc(format); + cache->num_mip_levels = num_mip_levels; + cache->num_layers = num_layers; + for (i = 0; i < cache->num_mip_levels; i++) { + struct svga3dsurface_mip *mip = &cache->mip[i]; + + mip->size = svga3dsurface_get_mip_size(*size, i); + mip->bytes = svga3dsurface_get_image_buffer_size + (desc, &mip->size, 0); + mip->row_stride = + __KERNEL_DIV_ROUND_UP(mip->size.width, + desc->block_size.width) * + desc->bytes_per_block * num_samples; + if (!mip->row_stride) + goto invalid_di
[PATCH v4 0/9] Emulated coherent graphics memory
Planning to merge this through the drm/vmwgfx tree soon, so if there are any objections, please speak up. Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver to provide coherent graphics memory, meaning that the GPU sees any content written to the coherent memory on the next GPU operation that touches that memory, and the CPU sees any content written by the GPU to that memory immediately after any fence object trailing the GPU operation has signaled. Paravirtual drivers that otherwise require explicit synchronization needs to do this by hooking up dirty tracking to pagefault handlers and buffer object validation. This is a first attempt to do that for the vmwgfx driver. The mm patches has been out for RFC. I think I have addressed all the feedback I got, except a possible softdirty breakage. But although the dirty-tracking and softdirty may write-protect PTEs both care about, that shouldn't really cause any operation interference. In particular since we use the hardware dirty PTE bits and softdirty uses other PTE bits. For the TTM changes they are hopefully in line with the long-term strategy of making helpers out of what's left of TTM. The code has been tested and excercised by a tailored version of mesa where we disable all explicit synchronization and assume graphics memory is coherent. The performance loss varies of course; a typical number is around 5%. Changes v1-v2: - Addressed a number of typos and formatting issues. - Added a usage warning for apply_to_pfn_range() and apply_to_page_range() - Re-evaluated the decision to use apply_to_pfn_range() rather than modifying the pagewalk.c. It still looks like generically handling the transparent huge page cases requires the mmap_sem to be held at least in read mode, so sticking with apply_to_pfn_range() for now. - The TTM page-fault helper vma copy argument was scratched in favour of a pageprot_t argument. Changes v3: - Adapted to upstream API changes. Changes v4: - Adapted to upstream mmu_notifier changes. (Jerome?) - Fixed a couple of warnings on 32-bit x86 - Fixed image offset computation on multisample images. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: "Christian König" Cc: linux...@kvack.org
[PATCH v4 7/9] drm/vmwgfx: Use an RBtree instead of linked list for MOB resources
From: Thomas Hellstrom With emulated coherent memory we need to be able to quickly look up a resource from the MOB offset. Instead of traversing a linked list with O(n) worst case, use an RBtree with O(log n) worst case complexity. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 5 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 10 +++ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 +--- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 90ca866640fe..e8bc7a7ac031 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -464,6 +464,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo) struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo); WARN_ON(vmw_bo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree)); vmw_bo_unmap(vmw_bo); kfree(vmw_bo); } @@ -480,6 +481,7 @@ static void vmw_user_bo_destroy(struct ttm_buffer_object *bo) struct vmw_buffer_object *vbo = &vmw_user_bo->vbo; WARN_ON(vbo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); vmw_bo_unmap(vbo); ttm_prime_object_kfree(vmw_user_bo, prime); } @@ -515,8 +517,7 @@ int vmw_bo_init(struct vmw_private *dev_priv, memset(vmw_bo, 0, sizeof(*vmw_bo)); BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->base.priority = 3; - - INIT_LIST_HEAD(&vmw_bo->res_list); + vmw_bo->res_tree = RB_ROOT; ret = ttm_bo_init(bdev, &vmw_bo->base, size, ttm_bo_type_device, placement, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 27c259395790..9b347923196f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -89,7 +89,7 @@ struct vmw_fpriv { /** * struct vmw_buffer_object - TTM buffer object with vmwgfx additions * @base: The TTM buffer object - * @res_list: List of resources using this buffer object as a backing MOB + * @res_tree: RB tree of resources using this buffer object as a backing MOB * @pin_count: pin depth * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB * @map: Kmap object for semi-persistent mappings @@ -98,7 +98,7 @@ struct vmw_fpriv { */ struct vmw_buffer_object { struct ttm_buffer_object base; - struct list_head res_list; + struct rb_root res_tree; s32 pin_count; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; @@ -146,8 +146,8 @@ struct vmw_res_func; * pin-count greater than zero. It is not on the resource LRU lists and its * backup buffer is pinned. Hence it can't be evicted. * @func: Method vtable for this resource. Immutable. + * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved. * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock. - * @mob_head: List head for the MOB backup list. Protected by @backup reserved. * @binding_head: List head for the context binding list. Protected by * the @dev_priv::binding_mutex * @res_free: The resource destructor. @@ -168,8 +168,8 @@ struct vmw_resource { unsigned long backup_offset; unsigned long pin_count; const struct vmw_res_func *func; + struct rb_node mob_node; struct list_head lru_head; - struct list_head mob_head; struct list_head binding_head; struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); @@ -742,7 +742,7 @@ void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, */ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) { - return !list_empty(&res->mob_head); + return !RB_EMPTY_NODE(&res->mob_node); } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index da9afa83fb4f..b84dd5953886 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -41,11 +41,24 @@ void vmw_resource_mob_attach(struct vmw_resource *res) { struct vmw_buffer_object *backup = res->backup; + struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL; lockdep_assert_held(&backup->base.resv->lock.base); res->used_prio = (res->res_dirty) ? res->func->dirty_prio : res->func->prio; - list_add_tail(&res->mob_head, &backup->res_list); + + while (*new) { + struct vmw_resource *this = + container_of(*new, struct vmw_resource, mob_node); + + parent = *new; + new = (res->backup_offset < this->backup_offset) ? + &((*new)->rb_left) : &((*new)->rb_right); + } + + rb_link_node(&res->mob_node, parent, new); +
[PATCH v4 8/9] drm/vmwgfx: Implement an infrastructure for read-coherent resources
From: Thomas Hellstrom Similar to write-coherent resources, make sure that from the user-space point of view, GPU rendered contents is automatically available for reading by the CPU. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 7 +- drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c| 73 - drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 103 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h | 2 + drivers/gpu/drm/vmwgfx/vmwgfx_validation.c| 3 +- 5 files changed, 177 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 9b347923196f..dae3a39bf402 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -689,7 +689,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res); extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res); extern struct vmw_resource * vmw_resource_reference_unless_doomed(struct vmw_resource *res); -extern int vmw_resource_validate(struct vmw_resource *res, bool intr); +extern int vmw_resource_validate(struct vmw_resource *res, bool intr, +bool dirtying); extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); @@ -733,6 +734,8 @@ void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res); void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, pgoff_t end); +int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, + pgoff_t end, pgoff_t *num_prefault); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached @@ -1427,6 +1430,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); void vmw_bo_dirty_clear_res(struct vmw_resource *res); void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end); vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 8d154f90bdc0..730c51e397dd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -153,7 +153,6 @@ static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo) } } - /** * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty * tracking structure @@ -171,6 +170,51 @@ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo) vmw_bo_dirty_scan_mkwrite(vbo); } +/** + * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before + * an unmap_mapping_range operation. + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * If we're using the _PAGETABLE scan method, we may leak dirty pages + * when calling unmap_mapping_range(). This function makes sure we pick + * up all dirty pages. + */ +static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end) + return; + + apply_as_wrprotect(mapping, start + offset, end - start); + apply_as_clean(mapping, start + offset, end - start, offset, + &dirty->bitmap[0], &dirty->start, &dirty->end); +} + +/** + * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange. + */ +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + vmw_bo_dirty_pre_unmap(vbo, start, end); + unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT, + (loff_t) (end - start) << PAGE_SHIFT); +} + /** * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object * @vbo: The buffer object @@ -389,21 +433,40 @@ vm_fault_t vmw_bo_vm_fault(stru
[PATCH v4 5/9] drm/ttm: TTM fault handler helpers
From: Thomas Hellstrom With the vmwgfx dirty tracking, the default TTM fault handler is not completely sufficient (vmwgfx need to modify the vma->vm_flags member, and also needs to restrict the number of prefaults). We also want to replicate the new ttm_bo_vm_reserve() functionality So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved() and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other drivers to use. Cc: "Christian König" Signed-off-by: Thomas Hellstrom Reviewed-by: "Christian König" #v1 --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 175 +++- include/drm/ttm/ttm_bo_api.h| 10 ++ 2 files changed, 113 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 196e13a0adad..2d9862fcf6fd 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -42,8 +42,6 @@ #include #include -#define TTM_BO_VM_NUM_PREFAULT 16 - static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_fault *vmf) { @@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, + page_offset; } -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) +/** + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback + * @bo: The buffer object + * @vmf: The fault structure handed to the callback + * + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped + * during long waits, and after the wait the callback will be restarted. This + * is to allow other threads using the same virtual memory space concurrent + * access to map(), unmap() completely unrelated buffer objects. TTM buffer + * object reservations sometimes wait for GPU and should therefore be + * considered long waits. This function reserves the buffer object interruptibly + * taking this into account. Starvation is avoided by the vm system not + * allowing too many repeated restarts. + * This function is intended to be used in customized fault() and _mkwrite() + * handlers. + * + * Return: + *0 on success and the bo was reserved. + *VM_FAULT_RETRY if blocking wait. + *VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. + */ +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, +struct vm_fault *vmf) { - struct vm_area_struct *vma = vmf->vma; - struct ttm_buffer_object *bo = (struct ttm_buffer_object *) - vma->vm_private_data; - struct ttm_bo_device *bdev = bo->bdev; - unsigned long page_offset; - unsigned long page_last; - unsigned long pfn; - struct ttm_tt *ttm = NULL; - struct page *page; - int err; - int i; - vm_fault_t ret = VM_FAULT_NOPAGE; - unsigned long address = vmf->address; - struct ttm_mem_type_manager *man = - &bdev->man[bo->mem.mem_type]; - struct vm_area_struct cvma; - - /* -* Work around locking order reversal in fault / nopfn -* between mmap_sem and bo_reserve: Perform a trylock operation -* for reserve, and if it fails, retry the fault after waiting -* for the buffer to become unreserved. -*/ if (unlikely(!reservation_object_trylock(bo->resv))) { if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { @@ -151,14 +148,55 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) return VM_FAULT_NOPAGE; } + return 0; +} +EXPORT_SYMBOL(ttm_bo_vm_reserve); + +/** + * ttm_bo_vm_fault_reserved - TTM fault helper + * @vmf: The struct vm_fault given as argument to the fault callback + * @prot: The page protection to be used for this memory area. + * @num_prefault: Maximum number of prefault pages. The caller may want to + * specify this based on madvice settings and the size of the GPU object + * backed by the memory. + * + * This function inserts one or more page table entries pointing to the + * memory backing the buffer object, and then returns a return code + * instructing the caller to retry the page access. + * + * Return: + * VM_FAULT_NOPAGE on success or pending signal + * VM_FAULT_SIGBUS on unspecified error + * VM_FAULT_OOM on out-of-memory + * VM_FAULT_RETRY if retryable wait + */ +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + pgprot_t prot, + pgoff_t num_prefault) +{ + struct vm_area_struct *vma = vmf->vma; + struct vm_area_struct cvma = *vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + struct ttm_bo_device *bdev = bo->bdev; + unsigned long page_offset; + unsigned long page_last; + unsigned long pfn; + struct ttm_tt *ttm = NULL; + struct p
[PATCH v4 4/9] drm/ttm: Allow the driver to provide the ttm struct vm_operations_struct
From: Thomas Hellstrom Add a pointer to the struct vm_operations_struct in the bo_device, and assign that pointer to the default value currently used. The driver can then optionally modify that pointer and the new value can be used for each new vma created. Cc: "Christian König" Signed-off-by: Thomas Hellstrom Reviewed-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c| 1 + drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 +++--- include/drm/ttm/ttm_bo_driver.h | 6 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c7de667d482a..6953dd264172 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1739,6 +1739,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, mutex_lock(&ttm_global_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&ttm_global_mutex); + bdev->vm_ops = &ttm_bo_vm_ops; return 0; out_no_sys: diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 6dacff49c1cc..196e13a0adad 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -395,7 +395,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, return ret; } -static const struct vm_operations_struct ttm_bo_vm_ops = { +const struct vm_operations_struct ttm_bo_vm_ops = { .fault = ttm_bo_vm_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, @@ -448,7 +448,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, if (unlikely(ret != 0)) goto out_unref; - vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_ops = bdev->vm_ops; /* * Note: We're transferring the bo reference to @@ -480,7 +480,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) ttm_bo_get(bo); - vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_ops = bo->bdev->vm_ops; vma->vm_private_data = bo; vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index c9b8ba492f24..a2d810a2504d 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -442,6 +442,9 @@ extern struct ttm_bo_global { * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. * @vma_manager: Address space manager + * @vm_ops: Pointer to the struct vm_operations_struct used for this + * device's VM operations. The driver may override this before the first + * mmap() call. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. * @dev_mapping: A pointer to the struct address_space representing the @@ -460,6 +463,7 @@ struct ttm_bo_device { struct ttm_bo_global *glob; struct ttm_bo_driver *driver; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; + const struct vm_operations_struct *vm_ops; /* * Protected by internal locks. @@ -488,6 +492,8 @@ struct ttm_bo_device { bool no_retry; }; +extern const struct vm_operations_struct ttm_bo_vm_ops; + /** * struct ttm_lru_bulk_move_pos * -- 2.20.1
Re: [PATCH v4 3/9] mm: Add write-protect and clean utilities for address space ranges
Hi, Nadav, On 6/11/19 7:21 PM, Nadav Amit wrote: On Jun 11, 2019, at 5:24 AM, Thomas Hellström (VMware) wrote: From: Thomas Hellstrom [ snip ] +/** + * apply_pt_wrprotect - Leaf pte callback to write-protect a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as + * + * The function write-protects a pte and records the range in + * virtual address space of touched ptes for efficient range TLB flushes. + * + * Return: Always zero. + */ +static int apply_pt_wrprotect(pte_t *pte, pgtable_t token, + unsigned long addr, + struct pfn_range_apply *closure) +{ + struct apply_as *aas = container_of(closure, typeof(*aas), base); + pte_t ptent = *pte; + + if (pte_write(ptent)) { + pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte); + + ptent = pte_wrprotect(old_pte); + ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent); + aas->total++; + aas->start = min(aas->start, addr); + aas->end = max(aas->end, addr + PAGE_SIZE); + } + + return 0; +} + +/** + * struct apply_as_clean - Closure structure for apply_as_clean + * @base: struct apply_as we derive from + * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap + * @bitmap: Bitmap with one bit for each page offset in the address_space range + * covered. + * @start: Address_space page offset of first modified pte relative + * to @bitmap_pgoff + * @end: Address_space page offset of last modified pte relative + * to @bitmap_pgoff + */ +struct apply_as_clean { + struct apply_as base; + pgoff_t bitmap_pgoff; + unsigned long *bitmap; + pgoff_t start; + pgoff_t end; +}; + +/** + * apply_pt_clean - Leaf pte callback to clean a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as_clean + * + * The function cleans a pte and records the range in + * virtual address space of touched ptes for efficient TLB flushes. + * It also records dirty ptes in a bitmap representing page offsets + * in the address_space, as well as the first and last of the bits + * touched. + * + * Return: Always zero. + */ +static int apply_pt_clean(pte_t *pte, pgtable_t token, + unsigned long addr, + struct pfn_range_apply *closure) +{ + struct apply_as *aas = container_of(closure, typeof(*aas), base); + struct apply_as_clean *clean = container_of(aas, typeof(*clean), base); + pte_t ptent = *pte; + + if (pte_dirty(ptent)) { + pgoff_t pgoff = ((addr - aas->vma->vm_start) >> PAGE_SHIFT) + + aas->vma->vm_pgoff - clean->bitmap_pgoff; + pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte); + + ptent = pte_mkclean(old_pte); + ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent); + + aas->total++; + aas->start = min(aas->start, addr); + aas->end = max(aas->end, addr + PAGE_SIZE); + + __set_bit(pgoff, clean->bitmap); + clean->start = min(clean->start, pgoff); + clean->end = max(clean->end, pgoff + 1); + } + + return 0; Usually, when a PTE is write-protected, or when a dirty-bit is cleared, the TLB flush must be done while the page-table lock for that specific table is taken (i.e., within apply_pt_clean() and apply_pt_wrprotect() in this case). Otherwise, in the case of apply_pt_clean() for example, another core might shortly after (before the TLB flush) write to the same page whose PTE was changed. The dirty-bit in such case might not be set, and the change get lost. Hmm. Let's assume that was the case, we have two possible situations: A: pt_clean 1. That core's TLB entry is invalid. It will set the PTE dirty bit and continue. The dirty bit will probably remain set after the TLB flush. 2. That core's TLB entry is valid. It will just continue. The dirty bit will remain clear after the TLB flush. But I fail to see how having the TLB flush within the page table lock would help in this case. Since the writing core will never attempt to take it? In any case, if such a race occurs, the corresponding bit in the bitmap would have been set and we've recorded that the page is dirty. B: wrprotect situation, the situation is a bit different: 1. That core's TLB entry is invalid. It will read the PTE, cause a fault and block in mkwrite() on an external address space lock which is held over this operation. (Is
Re: [PATCH v4 3/9] mm: Add write-protect and clean utilities for address space ranges
On 6/11/19 9:10 PM, Nadav Amit wrote: On Jun 11, 2019, at 11:26 AM, Thomas Hellström (VMware) wrote: Hi, Nadav, On 6/11/19 7:21 PM, Nadav Amit wrote: On Jun 11, 2019, at 5:24 AM, Thomas Hellström (VMware) wrote: From: Thomas Hellstrom [ snip ] +/** + * apply_pt_wrprotect - Leaf pte callback to write-protect a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as + * + * The function write-protects a pte and records the range in + * virtual address space of touched ptes for efficient range TLB flushes. + * + * Return: Always zero. + */ +static int apply_pt_wrprotect(pte_t *pte, pgtable_t token, + unsigned long addr, + struct pfn_range_apply *closure) +{ + struct apply_as *aas = container_of(closure, typeof(*aas), base); + pte_t ptent = *pte; + + if (pte_write(ptent)) { + pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte); + + ptent = pte_wrprotect(old_pte); + ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent); + aas->total++; + aas->start = min(aas->start, addr); + aas->end = max(aas->end, addr + PAGE_SIZE); + } + + return 0; +} + +/** + * struct apply_as_clean - Closure structure for apply_as_clean + * @base: struct apply_as we derive from + * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap + * @bitmap: Bitmap with one bit for each page offset in the address_space range + * covered. + * @start: Address_space page offset of first modified pte relative + * to @bitmap_pgoff + * @end: Address_space page offset of last modified pte relative + * to @bitmap_pgoff + */ +struct apply_as_clean { + struct apply_as base; + pgoff_t bitmap_pgoff; + unsigned long *bitmap; + pgoff_t start; + pgoff_t end; +}; + +/** + * apply_pt_clean - Leaf pte callback to clean a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as_clean + * + * The function cleans a pte and records the range in + * virtual address space of touched ptes for efficient TLB flushes. + * It also records dirty ptes in a bitmap representing page offsets + * in the address_space, as well as the first and last of the bits + * touched. + * + * Return: Always zero. + */ +static int apply_pt_clean(pte_t *pte, pgtable_t token, + unsigned long addr, + struct pfn_range_apply *closure) +{ + struct apply_as *aas = container_of(closure, typeof(*aas), base); + struct apply_as_clean *clean = container_of(aas, typeof(*clean), base); + pte_t ptent = *pte; + + if (pte_dirty(ptent)) { + pgoff_t pgoff = ((addr - aas->vma->vm_start) >> PAGE_SHIFT) + + aas->vma->vm_pgoff - clean->bitmap_pgoff; + pte_t old_pte = ptep_modify_prot_start(aas->vma, addr, pte); + + ptent = pte_mkclean(old_pte); + ptep_modify_prot_commit(aas->vma, addr, pte, old_pte, ptent); + + aas->total++; + aas->start = min(aas->start, addr); + aas->end = max(aas->end, addr + PAGE_SIZE); + + __set_bit(pgoff, clean->bitmap); + clean->start = min(clean->start, pgoff); + clean->end = max(clean->end, pgoff + 1); + } + + return 0; Usually, when a PTE is write-protected, or when a dirty-bit is cleared, the TLB flush must be done while the page-table lock for that specific table is taken (i.e., within apply_pt_clean() and apply_pt_wrprotect() in this case). Otherwise, in the case of apply_pt_clean() for example, another core might shortly after (before the TLB flush) write to the same page whose PTE was changed. The dirty-bit in such case might not be set, and the change get lost. Hmm. Let's assume that was the case, we have two possible situations: A: pt_clean 1. That core's TLB entry is invalid. It will set the PTE dirty bit and continue. The dirty bit will probably remain set after the TLB flush. I guess you mean the PTE is not cached in the TLB. Yes. 2. That core's TLB entry is valid. It will just continue. The dirty bit will remain clear after the TLB flush. But I fail to see how having the TLB flush within the page table lock would help in this case. Since the writing core will never attempt to take it? In any case, if such a race occurs, the corresponding bit in the bitmap would have been set and we've recorded that the page is dirty. I don’t understand. What do you mean “recorded that the page is dirty”?
[PATCH 1/2] drm/vmwgfx: Use the backdoor port if the HB port is not available
From: Thomas Hellstrom The HB port may not be available for various reasons. Either it has been disabled by a config option or by the hypervisor for other reasons. In that case, make sure we have a backup plan and use the backdoor port instead with a performance penalty. Cc: sta...@vger.kernel.org Fixes: 89da76fde68d ("drm/vmwgfx: Add VMWare host messaging capability") Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 146 ++-- 1 file changed, 117 insertions(+), 29 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index 8b9270f31409..e4e09d47c5c0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -136,6 +136,114 @@ static int vmw_close_channel(struct rpc_channel *channel) return 0; } +/** + * vmw_port_hb_out - Send the message payload either through the + * high-bandwidth port if available, or through the backdoor otherwise. + * @channel: The rpc channel. + * @msg: NULL-terminated message. + * @hb: Whether the high-bandwidth port is available. + * + * Return: The port status. + */ +static unsigned long vmw_port_hb_out(struct rpc_channel *channel, +const char *msg, bool hb) +{ + unsigned long si, di, eax, ebx, ecx, edx; + unsigned long msg_len = strlen(msg); + + if (hb) { + unsigned long bp = channel->cookie_high; + + si = (uintptr_t) msg; + di = channel->cookie_low; + + VMW_PORT_HB_OUT( + (MESSAGE_STATUS_SUCCESS << 16) | VMW_PORT_CMD_HB_MSG, + msg_len, si, di, + VMW_HYPERVISOR_HB_PORT | (channel->channel_id << 16), + VMW_HYPERVISOR_MAGIC, bp, + eax, ebx, ecx, edx, si, di); + + return ebx; + } + + /* HB port not available. Send the message 4 bytes at a time. */ + ecx = MESSAGE_STATUS_SUCCESS << 16; + while (msg_len && (HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS)) { + unsigned int bytes = min_t(size_t, msg_len, 4); + unsigned long word = 0; + + memcpy(&word, msg, bytes); + msg_len -= bytes; + msg += bytes; + si = channel->cookie_high; + di = channel->cookie_low; + + VMW_PORT(VMW_PORT_CMD_MSG | (MSG_TYPE_SENDPAYLOAD << 16), +word, si, di, +VMW_HYPERVISOR_PORT | (channel->channel_id << 16), +VMW_HYPERVISOR_MAGIC, +eax, ebx, ecx, edx, si, di); + } + + return ecx; +} + +/** + * vmw_port_hb_in - Receive the message payload either through the + * high-bandwidth port if available, or through the backdoor otherwise. + * @channel: The rpc channel. + * @reply: Pointer to buffer holding reply. + * @reply_len: Length of the reply. + * @hb: Whether the high-bandwidth port is available. + * + * Return: The port status. + */ +static unsigned long vmw_port_hb_in(struct rpc_channel *channel, char *reply, + unsigned long reply_len, bool hb) +{ + unsigned long si, di, eax, ebx, ecx, edx; + + if (hb) { + unsigned long bp = channel->cookie_low; + + si = channel->cookie_high; + di = (uintptr_t) reply; + + VMW_PORT_HB_IN( + (MESSAGE_STATUS_SUCCESS << 16) | VMW_PORT_CMD_HB_MSG, + reply_len, si, di, + VMW_HYPERVISOR_HB_PORT | (channel->channel_id << 16), + VMW_HYPERVISOR_MAGIC, bp, + eax, ebx, ecx, edx, si, di); + + return ebx; + } + + /* HB port not available. Retrieve the message 4 bytes at a time. */ + ecx = MESSAGE_STATUS_SUCCESS << 16; + while (reply_len) { + unsigned int bytes = min_t(unsigned long, reply_len, 4); + + si = channel->cookie_high; + di = channel->cookie_low; + + VMW_PORT(VMW_PORT_CMD_MSG | (MSG_TYPE_RECVPAYLOAD << 16), +MESSAGE_STATUS_SUCCESS, si, di, +VMW_HYPERVISOR_PORT | (channel->channel_id << 16), +VMW_HYPERVISOR_MAGIC, +eax, ebx, ecx, edx, si, di); + + if ((HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS) == 0) + break; + + memcpy(reply, &ebx, bytes); + reply_len -= bytes; + reply += bytes; + } + + return ecx; +} /** @@ -148,11 +256,10 @@ static int vmw_close_channel(struct rpc_channel *channel) */ static int vmw_send_msg(struct rpc_channel *c
[PATCH 2/2] drm/vmwgfx: Honor the sg list segment size limitation
From: Thomas Hellstrom When building sg tables, honor the device sg list segment size limitation. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index a6ea75b58a83..d8ea3dd10af0 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -441,11 +441,11 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) if (unlikely(ret != 0)) return ret; - ret = sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages, - vsgt->num_pages, 0, - (unsigned long) - vsgt->num_pages << PAGE_SHIFT, - GFP_KERNEL); + ret = __sg_alloc_table_from_pages + (&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0, +(unsigned long) vsgt->num_pages << PAGE_SHIFT, +dma_get_max_seg_size(dev_priv->dev->dev), +GFP_KERNEL); if (unlikely(ret != 0)) goto out_sg_alloc_fail; -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v5 6/9] drm/vmwgfx: Implement an infrastructure for write-coherent resources
Emulate coherency by tracking vm accesses. * @backup: The backup buffer if any. Protected by resource reserved. * @backup_offset: Offset into the backup buffer if any. Protected by resource * reserved. Note that only a few resource types can have a @backup_offset @@ -151,14 +154,16 @@ struct vmw_res_func; * @hw_destroy: Callback to destroy the resource on the device, as part of * resource destruction. */ +struct vmw_resource_dirty; struct vmw_resource { struct kref kref; struct vmw_private *dev_priv; int id; u32 used_prio; unsigned long backup_size; - bool res_dirty; - bool backup_dirty; + u32 res_dirty : 1; + u32 backup_dirty : 1; + u32 coherent : 1; struct vmw_buffer_object *backup; unsigned long backup_offset; unsigned long pin_count; @@ -166,6 +171,7 @@ struct vmw_resource { struct list_head lru_head; struct list_head mob_head; struct list_head binding_head; + struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); void (*hw_destroy) (struct vmw_resource *res); }; @@ -606,6 +612,9 @@ struct vmw_private { /* Validation memory reservation */ struct vmw_validation_mem vvm; + + /* VM operations */ + struct vm_operations_struct vm_ops; }; static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res) @@ -722,6 +731,8 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv); extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo); void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res); +void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, + pgoff_t end); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached @@ -1410,6 +1421,15 @@ int vmw_host_log(const char *log); #define VMW_DEBUG_USER(fmt, ...) \ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) +/* Resource dirtying - vmwgfx_page_dirty.c */ +void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo); +int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); +void vmw_bo_dirty_clear_res(struct vmw_resource *res); +void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); +vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); + /** * Inline helper functions */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 33533d126277..319c1ca35663 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv, offsetof(typeof(*cmd), sid)); cmd = container_of(header, typeof(*cmd), header); - return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, VMW_RES_DIRTY_NONE, user_surface_converter, &cmd->sid, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c new file mode 100644 index ..8d154f90bdc0 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/****** + * + * Copyright 2019 VMware, Inc., Palo Alto, CA., USA + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **/ +#include "vmwgfx_drv.h" + +/* + * Different methods for tracking dirty: + * VMW_BO_DIRTY_PAG
[PATCH v5 8/9] drm/vmwgfx: Implement an infrastructure for read-coherent resources
From: Thomas Hellstrom Similar to write-coherent resources, make sure that from the user-space point of view, GPU rendered contents is automatically available for reading by the CPU. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 7 +- drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c| 73 - drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 103 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h | 2 + drivers/gpu/drm/vmwgfx/vmwgfx_validation.c| 3 +- 5 files changed, 177 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 9b347923196f..dae3a39bf402 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -689,7 +689,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res); extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res); extern struct vmw_resource * vmw_resource_reference_unless_doomed(struct vmw_resource *res); -extern int vmw_resource_validate(struct vmw_resource *res, bool intr); +extern int vmw_resource_validate(struct vmw_resource *res, bool intr, +bool dirtying); extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); @@ -733,6 +734,8 @@ void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res); void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, pgoff_t end); +int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, + pgoff_t end, pgoff_t *num_prefault); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached @@ -1427,6 +1430,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); void vmw_bo_dirty_clear_res(struct vmw_resource *res); void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end); vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 8d154f90bdc0..730c51e397dd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -153,7 +153,6 @@ static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo) } } - /** * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty * tracking structure @@ -171,6 +170,51 @@ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo) vmw_bo_dirty_scan_mkwrite(vbo); } +/** + * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before + * an unmap_mapping_range operation. + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * If we're using the _PAGETABLE scan method, we may leak dirty pages + * when calling unmap_mapping_range(). This function makes sure we pick + * up all dirty pages. + */ +static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end) + return; + + apply_as_wrprotect(mapping, start + offset, end - start); + apply_as_clean(mapping, start + offset, end - start, offset, + &dirty->bitmap[0], &dirty->start, &dirty->end); +} + +/** + * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange. + */ +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + vmw_bo_dirty_pre_unmap(vbo, start, end); + unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT, + (loff_t) (end - start) << PAGE_SHIFT); +} + /** * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object * @vbo: The buffer object @@ -389,21 +433,40 @@ vm_fault_t vmw_bo_vm_fault(stru
[PATCH v5 9/9] drm/vmwgfx: Add surface dirty-tracking callbacks
From: Thomas Hellstrom Add the callbacks necessary to implement emulated coherent memory for surfaces. Add a flag to the gb_surface_create ioctl to indicate that surface memory should be coherent. Also bump the drm minor version to signal the availability of coherent surfaces. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- .../device_include/svga3d_surfacedefs.h | 233 ++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 395 +- include/uapi/drm/vmwgfx_drm.h | 4 +- 4 files changed, 629 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h index f2bfd3d80598..61414f105c67 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h @@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format, return offset; } - static inline u32 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, surf_size_struct baseLevelSize, @@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format) return svga3dsurface_is_dx_screen_target_format(format); } +/** + * struct svga3dsurface_mip - Mimpmap level information + * @bytes: Bytes required in the backing store of this mipmap level. + * @img_stride: Byte stride per image. + * @row_stride: Byte stride per block row. + * @size: The size of the mipmap. + */ +struct svga3dsurface_mip { + size_t bytes; + size_t img_stride; + size_t row_stride; + struct drm_vmw_size size; + +}; + +/** + * struct svga3dsurface_cache - Cached surface information + * @desc: Pointer to the surface descriptor + * @mip: Array of mipmap level information. Valid size is @num_mip_levels. + * @mip_chain_bytes: Bytes required in the backing store for the whole chain + * of mip levels. + * @sheet_bytes: Bytes required in the backing store for a sheet + * representing a single sample. + * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in + * a chain. + * @num_layers: Number of slices in an array texture or number of faces in + * a cubemap texture. + */ +struct svga3dsurface_cache { + const struct svga3d_surface_desc *desc; + struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS]; + size_t mip_chain_bytes; + size_t sheet_bytes; + u32 num_mip_levels; + u32 num_layers; +}; + +/** + * struct svga3dsurface_loc - Surface location + * @sub_resource: Surface subresource. Defined as layer * num_mip_levels + + * mip_level. + * @x: X coordinate. + * @y: Y coordinate. + * @z: Z coordinate. + */ +struct svga3dsurface_loc { + u32 sub_resource; + u32 x, y, z; +}; + +/** + * svga3dsurface_subres - Compute the subresource from layer and mipmap. + * @cache: Surface layout data. + * @mip_level: The mipmap level. + * @layer: The surface layer (face or array slice). + * + * Return: The subresource. + */ +static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache, + u32 mip_level, u32 layer) +{ + return cache->num_mip_levels * layer + mip_level; +} + +/** + * svga3dsurface_setup_cache - Build a surface cache entry + * @size: The surface base level dimensions. + * @format: The surface format. + * @num_mip_levels: Number of mipmap levels. + * @num_layers: Number of layers. + * @cache: Pointer to a struct svga3dsurface_cach object to be filled in. + * + * Return: Zero on success, -EINVAL on invalid surface layout. + */ +static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size, + SVGA3dSurfaceFormat format, + u32 num_mip_levels, + u32 num_layers, + u32 num_samples, + struct svga3dsurface_cache *cache) +{ + const struct svga3d_surface_desc *desc; + u32 i; + + memset(cache, 0, sizeof(*cache)); + cache->desc = desc = svga3dsurface_get_desc(format); + cache->num_mip_levels = num_mip_levels; + cache->num_layers = num_layers; + for (i = 0; i < cache->num_mip_levels; i++) { + struct svga3dsurface_mip *mip = &cache->mip[i]; + + mip->size = svga3dsurface_get_mip_size(*size, i); + mip->bytes = svga3dsurface_get_image_buffer_size + (desc, &mip->size, 0); + mip->row_stride = + __KERNEL_DIV_ROUND_UP(mip->size.width, + desc->block_size.width) * + desc->bytes_per_block * num_samples; + if (!mip->row_stride) + goto invalid_di
[PATCH v5 7/9] drm/vmwgfx: Use an RBtree instead of linked list for MOB resources
From: Thomas Hellstrom With emulated coherent memory we need to be able to quickly look up a resource from the MOB offset. Instead of traversing a linked list with O(n) worst case, use an RBtree with O(log n) worst case complexity. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 5 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 10 +++ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 +--- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 90ca866640fe..e8bc7a7ac031 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -464,6 +464,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo) struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo); WARN_ON(vmw_bo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree)); vmw_bo_unmap(vmw_bo); kfree(vmw_bo); } @@ -480,6 +481,7 @@ static void vmw_user_bo_destroy(struct ttm_buffer_object *bo) struct vmw_buffer_object *vbo = &vmw_user_bo->vbo; WARN_ON(vbo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); vmw_bo_unmap(vbo); ttm_prime_object_kfree(vmw_user_bo, prime); } @@ -515,8 +517,7 @@ int vmw_bo_init(struct vmw_private *dev_priv, memset(vmw_bo, 0, sizeof(*vmw_bo)); BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->base.priority = 3; - - INIT_LIST_HEAD(&vmw_bo->res_list); + vmw_bo->res_tree = RB_ROOT; ret = ttm_bo_init(bdev, &vmw_bo->base, size, ttm_bo_type_device, placement, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 27c259395790..9b347923196f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -89,7 +89,7 @@ struct vmw_fpriv { /** * struct vmw_buffer_object - TTM buffer object with vmwgfx additions * @base: The TTM buffer object - * @res_list: List of resources using this buffer object as a backing MOB + * @res_tree: RB tree of resources using this buffer object as a backing MOB * @pin_count: pin depth * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB * @map: Kmap object for semi-persistent mappings @@ -98,7 +98,7 @@ struct vmw_fpriv { */ struct vmw_buffer_object { struct ttm_buffer_object base; - struct list_head res_list; + struct rb_root res_tree; s32 pin_count; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; @@ -146,8 +146,8 @@ struct vmw_res_func; * pin-count greater than zero. It is not on the resource LRU lists and its * backup buffer is pinned. Hence it can't be evicted. * @func: Method vtable for this resource. Immutable. + * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved. * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock. - * @mob_head: List head for the MOB backup list. Protected by @backup reserved. * @binding_head: List head for the context binding list. Protected by * the @dev_priv::binding_mutex * @res_free: The resource destructor. @@ -168,8 +168,8 @@ struct vmw_resource { unsigned long backup_offset; unsigned long pin_count; const struct vmw_res_func *func; + struct rb_node mob_node; struct list_head lru_head; - struct list_head mob_head; struct list_head binding_head; struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); @@ -742,7 +742,7 @@ void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, */ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) { - return !list_empty(&res->mob_head); + return !RB_EMPTY_NODE(&res->mob_node); } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index da9afa83fb4f..b84dd5953886 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -41,11 +41,24 @@ void vmw_resource_mob_attach(struct vmw_resource *res) { struct vmw_buffer_object *backup = res->backup; + struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL; lockdep_assert_held(&backup->base.resv->lock.base); res->used_prio = (res->res_dirty) ? res->func->dirty_prio : res->func->prio; - list_add_tail(&res->mob_head, &backup->res_list); + + while (*new) { + struct vmw_resource *this = + container_of(*new, struct vmw_resource, mob_node); + + parent = *new; + new = (res->backup_offset < this->backup_offset) ? + &((*new)->rb_left) : &((*new)->rb_right); + } + + rb_link_node(&res->mob_node, parent, new); +
[PATCH v5 2/9] mm: Add an apply_to_pfn_range interface
From: Thomas Hellstrom This is basically apply_to_page_range with added functionality: Allocating missing parts of the page table becomes optional, which means that the function can be guaranteed not to error if allocation is disabled. Also passing of the closure struct and callback function becomes different and more in line with how things are done elsewhere. Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range The reason for not using the page-walk code is that we want to perform the page-walk on vmas pointing to an address space without requiring the mmap_sem to be held rather than on vmas belonging to a process with the mmap_sem held. Notable changes since RFC: Don't export apply_to_pfn range. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- include/linux/mm.h | 10 mm/memory.c| 135 ++--- 2 files changed, 113 insertions(+), 32 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e8834ac32b7..3d06ce2a64af 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +struct pfn_range_apply; +typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, +struct pfn_range_apply *closure); +struct pfn_range_apply { + struct mm_struct *mm; + pter_fn_t ptefn; + unsigned int alloc; +}; +extern int apply_to_pfn_range(struct pfn_range_apply *closure, + unsigned long address, unsigned long size); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); diff --git a/mm/memory.c b/mm/memory.c index 168f546af1ad..462aa47f8878 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long } EXPORT_SYMBOL(vm_iomap_memory); -static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd, + unsigned long addr, unsigned long end) { pte_t *pte; int err; pgtable_t token; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? + pte = (closure->mm == &init_mm) ? pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); + pte_alloc_map_lock(closure->mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; @@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd); do { - err = fn(pte++, token, addr, data); + err = closure->ptefn(pte++, token, addr, closure); if (err) break; } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - if (mm != &init_mm) + if (closure->mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err; } -static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud, + unsigned long addr, unsigned long end) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); + pmd = pmd_alloc(closure->mm, pud, addr); if (!pmd) return -ENOMEM; + do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); + if (!closure->alloc && pmd_none_or_clear_bad(pmd)) + continue; + err = apply_to_pte_range(closure, pmd, addr, next); if (err) break; } while (pmd++, addr = next, addr != end); return err; } -static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d, +
[PATCH v5 3/9] mm: Add write-protect and clean utilities for address space ranges
From: Thomas Hellstrom Add two utilities to a) write-protect and b) clean all ptes pointing into a range of an address space. The utilities are intended to aid in tracking dirty pages (either driver-allocated system memory or pci device memory). The write-protect utility should be used in conjunction with page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page accesses. Typically one would want to use this on sparse accesses into large memory regions. The clean utility should be used to utilize hardware dirtying functionality and avoid the overhead of page-faults, typically on large accesses into small memory regions. The added file "as_dirty_helpers.c" is initially listed as maintained by VMware under our DRM driver. If somebody would like it elsewhere, that's of course no problem. Notable changes since RFC: - Added comments to help avoid the usage of these function for VMAs it's not intended for. We also do advisory checks on the vm_flags and warn on illegal usage. - Perform the pte modifications the same way softdirty does. - Add mmu_notifier range invalidation calls. - Add a config option so that this code is not unconditionally included. - Tell the mmu_gather code about pending tlb flushes. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- MAINTAINERS | 1 + include/linux/mm.h| 9 +- mm/Kconfig| 3 + mm/Makefile | 1 + mm/as_dirty_helpers.c | 300 ++ 5 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 mm/as_dirty_helpers.c diff --git a/MAINTAINERS b/MAINTAINERS index 7a2f487ea49a..a55d4ef91b0b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5179,6 +5179,7 @@ T:git git://people.freedesktop.org/~thomash/linux S: Supported F: drivers/gpu/drm/vmwgfx/ F: include/uapi/drm/vmwgfx_drm.h +F: mm/as_dirty_helpers.c DRM DRIVERS M: David Airlie diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d06ce2a64af..a0bc2a82917e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2685,7 +2685,14 @@ struct pfn_range_apply { }; extern int apply_to_pfn_range(struct pfn_range_apply *closure, unsigned long address, unsigned long size); - +unsigned long apply_as_wrprotect(struct address_space *mapping, +pgoff_t first_index, pgoff_t nr); +unsigned long apply_as_clean(struct address_space *mapping, +pgoff_t first_index, pgoff_t nr, +pgoff_t bitmap_pgoff, +unsigned long *bitmap, +pgoff_t *start, +pgoff_t *end); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); diff --git a/mm/Kconfig b/mm/Kconfig index f0c76ba47695..5006d0e6a5c7 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -765,4 +765,7 @@ config GUP_BENCHMARK config ARCH_HAS_PTE_SPECIAL bool +config AS_DIRTY_HELPERS +bool + endmenu diff --git a/mm/Makefile b/mm/Makefile index ac5e5ba78874..f5d412bbc2f7 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_HMM) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o +obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c new file mode 100644 index ..0da7a8eed51a --- /dev/null +++ b/mm/as_dirty_helpers.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +/** + * struct apply_as - Closure structure for apply_as_range + * @base: struct pfn_range_apply we derive from + * @start: Address of first modified pte + * @end: Address of last modified pte + 1 + * @total: Total number of modified ptes + * @vma: Pointer to the struct vm_area_struct we're currently operating on + */ +struct apply_as { + struct pfn_range_apply base; + unsigned long start; + unsigned long end; + unsigned long total; + struct vm_area_struct *vma; +}; + +/** + * apply_pt_wrprotect - Leaf pte callback to write-protect a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as + * + * The function write-protects a pte and records the range in + * virtual address space of touched ptes for efficient range TLB flushes. + * + * Return: Always zero.
[PATCH v5 5/9] drm/ttm: TTM fault handler helpers
From: Thomas Hellstrom With the vmwgfx dirty tracking, the default TTM fault handler is not completely sufficient (vmwgfx need to modify the vma->vm_flags member, and also needs to restrict the number of prefaults). We also want to replicate the new ttm_bo_vm_reserve() functionality So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved() and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other drivers to use. Cc: "Christian König" Signed-off-by: Thomas Hellstrom Reviewed-by: "Christian König" #v1 --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 175 +++- include/drm/ttm/ttm_bo_api.h| 10 ++ 2 files changed, 113 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 196e13a0adad..2d9862fcf6fd 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -42,8 +42,6 @@ #include #include -#define TTM_BO_VM_NUM_PREFAULT 16 - static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_fault *vmf) { @@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, + page_offset; } -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) +/** + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback + * @bo: The buffer object + * @vmf: The fault structure handed to the callback + * + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped + * during long waits, and after the wait the callback will be restarted. This + * is to allow other threads using the same virtual memory space concurrent + * access to map(), unmap() completely unrelated buffer objects. TTM buffer + * object reservations sometimes wait for GPU and should therefore be + * considered long waits. This function reserves the buffer object interruptibly + * taking this into account. Starvation is avoided by the vm system not + * allowing too many repeated restarts. + * This function is intended to be used in customized fault() and _mkwrite() + * handlers. + * + * Return: + *0 on success and the bo was reserved. + *VM_FAULT_RETRY if blocking wait. + *VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. + */ +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, +struct vm_fault *vmf) { - struct vm_area_struct *vma = vmf->vma; - struct ttm_buffer_object *bo = (struct ttm_buffer_object *) - vma->vm_private_data; - struct ttm_bo_device *bdev = bo->bdev; - unsigned long page_offset; - unsigned long page_last; - unsigned long pfn; - struct ttm_tt *ttm = NULL; - struct page *page; - int err; - int i; - vm_fault_t ret = VM_FAULT_NOPAGE; - unsigned long address = vmf->address; - struct ttm_mem_type_manager *man = - &bdev->man[bo->mem.mem_type]; - struct vm_area_struct cvma; - - /* -* Work around locking order reversal in fault / nopfn -* between mmap_sem and bo_reserve: Perform a trylock operation -* for reserve, and if it fails, retry the fault after waiting -* for the buffer to become unreserved. -*/ if (unlikely(!reservation_object_trylock(bo->resv))) { if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { @@ -151,14 +148,55 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) return VM_FAULT_NOPAGE; } + return 0; +} +EXPORT_SYMBOL(ttm_bo_vm_reserve); + +/** + * ttm_bo_vm_fault_reserved - TTM fault helper + * @vmf: The struct vm_fault given as argument to the fault callback + * @prot: The page protection to be used for this memory area. + * @num_prefault: Maximum number of prefault pages. The caller may want to + * specify this based on madvice settings and the size of the GPU object + * backed by the memory. + * + * This function inserts one or more page table entries pointing to the + * memory backing the buffer object, and then returns a return code + * instructing the caller to retry the page access. + * + * Return: + * VM_FAULT_NOPAGE on success or pending signal + * VM_FAULT_SIGBUS on unspecified error + * VM_FAULT_OOM on out-of-memory + * VM_FAULT_RETRY if retryable wait + */ +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + pgprot_t prot, + pgoff_t num_prefault) +{ + struct vm_area_struct *vma = vmf->vma; + struct vm_area_struct cvma = *vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + struct ttm_bo_device *bdev = bo->bdev; + unsigned long page_offset; + unsigned long page_last; + unsigned long pfn; + struct ttm_tt *ttm = NULL; + struct p
[PATCH v5 0/9] Emulated coherent graphics memory
Planning to merge this through the drm/vmwgfx tree soon, so if there are any objections, please speak up. Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver to provide coherent graphics memory, meaning that the GPU sees any content written to the coherent memory on the next GPU operation that touches that memory, and the CPU sees any content written by the GPU to that memory immediately after any fence object trailing the GPU operation has signaled. Paravirtual drivers that otherwise require explicit synchronization needs to do this by hooking up dirty tracking to pagefault handlers and buffer object validation. This is a first attempt to do that for the vmwgfx driver. The mm patches has been out for RFC. I think I have addressed all the feedback I got, except a possible softdirty breakage. But although the dirty-tracking and softdirty may write-protect PTEs both care about, that shouldn't really cause any operation interference. In particular since we use the hardware dirty PTE bits and softdirty uses other PTE bits. For the TTM changes they are hopefully in line with the long-term strategy of making helpers out of what's left of TTM. The code has been tested and excercised by a tailored version of mesa where we disable all explicit synchronization and assume graphics memory is coherent. The performance loss varies of course; a typical number is around 5%. Changes v1-v2: - Addressed a number of typos and formatting issues. - Added a usage warning for apply_to_pfn_range() and apply_to_page_range() - Re-evaluated the decision to use apply_to_pfn_range() rather than modifying the pagewalk.c. It still looks like generically handling the transparent huge page cases requires the mmap_sem to be held at least in read mode, so sticking with apply_to_pfn_range() for now. - The TTM page-fault helper vma copy argument was scratched in favour of a pageprot_t argument. Changes v3: - Adapted to upstream API changes. Changes v4: - Adapted to upstream mmu_notifier changes. (Jerome?) - Fixed a couple of warnings on 32-bit x86 - Fixed image offset computation on multisample images. Changes v5: - Updated usage warning in patch 3/9 after review comments from Nadav Amit. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: "Christian König" Cc: linux...@kvack.org ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v5 1/9] mm: Allow the [page|pfn]_mkwrite callbacks to drop the mmap_sem
From: Thomas Hellstrom Driver fault callbacks are allowed to drop the mmap_sem when expecting long hardware waits to avoid blocking other mm users. Allow the mkwrite callbacks to do the same by returning early on VM_FAULT_RETRY. In particular we want to be able to drop the mmap_sem when waiting for a reservation object lock on a GPU buffer object. These locks may be held while waiting for the GPU. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell --- mm/memory.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index ddf20bd0c317..168f546af1ad 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2238,7 +2238,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; - if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) + if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; if (unlikely(!(ret & VM_FAULT_LOCKED))) { lock_page(page); @@ -2515,7 +2515,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; ret = vma->vm_ops->pfn_mkwrite(vmf); - if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) + if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)) return ret; return finish_mkwrite_fault(vmf); } @@ -2536,7 +2536,8 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & - (VM_FAULT_ERROR | VM_FAULT_NOPAGE { + (VM_FAULT_ERROR | VM_FAULT_NOPAGE | + VM_FAULT_RETRY { put_page(vmf->page); return tmp; } @@ -3601,7 +3602,8 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) unlock_page(vmf->page); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || - (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE { + (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | + VM_FAULT_RETRY { put_page(vmf->page); return tmp; } -- 2.20.1
[PATCH v5 4/9] drm/ttm: Allow the driver to provide the ttm struct vm_operations_struct
From: Thomas Hellstrom Add a pointer to the struct vm_operations_struct in the bo_device, and assign that pointer to the default value currently used. The driver can then optionally modify that pointer and the new value can be used for each new vma created. Cc: "Christian König" Signed-off-by: Thomas Hellstrom Reviewed-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c| 1 + drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 +++--- include/drm/ttm/ttm_bo_driver.h | 6 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c7de667d482a..6953dd264172 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1739,6 +1739,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, mutex_lock(&ttm_global_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&ttm_global_mutex); + bdev->vm_ops = &ttm_bo_vm_ops; return 0; out_no_sys: diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 6dacff49c1cc..196e13a0adad 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -395,7 +395,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, return ret; } -static const struct vm_operations_struct ttm_bo_vm_ops = { +const struct vm_operations_struct ttm_bo_vm_ops = { .fault = ttm_bo_vm_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, @@ -448,7 +448,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, if (unlikely(ret != 0)) goto out_unref; - vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_ops = bdev->vm_ops; /* * Note: We're transferring the bo reference to @@ -480,7 +480,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) ttm_bo_get(bo); - vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_ops = bo->bdev->vm_ops; vma->vm_private_data = bo; vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index c9b8ba492f24..a2d810a2504d 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -442,6 +442,9 @@ extern struct ttm_bo_global { * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. * @vma_manager: Address space manager + * @vm_ops: Pointer to the struct vm_operations_struct used for this + * device's VM operations. The driver may override this before the first + * mmap() call. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. * @dev_mapping: A pointer to the struct address_space representing the @@ -460,6 +463,7 @@ struct ttm_bo_device { struct ttm_bo_global *glob; struct ttm_bo_driver *driver; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; + const struct vm_operations_struct *vm_ops; /* * Protected by internal locks. @@ -488,6 +492,8 @@ struct ttm_bo_device { bool no_retry; }; +extern const struct vm_operations_struct ttm_bo_vm_ops; + /** * struct ttm_lru_bulk_move_pos * -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v4 2/9] mm: Add an apply_to_pfn_range interface
From: Thomas Hellstrom This is basically apply_to_page_range with added functionality: Allocating missing parts of the page table becomes optional, which means that the function can be guaranteed not to error if allocation is disabled. Also passing of the closure struct and callback function becomes different and more in line with how things are done elsewhere. Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range The reason for not using the page-walk code is that we want to perform the page-walk on vmas pointing to an address space without requiring the mmap_sem to be held rather than on vmas belonging to a process with the mmap_sem held. Notable changes since RFC: Don't export apply_to_pfn range. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- include/linux/mm.h | 10 mm/memory.c| 135 ++--- 2 files changed, 113 insertions(+), 32 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e8834ac32b7..3d06ce2a64af 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +struct pfn_range_apply; +typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, +struct pfn_range_apply *closure); +struct pfn_range_apply { + struct mm_struct *mm; + pter_fn_t ptefn; + unsigned int alloc; +}; +extern int apply_to_pfn_range(struct pfn_range_apply *closure, + unsigned long address, unsigned long size); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); diff --git a/mm/memory.c b/mm/memory.c index 168f546af1ad..462aa47f8878 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long } EXPORT_SYMBOL(vm_iomap_memory); -static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd, + unsigned long addr, unsigned long end) { pte_t *pte; int err; pgtable_t token; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? + pte = (closure->mm == &init_mm) ? pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); + pte_alloc_map_lock(closure->mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; @@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd); do { - err = fn(pte++, token, addr, data); + err = closure->ptefn(pte++, token, addr, closure); if (err) break; } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - if (mm != &init_mm) + if (closure->mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err; } -static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud, + unsigned long addr, unsigned long end) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); + pmd = pmd_alloc(closure->mm, pud, addr); if (!pmd) return -ENOMEM; + do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); + if (!closure->alloc && pmd_none_or_clear_bad(pmd)) + continue; + err = apply_to_pte_range(closure, pmd, addr, next); if (err) break; } while (pmd++, addr = next, addr != end); return err; } -static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d, +
[PATCH v4 1/9] mm: Allow the [page|pfn]_mkwrite callbacks to drop the mmap_sem
From: Thomas Hellstrom Driver fault callbacks are allowed to drop the mmap_sem when expecting long hardware waits to avoid blocking other mm users. Allow the mkwrite callbacks to do the same by returning early on VM_FAULT_RETRY. In particular we want to be able to drop the mmap_sem when waiting for a reservation object lock on a GPU buffer object. These locks may be held while waiting for the GPU. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell --- mm/memory.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index ddf20bd0c317..168f546af1ad 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2238,7 +2238,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; - if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) + if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; if (unlikely(!(ret & VM_FAULT_LOCKED))) { lock_page(page); @@ -2515,7 +2515,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; ret = vma->vm_ops->pfn_mkwrite(vmf); - if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) + if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)) return ret; return finish_mkwrite_fault(vmf); } @@ -2536,7 +2536,8 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & - (VM_FAULT_ERROR | VM_FAULT_NOPAGE { + (VM_FAULT_ERROR | VM_FAULT_NOPAGE | + VM_FAULT_RETRY { put_page(vmf->page); return tmp; } @@ -3601,7 +3602,8 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) unlock_page(vmf->page); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || - (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE { + (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | + VM_FAULT_RETRY { put_page(vmf->page); return tmp; } -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v4 3/9] mm: Add write-protect and clean utilities for address space ranges
From: Thomas Hellstrom Add two utilities to a) write-protect and b) clean all ptes pointing into a range of an address space. The utilities are intended to aid in tracking dirty pages (either driver-allocated system memory or pci device memory). The write-protect utility should be used in conjunction with page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page accesses. Typically one would want to use this on sparse accesses into large memory regions. The clean utility should be used to utilize hardware dirtying functionality and avoid the overhead of page-faults, typically on large accesses into small memory regions. The added file "as_dirty_helpers.c" is initially listed as maintained by VMware under our DRM driver. If somebody would like it elsewhere, that's of course no problem. Notable changes since RFC: - Added comments to help avoid the usage of these function for VMAs it's not intended for. We also do advisory checks on the vm_flags and warn on illegal usage. - Perform the pte modifications the same way softdirty does. - Add mmu_notifier range invalidation calls. - Add a config option so that this code is not unconditionally included. - Tell the mmu_gather code about pending tlb flushes. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- MAINTAINERS | 1 + include/linux/mm.h| 9 +- mm/Kconfig| 3 + mm/Makefile | 1 + mm/as_dirty_helpers.c | 298 ++ 5 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 mm/as_dirty_helpers.c diff --git a/MAINTAINERS b/MAINTAINERS index 7a2f487ea49a..a55d4ef91b0b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5179,6 +5179,7 @@ T:git git://people.freedesktop.org/~thomash/linux S: Supported F: drivers/gpu/drm/vmwgfx/ F: include/uapi/drm/vmwgfx_drm.h +F: mm/as_dirty_helpers.c DRM DRIVERS M: David Airlie diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d06ce2a64af..a0bc2a82917e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2685,7 +2685,14 @@ struct pfn_range_apply { }; extern int apply_to_pfn_range(struct pfn_range_apply *closure, unsigned long address, unsigned long size); - +unsigned long apply_as_wrprotect(struct address_space *mapping, +pgoff_t first_index, pgoff_t nr); +unsigned long apply_as_clean(struct address_space *mapping, +pgoff_t first_index, pgoff_t nr, +pgoff_t bitmap_pgoff, +unsigned long *bitmap, +pgoff_t *start, +pgoff_t *end); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); diff --git a/mm/Kconfig b/mm/Kconfig index f0c76ba47695..5006d0e6a5c7 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -765,4 +765,7 @@ config GUP_BENCHMARK config ARCH_HAS_PTE_SPECIAL bool +config AS_DIRTY_HELPERS +bool + endmenu diff --git a/mm/Makefile b/mm/Makefile index ac5e5ba78874..f5d412bbc2f7 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_HMM) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o +obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c new file mode 100644 index ..ccaad41b70bc --- /dev/null +++ b/mm/as_dirty_helpers.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +/** + * struct apply_as - Closure structure for apply_as_range + * @base: struct pfn_range_apply we derive from + * @start: Address of first modified pte + * @end: Address of last modified pte + 1 + * @total: Total number of modified ptes + * @vma: Pointer to the struct vm_area_struct we're currently operating on + */ +struct apply_as { + struct pfn_range_apply base; + unsigned long start; + unsigned long end; + unsigned long total; + struct vm_area_struct *vma; +}; + +/** + * apply_pt_wrprotect - Leaf pte callback to write-protect a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as + * + * The function write-protects a pte and records the range in + * virtual address space of touched ptes for efficient range TLB flushes. + * + * Return: Always zero.
Re: [PATCH v5 3/9] mm: Add write-protect and clean utilities for address space ranges
On 6/12/19 1:23 PM, Christoph Hellwig wrote: On Wed, Jun 12, 2019 at 08:42:37AM +0200, Thomas Hellström (VMware) wrote: From: Thomas Hellstrom Add two utilities to a) write-protect and b) clean all ptes pointing into a range of an address space. The utilities are intended to aid in tracking dirty pages (either driver-allocated system memory or pci device memory). The write-protect utility should be used in conjunction with page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page accesses. Typically one would want to use this on sparse accesses into large memory regions. The clean utility should be used to utilize hardware dirtying functionality and avoid the overhead of page-faults, typically on large accesses into small memory regions. Please use EXPORT_SYMBOL_GPL, just like for apply_to_page_range and friends. Sounds reasonable if this uses already EXPORT_SYMBOL_GPL'd functionality. I'll respin. Also in general new core functionality like this should go along with the actual user, we don't need to repeat the hmm disaster. I see in your later message that you noticed the other patches. There's also user-space functionality in mesa that excercises this. /Thomas ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH v5 2/9] mm: Add an apply_to_pfn_range interface
On 6/12/19 2:16 PM, Christoph Hellwig wrote: On Wed, Jun 12, 2019 at 08:42:36AM +0200, Thomas Hellström (VMware) wrote: From: Thomas Hellstrom This is basically apply_to_page_range with added functionality: Allocating missing parts of the page table becomes optional, which means that the function can be guaranteed not to error if allocation is disabled. Also passing of the closure struct and callback function becomes different and more in line with how things are done elsewhere. Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range The reason for not using the page-walk code is that we want to perform the page-walk on vmas pointing to an address space without requiring the mmap_sem to be held rather than on vmas belonging to a process with the mmap_sem held. Notable changes since RFC: Don't export apply_to_pfn range. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- include/linux/mm.h | 10 mm/memory.c| 135 ++--- 2 files changed, 113 insertions(+), 32 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e8834ac32b7..3d06ce2a64af 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +struct pfn_range_apply; +typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, +struct pfn_range_apply *closure); +struct pfn_range_apply { + struct mm_struct *mm; + pter_fn_t ptefn; + unsigned int alloc; +}; +extern int apply_to_pfn_range(struct pfn_range_apply *closure, + unsigned long address, unsigned long size); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); diff --git a/mm/memory.c b/mm/memory.c index 168f546af1ad..462aa47f8878 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long } EXPORT_SYMBOL(vm_iomap_memory); -static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd, + unsigned long addr, unsigned long end) { pte_t *pte; int err; pgtable_t token; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? + pte = (closure->mm == &init_mm) ? pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); + pte_alloc_map_lock(closure->mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; @@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd); do { - err = fn(pte++, token, addr, data); + err = closure->ptefn(pte++, token, addr, closure); if (err) break; } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - if (mm != &init_mm) + if (closure->mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err; } -static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud, + unsigned long addr, unsigned long end) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); + pmd = pmd_alloc(closure->mm, pud, addr); if (!pmd) return -ENOMEM; + do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); + if (!closure->alloc && pmd_none_or_clear_bad(pmd)) + continue; + err = apply_to_pte_range(closure, pmd, addr, next); if (err) break; } while (pmd++, addr = next, addr != end); return err; } -static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, -
[PATCH v6 4/9] drm/ttm: Allow the driver to provide the ttm struct vm_operations_struct
From: Thomas Hellstrom Add a pointer to the struct vm_operations_struct in the bo_device, and assign that pointer to the default value currently used. The driver can then optionally modify that pointer and the new value can be used for each new vma created. Cc: "Christian König" Signed-off-by: Thomas Hellstrom Reviewed-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c| 1 + drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 +++--- include/drm/ttm/ttm_bo_driver.h | 6 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c7de667d482a..6953dd264172 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1739,6 +1739,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, mutex_lock(&ttm_global_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&ttm_global_mutex); + bdev->vm_ops = &ttm_bo_vm_ops; return 0; out_no_sys: diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 6dacff49c1cc..196e13a0adad 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -395,7 +395,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, return ret; } -static const struct vm_operations_struct ttm_bo_vm_ops = { +const struct vm_operations_struct ttm_bo_vm_ops = { .fault = ttm_bo_vm_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, @@ -448,7 +448,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, if (unlikely(ret != 0)) goto out_unref; - vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_ops = bdev->vm_ops; /* * Note: We're transferring the bo reference to @@ -480,7 +480,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) ttm_bo_get(bo); - vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_ops = bo->bdev->vm_ops; vma->vm_private_data = bo; vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index c9b8ba492f24..a2d810a2504d 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -442,6 +442,9 @@ extern struct ttm_bo_global { * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. * @vma_manager: Address space manager + * @vm_ops: Pointer to the struct vm_operations_struct used for this + * device's VM operations. The driver may override this before the first + * mmap() call. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. * @dev_mapping: A pointer to the struct address_space representing the @@ -460,6 +463,7 @@ struct ttm_bo_device { struct ttm_bo_global *glob; struct ttm_bo_driver *driver; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; + const struct vm_operations_struct *vm_ops; /* * Protected by internal locks. @@ -488,6 +492,8 @@ struct ttm_bo_device { bool no_retry; }; +extern const struct vm_operations_struct ttm_bo_vm_ops; + /** * struct ttm_lru_bulk_move_pos * -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v6 7/9] drm/vmwgfx: Use an RBtree instead of linked list for MOB resources
From: Thomas Hellstrom With emulated coherent memory we need to be able to quickly look up a resource from the MOB offset. Instead of traversing a linked list with O(n) worst case, use an RBtree with O(log n) worst case complexity. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 5 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 10 +++ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 +--- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 90ca866640fe..e8bc7a7ac031 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -464,6 +464,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo) struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo); WARN_ON(vmw_bo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree)); vmw_bo_unmap(vmw_bo); kfree(vmw_bo); } @@ -480,6 +481,7 @@ static void vmw_user_bo_destroy(struct ttm_buffer_object *bo) struct vmw_buffer_object *vbo = &vmw_user_bo->vbo; WARN_ON(vbo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); vmw_bo_unmap(vbo); ttm_prime_object_kfree(vmw_user_bo, prime); } @@ -515,8 +517,7 @@ int vmw_bo_init(struct vmw_private *dev_priv, memset(vmw_bo, 0, sizeof(*vmw_bo)); BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->base.priority = 3; - - INIT_LIST_HEAD(&vmw_bo->res_list); + vmw_bo->res_tree = RB_ROOT; ret = ttm_bo_init(bdev, &vmw_bo->base, size, ttm_bo_type_device, placement, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 27c259395790..9b347923196f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -89,7 +89,7 @@ struct vmw_fpriv { /** * struct vmw_buffer_object - TTM buffer object with vmwgfx additions * @base: The TTM buffer object - * @res_list: List of resources using this buffer object as a backing MOB + * @res_tree: RB tree of resources using this buffer object as a backing MOB * @pin_count: pin depth * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB * @map: Kmap object for semi-persistent mappings @@ -98,7 +98,7 @@ struct vmw_fpriv { */ struct vmw_buffer_object { struct ttm_buffer_object base; - struct list_head res_list; + struct rb_root res_tree; s32 pin_count; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; @@ -146,8 +146,8 @@ struct vmw_res_func; * pin-count greater than zero. It is not on the resource LRU lists and its * backup buffer is pinned. Hence it can't be evicted. * @func: Method vtable for this resource. Immutable. + * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved. * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock. - * @mob_head: List head for the MOB backup list. Protected by @backup reserved. * @binding_head: List head for the context binding list. Protected by * the @dev_priv::binding_mutex * @res_free: The resource destructor. @@ -168,8 +168,8 @@ struct vmw_resource { unsigned long backup_offset; unsigned long pin_count; const struct vmw_res_func *func; + struct rb_node mob_node; struct list_head lru_head; - struct list_head mob_head; struct list_head binding_head; struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); @@ -742,7 +742,7 @@ void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, */ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) { - return !list_empty(&res->mob_head); + return !RB_EMPTY_NODE(&res->mob_node); } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index da9afa83fb4f..b84dd5953886 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -41,11 +41,24 @@ void vmw_resource_mob_attach(struct vmw_resource *res) { struct vmw_buffer_object *backup = res->backup; + struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL; lockdep_assert_held(&backup->base.resv->lock.base); res->used_prio = (res->res_dirty) ? res->func->dirty_prio : res->func->prio; - list_add_tail(&res->mob_head, &backup->res_list); + + while (*new) { + struct vmw_resource *this = + container_of(*new, struct vmw_resource, mob_node); + + parent = *new; + new = (res->backup_offset < this->backup_offset) ? + &((*new)->rb_left) : &((*new)->rb_right); + } + + rb_link_node(&res->mob_node, parent, new); +
[PATCH v6 6/9] drm/vmwgfx: Implement an infrastructure for write-coherent resources
Emulate coherency by tracking vm accesses. * @backup: The backup buffer if any. Protected by resource reserved. * @backup_offset: Offset into the backup buffer if any. Protected by resource * reserved. Note that only a few resource types can have a @backup_offset @@ -151,14 +154,16 @@ struct vmw_res_func; * @hw_destroy: Callback to destroy the resource on the device, as part of * resource destruction. */ +struct vmw_resource_dirty; struct vmw_resource { struct kref kref; struct vmw_private *dev_priv; int id; u32 used_prio; unsigned long backup_size; - bool res_dirty; - bool backup_dirty; + u32 res_dirty : 1; + u32 backup_dirty : 1; + u32 coherent : 1; struct vmw_buffer_object *backup; unsigned long backup_offset; unsigned long pin_count; @@ -166,6 +171,7 @@ struct vmw_resource { struct list_head lru_head; struct list_head mob_head; struct list_head binding_head; + struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); void (*hw_destroy) (struct vmw_resource *res); }; @@ -606,6 +612,9 @@ struct vmw_private { /* Validation memory reservation */ struct vmw_validation_mem vvm; + + /* VM operations */ + struct vm_operations_struct vm_ops; }; static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res) @@ -722,6 +731,8 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv); extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo); void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res); +void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, + pgoff_t end); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached @@ -1410,6 +1421,15 @@ int vmw_host_log(const char *log); #define VMW_DEBUG_USER(fmt, ...) \ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) +/* Resource dirtying - vmwgfx_page_dirty.c */ +void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo); +int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); +void vmw_bo_dirty_clear_res(struct vmw_resource *res); +void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); +vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); + /** * Inline helper functions */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 33533d126277..319c1ca35663 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv, offsetof(typeof(*cmd), sid)); cmd = container_of(header, typeof(*cmd), header); - return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, VMW_RES_DIRTY_NONE, user_surface_converter, &cmd->sid, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c new file mode 100644 index ..8d154f90bdc0 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/****** + * + * Copyright 2019 VMware, Inc., Palo Alto, CA., USA + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **/ +#include "vmwgfx_drv.h" + +/* + * Different methods for tracking dirty: + * VMW_BO_DIRTY_PAG
[PATCH v6 8/9] drm/vmwgfx: Implement an infrastructure for read-coherent resources
From: Thomas Hellstrom Similar to write-coherent resources, make sure that from the user-space point of view, GPU rendered contents is automatically available for reading by the CPU. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 7 +- drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c| 73 - drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 103 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h | 2 + drivers/gpu/drm/vmwgfx/vmwgfx_validation.c| 3 +- 5 files changed, 177 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 9b347923196f..dae3a39bf402 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -689,7 +689,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res); extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res); extern struct vmw_resource * vmw_resource_reference_unless_doomed(struct vmw_resource *res); -extern int vmw_resource_validate(struct vmw_resource *res, bool intr); +extern int vmw_resource_validate(struct vmw_resource *res, bool intr, +bool dirtying); extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); @@ -733,6 +734,8 @@ void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res); void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, pgoff_t end); +int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, + pgoff_t end, pgoff_t *num_prefault); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached @@ -1427,6 +1430,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); void vmw_bo_dirty_clear_res(struct vmw_resource *res); void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end); vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 8d154f90bdc0..730c51e397dd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -153,7 +153,6 @@ static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo) } } - /** * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty * tracking structure @@ -171,6 +170,51 @@ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo) vmw_bo_dirty_scan_mkwrite(vbo); } +/** + * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before + * an unmap_mapping_range operation. + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * If we're using the _PAGETABLE scan method, we may leak dirty pages + * when calling unmap_mapping_range(). This function makes sure we pick + * up all dirty pages. + */ +static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end) + return; + + apply_as_wrprotect(mapping, start + offset, end - start); + apply_as_clean(mapping, start + offset, end - start, offset, + &dirty->bitmap[0], &dirty->start, &dirty->end); +} + +/** + * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange. + */ +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + vmw_bo_dirty_pre_unmap(vbo, start, end); + unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT, + (loff_t) (end - start) << PAGE_SHIFT); +} + /** * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object * @vbo: The buffer object @@ -389,21 +433,40 @@ vm_fault_t vmw_bo_vm_fault(stru
[PATCH v6 5/9] drm/ttm: TTM fault handler helpers
From: Thomas Hellstrom With the vmwgfx dirty tracking, the default TTM fault handler is not completely sufficient (vmwgfx need to modify the vma->vm_flags member, and also needs to restrict the number of prefaults). We also want to replicate the new ttm_bo_vm_reserve() functionality So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved() and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other drivers to use. Cc: "Christian König" Signed-off-by: Thomas Hellstrom Reviewed-by: "Christian König" #v1 --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 175 +++- include/drm/ttm/ttm_bo_api.h| 10 ++ 2 files changed, 113 insertions(+), 72 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 196e13a0adad..2d9862fcf6fd 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -42,8 +42,6 @@ #include #include -#define TTM_BO_VM_NUM_PREFAULT 16 - static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_fault *vmf) { @@ -106,31 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, + page_offset; } -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) +/** + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback + * @bo: The buffer object + * @vmf: The fault structure handed to the callback + * + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped + * during long waits, and after the wait the callback will be restarted. This + * is to allow other threads using the same virtual memory space concurrent + * access to map(), unmap() completely unrelated buffer objects. TTM buffer + * object reservations sometimes wait for GPU and should therefore be + * considered long waits. This function reserves the buffer object interruptibly + * taking this into account. Starvation is avoided by the vm system not + * allowing too many repeated restarts. + * This function is intended to be used in customized fault() and _mkwrite() + * handlers. + * + * Return: + *0 on success and the bo was reserved. + *VM_FAULT_RETRY if blocking wait. + *VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. + */ +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, +struct vm_fault *vmf) { - struct vm_area_struct *vma = vmf->vma; - struct ttm_buffer_object *bo = (struct ttm_buffer_object *) - vma->vm_private_data; - struct ttm_bo_device *bdev = bo->bdev; - unsigned long page_offset; - unsigned long page_last; - unsigned long pfn; - struct ttm_tt *ttm = NULL; - struct page *page; - int err; - int i; - vm_fault_t ret = VM_FAULT_NOPAGE; - unsigned long address = vmf->address; - struct ttm_mem_type_manager *man = - &bdev->man[bo->mem.mem_type]; - struct vm_area_struct cvma; - - /* -* Work around locking order reversal in fault / nopfn -* between mmap_sem and bo_reserve: Perform a trylock operation -* for reserve, and if it fails, retry the fault after waiting -* for the buffer to become unreserved. -*/ if (unlikely(!reservation_object_trylock(bo->resv))) { if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) { if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) { @@ -151,14 +148,55 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) return VM_FAULT_NOPAGE; } + return 0; +} +EXPORT_SYMBOL(ttm_bo_vm_reserve); + +/** + * ttm_bo_vm_fault_reserved - TTM fault helper + * @vmf: The struct vm_fault given as argument to the fault callback + * @prot: The page protection to be used for this memory area. + * @num_prefault: Maximum number of prefault pages. The caller may want to + * specify this based on madvice settings and the size of the GPU object + * backed by the memory. + * + * This function inserts one or more page table entries pointing to the + * memory backing the buffer object, and then returns a return code + * instructing the caller to retry the page access. + * + * Return: + * VM_FAULT_NOPAGE on success or pending signal + * VM_FAULT_SIGBUS on unspecified error + * VM_FAULT_OOM on out-of-memory + * VM_FAULT_RETRY if retryable wait + */ +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + pgprot_t prot, + pgoff_t num_prefault) +{ + struct vm_area_struct *vma = vmf->vma; + struct vm_area_struct cvma = *vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + struct ttm_bo_device *bdev = bo->bdev; + unsigned long page_offset; + unsigned long page_last; + unsigned long pfn; + struct ttm_tt *ttm = NULL; + struct p
[PATCH v6 0/9] Emulated coherent graphics memory
Planning to merge this through the drm/vmwgfx tree soon, so if there are any objections, please speak up. Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver to provide coherent graphics memory, meaning that the GPU sees any content written to the coherent memory on the next GPU operation that touches that memory, and the CPU sees any content written by the GPU to that memory immediately after any fence object trailing the GPU operation has signaled. Paravirtual drivers that otherwise require explicit synchronization needs to do this by hooking up dirty tracking to pagefault handlers and buffer object validation. This is a first attempt to do that for the vmwgfx driver. The mm patches has been out for RFC. I think I have addressed all the feedback I got, except a possible softdirty breakage. But although the dirty-tracking and softdirty may write-protect PTEs both care about, that shouldn't really cause any operation interference. In particular since we use the hardware dirty PTE bits and softdirty uses other PTE bits. For the TTM changes they are hopefully in line with the long-term strategy of making helpers out of what's left of TTM. The code has been tested and exercised by a tailored version of mesa where we disable all explicit synchronization and assume graphics memory is coherent. The performance loss varies of course; a typical number is around 5%. Changes v1-v2: - Addressed a number of typos and formatting issues. - Added a usage warning for apply_to_pfn_range() and apply_to_page_range() - Re-evaluated the decision to use apply_to_pfn_range() rather than modifying the pagewalk.c. It still looks like generically handling the transparent huge page cases requires the mmap_sem to be held at least in read mode, so sticking with apply_to_pfn_range() for now. - The TTM page-fault helper vma copy argument was scratched in favour of a pageprot_t argument. Changes v3: - Adapted to upstream API changes. Changes v4: - Adapted to upstream mmu_notifier changes. (Jerome?) - Fixed a couple of warnings on 32-bit x86 - Fixed image offset computation on multisample images. Changes v5: - Updated usage warning in patch 3/9 after review comments from Nadav Amit. Changes v6: - Updated exports of new functionality in patch 3/9 to EXPORT_SYMBOL_GPL after review comments from Christoph Hellwig. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: "Christian König" Cc: linux...@kvack.org ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v6 9/9] drm/vmwgfx: Add surface dirty-tracking callbacks
From: Thomas Hellstrom Add the callbacks necessary to implement emulated coherent memory for surfaces. Add a flag to the gb_surface_create ioctl to indicate that surface memory should be coherent. Also bump the drm minor version to signal the availability of coherent surfaces. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- .../device_include/svga3d_surfacedefs.h | 233 ++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 395 +- include/uapi/drm/vmwgfx_drm.h | 4 +- 4 files changed, 629 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h index f2bfd3d80598..61414f105c67 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h @@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format, return offset; } - static inline u32 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, surf_size_struct baseLevelSize, @@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format) return svga3dsurface_is_dx_screen_target_format(format); } +/** + * struct svga3dsurface_mip - Mimpmap level information + * @bytes: Bytes required in the backing store of this mipmap level. + * @img_stride: Byte stride per image. + * @row_stride: Byte stride per block row. + * @size: The size of the mipmap. + */ +struct svga3dsurface_mip { + size_t bytes; + size_t img_stride; + size_t row_stride; + struct drm_vmw_size size; + +}; + +/** + * struct svga3dsurface_cache - Cached surface information + * @desc: Pointer to the surface descriptor + * @mip: Array of mipmap level information. Valid size is @num_mip_levels. + * @mip_chain_bytes: Bytes required in the backing store for the whole chain + * of mip levels. + * @sheet_bytes: Bytes required in the backing store for a sheet + * representing a single sample. + * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in + * a chain. + * @num_layers: Number of slices in an array texture or number of faces in + * a cubemap texture. + */ +struct svga3dsurface_cache { + const struct svga3d_surface_desc *desc; + struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS]; + size_t mip_chain_bytes; + size_t sheet_bytes; + u32 num_mip_levels; + u32 num_layers; +}; + +/** + * struct svga3dsurface_loc - Surface location + * @sub_resource: Surface subresource. Defined as layer * num_mip_levels + + * mip_level. + * @x: X coordinate. + * @y: Y coordinate. + * @z: Z coordinate. + */ +struct svga3dsurface_loc { + u32 sub_resource; + u32 x, y, z; +}; + +/** + * svga3dsurface_subres - Compute the subresource from layer and mipmap. + * @cache: Surface layout data. + * @mip_level: The mipmap level. + * @layer: The surface layer (face or array slice). + * + * Return: The subresource. + */ +static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache, + u32 mip_level, u32 layer) +{ + return cache->num_mip_levels * layer + mip_level; +} + +/** + * svga3dsurface_setup_cache - Build a surface cache entry + * @size: The surface base level dimensions. + * @format: The surface format. + * @num_mip_levels: Number of mipmap levels. + * @num_layers: Number of layers. + * @cache: Pointer to a struct svga3dsurface_cach object to be filled in. + * + * Return: Zero on success, -EINVAL on invalid surface layout. + */ +static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size, + SVGA3dSurfaceFormat format, + u32 num_mip_levels, + u32 num_layers, + u32 num_samples, + struct svga3dsurface_cache *cache) +{ + const struct svga3d_surface_desc *desc; + u32 i; + + memset(cache, 0, sizeof(*cache)); + cache->desc = desc = svga3dsurface_get_desc(format); + cache->num_mip_levels = num_mip_levels; + cache->num_layers = num_layers; + for (i = 0; i < cache->num_mip_levels; i++) { + struct svga3dsurface_mip *mip = &cache->mip[i]; + + mip->size = svga3dsurface_get_mip_size(*size, i); + mip->bytes = svga3dsurface_get_image_buffer_size + (desc, &mip->size, 0); + mip->row_stride = + __KERNEL_DIV_ROUND_UP(mip->size.width, + desc->block_size.width) * + desc->bytes_per_block * num_samples; + if (!mip->row_stride) + goto invalid_di
[PATCH v6 2/9] mm: Add an apply_to_pfn_range interface
From: Thomas Hellstrom This is basically apply_to_page_range with added functionality: Allocating missing parts of the page table becomes optional, which means that the function can be guaranteed not to error if allocation is disabled. Also passing of the closure struct and callback function becomes different and more in line with how things are done elsewhere. Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range The reason for not using the page-walk code is that we want to perform the page-walk on vmas pointing to an address space without requiring the mmap_sem to be held rather than on vmas belonging to a process with the mmap_sem held. Notable changes since RFC: Don't export apply_to_pfn range. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- include/linux/mm.h | 10 mm/memory.c| 135 ++--- 2 files changed, 113 insertions(+), 32 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e8834ac32b7..3d06ce2a64af 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +struct pfn_range_apply; +typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, +struct pfn_range_apply *closure); +struct pfn_range_apply { + struct mm_struct *mm; + pter_fn_t ptefn; + unsigned int alloc; +}; +extern int apply_to_pfn_range(struct pfn_range_apply *closure, + unsigned long address, unsigned long size); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); diff --git a/mm/memory.c b/mm/memory.c index 168f546af1ad..462aa47f8878 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long } EXPORT_SYMBOL(vm_iomap_memory); -static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd, + unsigned long addr, unsigned long end) { pte_t *pte; int err; pgtable_t token; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? + pte = (closure->mm == &init_mm) ? pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); + pte_alloc_map_lock(closure->mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; @@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd); do { - err = fn(pte++, token, addr, data); + err = closure->ptefn(pte++, token, addr, closure); if (err) break; } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - if (mm != &init_mm) + if (closure->mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err; } -static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud, + unsigned long addr, unsigned long end) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); + pmd = pmd_alloc(closure->mm, pud, addr); if (!pmd) return -ENOMEM; + do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); + if (!closure->alloc && pmd_none_or_clear_bad(pmd)) + continue; + err = apply_to_pte_range(closure, pmd, addr, next); if (err) break; } while (pmd++, addr = next, addr != end); return err; } -static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d, +
[PATCH v6 3/9] mm: Add write-protect and clean utilities for address space ranges
From: Thomas Hellstrom Add two utilities to a) write-protect and b) clean all ptes pointing into a range of an address space. The utilities are intended to aid in tracking dirty pages (either driver-allocated system memory or pci device memory). The write-protect utility should be used in conjunction with page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page accesses. Typically one would want to use this on sparse accesses into large memory regions. The clean utility should be used to utilize hardware dirtying functionality and avoid the overhead of page-faults, typically on large accesses into small memory regions. The added file "as_dirty_helpers.c" is initially listed as maintained by VMware under our DRM driver. If somebody would like it elsewhere, that's of course no problem. Notable changes since RFC: - Added comments to help avoid the usage of these function for VMAs it's not intended for. We also do advisory checks on the vm_flags and warn on illegal usage. - Perform the pte modifications the same way softdirty does. - Add mmu_notifier range invalidation calls. - Add a config option so that this code is not unconditionally included. - Tell the mmu_gather code about pending tlb flushes. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- MAINTAINERS | 1 + include/linux/mm.h| 9 +- mm/Kconfig| 3 + mm/Makefile | 1 + mm/as_dirty_helpers.c | 300 ++ 5 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 mm/as_dirty_helpers.c diff --git a/MAINTAINERS b/MAINTAINERS index 7a2f487ea49a..a55d4ef91b0b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5179,6 +5179,7 @@ T:git git://people.freedesktop.org/~thomash/linux S: Supported F: drivers/gpu/drm/vmwgfx/ F: include/uapi/drm/vmwgfx_drm.h +F: mm/as_dirty_helpers.c DRM DRIVERS M: David Airlie diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d06ce2a64af..a0bc2a82917e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2685,7 +2685,14 @@ struct pfn_range_apply { }; extern int apply_to_pfn_range(struct pfn_range_apply *closure, unsigned long address, unsigned long size); - +unsigned long apply_as_wrprotect(struct address_space *mapping, +pgoff_t first_index, pgoff_t nr); +unsigned long apply_as_clean(struct address_space *mapping, +pgoff_t first_index, pgoff_t nr, +pgoff_t bitmap_pgoff, +unsigned long *bitmap, +pgoff_t *start, +pgoff_t *end); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); diff --git a/mm/Kconfig b/mm/Kconfig index f0c76ba47695..5006d0e6a5c7 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -765,4 +765,7 @@ config GUP_BENCHMARK config ARCH_HAS_PTE_SPECIAL bool +config AS_DIRTY_HELPERS +bool + endmenu diff --git a/mm/Makefile b/mm/Makefile index ac5e5ba78874..f5d412bbc2f7 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_HMM) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o +obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c new file mode 100644 index ..f600e31534fb --- /dev/null +++ b/mm/as_dirty_helpers.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +/** + * struct apply_as - Closure structure for apply_as_range + * @base: struct pfn_range_apply we derive from + * @start: Address of first modified pte + * @end: Address of last modified pte + 1 + * @total: Total number of modified ptes + * @vma: Pointer to the struct vm_area_struct we're currently operating on + */ +struct apply_as { + struct pfn_range_apply base; + unsigned long start; + unsigned long end; + unsigned long total; + struct vm_area_struct *vma; +}; + +/** + * apply_pt_wrprotect - Leaf pte callback to write-protect a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as + * + * The function write-protects a pte and records the range in + * virtual address space of touched ptes for efficient range TLB flushes. + * + * Return: Always zero.
[PATCH v6 1/9] mm: Allow the [page|pfn]_mkwrite callbacks to drop the mmap_sem
From: Thomas Hellstrom Driver fault callbacks are allowed to drop the mmap_sem when expecting long hardware waits to avoid blocking other mm users. Allow the mkwrite callbacks to do the same by returning early on VM_FAULT_RETRY. In particular we want to be able to drop the mmap_sem when waiting for a reservation object lock on a GPU buffer object. These locks may be held while waiting for the GPU. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell --- mm/memory.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index ddf20bd0c317..168f546af1ad 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2238,7 +2238,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; - if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) + if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; if (unlikely(!(ret & VM_FAULT_LOCKED))) { lock_page(page); @@ -2515,7 +2515,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; ret = vma->vm_ops->pfn_mkwrite(vmf); - if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) + if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)) return ret; return finish_mkwrite_fault(vmf); } @@ -2536,7 +2536,8 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & - (VM_FAULT_ERROR | VM_FAULT_NOPAGE { + (VM_FAULT_ERROR | VM_FAULT_NOPAGE | + VM_FAULT_RETRY { put_page(vmf->page); return tmp; } @@ -3601,7 +3602,8 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) unlock_page(vmf->page); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || - (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE { + (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | + VM_FAULT_RETRY { put_page(vmf->page); return tmp; } -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v7 0/9] Emulated coherent graphics memory
Planning to merge this through the drm/vmwgfx tree soon, so if there are any objections, please speak up. Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver to provide coherent graphics memory, meaning that the GPU sees any content written to the coherent memory on the next GPU operation that touches that memory, and the CPU sees any content written by the GPU to that memory immediately after any fence object trailing the GPU operation has signaled. Paravirtual drivers that otherwise require explicit synchronization needs to do this by hooking up dirty tracking to pagefault handlers and buffer object validation. This is a first attempt to do that for the vmwgfx driver. The mm patches has been out for RFC. I think I have addressed all the feedback I got, except a possible softdirty breakage. But although the dirty-tracking and softdirty may write-protect PTEs both care about, that shouldn't really cause any operation interference. In particular since we use the hardware dirty PTE bits and softdirty uses other PTE bits. For the TTM changes they are hopefully in line with the long-term strategy of making helpers out of what's left of TTM. The code has been tested and exercised by a tailored version of mesa where we disable all explicit synchronization and assume graphics memory is coherent. The performance loss varies of course; a typical number is around 5%. Changes v1-v2: - Addressed a number of typos and formatting issues. - Added a usage warning for apply_to_pfn_range() and apply_to_page_range() - Re-evaluated the decision to use apply_to_pfn_range() rather than modifying the pagewalk.c. It still looks like generically handling the transparent huge page cases requires the mmap_sem to be held at least in read mode, so sticking with apply_to_pfn_range() for now. - The TTM page-fault helper vma copy argument was scratched in favour of a pageprot_t argument. Changes v3: - Adapted to upstream API changes. Changes v4: - Adapted to upstream mmu_notifier changes. (Jerome?) - Fixed a couple of warnings on 32-bit x86 - Fixed image offset computation on multisample images. Changes v5: - Updated usage warning in patch 3/9 after review comments from Nadav Amit. Changes v6: - Updated exports of new functionality in patch 3/9 to EXPORT_SYMBOL_GPL after review comments from Christoph Hellwig. Changes v7: - Re-added removed comment in ttm_bo_vm.c (Review by Hillf Danton) - Fixed an error path regression in ttm_bo_vm.c Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: "Christian König" Cc: linux...@kvack.org ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v7 9/9] drm/vmwgfx: Add surface dirty-tracking callbacks
From: Thomas Hellstrom Add the callbacks necessary to implement emulated coherent memory for surfaces. Add a flag to the gb_surface_create ioctl to indicate that surface memory should be coherent. Also bump the drm minor version to signal the availability of coherent surfaces. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- .../device_include/svga3d_surfacedefs.h | 233 ++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 395 +- include/uapi/drm/vmwgfx_drm.h | 4 +- 4 files changed, 629 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h index f2bfd3d80598..61414f105c67 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h @@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format, return offset; } - static inline u32 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, surf_size_struct baseLevelSize, @@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format) return svga3dsurface_is_dx_screen_target_format(format); } +/** + * struct svga3dsurface_mip - Mimpmap level information + * @bytes: Bytes required in the backing store of this mipmap level. + * @img_stride: Byte stride per image. + * @row_stride: Byte stride per block row. + * @size: The size of the mipmap. + */ +struct svga3dsurface_mip { + size_t bytes; + size_t img_stride; + size_t row_stride; + struct drm_vmw_size size; + +}; + +/** + * struct svga3dsurface_cache - Cached surface information + * @desc: Pointer to the surface descriptor + * @mip: Array of mipmap level information. Valid size is @num_mip_levels. + * @mip_chain_bytes: Bytes required in the backing store for the whole chain + * of mip levels. + * @sheet_bytes: Bytes required in the backing store for a sheet + * representing a single sample. + * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in + * a chain. + * @num_layers: Number of slices in an array texture or number of faces in + * a cubemap texture. + */ +struct svga3dsurface_cache { + const struct svga3d_surface_desc *desc; + struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS]; + size_t mip_chain_bytes; + size_t sheet_bytes; + u32 num_mip_levels; + u32 num_layers; +}; + +/** + * struct svga3dsurface_loc - Surface location + * @sub_resource: Surface subresource. Defined as layer * num_mip_levels + + * mip_level. + * @x: X coordinate. + * @y: Y coordinate. + * @z: Z coordinate. + */ +struct svga3dsurface_loc { + u32 sub_resource; + u32 x, y, z; +}; + +/** + * svga3dsurface_subres - Compute the subresource from layer and mipmap. + * @cache: Surface layout data. + * @mip_level: The mipmap level. + * @layer: The surface layer (face or array slice). + * + * Return: The subresource. + */ +static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache, + u32 mip_level, u32 layer) +{ + return cache->num_mip_levels * layer + mip_level; +} + +/** + * svga3dsurface_setup_cache - Build a surface cache entry + * @size: The surface base level dimensions. + * @format: The surface format. + * @num_mip_levels: Number of mipmap levels. + * @num_layers: Number of layers. + * @cache: Pointer to a struct svga3dsurface_cach object to be filled in. + * + * Return: Zero on success, -EINVAL on invalid surface layout. + */ +static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size, + SVGA3dSurfaceFormat format, + u32 num_mip_levels, + u32 num_layers, + u32 num_samples, + struct svga3dsurface_cache *cache) +{ + const struct svga3d_surface_desc *desc; + u32 i; + + memset(cache, 0, sizeof(*cache)); + cache->desc = desc = svga3dsurface_get_desc(format); + cache->num_mip_levels = num_mip_levels; + cache->num_layers = num_layers; + for (i = 0; i < cache->num_mip_levels; i++) { + struct svga3dsurface_mip *mip = &cache->mip[i]; + + mip->size = svga3dsurface_get_mip_size(*size, i); + mip->bytes = svga3dsurface_get_image_buffer_size + (desc, &mip->size, 0); + mip->row_stride = + __KERNEL_DIV_ROUND_UP(mip->size.width, + desc->block_size.width) * + desc->bytes_per_block * num_samples; + if (!mip->row_stride) + goto invalid_di
[PATCH v7 3/9] mm: Add write-protect and clean utilities for address space ranges
From: Thomas Hellstrom Add two utilities to a) write-protect and b) clean all ptes pointing into a range of an address space. The utilities are intended to aid in tracking dirty pages (either driver-allocated system memory or pci device memory). The write-protect utility should be used in conjunction with page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page accesses. Typically one would want to use this on sparse accesses into large memory regions. The clean utility should be used to utilize hardware dirtying functionality and avoid the overhead of page-faults, typically on large accesses into small memory regions. The added file "as_dirty_helpers.c" is initially listed as maintained by VMware under our DRM driver. If somebody would like it elsewhere, that's of course no problem. Notable changes since RFC: - Added comments to help avoid the usage of these function for VMAs it's not intended for. We also do advisory checks on the vm_flags and warn on illegal usage. - Perform the pte modifications the same way softdirty does. - Add mmu_notifier range invalidation calls. - Add a config option so that this code is not unconditionally included. - Tell the mmu_gather code about pending tlb flushes. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- MAINTAINERS | 1 + include/linux/mm.h| 9 +- mm/Kconfig| 3 + mm/Makefile | 1 + mm/as_dirty_helpers.c | 300 ++ 5 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 mm/as_dirty_helpers.c diff --git a/MAINTAINERS b/MAINTAINERS index 7a2f487ea49a..a55d4ef91b0b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5179,6 +5179,7 @@ T:git git://people.freedesktop.org/~thomash/linux S: Supported F: drivers/gpu/drm/vmwgfx/ F: include/uapi/drm/vmwgfx_drm.h +F: mm/as_dirty_helpers.c DRM DRIVERS M: David Airlie diff --git a/include/linux/mm.h b/include/linux/mm.h index 3d06ce2a64af..a0bc2a82917e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2685,7 +2685,14 @@ struct pfn_range_apply { }; extern int apply_to_pfn_range(struct pfn_range_apply *closure, unsigned long address, unsigned long size); - +unsigned long apply_as_wrprotect(struct address_space *mapping, +pgoff_t first_index, pgoff_t nr); +unsigned long apply_as_clean(struct address_space *mapping, +pgoff_t first_index, pgoff_t nr, +pgoff_t bitmap_pgoff, +unsigned long *bitmap, +pgoff_t *start, +pgoff_t *end); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); diff --git a/mm/Kconfig b/mm/Kconfig index f0c76ba47695..5006d0e6a5c7 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -765,4 +765,7 @@ config GUP_BENCHMARK config ARCH_HAS_PTE_SPECIAL bool +config AS_DIRTY_HELPERS +bool + endmenu diff --git a/mm/Makefile b/mm/Makefile index ac5e5ba78874..f5d412bbc2f7 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -104,3 +104,4 @@ obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_HMM) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o +obj-$(CONFIG_AS_DIRTY_HELPERS) += as_dirty_helpers.o diff --git a/mm/as_dirty_helpers.c b/mm/as_dirty_helpers.c new file mode 100644 index ..f600e31534fb --- /dev/null +++ b/mm/as_dirty_helpers.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include + +/** + * struct apply_as - Closure structure for apply_as_range + * @base: struct pfn_range_apply we derive from + * @start: Address of first modified pte + * @end: Address of last modified pte + 1 + * @total: Total number of modified ptes + * @vma: Pointer to the struct vm_area_struct we're currently operating on + */ +struct apply_as { + struct pfn_range_apply base; + unsigned long start; + unsigned long end; + unsigned long total; + struct vm_area_struct *vma; +}; + +/** + * apply_pt_wrprotect - Leaf pte callback to write-protect a pte + * @pte: Pointer to the pte + * @token: Page table token, see apply_to_pfn_range() + * @addr: The virtual page address + * @closure: Pointer to a struct pfn_range_apply embedded in a + * struct apply_as + * + * The function write-protects a pte and records the range in + * virtual address space of touched ptes for efficient range TLB flushes. + * + * Return: Always zero.
[PATCH v7 8/9] drm/vmwgfx: Implement an infrastructure for read-coherent resources
From: Thomas Hellstrom Similar to write-coherent resources, make sure that from the user-space point of view, GPU rendered contents is automatically available for reading by the CPU. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 7 +- drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c| 73 - drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 103 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h | 2 + drivers/gpu/drm/vmwgfx/vmwgfx_validation.c| 3 +- 5 files changed, 177 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 9b347923196f..dae3a39bf402 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -689,7 +689,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res); extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res); extern struct vmw_resource * vmw_resource_reference_unless_doomed(struct vmw_resource *res); -extern int vmw_resource_validate(struct vmw_resource *res, bool intr); +extern int vmw_resource_validate(struct vmw_resource *res, bool intr, +bool dirtying); extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); @@ -733,6 +734,8 @@ void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res); void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, pgoff_t end); +int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, + pgoff_t end, pgoff_t *num_prefault); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached @@ -1427,6 +1430,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); void vmw_bo_dirty_clear_res(struct vmw_resource *res); void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end); vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 8d154f90bdc0..730c51e397dd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -153,7 +153,6 @@ static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo) } } - /** * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty * tracking structure @@ -171,6 +170,51 @@ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo) vmw_bo_dirty_scan_mkwrite(vbo); } +/** + * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before + * an unmap_mapping_range operation. + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * If we're using the _PAGETABLE scan method, we may leak dirty pages + * when calling unmap_mapping_range(). This function makes sure we pick + * up all dirty pages. + */ +static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end) + return; + + apply_as_wrprotect(mapping, start + offset, end - start); + apply_as_clean(mapping, start + offset, end - start, offset, + &dirty->bitmap[0], &dirty->start, &dirty->end); +} + +/** + * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange. + */ +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + unsigned long offset = drm_vma_node_start(&vbo->base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + vmw_bo_dirty_pre_unmap(vbo, start, end); + unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT, + (loff_t) (end - start) << PAGE_SHIFT); +} + /** * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object * @vbo: The buffer object @@ -389,21 +433,40 @@ vm_fault_t vmw_bo_vm_fault(stru
[PATCH v7 4/9] drm/ttm: Allow the driver to provide the ttm struct vm_operations_struct
From: Thomas Hellstrom Add a pointer to the struct vm_operations_struct in the bo_device, and assign that pointer to the default value currently used. The driver can then optionally modify that pointer and the new value can be used for each new vma created. Cc: "Christian König" Signed-off-by: Thomas Hellstrom Reviewed-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c| 1 + drivers/gpu/drm/ttm/ttm_bo_vm.c | 6 +++--- include/drm/ttm/ttm_bo_driver.h | 6 ++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index c7de667d482a..6953dd264172 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -1739,6 +1739,7 @@ int ttm_bo_device_init(struct ttm_bo_device *bdev, mutex_lock(&ttm_global_mutex); list_add_tail(&bdev->device_list, &glob->device_list); mutex_unlock(&ttm_global_mutex); + bdev->vm_ops = &ttm_bo_vm_ops; return 0; out_no_sys: diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 6dacff49c1cc..196e13a0adad 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -395,7 +395,7 @@ static int ttm_bo_vm_access(struct vm_area_struct *vma, unsigned long addr, return ret; } -static const struct vm_operations_struct ttm_bo_vm_ops = { +const struct vm_operations_struct ttm_bo_vm_ops = { .fault = ttm_bo_vm_fault, .open = ttm_bo_vm_open, .close = ttm_bo_vm_close, @@ -448,7 +448,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma, if (unlikely(ret != 0)) goto out_unref; - vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_ops = bdev->vm_ops; /* * Note: We're transferring the bo reference to @@ -480,7 +480,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo) ttm_bo_get(bo); - vma->vm_ops = &ttm_bo_vm_ops; + vma->vm_ops = bo->bdev->vm_ops; vma->vm_private_data = bo; vma->vm_flags |= VM_MIXEDMAP; vma->vm_flags |= VM_IO | VM_DONTEXPAND; diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index c9b8ba492f24..a2d810a2504d 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -442,6 +442,9 @@ extern struct ttm_bo_global { * @driver: Pointer to a struct ttm_bo_driver struct setup by the driver. * @man: An array of mem_type_managers. * @vma_manager: Address space manager + * @vm_ops: Pointer to the struct vm_operations_struct used for this + * device's VM operations. The driver may override this before the first + * mmap() call. * lru_lock: Spinlock that protects the buffer+device lru lists and * ddestroy lists. * @dev_mapping: A pointer to the struct address_space representing the @@ -460,6 +463,7 @@ struct ttm_bo_device { struct ttm_bo_global *glob; struct ttm_bo_driver *driver; struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES]; + const struct vm_operations_struct *vm_ops; /* * Protected by internal locks. @@ -488,6 +492,8 @@ struct ttm_bo_device { bool no_retry; }; +extern const struct vm_operations_struct ttm_bo_vm_ops; + /** * struct ttm_lru_bulk_move_pos * -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v7 5/9] drm/ttm: TTM fault handler helpers
From: Thomas Hellstrom With the vmwgfx dirty tracking, the default TTM fault handler is not completely sufficient (vmwgfx need to modify the vma->vm_flags member, and also needs to restrict the number of prefaults). We also want to replicate the new ttm_bo_vm_reserve() functionality So start turning the TTM vm code into helpers: ttm_bo_vm_fault_reserved() and ttm_bo_vm_reserve(), and provide a default TTM fault handler for other drivers to use. Cc: "Christian König" Signed-off-by: Thomas Hellstrom Reviewed-by: "Christian König" #v1 --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 163 include/drm/ttm/ttm_bo_api.h| 10 ++ 2 files changed, 111 insertions(+), 62 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 196e13a0adad..0c4576cbafcf 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -42,8 +42,6 @@ #include #include -#define TTM_BO_VM_NUM_PREFAULT 16 - static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo, struct vm_fault *vmf) { @@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo, + page_offset; } -static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) +/** + * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback + * @bo: The buffer object + * @vmf: The fault structure handed to the callback + * + * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped + * during long waits, and after the wait the callback will be restarted. This + * is to allow other threads using the same virtual memory space concurrent + * access to map(), unmap() completely unrelated buffer objects. TTM buffer + * object reservations sometimes wait for GPU and should therefore be + * considered long waits. This function reserves the buffer object interruptibly + * taking this into account. Starvation is avoided by the vm system not + * allowing too many repeated restarts. + * This function is intended to be used in customized fault() and _mkwrite() + * handlers. + * + * Return: + *0 on success and the bo was reserved. + *VM_FAULT_RETRY if blocking wait. + *VM_FAULT_NOPAGE if blocking wait and retrying was not allowed. + */ +vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, +struct vm_fault *vmf) { - struct vm_area_struct *vma = vmf->vma; - struct ttm_buffer_object *bo = (struct ttm_buffer_object *) - vma->vm_private_data; - struct ttm_bo_device *bdev = bo->bdev; - unsigned long page_offset; - unsigned long page_last; - unsigned long pfn; - struct ttm_tt *ttm = NULL; - struct page *page; - int err; - int i; - vm_fault_t ret = VM_FAULT_NOPAGE; - unsigned long address = vmf->address; - struct ttm_mem_type_manager *man = - &bdev->man[bo->mem.mem_type]; - struct vm_area_struct cvma; - /* * Work around locking order reversal in fault / nopfn * between mmap_sem and bo_reserve: Perform a trylock operation @@ -151,14 +154,55 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) return VM_FAULT_NOPAGE; } + return 0; +} +EXPORT_SYMBOL(ttm_bo_vm_reserve); + +/** + * ttm_bo_vm_fault_reserved - TTM fault helper + * @vmf: The struct vm_fault given as argument to the fault callback + * @prot: The page protection to be used for this memory area. + * @num_prefault: Maximum number of prefault pages. The caller may want to + * specify this based on madvice settings and the size of the GPU object + * backed by the memory. + * + * This function inserts one or more page table entries pointing to the + * memory backing the buffer object, and then returns a return code + * instructing the caller to retry the page access. + * + * Return: + * VM_FAULT_NOPAGE on success or pending signal + * VM_FAULT_SIGBUS on unspecified error + * VM_FAULT_OOM on out-of-memory + * VM_FAULT_RETRY if retryable wait + */ +vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, + pgprot_t prot, + pgoff_t num_prefault) +{ + struct vm_area_struct *vma = vmf->vma; + struct vm_area_struct cvma = *vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + struct ttm_bo_device *bdev = bo->bdev; + unsigned long page_offset; + unsigned long page_last; + unsigned long pfn; + struct ttm_tt *ttm = NULL; + struct page *page; + int err; + pgoff_t i; + vm_fault_t ret = VM_FAULT_NOPAGE; + unsigned long address = vmf->address; + struct ttm_mem_type_manager *man = + &bdev->man[bo->mem.mem_type]; + /* * Refuse to fault imported pages. This should be handled * (if at all
[PATCH v7 1/9] mm: Allow the [page|pfn]_mkwrite callbacks to drop the mmap_sem
From: Thomas Hellstrom Driver fault callbacks are allowed to drop the mmap_sem when expecting long hardware waits to avoid blocking other mm users. Allow the mkwrite callbacks to do the same by returning early on VM_FAULT_RETRY. In particular we want to be able to drop the mmap_sem when waiting for a reservation object lock on a GPU buffer object. These locks may be held while waiting for the GPU. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell --- mm/memory.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index ddf20bd0c317..168f546af1ad 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2238,7 +2238,7 @@ static vm_fault_t do_page_mkwrite(struct vm_fault *vmf) ret = vmf->vma->vm_ops->page_mkwrite(vmf); /* Restore original flags so that caller is not surprised */ vmf->flags = old_flags; - if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) + if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY))) return ret; if (unlikely(!(ret & VM_FAULT_LOCKED))) { lock_page(page); @@ -2515,7 +2515,7 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); vmf->flags |= FAULT_FLAG_MKWRITE; ret = vma->vm_ops->pfn_mkwrite(vmf); - if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE)) + if (ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY)) return ret; return finish_mkwrite_fault(vmf); } @@ -2536,7 +2536,8 @@ static vm_fault_t wp_page_shared(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || (tmp & - (VM_FAULT_ERROR | VM_FAULT_NOPAGE { + (VM_FAULT_ERROR | VM_FAULT_NOPAGE | + VM_FAULT_RETRY { put_page(vmf->page); return tmp; } @@ -3601,7 +3602,8 @@ static vm_fault_t do_shared_fault(struct vm_fault *vmf) unlock_page(vmf->page); tmp = do_page_mkwrite(vmf); if (unlikely(!tmp || - (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE { + (tmp & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | + VM_FAULT_RETRY { put_page(vmf->page); return tmp; } -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v7 7/9] drm/vmwgfx: Use an RBtree instead of linked list for MOB resources
From: Thomas Hellstrom With emulated coherent memory we need to be able to quickly look up a resource from the MOB offset. Instead of traversing a linked list with O(n) worst case, use an RBtree with O(log n) worst case complexity. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 5 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 10 +++ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 +--- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 90ca866640fe..e8bc7a7ac031 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -464,6 +464,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo) struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo); WARN_ON(vmw_bo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree)); vmw_bo_unmap(vmw_bo); kfree(vmw_bo); } @@ -480,6 +481,7 @@ static void vmw_user_bo_destroy(struct ttm_buffer_object *bo) struct vmw_buffer_object *vbo = &vmw_user_bo->vbo; WARN_ON(vbo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); vmw_bo_unmap(vbo); ttm_prime_object_kfree(vmw_user_bo, prime); } @@ -515,8 +517,7 @@ int vmw_bo_init(struct vmw_private *dev_priv, memset(vmw_bo, 0, sizeof(*vmw_bo)); BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->base.priority = 3; - - INIT_LIST_HEAD(&vmw_bo->res_list); + vmw_bo->res_tree = RB_ROOT; ret = ttm_bo_init(bdev, &vmw_bo->base, size, ttm_bo_type_device, placement, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 27c259395790..9b347923196f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -89,7 +89,7 @@ struct vmw_fpriv { /** * struct vmw_buffer_object - TTM buffer object with vmwgfx additions * @base: The TTM buffer object - * @res_list: List of resources using this buffer object as a backing MOB + * @res_tree: RB tree of resources using this buffer object as a backing MOB * @pin_count: pin depth * @dx_query_ctx: DX context if this buffer object is used as a DX query MOB * @map: Kmap object for semi-persistent mappings @@ -98,7 +98,7 @@ struct vmw_fpriv { */ struct vmw_buffer_object { struct ttm_buffer_object base; - struct list_head res_list; + struct rb_root res_tree; s32 pin_count; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; @@ -146,8 +146,8 @@ struct vmw_res_func; * pin-count greater than zero. It is not on the resource LRU lists and its * backup buffer is pinned. Hence it can't be evicted. * @func: Method vtable for this resource. Immutable. + * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved. * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock. - * @mob_head: List head for the MOB backup list. Protected by @backup reserved. * @binding_head: List head for the context binding list. Protected by * the @dev_priv::binding_mutex * @res_free: The resource destructor. @@ -168,8 +168,8 @@ struct vmw_resource { unsigned long backup_offset; unsigned long pin_count; const struct vmw_res_func *func; + struct rb_node mob_node; struct list_head lru_head; - struct list_head mob_head; struct list_head binding_head; struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); @@ -742,7 +742,7 @@ void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, */ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) { - return !list_empty(&res->mob_head); + return !RB_EMPTY_NODE(&res->mob_node); } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index da9afa83fb4f..b84dd5953886 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -41,11 +41,24 @@ void vmw_resource_mob_attach(struct vmw_resource *res) { struct vmw_buffer_object *backup = res->backup; + struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL; lockdep_assert_held(&backup->base.resv->lock.base); res->used_prio = (res->res_dirty) ? res->func->dirty_prio : res->func->prio; - list_add_tail(&res->mob_head, &backup->res_list); + + while (*new) { + struct vmw_resource *this = + container_of(*new, struct vmw_resource, mob_node); + + parent = *new; + new = (res->backup_offset < this->backup_offset) ? + &((*new)->rb_left) : &((*new)->rb_right); + } + + rb_link_node(&res->mob_node, parent, new); +
[PATCH v7 6/9] drm/vmwgfx: Implement an infrastructure for write-coherent resources
Emulate coherency by tracking vm accesses. * @backup: The backup buffer if any. Protected by resource reserved. * @backup_offset: Offset into the backup buffer if any. Protected by resource * reserved. Note that only a few resource types can have a @backup_offset @@ -151,14 +154,16 @@ struct vmw_res_func; * @hw_destroy: Callback to destroy the resource on the device, as part of * resource destruction. */ +struct vmw_resource_dirty; struct vmw_resource { struct kref kref; struct vmw_private *dev_priv; int id; u32 used_prio; unsigned long backup_size; - bool res_dirty; - bool backup_dirty; + u32 res_dirty : 1; + u32 backup_dirty : 1; + u32 coherent : 1; struct vmw_buffer_object *backup; unsigned long backup_offset; unsigned long pin_count; @@ -166,6 +171,7 @@ struct vmw_resource { struct list_head lru_head; struct list_head mob_head; struct list_head binding_head; + struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); void (*hw_destroy) (struct vmw_resource *res); }; @@ -606,6 +612,9 @@ struct vmw_private { /* Validation memory reservation */ struct vmw_validation_mem vvm; + + /* VM operations */ + struct vm_operations_struct vm_ops; }; static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res) @@ -722,6 +731,8 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv); extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo); void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res); +void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, + pgoff_t end); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached @@ -1410,6 +1421,15 @@ int vmw_host_log(const char *log); #define VMW_DEBUG_USER(fmt, ...) \ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) +/* Resource dirtying - vmwgfx_page_dirty.c */ +void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo); +int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); +void vmw_bo_dirty_clear_res(struct vmw_resource *res); +void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); +vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); + /** * Inline helper functions */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index 33533d126277..319c1ca35663 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv, offsetof(typeof(*cmd), sid)); cmd = container_of(header, typeof(*cmd), header); - return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, VMW_RES_DIRTY_NONE, user_surface_converter, &cmd->sid, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c new file mode 100644 index ..8d154f90bdc0 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -0,0 +1,409 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/****** + * + * Copyright 2019 VMware, Inc., Palo Alto, CA., USA + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **/ +#include "vmwgfx_drv.h" + +/* + * Different methods for tracking dirty: + * VMW_BO_DIRTY_PAG
[PATCH v7 2/9] mm: Add an apply_to_pfn_range interface
From: Thomas Hellstrom This is basically apply_to_page_range with added functionality: Allocating missing parts of the page table becomes optional, which means that the function can be guaranteed not to error if allocation is disabled. Also passing of the closure struct and callback function becomes different and more in line with how things are done elsewhere. Finally we keep apply_to_page_range as a wrapper around apply_to_pfn_range The reason for not using the page-walk code is that we want to perform the page-walk on vmas pointing to an address space without requiring the mmap_sem to be held rather than on vmas belonging to a process with the mmap_sem held. Notable changes since RFC: Don't export apply_to_pfn range. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Souptick Joarder Cc: "Jérôme Glisse" Cc: linux...@kvack.org Cc: linux-ker...@vger.kernel.org Signed-off-by: Thomas Hellstrom Reviewed-by: Ralph Campbell #v1 --- include/linux/mm.h | 10 mm/memory.c| 135 ++--- 2 files changed, 113 insertions(+), 32 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e8834ac32b7..3d06ce2a64af 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2675,6 +2675,16 @@ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); +struct pfn_range_apply; +typedef int (*pter_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, +struct pfn_range_apply *closure); +struct pfn_range_apply { + struct mm_struct *mm; + pter_fn_t ptefn; + unsigned int alloc; +}; +extern int apply_to_pfn_range(struct pfn_range_apply *closure, + unsigned long address, unsigned long size); #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); diff --git a/mm/memory.c b/mm/memory.c index 168f546af1ad..462aa47f8878 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2032,18 +2032,17 @@ int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long } EXPORT_SYMBOL(vm_iomap_memory); -static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pte_range(struct pfn_range_apply *closure, pmd_t *pmd, + unsigned long addr, unsigned long end) { pte_t *pte; int err; pgtable_t token; spinlock_t *uninitialized_var(ptl); - pte = (mm == &init_mm) ? + pte = (closure->mm == &init_mm) ? pte_alloc_kernel(pmd, addr) : - pte_alloc_map_lock(mm, pmd, addr, &ptl); + pte_alloc_map_lock(closure->mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; @@ -2054,86 +2053,109 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd, token = pmd_pgtable(*pmd); do { - err = fn(pte++, token, addr, data); + err = closure->ptefn(pte++, token, addr, closure); if (err) break; } while (addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - if (mm != &init_mm) + if (closure->mm != &init_mm) pte_unmap_unlock(pte-1, ptl); return err; } -static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pmd_range(struct pfn_range_apply *closure, pud_t *pud, + unsigned long addr, unsigned long end) { pmd_t *pmd; unsigned long next; - int err; + int err = 0; BUG_ON(pud_huge(*pud)); - pmd = pmd_alloc(mm, pud, addr); + pmd = pmd_alloc(closure->mm, pud, addr); if (!pmd) return -ENOMEM; + do { next = pmd_addr_end(addr, end); - err = apply_to_pte_range(mm, pmd, addr, next, fn, data); + if (!closure->alloc && pmd_none_or_clear_bad(pmd)) + continue; + err = apply_to_pte_range(closure, pmd, addr, next); if (err) break; } while (pmd++, addr = next, addr != end); return err; } -static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, -unsigned long addr, unsigned long end, -pte_fn_t fn, void *data) +static int apply_to_pud_range(struct pfn_range_apply *closure, p4d_t *p4d, +
[PATCH 2/4] drm/vmwgfx: Add debug message for layout change ioctl
From: Deepak Rawat Add debug code to check user-space layout change request. Signed-off-by: Deepak Rawat Reviewed-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 8 drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 8 2 files changed, 16 insertions(+) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 5971c0d47507..b4c8817dc267 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1424,6 +1424,14 @@ int vmw_host_log(const char *log); #define VMW_DEBUG_USER(fmt, ...) \ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) +/** + * VMW_DEBUG_KMS - Debug output for kernel mode-setting + * + * This macro is for debugging vmwgfx mode-setting code. + */ +#define VMW_DEBUG_KMS(fmt, ...) \ + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) + /* Resource dirtying - vmwgfx_page_dirty.c */ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo); int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index b97bc8e5944b..7f9264a72e1d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -2347,6 +2347,9 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, if (!arg->num_outputs) { struct drm_rect def_rect = {0, 0, 800, 600}; + VMW_DEBUG_KMS("Default layout x1 = %d y1 = %d x2 = %d y2 = %d\n", + def_rect.x1, def_rect.y1, + def_rect.x2, def_rect.y2); vmw_du_update_layout(dev_priv, 1, &def_rect); return 0; } @@ -2367,6 +2370,7 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, drm_rects = (struct drm_rect *)rects; + VMW_DEBUG_KMS("Layout count = %u\n", arg->num_outputs); for (i = 0; i < arg->num_outputs; i++) { struct drm_vmw_rect curr_rect; @@ -2383,6 +2387,10 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, drm_rects[i].x2 = curr_rect.x + curr_rect.w; drm_rects[i].y2 = curr_rect.y + curr_rect.h; + VMW_DEBUG_KMS(" x1 = %d y1 = %d x2 = %d y2 = %d\n", + drm_rects[i].x1, drm_rects[i].y1, + drm_rects[i].x2, drm_rects[i].y2); + /* * Currently this check is limiting the topology within * mode_config->max (which actually is max texture size -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH 1/4] drm/vmwgfx: Assign eviction priorities to resources
From: Thomas Hellstrom TTM provides a means to assign eviction priorities to buffer object. This means that all buffer objects with a lower priority will be evicted first on memory pressure. Use this to make sure surfaces and in particular non-dirty surfaces are evicted first. Evicting in particular shaders, cotables and contexts imply a significant performance hit on vmwgfx, so make sure these resources are evicted last. Some buffer objects are sub-allocated in user-space which means we can have many resources attached to a single buffer object or resource. In that case the buffer object is given the highest priority of the attached resources. Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c| 2 + drivers/gpu/drm/vmwgfx/vmwgfx_context.c | 4 ++ drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c | 13 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 72 +++ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 56 +++ drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h | 2 + drivers/gpu/drm/vmwgfx/vmwgfx_shader.c| 8 ++- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 4 ++ 8 files changed, 141 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 5d5c2bce01f3..c0829d50eecc 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -510,6 +510,8 @@ int vmw_bo_init(struct vmw_private *dev_priv, acc_size = vmw_bo_acc_size(dev_priv, size, user); memset(vmw_bo, 0, sizeof(*vmw_bo)); + BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); + vmw_bo->base.priority = 3; INIT_LIST_HEAD(&vmw_bo->res_list); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c index 63f111068a44..a56c9d802382 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c @@ -88,6 +88,8 @@ static const struct vmw_res_func vmw_gb_context_func = { .res_type = vmw_res_context, .needs_backup = true, .may_evict = true, + .prio = 3, + .dirty_prio = 3, .type_name = "guest backed contexts", .backup_placement = &vmw_mob_placement, .create = vmw_gb_context_create, @@ -100,6 +102,8 @@ static const struct vmw_res_func vmw_dx_context_func = { .res_type = vmw_res_dx_context, .needs_backup = true, .may_evict = true, + .prio = 3, + .dirty_prio = 3, .type_name = "dx contexts", .backup_placement = &vmw_mob_placement, .create = vmw_dx_context_create, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c index b4f6e1217c9d..8c699cb2565b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c @@ -116,6 +116,8 @@ static const struct vmw_res_func vmw_cotable_func = { .res_type = vmw_res_cotable, .needs_backup = true, .may_evict = true, + .prio = 3, + .dirty_prio = 3, .type_name = "context guest backed object tables", .backup_placement = &vmw_mob_placement, .create = vmw_cotable_create, @@ -307,7 +309,7 @@ static int vmw_cotable_unbind(struct vmw_resource *res, struct ttm_buffer_object *bo = val_buf->bo; struct vmw_fence_obj *fence; - if (list_empty(&res->mob_head)) + if (!vmw_resource_mob_attached(res)) return 0; WARN_ON_ONCE(bo->mem.mem_type != VMW_PL_MOB); @@ -453,6 +455,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) goto out_wait; } + vmw_resource_mob_detach(res); res->backup = buf; res->backup_size = new_size; vcotbl->size_read_back = cur_size_read_back; @@ -467,12 +470,12 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) res->backup = old_buf; res->backup_size = old_size; vcotbl->size_read_back = old_size_read_back; + vmw_resource_mob_attach(res); goto out_wait; } + vmw_resource_mob_attach(res); /* Let go of the old mob. */ - list_del(&res->mob_head); - list_add_tail(&res->mob_head, &buf->res_list); vmw_bo_unreference(&old_buf); res->id = vcotbl->type; @@ -496,7 +499,7 @@ static int vmw_cotable_resize(struct vmw_resource *res, size_t new_size) * is called before bind() in the validation sequence is instead used for two * things. * 1) Unscrub the cotable if it is scrubbed and still attached to a backup - *buffer, that is, if @res->mob_head is non-empty. + *buffer. * 2) Resize the cotable if needed. */ static int vmw_cotable_create(struct vmw_resource *res) @@ -512,7 +515,7 @@ static int vmw_cotable_create(struct vmw_resource *res) new_size *= 2; if
[PATCH 3/4] drm/vmwgfx: Use VMW_DEBUG_KMS for vmwgfx mode-setting user errors
From: Deepak Rawat For errors during layout change ioctl use VMW_DEBUG_KMS instead of DRM_ERROR. Signed-off-by: Deepak Rawat Reviewed-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_kms.c | 15 ++- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 7f9264a72e1d..e7222fa2cfdf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1462,7 +1462,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev, if (dev_priv->active_display_unit == vmw_du_screen_target && (drm_rect_width(&rects[i]) > dev_priv->stdu_max_width || drm_rect_height(&rects[i]) > dev_priv->stdu_max_height)) { - DRM_ERROR("Screen size not supported.\n"); + VMW_DEBUG_KMS("Screen size not supported.\n"); return -EINVAL; } @@ -1486,7 +1486,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev, * limit on primary bounding box */ if (pixel_mem > dev_priv->prim_bb_mem) { - DRM_ERROR("Combined output size too large.\n"); + VMW_DEBUG_KMS("Combined output size too large.\n"); return -EINVAL; } @@ -1496,7 +1496,7 @@ static int vmw_kms_check_display_memory(struct drm_device *dev, bb_mem = (u64) bounding_box.x2 * bounding_box.y2 * 4; if (bb_mem > dev_priv->prim_bb_mem) { - DRM_ERROR("Topology is beyond supported limits.\n"); + VMW_DEBUG_KMS("Topology is beyond supported limits.\n"); return -EINVAL; } } @@ -1645,6 +1645,7 @@ static int vmw_kms_check_topology(struct drm_device *dev, struct vmw_connector_state *vmw_conn_state; if (!du->pref_active && new_crtc_state->enable) { + VMW_DEBUG_KMS("Enabling a disabled display unit\n"); ret = -EINVAL; goto clean; } @@ -1701,8 +1702,10 @@ vmw_kms_atomic_check_modeset(struct drm_device *dev, return ret; ret = vmw_kms_check_implicit(dev, state); - if (ret) + if (ret) { + VMW_DEBUG_KMS("Invalid implicit state\n"); return ret; + } if (!state->allow_modeset) return ret; @@ -2401,7 +2404,9 @@ int vmw_kms_update_layout_ioctl(struct drm_device *dev, void *data, if (drm_rects[i].x1 < 0 || drm_rects[i].y1 < 0 || drm_rects[i].x2 > mode_config->max_width || drm_rects[i].y2 > mode_config->max_height) { - DRM_ERROR("Invalid GUI layout.\n"); + VMW_DEBUG_KMS("Invalid layout %d %d %d %d\n", + drm_rects[i].x1, drm_rects[i].y1, + drm_rects[i].x2, drm_rects[i].y2); ret = -EINVAL; goto out_free; } -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH 4/4] drm/vmwgfx: Kill unneeded legacy security features
From: Thomas Hellstrom At one point, the GPU command verifier and user-space handle manager couldn't properly protect GPU clients from accessing each other's data. Instead there was an elaborate mechanism to make sure only the active master's primary clients could render. The other clients were either put to sleep or even killed (if the master had exited). VRAM was evicted on master switch. With the advent of render-node functionality, we relaxed the VRAM eviction, but the other mechanisms stayed in place. Now that the GPU command verifier and ttm object manager properly isolates primary clients from different master realms we can remove the master switch related code and drop those legacy features. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/ttm_lock.c | 100 --- drivers/gpu/drm/vmwgfx/ttm_lock.h | 30 - drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 162 +--- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 16 +-- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 6 - 5 files changed, 3 insertions(+), 311 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/ttm_lock.c b/drivers/gpu/drm/vmwgfx/ttm_lock.c index 16b2083cb9d4..5971c72e6d10 100644 --- a/drivers/gpu/drm/vmwgfx/ttm_lock.c +++ b/drivers/gpu/drm/vmwgfx/ttm_lock.c @@ -29,7 +29,6 @@ * Authors: Thomas Hellstrom */ -#include #include #include #include @@ -49,8 +48,6 @@ void ttm_lock_init(struct ttm_lock *lock) init_waitqueue_head(&lock->queue); lock->rw = 0; lock->flags = 0; - lock->kill_takers = false; - lock->signal = SIGKILL; } void ttm_read_unlock(struct ttm_lock *lock) @@ -66,11 +63,6 @@ static bool __ttm_read_lock(struct ttm_lock *lock) bool locked = false; spin_lock(&lock->lock); - if (unlikely(lock->kill_takers)) { - send_sig(lock->signal, current, 0); - spin_unlock(&lock->lock); - return false; - } if (lock->rw >= 0 && lock->flags == 0) { ++lock->rw; locked = true; @@ -98,11 +90,6 @@ static bool __ttm_read_trylock(struct ttm_lock *lock, bool *locked) *locked = false; spin_lock(&lock->lock); - if (unlikely(lock->kill_takers)) { - send_sig(lock->signal, current, 0); - spin_unlock(&lock->lock); - return false; - } if (lock->rw >= 0 && lock->flags == 0) { ++lock->rw; block = false; @@ -147,11 +134,6 @@ static bool __ttm_write_lock(struct ttm_lock *lock) bool locked = false; spin_lock(&lock->lock); - if (unlikely(lock->kill_takers)) { - send_sig(lock->signal, current, 0); - spin_unlock(&lock->lock); - return false; - } if (lock->rw == 0 && ((lock->flags & ~TTM_WRITE_LOCK_PENDING) == 0)) { lock->rw = -1; lock->flags &= ~TTM_WRITE_LOCK_PENDING; @@ -182,88 +164,6 @@ int ttm_write_lock(struct ttm_lock *lock, bool interruptible) return ret; } -static int __ttm_vt_unlock(struct ttm_lock *lock) -{ - int ret = 0; - - spin_lock(&lock->lock); - if (unlikely(!(lock->flags & TTM_VT_LOCK))) - ret = -EINVAL; - lock->flags &= ~TTM_VT_LOCK; - wake_up_all(&lock->queue); - spin_unlock(&lock->lock); - - return ret; -} - -static void ttm_vt_lock_remove(struct ttm_base_object **p_base) -{ - struct ttm_base_object *base = *p_base; - struct ttm_lock *lock = container_of(base, struct ttm_lock, base); - int ret; - - *p_base = NULL; - ret = __ttm_vt_unlock(lock); - BUG_ON(ret != 0); -} - -static bool __ttm_vt_lock(struct ttm_lock *lock) -{ - bool locked = false; - - spin_lock(&lock->lock); - if (lock->rw == 0) { - lock->flags &= ~TTM_VT_LOCK_PENDING; - lock->flags |= TTM_VT_LOCK; - locked = true; - } else { - lock->flags |= TTM_VT_LOCK_PENDING; - } - spin_unlock(&lock->lock); - return locked; -} - -int ttm_vt_lock(struct ttm_lock *lock, - bool interruptible, - struct ttm_object_file *tfile) -{ - int ret = 0; - - if (interruptible) { - ret = wait_event_interruptible(lock->queue, - __ttm_vt_lock(lock)); - if (unlikely(ret != 0)) { - spin_lock(&lock->lock); - lock->flags &= ~TTM_VT_LOCK_PENDING; - wake_up_all(&lock->queue); - spin_unlock(&lock->lock); - return ret; - } - } else - wait_event(lock->queue, __ttm_vt_lock(lock)); - - /* -* Add a base-object, the destructor of which will -* make sure the lock is released i
[git pull] vmwgfx-fixes-5.2
Dave, Daniel, A couple of fixes for vmwgfx. Two fixes for a DMA sg-list debug warning message. These are not cc'd stable since there is no evidence of actual breakage. On fix for the high-bandwidth backdoor port which is cc'd stable due to upcoming hardware, on which the code would otherwise break. The following changes since commit 5ed7f4b5eca11c3c69e7c8b53e4321812bc1ee1e: drm/vmwgfx: integer underflow in vmw_cmd_dx_set_shader() leading to an invalid read (2019-05-21 10:23:10 +0200) are available in the Git repository at: git://people.freedesktop.org/~thomash/linux vmwgfx-fixes-5.2 for you to fetch changes up to 39916897cd815a0ee07ba1f6820cf88a63e459fc: drm/vmwgfx: fix a warning due to missing dma_parms (2019-06-11 17:00:53 +0200) Qian Cai (1): drm/vmwgfx: fix a warning due to missing dma_parms Thomas Hellstrom (2): drm/vmwgfx: Use the backdoor port if the HB port is not available drm/vmwgfx: Honor the sg list segment size limitation drivers/gpu/drm/vmwgfx/vmwgfx_drv.c| 3 + drivers/gpu/drm/vmwgfx/vmwgfx_msg.c| 146 +++-- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 10 +- 3 files changed, 125 insertions(+), 34 deletions(-) ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[git pull] vmwgfx-next
Dave, Daniel - The coherent memory changes including mm changes. - Some vmwgfx debug fixes. - Removal of vmwgfx legacy security checks. The following changes since commit 561564bea3248293398dc32ec36da40fb71faed0: Merge tag 'omapdrm-5.3' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux into drm-next (2019-06-11 13:29:33 +0200) are available in the Git repository at: git://people.freedesktop.org/~thomash/linux/ vmwgfx-next for you to fetch changes up to 9bbfda544ed79e8e9abde27bfe2c85428d582e7b: drm/vmwgfx: Kill unneeded legacy security features (2019-06-18 15:22:48 +0200) Deepak Rawat (2): drm/vmwgfx: Add debug message for layout change ioctl drm/vmwgfx: Use VMW_DEBUG_KMS for vmwgfx mode-setting user errors Thomas Hellstrom (11): drm/vmwgfx: Assign eviction priorities to resources mm: Allow the [page|pfn]_mkwrite callbacks to drop the mmap_sem mm: Add an apply_to_pfn_range interface mm: Add write-protect and clean utilities for address space ranges drm/ttm: Allow the driver to provide the ttm struct vm_operations_struct drm/ttm: TTM fault handler helpers drm/vmwgfx: Implement an infrastructure for write-coherent resources drm/vmwgfx: Use an RBtree instead of linked list for MOB resources drm/vmwgfx: Implement an infrastructure for read-coherent resources drm/vmwgfx: Add surface dirty-tracking callbacks drm/vmwgfx: Kill unneeded legacy security features MAINTAINERS| 1 + drivers/gpu/drm/ttm/ttm_bo.c | 1 + drivers/gpu/drm/ttm/ttm_bo_vm.c| 169 +--- drivers/gpu/drm/vmwgfx/Kconfig | 1 + drivers/gpu/drm/vmwgfx/Makefile| 2 +- .../drm/vmwgfx/device_include/svga3d_surfacedefs.h | 233 +- drivers/gpu/drm/vmwgfx/ttm_lock.c | 100 - drivers/gpu/drm/vmwgfx/ttm_lock.h | 30 -- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 12 +- drivers/gpu/drm/vmwgfx/vmwgfx_context.c| 4 + drivers/gpu/drm/vmwgfx/vmwgfx_cotable.c| 13 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c| 167 +--- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h| 139 -- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c| 1 - drivers/gpu/drm/vmwgfx/vmwgfx_kms.c| 23 +- drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 472 + drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 245 +-- drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h | 15 + drivers/gpu/drm/vmwgfx/vmwgfx_shader.c | 8 +- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c| 405 +- drivers/gpu/drm/vmwgfx/vmwgfx_validation.c | 74 +++- drivers/gpu/drm/vmwgfx/vmwgfx_validation.h | 16 +- include/drm/ttm/ttm_bo_api.h | 10 + include/drm/ttm/ttm_bo_driver.h| 6 + include/linux/mm.h | 19 +- include/uapi/drm/vmwgfx_drm.h | 4 +- mm/Kconfig | 3 + mm/Makefile| 1 + mm/as_dirty_helpers.c | 300 + mm/memory.c| 145 +-- 30 files changed, 2136 insertions(+), 483 deletions(-) create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c create mode 100644 mm/as_dirty_helpers.c ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH v2 00/18] drm/ttm: make ttm bo a gem bo subclass
On 6/21/19 1:57 PM, Gerd Hoffmann wrote: Aargh. Please don't do this. Multiple reasons: 1) I think It's bad to dump all buffer object functionality we can possibly think of in a single struct and force that on all (well at least most) users. It's better to isolate functionality in structs, have utility functions for those and let the drivers derive their buffer objects from whatever functionality they actually need. 2) vmwgfx is not using gem and we don't want to carry that extra payload in the buffer object. 3) TTM historically hasn't been using the various drm layers except for later when common helpers have been used, (like the vma manager and the cache utilities). It's desirable to keep that layer distinction. (which is really what I'm saying in 1.) Now if more and more functionality that originated in TTM is moving into GEM we need to find a better way to do that without duplicating functionality. I suggest adding pointers in the TTM structs and defaulting those pointers to the member in the TTM struct. Optionally to to the member in the GEM struct. If we need to migrate those members out of the TTM struct, vmwgfx would have to provide them in its own buffer class. NAK from the vmwgfx side. Thanks, Thoams ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: drm pull for v5.3-rc1
Hi, All. On 7/15/19 8:00 PM, Linus Torvalds wrote: On Mon, Jul 15, 2019 at 10:37 AM Linus Torvalds wrote: I'm not pulling this. Why did you merge it into your tree, when apparently you were aware of how questionable it is judging by the drm pull request. Looking at some of the fallout, I also see that you then added that "adjust apply_to_pfn_range interface for dropped token" patch that seems to be for easier merging of this all. But you remove the 'token' entirely in one place, and in another you keep it and just say "whatever, it's unused, pass in NULL". WHAA? As part of looking at this all, I also note that some of this is also very non-kernely. The whole thing with trying to implement a "closure" in C is simply not how we do things in the kernel (although I've admittedly seen signs of it in some drivers). If this should be done at all (and that's questionable), at least do it in the canonical kernel way: pass in a separate "actor" function pointer and an argument block, don't try to mix function pointers and argument data and call it a "closure". We try to keep data and functions separate. It's not even for security concerns (although those have caused some splits in the past - avoid putting function pointers in structures that you then can't mark read-only!), it's a more generic issue of just keeping arguments as arguments - even if you then make a structure of them in order to not make the calling convention very complicated. (Yes, we do have the pattern of sometimes mixing function pointers with "describing data", ie the "struct file_operations" structure isn't _just_ actual function pointers, it also contains the module owner, for example. But those aren't about mixing function pointers with their arguments, it's about basically "describing" an object interface with more than just the operation pointers). So some of this code is stuff that I would have let go if it was in some individual driver ("Closures? C doesn't have closures! But whatever - that driver writer came from some place that taught lamda calculus before techning C"). But in the core mm code, I want reviews. And I want the code to follow normal kernel conventions. Sorry for creating this mess, I guess I need to take another spin at this, but first I'd like to straighten out a few details: - I've never had any kernel code more reviewed than this. It's been out on LKML and mm-list and maintainers I think 8 times including the RFC. The last time I was explicitly asking if anybody had any objections because I wanted to get it merged. It's not an internally-reviewed-only thing. There have been a number of people looking at the code and leaving comments and requesting fixes including Ralph Campbell, Jerome Glisse, Souptick Joarder, Nadav Amit and Christoph Hellwig. Perhaps I should have been more explicit in requesting R-Bs after fixing up all review comments, but I didn't. None of them had any issues similar to the ones you describe above. - The combined callback / argument struct: It was strongly inspired by the struct mm_walk (mm.h), the page walk code being quite similar in functionality. "Closure" is perhaps a bad name. Originates in X server code. Thanks, Thomas Linus ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v7 4/8] mm: Add write-protect and clean utilities for address space ranges
From: Thomas Hellstrom Add two utilities to 1) write-protect and 2) clean all ptes pointing into a range of an address space. The utilities are intended to aid in tracking dirty pages (either driver-allocated system memory or pci device memory). The write-protect utility should be used in conjunction with page_mkwrite() and pfn_mkwrite() to trigger write page-faults on page accesses. Typically one would want to use this on sparse accesses into large memory regions. The clean utility should be used to utilize hardware dirtying functionality and avoid the overhead of page-faults, typically on large accesses into small memory regions. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov Signed-off-by: Thomas Hellstrom Acked-by: Andrew Morton --- include/linux/mm.h | 13 +- mm/Kconfig | 3 + mm/Makefile| 1 + mm/mapping_dirty_helpers.c | 315 + 4 files changed, 331 insertions(+), 1 deletion(-) create mode 100644 mm/mapping_dirty_helpers.c diff --git a/include/linux/mm.h b/include/linux/mm.h index cc292273e6ba..4bc93477375e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2637,7 +2637,6 @@ typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, unsigned long size, pte_fn_t fn, void *data); - #ifdef CONFIG_PAGE_POISONING extern bool page_poisoning_enabled(void); extern void kernel_poison_pages(struct page *page, int numpages, int enable); @@ -2878,5 +2877,17 @@ static inline int pages_identical(struct page *page1, struct page *page2) return !memcmp_pages(page1, page2); } +#ifdef CONFIG_MAPPING_DIRTY_HELPERS +unsigned long clean_record_shared_mapping_range(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr, + pgoff_t bitmap_pgoff, + unsigned long *bitmap, + pgoff_t *start, + pgoff_t *end); + +unsigned long wp_shared_mapping_range(struct address_space *mapping, + pgoff_t first_index, pgoff_t nr); +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/Kconfig b/mm/Kconfig index a5dae9a7eb51..550f7aceb679 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -736,4 +736,7 @@ config ARCH_HAS_PTE_SPECIAL config ARCH_HAS_HUGEPD bool +config MAPPING_DIRTY_HELPERS +bool + endmenu diff --git a/mm/Makefile b/mm/Makefile index d996846697ef..1937cc251883 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -107,3 +107,4 @@ obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o obj-$(CONFIG_ZONE_DEVICE) += memremap.o obj-$(CONFIG_HMM_MIRROR) += hmm.o obj-$(CONFIG_MEMFD_CREATE) += memfd.o +obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c new file mode 100644 index ..71070dda9643 --- /dev/null +++ b/mm/mapping_dirty_helpers.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include + +/** + * struct wp_walk - Private struct for pagetable walk callbacks + * @range: Range for mmu notifiers + * @tlbflush_start: Address of first modified pte + * @tlbflush_end: Address of last modified pte + 1 + * @total: Total number of modified ptes + */ +struct wp_walk { + struct mmu_notifier_range range; + unsigned long tlbflush_start; + unsigned long tlbflush_end; + unsigned long total; +}; + +/** + * wp_pte - Write-protect a pte + * @pte: Pointer to the pte + * @addr: The virtual page address + * @walk: pagetable walk callback argument + * + * The function write-protects a pte and records the range in + * virtual address space of touched ptes for efficient range TLB flushes. + */ +static int wp_pte(pte_t *pte, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct wp_walk *wpwalk = walk->private; + pte_t ptent = *pte; + + if (pte_write(ptent)) { + pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte); + + ptent = pte_wrprotect(old_pte); + ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent); + wpwalk->total++; + wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr); + wpwalk->tlbflush_end = max(wpwalk->tlbflush_end, + addr + PAGE_SIZE); + } + + return 0; +} + +/** + * struct clean_walk - Private struct for the clean_record_pte function. + * @base: struct wp_walk we derive from + * @bitmap_pgoff:
[PATCH v7 1/8] mm: Remove BUG_ON mmap_sem not held from xxx_trans_huge_lock()
From: Thomas Hellstrom The caller needs to make sure that the vma is not torn down during the lock operation and can also use the i_mmap_rwsem for file-backed vmas. Remove the BUG_ON. We could, as an alternative, add a test that either vma->vm_mm->mmap_sem or vma->vm_file->f_mapping->i_mmap_rwsem are held. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov Signed-off-by: Thomas Hellstrom Acked-by: Kirill A. Shutemov --- include/linux/huge_mm.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 93d5cf0bc716..0b84e13e88e2 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -216,7 +216,6 @@ static inline int is_swap_pmd(pmd_t pmd) static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { - VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else @@ -225,7 +224,6 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) { - VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma); if (pud_trans_huge(*pud) || pud_devmap(*pud)) return __pud_trans_huge_lock(pud, vma); else -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v7 3/8] mm: Add a walk_page_mapping() function to the pagewalk code
From: Thomas Hellstrom For users that want to travers all page table entries pointing into a region of a struct address_space mapping, introduce a walk_page_mapping() function. The walk_page_mapping() function will be initially be used for dirty- tracking in virtual graphics drivers. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov Signed-off-by: Thomas Hellstrom Reviewed-by: Andrew Morton --- include/linux/pagewalk.h | 9 mm/pagewalk.c| 94 +++- 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h index bddd9759bab9..6ec82e92c87f 100644 --- a/include/linux/pagewalk.h +++ b/include/linux/pagewalk.h @@ -24,6 +24,9 @@ struct mm_walk; * "do page table walk over the current vma", returning * a negative value means "abort current page table walk * right now" and returning 1 means "skip the current vma" + * @pre_vma:if set, called before starting walk on a non-null vma. + * @post_vma: if set, called after a walk on a non-null vma, provided + * that @pre_vma and the vma walk succeeded. */ struct mm_walk_ops { int (*pud_entry)(pud_t *pud, unsigned long addr, @@ -39,6 +42,9 @@ struct mm_walk_ops { struct mm_walk *walk); int (*test_walk)(unsigned long addr, unsigned long next, struct mm_walk *walk); + int (*pre_vma)(unsigned long start, unsigned long end, + struct mm_walk *walk); + void (*post_vma)(struct mm_walk *walk); }; /** @@ -62,5 +68,8 @@ int walk_page_range(struct mm_struct *mm, unsigned long start, void *private); int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, void *private); +int walk_page_mapping(struct address_space *mapping, pgoff_t first_index, + pgoff_t nr, const struct mm_walk_ops *ops, + void *private); #endif /* _LINUX_PAGEWALK_H */ diff --git a/mm/pagewalk.c b/mm/pagewalk.c index c5fa42cab14f..ea0b9e606ad1 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -254,13 +254,23 @@ static int __walk_page_range(unsigned long start, unsigned long end, { int err = 0; struct vm_area_struct *vma = walk->vma; + const struct mm_walk_ops *ops = walk->ops; + + if (vma && ops->pre_vma) { + err = ops->pre_vma(start, end, walk); + if (err) + return err; + } if (vma && is_vm_hugetlb_page(vma)) { - if (walk->ops->hugetlb_entry) + if (ops->hugetlb_entry) err = walk_hugetlb_range(start, end, walk); } else err = walk_pgd_range(start, end, walk); + if (vma && ops->post_vma) + ops->post_vma(walk); + return err; } @@ -291,6 +301,11 @@ static int __walk_page_range(unsigned long start, unsigned long end, * its vm_flags. walk_page_test() and @ops->test_walk() are used for this * purpose. * + * If operations need to be staged before and committed after a vma is walked, + * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(), + * since it is intended to handle commit-type operations, can't return any + * errors. + * * struct mm_walk keeps current values of some common data like vma and pmd, * which are useful for the access from callbacks. If you want to pass some * caller-specific data to callbacks, @private should be helpful. @@ -377,3 +392,80 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops, return err; return __walk_page_range(vma->vm_start, vma->vm_end, &walk); } + +/** + * walk_page_mapping - walk all memory areas mapped into a struct address_space. + * @mapping: Pointer to the struct address_space + * @first_index: First page offset in the address_space + * @nr: Number of incremental page offsets to cover + * @ops: operation to call during the walk + * @private: private data for callbacks' usage + * + * This function walks all memory areas mapped into a struct address_space. + * The walk is limited to only the given page-size index range, but if + * the index boundaries cross a huge page-table entry, that entry will be + * included. + * + * Also see walk_page_range() for additional information. + * + * Locking: + * This function can't require that the struct mm_struct::mmap_sem is held, + * since @mapping may be mapped by multiple processes. Instead + * @mapping->i_mmap_rwsem must be held. This might have implications in the + * callbacks, and it's up tho the caller to ensure that the + * struct mm_struct::mmap_sem is not needed. + * + *
[PATCH v7 2/8] mm: pagewalk: Take the pagetable lock in walk_pte_range()
From: Thomas Hellstrom Without the lock, anybody modifying a pte from within this function might have it concurrently modified by someone else. Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov Suggested-by: Linus Torvalds Signed-off-by: Thomas Hellstrom Acked-by: Kirill A. Shutemov --- mm/pagewalk.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mm/pagewalk.c b/mm/pagewalk.c index d48c2a986ea3..c5fa42cab14f 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -10,8 +10,9 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t *pte; int err = 0; const struct mm_walk_ops *ops = walk->ops; + spinlock_t *ptl; - pte = pte_offset_map(pmd, addr); + pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); for (;;) { err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk); if (err) @@ -22,7 +23,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte++; } - pte_unmap(pte); + pte_unmap_unlock(pte, ptl); return err; } -- 2.20.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v7 7/8] drm/vmwgfx: Implement an infrastructure for read-coherent resources
From: Thomas Hellstrom Similar to write-coherent resources, make sure that from the user-space point of view, GPU rendered contents is automatically available for reading by the CPU. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 7 +- drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c| 77 - drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 103 +- drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h | 2 + drivers/gpu/drm/vmwgfx/vmwgfx_validation.c| 3 +- 5 files changed, 181 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index d5fa9b72c8ff..7773952f81f8 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -684,7 +684,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res); extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res); extern struct vmw_resource * vmw_resource_reference_unless_doomed(struct vmw_resource *res); -extern int vmw_resource_validate(struct vmw_resource *res, bool intr); +extern int vmw_resource_validate(struct vmw_resource *res, bool intr, +bool dirtying); extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); @@ -728,6 +729,8 @@ void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res); void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, pgoff_t end); +int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start, + pgoff_t end, pgoff_t *num_prefault); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached @@ -1421,6 +1424,8 @@ int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); void vmw_bo_dirty_clear_res(struct vmw_resource *res); void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end); vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 060c1e492f25..f07aa857587c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -155,7 +155,6 @@ static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo) } } - /** * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty * tracking structure @@ -173,6 +172,53 @@ void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo) vmw_bo_dirty_scan_mkwrite(vbo); } +/** + * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before + * an unmap_mapping_range operation. + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * If we're using the _PAGETABLE scan method, we may leak dirty pages + * when calling unmap_mapping_range(). This function makes sure we pick + * up all dirty pages. + */ +static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + struct vmw_bo_dirty *dirty = vbo->dirty; + unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end) + return; + + wp_shared_mapping_range(mapping, start + offset, end - start); + clean_record_shared_mapping_range(mapping, start + offset, + end - start, offset, + &dirty->bitmap[0], &dirty->start, + &dirty->end); +} + +/** + * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo + * @vbo: The buffer object, + * @start: First page of the range within the buffer object. + * @end: Last page of the range within the buffer object + 1. + * + * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange. + */ +void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, + pgoff_t start, pgoff_t end) +{ + unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node); + struct address_space *mapping = vbo->base.bdev->dev_mapping; + + vmw_bo_dirty_pre_unmap(vb
[PATCH v7 6/8] drm/vmwgfx: Use an RBtree instead of linked list for MOB resources
From: Thomas Hellstrom With emulated coherent memory we need to be able to quickly look up a resource from the MOB offset. Instead of traversing a linked list with O(n) worst case, use an RBtree with O(log n) worst case complexity. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 5 ++-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 10 +++ drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 33 +--- 3 files changed, 32 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 4f6a75d42d7f..8b71bf6b58ef 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -463,6 +463,7 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo) struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo); WARN_ON(vmw_bo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree)); vmw_bo_unmap(vmw_bo); kfree(vmw_bo); } @@ -479,6 +480,7 @@ static void vmw_user_bo_destroy(struct ttm_buffer_object *bo) struct vmw_buffer_object *vbo = &vmw_user_bo->vbo; WARN_ON(vbo->dirty); + WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree)); vmw_bo_unmap(vbo); ttm_prime_object_kfree(vmw_user_bo, prime); } @@ -514,8 +516,7 @@ int vmw_bo_init(struct vmw_private *dev_priv, memset(vmw_bo, 0, sizeof(*vmw_bo)); BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->base.priority = 3; - - INIT_LIST_HEAD(&vmw_bo->res_list); + vmw_bo->res_tree = RB_ROOT; ret = ttm_bo_init(bdev, &vmw_bo->base, size, ttm_bo_type_device, placement, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 7f2cce016687..d5fa9b72c8ff 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -100,7 +100,7 @@ struct vmw_fpriv { /** * struct vmw_buffer_object - TTM buffer object with vmwgfx additions * @base: The TTM buffer object - * @res_list: List of resources using this buffer object as a backing MOB + * @res_tree: RB tree of resources using this buffer object as a backing MOB * @pin_count: pin depth * @cpu_writers: Number of synccpu write grabs. Protected by reservation when * increased. May be decreased without reservation. @@ -111,7 +111,7 @@ struct vmw_fpriv { */ struct vmw_buffer_object { struct ttm_buffer_object base; - struct list_head res_list; + struct rb_root res_tree; s32 pin_count; atomic_t cpu_writers; /* Not ref-counted. Protected by binding_mutex */ @@ -160,8 +160,8 @@ struct vmw_res_func; * pin-count greater than zero. It is not on the resource LRU lists and its * backup buffer is pinned. Hence it can't be evicted. * @func: Method vtable for this resource. Immutable. + * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved. * @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock. - * @mob_head: List head for the MOB backup list. Protected by @backup reserved. * @binding_head: List head for the context binding list. Protected by * the @dev_priv::binding_mutex * @res_free: The resource destructor. @@ -182,8 +182,8 @@ struct vmw_resource { unsigned long backup_offset; unsigned long pin_count; const struct vmw_res_func *func; + struct rb_node mob_node; struct list_head lru_head; - struct list_head mob_head; struct list_head binding_head; struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); @@ -737,7 +737,7 @@ void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, */ static inline bool vmw_resource_mob_attached(const struct vmw_resource *res) { - return !list_empty(&res->mob_head); + return !RB_EMPTY_NODE(&res->mob_node); } /** diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index b54d60bd845d..a18831e1d353 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -40,11 +40,24 @@ void vmw_resource_mob_attach(struct vmw_resource *res) { struct vmw_buffer_object *backup = res->backup; + struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL; dma_resv_assert_held(res->backup->base.base.resv); res->used_prio = (res->res_dirty) ? res->func->dirty_prio : res->func->prio; - list_add_tail(&res->mob_head, &backup->res_list); + + while (*new) { + struct vmw_resource *this = + container_of(*new, struct vmw_resource, mob_node); + + parent = *new; + ne
[PATCH v7 8/8] drm/vmwgfx: Add surface dirty-tracking callbacks
From: Thomas Hellstrom Add the callbacks necessary to implement emulated coherent memory for surfaces. Add a flag to the gb_surface_create ioctl to indicate that surface memory should be coherent. Also bump the drm minor version to signal the availability of coherent surfaces. Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov Signed-off-by: Thomas Hellstrom Reviewed-by: Deepak Rawat --- .../device_include/svga3d_surfacedefs.h | 233 ++- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 +- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 395 +- include/uapi/drm/vmwgfx_drm.h | 4 +- 4 files changed, 629 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h index f2bfd3d80598..61414f105c67 100644 --- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h +++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h @@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format, return offset; } - static inline u32 svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format, surf_size_struct baseLevelSize, @@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format) return svga3dsurface_is_dx_screen_target_format(format); } +/** + * struct svga3dsurface_mip - Mimpmap level information + * @bytes: Bytes required in the backing store of this mipmap level. + * @img_stride: Byte stride per image. + * @row_stride: Byte stride per block row. + * @size: The size of the mipmap. + */ +struct svga3dsurface_mip { + size_t bytes; + size_t img_stride; + size_t row_stride; + struct drm_vmw_size size; + +}; + +/** + * struct svga3dsurface_cache - Cached surface information + * @desc: Pointer to the surface descriptor + * @mip: Array of mipmap level information. Valid size is @num_mip_levels. + * @mip_chain_bytes: Bytes required in the backing store for the whole chain + * of mip levels. + * @sheet_bytes: Bytes required in the backing store for a sheet + * representing a single sample. + * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in + * a chain. + * @num_layers: Number of slices in an array texture or number of faces in + * a cubemap texture. + */ +struct svga3dsurface_cache { + const struct svga3d_surface_desc *desc; + struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS]; + size_t mip_chain_bytes; + size_t sheet_bytes; + u32 num_mip_levels; + u32 num_layers; +}; + +/** + * struct svga3dsurface_loc - Surface location + * @sub_resource: Surface subresource. Defined as layer * num_mip_levels + + * mip_level. + * @x: X coordinate. + * @y: Y coordinate. + * @z: Z coordinate. + */ +struct svga3dsurface_loc { + u32 sub_resource; + u32 x, y, z; +}; + +/** + * svga3dsurface_subres - Compute the subresource from layer and mipmap. + * @cache: Surface layout data. + * @mip_level: The mipmap level. + * @layer: The surface layer (face or array slice). + * + * Return: The subresource. + */ +static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache, + u32 mip_level, u32 layer) +{ + return cache->num_mip_levels * layer + mip_level; +} + +/** + * svga3dsurface_setup_cache - Build a surface cache entry + * @size: The surface base level dimensions. + * @format: The surface format. + * @num_mip_levels: Number of mipmap levels. + * @num_layers: Number of layers. + * @cache: Pointer to a struct svga3dsurface_cach object to be filled in. + * + * Return: Zero on success, -EINVAL on invalid surface layout. + */ +static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size, + SVGA3dSurfaceFormat format, + u32 num_mip_levels, + u32 num_layers, + u32 num_samples, + struct svga3dsurface_cache *cache) +{ + const struct svga3d_surface_desc *desc; + u32 i; + + memset(cache, 0, sizeof(*cache)); + cache->desc = desc = svga3dsurface_get_desc(format); + cache->num_mip_levels = num_mip_levels; + cache->num_layers = num_layers; + for (i = 0; i < cache->num_mip_levels; i++) { + struct svga3dsurface_mip *mip = &cache->mip[i]; + + mip->size = svga3dsurface_get_mip_size(*size, i); + mip->bytes = svga3dsurface_get_image_buffer_size + (desc, &mip->size, 0); + mip->row_stride = + __KERNEL_DIV_ROUND_UP(mip->size.width, +
[PATCH v7 5/8] drm/vmwgfx: Implement an infrastructure for write-coherent resources
_dirty; - bool backup_dirty; + u32 res_dirty : 1; + u32 backup_dirty : 1; + u32 coherent : 1; struct vmw_buffer_object *backup; unsigned long backup_offset; unsigned long pin_count; @@ -180,6 +185,7 @@ struct vmw_resource { struct list_head lru_head; struct list_head mob_head; struct list_head binding_head; + struct vmw_resource_dirty *dirty; void (*res_free) (struct vmw_resource *res); void (*hw_destroy) (struct vmw_resource *res); }; @@ -720,6 +726,8 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv); extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo); void vmw_resource_mob_attach(struct vmw_resource *res); void vmw_resource_mob_detach(struct vmw_resource *res); +void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start, + pgoff_t end); /** * vmw_resource_mob_attached - Whether a resource currently has a mob attached @@ -1407,6 +1415,15 @@ int vmw_host_log(const char *log); #define VMW_DEBUG_USER(fmt, ...) \ DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) +/* Resource dirtying - vmwgfx_page_dirty.c */ +void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo); +int vmw_bo_dirty_add(struct vmw_buffer_object *vbo); +void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res); +void vmw_bo_dirty_clear_res(struct vmw_resource *res); +void vmw_bo_dirty_release(struct vmw_buffer_object *vbo); +vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); +vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); + /** * VMW_DEBUG_KMS - Debug output for kernel mode-setting * diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c index ff86d49dc5e8..934ad7c0c342 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c @@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv, offsetof(typeof(*cmd), sid)); cmd = container_of(header, typeof(*cmd), header); - return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface, VMW_RES_DIRTY_NONE, user_surface_converter, &cmd->sid, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c new file mode 100644 index ..060c1e492f25 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -0,0 +1,421 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/****** + * + * Copyright 2019 VMware, Inc., Palo Alto, CA., USA + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **/ +#include "vmwgfx_drv.h" + +/* + * Different methods for tracking dirty: + * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits + * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write- + * accesses in the VM mkwrite() callback + */ +enum vmw_bo_dirty_method { + VMW_BO_DIRTY_PAGETABLE, + VMW_BO_DIRTY_MKWRITE, +}; + +/* + * No dirtied pages at scan trigger a transition to the _MKWRITE method, + * similarly a certain percentage of dirty pages trigger a transition to + * the _PAGETABLE method. How many triggers should we wait for before + * changing method? + */ +#define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2 + +/* Percentage to trigger a transition to the _PAGETABLE method */ +#define VMW_DIRTY_PERCENTAGE 10 + +/** + * struct vmw_bo_dirty - Dirty information for buffer objects + * @start: First currently dirty bit + * @end: Last currently dirty bit + 1 + * @method: The currently used dirty method + * @change_count: Number of con
[PATCH v7 0/8] Emulated coherent graphics memory take 2
From: Thomas Hellström Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver to provide coherent graphics memory, meaning that the GPU sees any content written to the coherent memory on the next GPU operation that touches that memory, and the CPU sees any content written by the GPU to that memory immediately after any fence object trailing the GPU operation is signaled. Paravirtual drivers that otherwise require explicit synchronization needs to do this by hooking up dirty tracking to pagefault handlers and buffer object validation. Provide mm helpers needed for this and that also allow for huge pmd- and pud entries (patch 1-3), and the associated vmwgfx code (patch 4-7). The code has been tested and exercised by a tailored version of mesa where we disable all explicit synchronization and assume graphics memory is coherent. The performance loss varies of course; a typical number is around 5%. I would like to merge this code through the DRM tree, so an ack to include the new mm helpers in that merge would be greatly appreciated. Changes since RFC: - Merge conflict changes moved to the correct patch. Fixes intra-patchset compile errors. - Be more aggressive when turning ttm vm code into helpers. This makes sure we can use a const qualifier on the vmwgfx vm_ops. - Reinstate a lost comment an fix an error path that was broken when turning the ttm vm code into helpers. - Remove explicit type-casts of struct vm_area_struct::vm_private_data - Clarify the locking inversion that makes us not being able to use the mm pagewalk code. Changes since v1: - Removed the vmwgfx maintainer entry for as_dirty_helpers.c, updated commit message accordingly - Removed the TTM patches from the series as they are merged separately through DRM. Changes since v2: - Split out the pagewalk code from as_dirty_helpers.c and document locking. - Add pre_vma and post_vma callbacks to the pagewalk code. - Remove huge pmd and -pud asserts that would trip when we protect vmas with struct address_space::i_mmap_rwsem rather than with struct vm_area_struct::mmap_sem. - Do some naming cleanup in as_dirty_helpers.c Changes since v3: - Extensive renaming of the dirty helpers including the filename. - Update walk_page_mapping() doc. - Update the pagewalk code to not unconditionally split pmds if a pte_entry() callback is present. Update the dirty helper pmd_entry accordingly. - Use separate walk ops for the dirty helpers. - Update the pagewalk code to take the pagetable lock in walk_pte_range. Changes since v4: - Fix pte pointer confusion in patch 2/8 - Skip the pagewalk code conditional split patch for now, and update the mapping_dirty_helper accordingly. That problem will be solved in a cleaner way in a follow-up patchset. Changes since v5: - Fix tlb flushing when we have other pending tlb flushes. Changes since v6: - Added Andrew Morton R-Bs and acks. Series is now approved to merge through DRM: https://lore.kernel.org/r/20191105195114.f75be5e76763da5546121...@linux-foundation.org/ - Removed an redundant assignment in patch #8 (Colin Ian King) Cc: Andrew Morton Cc: Matthew Wilcox Cc: Will Deacon Cc: Peter Zijlstra Cc: Rik van Riel Cc: Minchan Kim Cc: Michal Hocko Cc: Huang Ying Cc: Jérôme Glisse Cc: Kirill A. Shutemov ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH] drm/vmwgfx: Use coherent memory if there are dma mapping size restrictions
From: Thomas Hellstrom We're gradually moving towards using DMA coherent memory in most situations, although TTM interactions with the DMA layers is still a work-in-progress. Meanwhile, use coherent memory when there are size restrictions meaning that there is a chance that streaming dma mapping of large buffer objects may fail. Also move DMA mask settings to the vmw_dma_select_mode function, since it's important that we set the correct DMA masks before calling the dma_max_mapping_size() function. Cc: Christoph Hellwig Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 31 +++-- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index fc0283659c41..1e1de83908fe 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -569,7 +569,10 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; - if (vmw_force_coherent) + (void) dma_set_mask_and_coherent(dev_priv->dev->dev, DMA_BIT_MASK(64)); + + if (vmw_force_coherent || + dma_max_mapping_size(dev_priv->dev->dev) != SIZE_MAX) dev_priv->map_mode = vmw_dma_alloc_coherent; else if (vmw_restrict_iommu) dev_priv->map_mode = vmw_dma_map_bind; @@ -582,30 +585,15 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) return -EINVAL; DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); - return 0; -} -/** - * vmw_dma_masks - set required page- and dma masks - * - * @dev: Pointer to struct drm-device - * - * With 32-bit we can only handle 32 bit PFNs. Optionally set that - * restriction also for 64-bit systems. - */ -static int vmw_dma_masks(struct vmw_private *dev_priv) -{ - struct drm_device *dev = dev_priv->dev; - int ret = 0; - - ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)); if (dev_priv->map_mode != vmw_dma_phys && (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) { DRM_INFO("Restricting DMA addresses to 44 bits.\n"); - return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44)); + return dma_set_mask_and_coherent(dev_priv->dev->dev, +DMA_BIT_MASK(44)); } - return ret; + return 0; } static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) @@ -674,7 +662,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) dev_priv->capabilities2 = vmw_read(dev_priv, SVGA_REG_CAP2); } - ret = vmw_dma_select_mode(dev_priv); if (unlikely(ret != 0)) { DRM_INFO("Restricting capabilities due to IOMMU setup.\n"); @@ -746,10 +733,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) if (dev_priv->capabilities & SVGA_CAP_CAP2_REGISTER) vmw_print_capabilities2(dev_priv->capabilities2); - ret = vmw_dma_masks(dev_priv); - if (unlikely(ret != 0)) - goto out_err0; - dma_set_max_seg_size(dev->dev, min_t(unsigned int, U32_MAX & PAGE_MASK, SCATTERLIST_MAX_SEGMENT)); -- 2.21.0 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH] drm/vmwgfx: remove set but not used variable 'srf'
From: YueHaibing drivers/gpu/drm/vmwgfx/vmwgfx_surface.c:339:22: warning: variable srf set but not used [-Wunused-but-set-variable] 'srf' is never used, so can be removed. Signed-off-by: YueHaibing Reviewed-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index 29d8794f0421..de0530b4dc1b 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -336,7 +336,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res) { struct vmw_private *dev_priv = res->dev_priv; - struct vmw_surface *srf; void *cmd; if (res->func->destroy == vmw_gb_surface_destroy) { @@ -360,7 +359,6 @@ static void vmw_hw_surface_destroy(struct vmw_resource *res) */ mutex_lock(&dev_priv->cmdbuf_mutex); - srf = vmw_res_to_srf(res); dev_priv->used_memory_size -= res->backup_size; mutex_unlock(&dev_priv->cmdbuf_mutex); } -- 2.21.0 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH] drm/vmwgfx: Use dma-coherent memory for high-bandwidth port messaging
From: Thomas Hellstrom With AMD-SEV high-bandwidth port messaging runs into trouble since the message content is encrypted using the vm-specific key, and the hypervisor is unable to read it. So use unencrypted dma-coherent bounce buffers for temporary message storage space. Allocating that memory is expensive so a future optimization might include a static unencrypted memory area for messages. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 4 +-- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 4 +-- drivers/gpu/drm/vmwgfx/vmwgfx_msg.c | 45 + 3 files changed, 31 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 81a95651643f..fc0283659c41 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -908,13 +908,13 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) snprintf(host_log, sizeof(host_log), "vmwgfx: %s-%s", VMWGFX_REPO, VMWGFX_GIT_VERSION); - vmw_host_log(host_log); + vmw_host_log(dev->dev, host_log); memset(host_log, 0, sizeof(host_log)); snprintf(host_log, sizeof(host_log), "vmwgfx: Module Version: %d.%d.%d", VMWGFX_DRIVER_MAJOR, VMWGFX_DRIVER_MINOR, VMWGFX_DRIVER_PATCHLEVEL); - vmw_host_log(host_log); + vmw_host_log(dev->dev, host_log); if (dev_priv->enable_fb) { vmw_fifo_resource_inc(dev_priv); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index b18842f73081..a77bf72cb9ac 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1389,9 +1389,9 @@ int vmw_bo_cpu_blit(struct ttm_buffer_object *dst, struct vmw_diff_cpy *diff); /* Host messaging -vmwgfx_msg.c: */ -int vmw_host_get_guestinfo(const char *guest_info_param, +int vmw_host_get_guestinfo(struct device *dev, const char *guest_info_param, char *buffer, size_t *length); -int vmw_host_log(const char *log); +int vmw_host_log(struct device *dev, const char *log); /* VMW logging */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c index b6c5e4c2ac3c..f439b7afa3a5 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c @@ -304,8 +304,8 @@ STACK_FRAME_NON_STANDARD(vmw_send_msg); * @msg: [OUT] message received from the host * @msg_len: message length */ -static int vmw_recv_msg(struct rpc_channel *channel, void **msg, - size_t *msg_len) +static int vmw_recv_msg(struct device *dev, struct rpc_channel *channel, + void **msg, size_t *msg_len, dma_addr_t *dma_handle) { unsigned long eax, ebx, ecx, edx, si, di; char *reply; @@ -339,7 +339,8 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, return 0; reply_len = ebx; - reply = kzalloc(reply_len + 1, GFP_KERNEL); + reply = dma_alloc_coherent(dev, reply_len + 1, dma_handle, + GFP_KERNEL); if (!reply) { DRM_ERROR("Cannot allocate memory for host message reply.\n"); return -ENOMEM; @@ -350,7 +351,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, ebx = vmw_port_hb_in(channel, reply, reply_len, !!(HIGH_WORD(ecx) & MESSAGE_STATUS_HB)); if ((HIGH_WORD(ebx) & MESSAGE_STATUS_SUCCESS) == 0) { - kfree(reply); + dma_free_coherent(dev, reply_len + 1, reply, *dma_handle); reply = NULL; if ((HIGH_WORD(ebx) & MESSAGE_STATUS_CPT) != 0) { /* A checkpoint occurred. Retry. */ @@ -374,7 +375,7 @@ static int vmw_recv_msg(struct rpc_channel *channel, void **msg, eax, ebx, ecx, edx, si, di); if ((HIGH_WORD(ecx) & MESSAGE_STATUS_SUCCESS) == 0) { - kfree(reply); + dma_free_coherent(dev, reply_len + 1, reply, *dma_handle); reply = NULL; if ((HIGH_WORD(ecx) & MESSAGE_STATUS_CPT) != 0) { /* A checkpoint occurred. Retry. */ @@ -404,18 +405,21 @@ STACK_FRAME_NON_STANDARD(vmw_recv_msg); * Gets the value of a GuestInfo.* parameter. The value returned will be in * a string, and it is up to the caller to post-process. * + * @dev: Pointer to struct device used for coherent memory allocation * @guest_info_param: Parameter to get, e.g. GuestInfo.svga.gl3 * @buffer: if NULL, *reply_len will contain reply size. * @length: size of the reply_buf. Set to s
[git pull] vmwgfx-coherent
Dave, Daniel, Take 2 of the coherent memory patches are now ready for pulling. Following the discussion we had after the last merge attempt that had to be reverted, it might make sense to send this to Linus as a pull request separate from other DRM stuff. This time Linus has been heavily involved in the outcome of the -mm patches, and Andrew Morton has acked them for merging through DRM: https://lore.kernel.org/r/20191105195114.f75be5e76763da5546121...@linux-foundation.org/ If you think it's too late for the 5.5 merge window, deferring to 5.6 is not a problem. There is a dependency on drm-misc: This needs to be merged after drm-misc caa478af4812, to avoid a compilation error. Cover message: Graphics APIs like OpenGL 4.4 and Vulkan require the graphics driver to provide coherent graphics memory, meaning that the GPU sees any content written to the coherent memory on the next GPU operation that touches that memory, and the CPU sees any content written by the GPU to that memory immediately after any fence object trailing the GPU operation is signaled. Paravirtual drivers that otherwise require explicit synchronization needs to do this by hooking up dirty tracking to pagefault handlers and buffer object validation. Provide mm helpers needed for this and that also allow for huge pmd- and pud entries (patch 1-3), and the associated vmwgfx code (patch 4-7). The code has been tested and exercised by a tailored version of mesa where we disable all explicit synchronization and assume graphics memory is coherent. The performance loss varies of course; a typical number is around 5%. The following changes since commit 7aef29f4d4613570f413301b0807ea66a21f5e3b: drm/ttm: Convert vm callbacks to helpers (2019-11-06 13:02:00 +0100) are available in the Git repository at: git://people.freedesktop.org/~thomash/linux vmwgfx-coherent for you to fetch changes up to 9ca7d19ff8ba6207bccab46536814fe4839df80a: drm/vmwgfx: Add surface dirty-tracking callbacks (2019-11-06 15:45:32 +0100) Thomas Hellstrom (8): mm: Remove BUG_ON mmap_sem not held from xxx_trans_huge_lock() mm: pagewalk: Take the pagetable lock in walk_pte_range() mm: Add a walk_page_mapping() function to the pagewalk code mm: Add write-protect and clean utilities for address space ranges drm/vmwgfx: Implement an infrastructure for write-coherent resources drm/vmwgfx: Use an RBtree instead of linked list for MOB resources drm/vmwgfx: Implement an infrastructure for read-coherent resources drm/vmwgfx: Add surface dirty-tracking callbacks drivers/gpu/drm/vmwgfx/Kconfig | 1 + drivers/gpu/drm/vmwgfx/Makefile| 2 +- .../drm/vmwgfx/device_include/svga3d_surfacedefs.h | 233 +- drivers/gpu/drm/vmwgfx/vmwgfx_bo.c | 10 +- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h| 44 +- drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c| 1 - drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 488 + drivers/gpu/drm/vmwgfx/vmwgfx_resource.c | 193 +++- drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h | 13 + drivers/gpu/drm/vmwgfx/vmwgfx_surface.c| 395 - drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c | 15 +- drivers/gpu/drm/vmwgfx/vmwgfx_validation.c | 74 +++- drivers/gpu/drm/vmwgfx/vmwgfx_validation.h | 16 +- include/linux/huge_mm.h| 2 - include/linux/mm.h | 13 +- include/linux/pagewalk.h | 9 + include/uapi/drm/vmwgfx_drm.h | 4 +- mm/Kconfig | 3 + mm/Makefile| 1 + mm/mapping_dirty_helpers.c | 315 + mm/pagewalk.c | 99 - 21 files changed, 1875 insertions(+), 56 deletions(-) create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c create mode 100644 mm/mapping_dirty_helpers.c ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH] drm/vmwgfx: Use coherent memory if there are dma mapping size restrictions
Hi, On 11/13/19 3:16 PM, Christoph Hellwig wrote: On Wed, Nov 13, 2019 at 10:51:43AM +0100, Thomas Hellström (VMware) wrote: From: Thomas Hellstrom We're gradually moving towards using DMA coherent memory in most situations, although TTM interactions with the DMA layers is still a work-in-progress. Meanwhile, use coherent memory when there are size restrictions meaning that there is a chance that streaming dma mapping of large buffer objects may fail. Also move DMA mask settings to the vmw_dma_select_mode function, since it's important that we set the correct DMA masks before calling the dma_max_mapping_size() function. Cc: Christoph Hellwig Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 31 +++-- 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index fc0283659c41..1e1de83908fe 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -569,7 +569,10 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; - if (vmw_force_coherent) + (void) dma_set_mask_and_coherent(dev_priv->dev->dev, DMA_BIT_MASK(64)); Please don't use void cast on returns. Also this genercally can't fail, so if it fails anyway it is fatal, and you should actually return an error. OK. Will fix. + if (vmw_force_coherent || + dma_max_mapping_size(dev_priv->dev->dev) != SIZE_MAX) I don't think this is the right check to see if swiotlb would be used. You probably want to call dma_addressing_limited(). Please also add a comment explaining the call here. If I understand things correctly, dma_addressing_limited() would always return false on vmwgfx, at least if the "restrict to 44 bit" option is removed. The "odd" modes we want to catch is someone setting swiotlb=force, or someone enabling SEV. In both cases, vmwgfx would break down due to limited DMA space even if streaming DMA was fixed with appropriate sync calls. The same holds for vmw_pvscsi (which we discussed before) where the large queue depth will typically fill the SWIOTLB causing excessive non-fatal error logging. dma_max_mapping_size() currently catch these modes, but best I guess would be dma_swiotlb_forced() function that could be used in cases like this? if (dev_priv->map_mode != vmw_dma_phys && AFAIK vmw_dma_phys can't even be set in current mainline and is dead code. Can you check if I'm missing something? Certainly all three branches above don't set it. I'll remove that dead code for v2. (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) { DRM_INFO("Restricting DMA addresses to 44 bits.\n"); - return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44)); + return dma_set_mask_and_coherent(dev_priv->dev->dev, +DMA_BIT_MASK(44)); Can you keep setting the DMA mask together? E.g. make the whole thing something like: static int vmw_dma_select_mode(struct vmw_private *dev_priv) { if (dma_addressing_limited(dev_priv->dev->dev) || vmw_force_coherent) { /* * XXX: what about AMD iommu? virtio-iommu? Also * swiotlb should be always available where we can * address more than 32-bit of memory.. */ if (!IS_ENABLED(CONFIG_SWIOTLB) && !IS_ENABLED(CONFIG_INTEL_IOMMU)) return -EINVAL; The above check is only to see if coherent memory functionality is really compiled in into TTM. We have a patchset that got lost in the last merge window to fix this properly and avoid confusion about this. I'll rebase on that. dev_priv->map_mode = vmw_dma_alloc_coherent; } else if (vmw_restrict_iommu) { dev_priv->map_mode = vmw_dma_map_bind; } else { dev_priv->map_mode = vmw_dma_map_populate; } /* * On 32-bit systems we can only handle 32 bit PFNs. Optionally set * that restriction also for 64-bit systems. */ return dma_set_mask_and_coherent(dev->dev, (IS_ENABLED(CONFIG_64BIT) && !vmw_restrict_dma_mask) ? 64 : 44); } Thanks, Thomas ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: drm/vmwgfx: Use dma-coherent memory for high-bandwidth port messaging
Hi, Nathan, On 11/13/19 9:01 PM, Nathan Chancellor wrote: On Wed, Nov 13, 2019 at 10:51:42AM +0100, Thomas Hellström (VMware) wrote: From: Thomas Hellstrom With AMD-SEV high-bandwidth port messaging runs into trouble since the message content is encrypted using the vm-specific key, and the hypervisor is unable to read it. So use unencrypted dma-coherent bounce buffers for temporary message storage space. Allocating that memory is expensive so a future optimization might include a static unencrypted memory area for messages. Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul Hi Thomas, The 0day team has been doing clang builds for us and sending the results to our mailing list for triage; this patch causes the following warning. Seems legitimate, mind taking a look at it and resolving it how you see fit? Cheers, Nathan This should be harmless as dma_free_coherent() with reply == NULL is a nop, but anyway I'll respin to silence the warning. Thanks, Thomas On Thu, Nov 14, 2019 at 03:36:44AM +0800, kbuild test robot wrote: CC: kbuild-...@lists.01.org In-Reply-To: <20191113095144.2981-1-thomas...@shipmail.org> References: <20191113095144.2981-1-thomas...@shipmail.org> TO: "Thomas Hellström (VMware)" CC: dri-devel@lists.freedesktop.org, Thomas Hellstrom , Brian Paul , Thomas Hellstrom , Brian Paul CC: Thomas Hellstrom , Brian Paul Hi "Thomas, I love your patch! Perhaps something to improve: [auto build test WARNING on linus/master] [also build test WARNING on v5.4-rc7 next-20191113] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system. BTW, we also suggest to use '--base' option to specify the base tree in git format-patch, please see https://stackoverflow.com/a/37406982] url: https://github.com/0day-ci/linux/commits/Thomas-Hellstr-m-VMware/drm-vmwgfx-Use-dma-coherent-memory-for-high-bandwidth-port-messaging/20191114-020818 base: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git 0e3f1ad80fc8cb0c517fd9a9afb22752b741fa76 config: x86_64-rhel-7.6 (attached as .config) compiler: clang version 10.0.0 (git://gitmirror/llvm_project 335ac2eb662ce5f1888e2a50310b01fba2d40d68) reproduce: # save the attached .config to linux build tree make ARCH=x86_64 If you fix the issue, kindly add following tag Reported-by: kbuild test robot All warnings (new ones prefixed by >>): drivers/gpu/drm/vmwgfx/vmwgfx_msg.c:441:6: warning: variable 'reply_handle' is used uninitialized whenever '||' condition is true [-Wsometimes-uninitialized] if (vmw_send_msg(&channel, msg) || ^~~ drivers/gpu/drm/vmwgfx/vmwgfx_msg.c:467:47: note: uninitialized use occurs here dma_free_coherent(dev, reply_len + 1, reply, reply_handle); ^~~~ drivers/gpu/drm/vmwgfx/vmwgfx_msg.c:441:6: note: remove the '||' if its condition is always false if (vmw_send_msg(&channel, msg) || ^~ drivers/gpu/drm/vmwgfx/vmwgfx_msg.c:421:37: note: initialize the variable 'reply_handle' to silence this warning dma_addr_t req_handle, reply_handle; ^ = 0 1 warning generated. vim +441 drivers/gpu/drm/vmwgfx/vmwgfx_msg.c 89da76fde68de1 Sinclair Yeh 2016-04-27 400 89da76fde68de1 Sinclair Yeh 2016-04-27 401 89da76fde68de1 Sinclair Yeh 2016-04-27 402 /** 89da76fde68de1 Sinclair Yeh 2016-04-27 403 * vmw_host_get_guestinfo: Gets a GuestInfo parameter 89da76fde68de1 Sinclair Yeh 2016-04-27 404 * 89da76fde68de1 Sinclair Yeh 2016-04-27 405 * Gets the value of a GuestInfo.* parameter. The value returned will be in 89da76fde68de1 Sinclair Yeh 2016-04-27 406 * a string, and it is up to the caller to post-process. 89da76fde68de1 Sinclair Yeh 2016-04-27 407 * 6bdb21230a2a01 Thomas Hellstrom 2019-11-13 408 * @dev: Pointer to struct device used for coherent memory allocation 89da76fde68de1 Sinclair Yeh 2016-04-27 409 * @guest_info_param: Parameter to get, e.g. GuestInfo.svga.gl3 89da76fde68de1 Sinclair Yeh 2016-04-27 410 * @buffer: if NULL, *reply_len will contain reply size. 89da76fde68de1 Sinclair Yeh 2016-04-27 411 * @length: size of the reply_buf. Set to size of reply upon return 89da76fde68de1 Sinclair Yeh 2016-04-27 412 * 89da76fde68de1 Sinclair Yeh 2016-04-27 413 * Returns: 0 on success 89da76fde68de1 Sinclair Yeh 2016-04-27 414 */ 6bdb21230a2a01 Thomas Hellstrom 2019-11-13 415 int vmw_host_get_guestinfo(struct device *dev, const char *guest_info_param, 89da76fde68de1 Sinclair Yeh 2016-04-27 416 char *buffer,
[PATCH 3/3] drm/vmwgfx: Use coherent memory if there are dma mapping size restrictions
From: Thomas Hellstrom We're gradually moving towards using DMA coherent memory in most situations, although TTM interactions with the DMA layers is still a work-in-progress. Meanwhile, use coherent memory when there are size restrictions meaning that there is a chance that streaming dma mapping of large buffer objects may fail. Cc: Christoph Hellwig Signed-off-by: Thomas Hellstrom Reviewed-by: Brian Paul --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 20 +++- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 8d479a411cdd..24f8d88d4b28 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -565,7 +565,15 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv) [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; - if (vmw_force_coherent) + /* +* dma_max_mapping_size() != SIZE_MAX means something is going +* on in the dma layer that the dma_map_bind or dma_map_populate modes +* are not working well with, or haven't been tested with. +* This typically happens when the SWIOTLB is active. Fall back to +* coherent memory in those cases. +*/ + if (dma_max_mapping_size(dev_priv->dev->dev) != SIZE_MAX || + vmw_force_coherent) dev_priv->map_mode = vmw_dma_alloc_coherent; else if (vmw_restrict_iommu) dev_priv->map_mode = vmw_dma_map_bind; @@ -668,10 +676,8 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) dev_priv->capabilities2 = vmw_read(dev_priv, SVGA_REG_CAP2); } - - ret = vmw_dma_select_mode(dev_priv); - if (unlikely(ret != 0)) { - DRM_INFO("Restricting capabilities due to IOMMU setup.\n"); + if (vmw_dma_masks(dev_priv) || vmw_dma_select_mode(dev_priv)) { + DRM_WARN("Refusing DMA due to lack of DMA support."); refuse_dma = true; } @@ -740,10 +746,6 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset) if (dev_priv->capabilities & SVGA_CAP_CAP2_REGISTER) vmw_print_capabilities2(dev_priv->capabilities2); - ret = vmw_dma_masks(dev_priv); - if (unlikely(ret != 0)) - goto out_err0; - dma_set_max_seg_size(dev->dev, min_t(unsigned int, U32_MAX & PAGE_MASK, SCATTERLIST_MAX_SEGMENT)); -- 2.21.0 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH 1/3] drm/vmwgfx: Remove the vmw_dma_phys mode code
From: Thomas Hellstrom This mode is not used anymore. Remove the dead code. Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c| 4 +--- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h| 1 - drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 9 - 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index e962048f65d2..60ef03120917 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -564,7 +564,6 @@ static void vmw_get_initial_size(struct vmw_private *dev_priv) static int vmw_dma_select_mode(struct vmw_private *dev_priv) { static const char *names[vmw_dma_map_max] = { - [vmw_dma_phys] = "Using physical TTM page addresses.", [vmw_dma_alloc_coherent] = "Using coherent TTM pages.", [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; @@ -598,8 +597,7 @@ static int vmw_dma_masks(struct vmw_private *dev_priv) int ret = 0; ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)); - if (dev_priv->map_mode != vmw_dma_phys && - (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask)) { + if (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask) { DRM_INFO("Restricting DMA addresses to 44 bits.\n"); return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44)); } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index b18842f73081..761dbd424749 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -281,7 +281,6 @@ struct vmw_res_cache_entry { * enum vmw_dma_map_mode - indicate how to perform TTM page dma mappings. */ enum vmw_dma_map_mode { - vmw_dma_phys, /* Use physical page addresses */ vmw_dma_alloc_coherent, /* Use TTM coherent pages */ vmw_dma_map_populate, /* Unmap from DMA just after unpopulate */ vmw_dma_map_bind, /* Unmap from DMA just before unbind */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index d8ea3dd10af0..8c0135f1c346 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -295,11 +295,6 @@ static struct page *__vmw_piter_non_sg_page(struct vmw_piter *viter) * pointed to by @viter. Functions are selected depending on the * current mapping mode. */ -static dma_addr_t __vmw_piter_phys_addr(struct vmw_piter *viter) -{ - return page_to_phys(viter->pages[viter->i]); -} - static dma_addr_t __vmw_piter_dma_addr(struct vmw_piter *viter) { return viter->addrs[viter->i]; @@ -329,10 +324,6 @@ void vmw_piter_start(struct vmw_piter *viter, const struct vmw_sg_table *vsgt, viter->page = &__vmw_piter_non_sg_page; viter->pages = vsgt->pages; switch (vsgt->mode) { - case vmw_dma_phys: - viter->next = &__vmw_piter_non_sg_next; - viter->dma_address = &__vmw_piter_phys_addr; - break; case vmw_dma_alloc_coherent: viter->next = &__vmw_piter_non_sg_next; viter->dma_address = &__vmw_piter_dma_addr; -- 2.21.0 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH 2/3] drm/vmwgfx: Remove the restrict_dma_mask module parameter
From: Thomas Hellstrom The restrict_dma_mask was mainly used for iommu functionality debugging and has no use anymore. Remove it. Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index 60ef03120917..8d479a411cdd 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -255,7 +255,6 @@ static int enable_fbdev = IS_ENABLED(CONFIG_DRM_VMWGFX_FBCON); static int vmw_force_iommu; static int vmw_restrict_iommu; static int vmw_force_coherent; -static int vmw_restrict_dma_mask; static int vmw_assume_16bpp; static int vmw_probe(struct pci_dev *, const struct pci_device_id *); @@ -270,8 +269,6 @@ MODULE_PARM_DESC(restrict_iommu, "Try to limit IOMMU usage for TTM pages"); module_param_named(restrict_iommu, vmw_restrict_iommu, int, 0600); MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages"); module_param_named(force_coherent, vmw_force_coherent, int, 0600); -MODULE_PARM_DESC(restrict_dma_mask, "Restrict DMA mask to 44 bits with IOMMU"); -module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, 0600); MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes"); module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600); @@ -597,7 +594,7 @@ static int vmw_dma_masks(struct vmw_private *dev_priv) int ret = 0; ret = dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)); - if (sizeof(unsigned long) == 4 || vmw_restrict_dma_mask) { + if (sizeof(unsigned long) == 4) { DRM_INFO("Restricting DMA addresses to 44 bits.\n"); return dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(44)); } -- 2.21.0 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH 0/3] drm/vmwgfx: Clean- and fix up DMA mode selection
From: Thomas Hellstrom (VMware) Clean up and fix dma mode selection. We remove an unused mode, a largely unused module option and finally add a check to enable dma coherent memory in those cases streaming dma mappings are undesired or won't work. Note that this series is intended to be applied on top of git://people.freedesktop.org/~thomash/linux branch vmwgfx-next meaning that the vmw_dma_select mode function would, after applying the patches look like static int vmw_dma_select_mode(struct vmw_private *dev_priv) { static const char *names[vmw_dma_map_max] = { [vmw_dma_alloc_coherent] = "Using coherent TTM pages.", [vmw_dma_map_populate] = "Caching DMA mappings.", [vmw_dma_map_bind] = "Giving up DMA mappings early."}; /* * dma_max_mapping_size() != SIZE_MAX means something is going * on in the dma layer that the dma_map_bind or dma_map_populate modes * are not working well with, or haven't been tested with. * This typically happens when the SWIOTLB is active. Fall back to * coherent memory in those cases. */ if (dma_max_mapping_size(dev_priv->dev->dev) != SIZE_MAX || vmw_force_coherent) dev_priv->map_mode = vmw_dma_alloc_coherent; else if (vmw_restrict_iommu) dev_priv->map_mode = vmw_dma_map_bind; else dev_priv->map_mode = vmw_dma_map_populate; if (!IS_ENABLED(CONFIG_DRM_TTM_DMA_PAGE_POOL) && (dev_priv->map_mode == vmw_dma_alloc_coherent)) return -EINVAL; DRM_INFO("DMA map mode: %s\n", names[dev_priv->map_mode]); return 0; } Cc: Christoph Hellwig ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH 3/3] drm/vmwgfx: Use coherent memory if there are dma mapping size restrictions
On 11/14/19 1:40 PM, Christoph Hellwig wrote: On Thu, Nov 14, 2019 at 11:56:45AM +0100, Thomas Hellström (VMware) wrote: From: Thomas Hellstrom We're gradually moving towards using DMA coherent memory in most situations, although TTM interactions with the DMA layers is still a work-in-progress. Meanwhile, use coherent memory when there are size restrictions meaning that there is a chance that streaming dma mapping of large buffer objects may fail. Unofrtunately that dma mapping size check really is completely broken. For example the sparc32 iommus have mapping size limitations (which we just haven't wired up yet), but will never bounce buffer. Let me cook up a real API for you instead. dma_addressing_limited() is fundamentally the right call for this, we just need to make it handle the corner cases you mentioned in reply to the last version of your patch. Sounds great! Thanks, Thomas ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[git pull] vmwgfx-next
Dave, Daniel Two minor cleanups / fixes for -next. The following changes since commit 3ca3a9eab7085b3c938b5d088c3020269cfecdc8: Merge tag 'drm-misc-next-fixes-2019-11-06' of git://anongit.freedesktop.org/drm/drm-misc into drm-next (2019-11-08 16:48:22 +1000) are available in the Git repository at: git://people.freedesktop.org/~thomash/linux vmwgfx-next for you to fetch changes up to b4011644b03c04b1a42068313c5b894da41d0698: drm/vmwgfx: remove set but not used variable 'srf' (2019-11-14 08:41:36 +0100) Thomas Hellstrom (1): drm/ttm, drm/vmwgfx: Use a configuration option for the TTM dma page pool YueHaibing (1): drm/vmwgfx: remove set but not used variable 'srf' drivers/gpu/drm/Kconfig | 7 +++ drivers/gpu/drm/ttm/Makefile | 4 ++-- drivers/gpu/drm/ttm/ttm_page_alloc_dma.c | 3 --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.c | 3 +-- drivers/gpu/drm/vmwgfx/vmwgfx_surface.c | 2 -- include/drm/ttm/ttm_page_alloc.h | 2 +- 6 files changed, 11 insertions(+), 10 deletions(-) ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v4 2/2] mm, drm/ttm: Fix vm page protection handling
From: Thomas Hellstrom TTM graphics buffer objects may, transparently to user-space, move between IO and system memory. When that happens, all PTEs pointing to the old location are zapped before the move and then faulted in again if needed. When that happens, the page protection caching mode- and encryption bits may change and be different from those of struct vm_area_struct::vm_page_prot. We were using an ugly hack to set the page protection correctly. Fix that and instead export and use vmf_insert_mixed_prot() or use vmf_insert_pfn_prot(). Also get the default page protection from struct vm_area_struct::vm_page_prot rather than using vm_get_page_prot(). This way we catch modifications done by the vm system for drivers that want write-notification. Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Signed-off-by: Thomas Hellstrom Reviewed-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo_vm.c | 22 +++--- mm/memory.c | 1 + 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 3e8c3de91ae4..78bfab81cf04 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -173,7 +173,6 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, pgoff_t num_prefault) { struct vm_area_struct *vma = vmf->vma; - struct vm_area_struct cvma = *vma; struct ttm_buffer_object *bo = vma->vm_private_data; struct ttm_bo_device *bdev = bo->bdev; unsigned long page_offset; @@ -244,7 +243,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, goto out_io_unlock; } - cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot); + prot = ttm_io_prot(bo->mem.placement, prot); if (!bo->mem.bus.is_iomem) { struct ttm_operation_ctx ctx = { .interruptible = false, @@ -260,7 +259,7 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, } } else { /* Iomem should not be marked encrypted */ - cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot); + prot = pgprot_decrypted(prot); } /* @@ -283,11 +282,20 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, pfn = page_to_pfn(page); } + /* +* Note that the value of @prot at this point may differ from +* the value of @vma->vm_page_prot in the caching- and +* encryption bits. This is because the exact location of the +* data may not be known at mmap() time and may also change +* at arbitrary times while the data is mmap'ed. +* See vmf_insert_mixed_prot() for a discussion. +*/ if (vma->vm_flags & VM_MIXEDMAP) - ret = vmf_insert_mixed(&cvma, address, - __pfn_to_pfn_t(pfn, PFN_DEV)); + ret = vmf_insert_mixed_prot(vma, address, + __pfn_to_pfn_t(pfn, PFN_DEV), + prot); else - ret = vmf_insert_pfn(&cvma, address, pfn); + ret = vmf_insert_pfn_prot(vma, address, pfn, prot); /* Never error on prefaulted PTEs */ if (unlikely((ret & VM_FAULT_ERROR))) { @@ -319,7 +327,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf) if (ret) return ret; - prot = vm_get_page_prot(vma->vm_flags); + prot = vma->vm_page_prot; ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT); if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) return ret; diff --git a/mm/memory.c b/mm/memory.c index 269a8a871e83..0f262acd1018 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1798,6 +1798,7 @@ vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, { return __vm_insert_mixed(vma, addr, pfn, pgprot, false); } +EXPORT_SYMBOL(vmf_insert_mixed_prot); vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) -- 2.21.0 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v4 0/2] mm, drm/ttm: Fix pte insertion with customized protection
From: Thomas Hellstrom The drm/ttm module is using a modified on-stack copy of the struct vm_area_struct to be able to set a page protection with customized caching. Fix that by adding a vmf_insert_mixed_prot() function similar to the existing vmf_insert_pfn_prot() for use with drm/ttm. I'd like to merge this through a drm tree. Changes since v1: *) Formatting fixes in patch 1 *) Updated commit message of patch 2. Changes since v2: *) Moved vmf_insert_mixed_prot() export to patch 2 (Michal Hocko) *) Documented under which conditions it's safe to use a page protection different from struct vm_area_struct::vm_page_prot. (Michal Hocko) Changes since v3: *) More documentation regarding under which conditions it's safe to use a page protection different from struct vm_area_struct::vm_page_prot. This time also in core vm. (Michal Hocko) Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" -- 2.21.0 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v4 1/2] mm: Add a vmf_insert_mixed_prot() function
From: Thomas Hellstrom The TTM module today uses a hack to be able to set a different page protection than struct vm_area_struct::vm_page_prot. To be able to do this properly, add the needed vm functionality as vmf_insert_mixed_prot(). Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Signed-off-by: Thomas Hellstrom Acked-by: Christian König --- include/linux/mm.h | 2 ++ include/linux/mm_types.h | 7 ++- mm/memory.c | 43 3 files changed, 47 insertions(+), 5 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index cc292273e6ba..29575d3c1e47 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2548,6 +2548,8 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn, pgprot_t pgprot); vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn); +vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, + pfn_t pfn, pgprot_t pgprot); vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn); int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index fa795284..ac96afdbb4bc 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -307,7 +307,12 @@ struct vm_area_struct { /* Second cache line starts here. */ struct mm_struct *vm_mm;/* The address space we belong to. */ - pgprot_t vm_page_prot; /* Access permissions of this VMA. */ + + /* +* Access permissions of this VMA. +* See vmf_insert_mixed() for discussion. +*/ + pgprot_t vm_page_prot; unsigned long vm_flags; /* Flags, see mm.h. */ /* diff --git a/mm/memory.c b/mm/memory.c index b1ca51a079f2..269a8a871e83 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1646,6 +1646,9 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, * vmf_insert_pfn_prot should only be used if using multiple VMAs is * impractical. * + * See vmf_insert_mixed_prot() for a discussion of the implication of using + * a value of @pgprot different from that of @vma->vm_page_prot. + * * Context: Process context. May allocate using %GFP_KERNEL. * Return: vm_fault_t value. */ @@ -1719,9 +1722,9 @@ static bool vm_mixed_ok(struct vm_area_struct *vma, pfn_t pfn) } static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, - unsigned long addr, pfn_t pfn, bool mkwrite) + unsigned long addr, pfn_t pfn, pgprot_t pgprot, + bool mkwrite) { - pgprot_t pgprot = vma->vm_page_prot; int err; BUG_ON(!vm_mixed_ok(vma, pfn)); @@ -1764,10 +1767,42 @@ static vm_fault_t __vm_insert_mixed(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +/** + * vmf_insert_mixed_prot - insert single pfn into user vma with specified pgprot + * @vma: user vma to map to + * @addr: target user address of this page + * @pfn: source kernel pfn + * @pgprot: pgprot flags for the inserted page + * + * This is exactly like vmf_insert_mixed(), except that it allows drivers to + * to override pgprot on a per-page basis. + * + * Typically this function should be used by drivers to set caching- and + * encryption bits different than those of @vma->vm_page_prot, because + * the caching- or encryption mode may not be known at mmap() time. + * This is ok as long as @vma->vm_page_prot is not used by the core vm + * to set caching and encryption bits for those vmas (except for COW pages). + * This is ensured by core vm only modifying these page table entries using + * functions that don't touch caching- or encryption bits, using pte_modify() + * if needed. (See for example mprotect()). + * Also when new page-table entries are created, this is only done using the + * fault() callback, and never using the value of vma->vm_page_prot, + * except for page-table entries that point to anonymous pages as the result + * of COW. + * + * Context: Process context. May allocate using %GFP_KERNEL. + * Return: vm_fault_t value. + */ +vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, +pfn_t pfn, pgprot_t pgprot) +{ + return __vm_insert_mixed(vma, addr, pfn, pgprot, false); +} + vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr, pfn_t pfn) { - return __vm_insert_mixed(vma, addr, pfn, false); + return __vm_insert_mixed(vma, addr, pfn, vma->vm_page_prot, false); } EXPORT_SYMBOL(vmf_insert_mixed); @@ -1779,7 +1814,7 @@ EXPORT_SYMBOL(vmf_insert_mixed); vm_fault_t vmf_insert_mi
Ack to merge through DRM tree? WAS [PATCH v4 0/2] mm, drm/ttm: Fix pte insertion with customized protection
Andrew, On 12/12/19 9:47 AM, Thomas Hellström (VMware) wrote: From: Thomas Hellstrom The drm/ttm module is using a modified on-stack copy of the struct vm_area_struct to be able to set a page protection with customized caching. Fix that by adding a vmf_insert_mixed_prot() function similar to the existing vmf_insert_pfn_prot() for use with drm/ttm. I'd like to merge this through a drm tree. Changes since v1: *) Formatting fixes in patch 1 *) Updated commit message of patch 2. Changes since v2: *) Moved vmf_insert_mixed_prot() export to patch 2 (Michal Hocko) *) Documented under which conditions it's safe to use a page protection different from struct vm_area_struct::vm_page_prot. (Michal Hocko) Changes since v3: *) More documentation regarding under which conditions it's safe to use a page protection different from struct vm_area_struct::vm_page_prot. This time also in core vm. (Michal Hocko) Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Seems all concerns with this series have been addressed. Could I have an ack to merge this through a DRM tree? Thanks, Thomas ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH 2/3] drm/ttm: always keep BOs on the LRU
Hi, Christian, On 10/16/19 11:30 AM, Christian König wrote: Am 25.09.19 um 14:10 schrieb Christian König: Am 25.09.19 um 14:06 schrieb Thomas Hellström (VMware): On 9/25/19 12:55 PM, Christian König wrote: This allows blocking for BOs to become available in the memory management. Amdgpu is doing this for quite a while now during CS. Now apply the new behavior to all drivers using TTM. Signed-off-by: Christian König Got to test this to see that there are no regressions. Did you got time to test this or did I just missed your response? Thanks in advance, Christian. Sorry for the delay. A lot on my plate currently. I'll get around to this later today or on monday. Thanks, Thomas ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH 2/3] drm/ttm: always keep BOs on the LRU
On 10/18/19 3:49 PM, Thomas Hellström (VMware) wrote: Hi, Christian, On 10/16/19 11:30 AM, Christian König wrote: Am 25.09.19 um 14:10 schrieb Christian König: Am 25.09.19 um 14:06 schrieb Thomas Hellström (VMware): On 9/25/19 12:55 PM, Christian König wrote: This allows blocking for BOs to become available in the memory management. Amdgpu is doing this for quite a while now during CS. Now apply the new behavior to all drivers using TTM. Signed-off-by: Christian König Got to test this to see that there are no regressions. Did you got time to test this or did I just missed your response? Thanks in advance, Christian. Sorry for the delay. A lot on my plate currently. I'll get around to this later today or on monday. Thanks, Thomas Hi, Christian! This patch is Acked-by: Thomas Hellstrom ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2
On 2/17/20 6:55 PM, Daniel Vetter wrote: On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote: Implement the importer side of unpinned DMA-buf handling. v2: update page tables immediately Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 770baba621b3..48de7624d49c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) return ERR_PTR(ret); } +/** + * amdgpu_dma_buf_move_notify - &attach.move_notify implementation + * + * @attach: the DMA-buf attachment + * + * Invalidate the DMA-buf attachment, making sure that the we re-create the + * mapping before the next use. + */ +static void +amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->importer_priv; + struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv); + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { false, false }; + struct ttm_placement placement = {}; + struct amdgpu_vm_bo_base *bo_base; + int r; + + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + return; + + r = ttm_bo_validate(&bo->tbo, &placement, &ctx); + if (r) { + DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r); + return; + } + + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { + struct amdgpu_vm *vm = bo_base->vm; + struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; + + if (ticket) { Yeah so this is kinda why I've been a total pain about the exact semantics of the move_notify hook. I think we should flat-out require that importers _always_ have a ticket attach when they call this, and that they can cope with additional locks being taken (i.e. full EDEADLCK) handling. Simplest way to force that contract is to add a dummy 2nd ww_mutex lock to the dma_resv object, which we then can take #ifdef CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket). Now the real disaster is how we handle deadlocks. Two issues: - Ideally we'd keep any lock we've taken locked until the end, it helps needless backoffs. I've played around a bit with that but not even poc level, just an idea: https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582 Idea is essentially to track a list of objects we had to lock as part of the ttm_bo_validate of the main object. - Second one is if we get a EDEADLCK on one of these sublocks (like the one here). We need to pass that up the entire callchain, including a temporary reference (we have to drop locks to do the ww_mutex_lock_slow call), and need a custom callback to drop that temporary reference (since that's all driver specific, might even be internal ww_mutex and not anything remotely looking like a normal dma_buf). This probably needs the exec util helpers from ttm, but at the dma_resv level, so that we can do something like this: struct dma_resv_ticket { struct ww_acquire_ctx base; /* can be set by anyone (including other drivers) that got hold of * this ticket and had to acquire some new lock. This lock might * protect anything, including driver-internal stuff, and isn't * required to be a dma_buf or even just a dma_resv. */ struct ww_mutex *contended_lock; /* callback which the driver (which might be a dma-buf exporter * and not matching the driver that started this locking ticket) * sets together with @contended_lock, for the main driver to drop * when it calls dma_resv_unlock on the contended_lock. */ void (drop_ref*)(struct ww_mutex *contended_lock); }; This is all supremely nasty (also ttm_bo_validate would need to be improved to handle these sublocks and random new objects that could force a ww_mutex_lock_slow). Just a short comment on this: Neither the currently used wait-die or the wound-wait algorithm *strictly* requires a slow lock on the contended lock. For wait-die it's just very convenient since it makes us sleep instead of spinning with -EDEADLK on the contended lock. For wound-wait IIRC one could just immediately restart the whole locking transaction after an -EDEADLK, and the transaction would automatically end up waiting on the contended lock, provided the mutex lock stealing is not allowed. There is however a possibility that the transaction will be wounded again on another lock, taken
Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2
On 2/18/20 10:01 PM, Daniel Vetter wrote: On Tue, Feb 18, 2020 at 9:17 PM Thomas Hellström (VMware) wrote: On 2/17/20 6:55 PM, Daniel Vetter wrote: On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote: Implement the importer side of unpinned DMA-buf handling. v2: update page tables immediately Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 770baba621b3..48de7624d49c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) return ERR_PTR(ret); } +/** + * amdgpu_dma_buf_move_notify - &attach.move_notify implementation + * + * @attach: the DMA-buf attachment + * + * Invalidate the DMA-buf attachment, making sure that the we re-create the + * mapping before the next use. + */ +static void +amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) +{ +struct drm_gem_object *obj = attach->importer_priv; +struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv); +struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); +struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); +struct ttm_operation_ctx ctx = { false, false }; +struct ttm_placement placement = {}; +struct amdgpu_vm_bo_base *bo_base; +int r; + +if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) +return; + +r = ttm_bo_validate(&bo->tbo, &placement, &ctx); +if (r) { +DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r); +return; +} + +for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { +struct amdgpu_vm *vm = bo_base->vm; +struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; + +if (ticket) { Yeah so this is kinda why I've been a total pain about the exact semantics of the move_notify hook. I think we should flat-out require that importers _always_ have a ticket attach when they call this, and that they can cope with additional locks being taken (i.e. full EDEADLCK) handling. Simplest way to force that contract is to add a dummy 2nd ww_mutex lock to the dma_resv object, which we then can take #ifdef CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket). Now the real disaster is how we handle deadlocks. Two issues: - Ideally we'd keep any lock we've taken locked until the end, it helps needless backoffs. I've played around a bit with that but not even poc level, just an idea: https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582 Idea is essentially to track a list of objects we had to lock as part of the ttm_bo_validate of the main object. - Second one is if we get a EDEADLCK on one of these sublocks (like the one here). We need to pass that up the entire callchain, including a temporary reference (we have to drop locks to do the ww_mutex_lock_slow call), and need a custom callback to drop that temporary reference (since that's all driver specific, might even be internal ww_mutex and not anything remotely looking like a normal dma_buf). This probably needs the exec util helpers from ttm, but at the dma_resv level, so that we can do something like this: struct dma_resv_ticket { struct ww_acquire_ctx base; /* can be set by anyone (including other drivers) that got hold of * this ticket and had to acquire some new lock. This lock might * protect anything, including driver-internal stuff, and isn't * required to be a dma_buf or even just a dma_resv. */ struct ww_mutex *contended_lock; /* callback which the driver (which might be a dma-buf exporter * and not matching the driver that started this locking ticket) * sets together with @contended_lock, for the main driver to drop * when it calls dma_resv_unlock on the contended_lock. */ void (drop_ref*)(struct ww_mutex *contended_lock); }; This is all supremely nasty (also ttm_bo_validate would need to be improved to handle these sublocks and random new objects that could force a ww_mutex_lock_slow). Just a short comment on this: Neither the currently used wait-die or the wound-wait algorithm *strictly* requires a slow lock on the contended lock. For wait-die it's just very convenient since it makes us sleep instead of spinning with -EDEADLK on the contended lock. For wound-wait IIRC one could just immediately restart the whole locking transaction after an -EDEADLK, and the transaction would automatically end up waiting on the contended lock, provided
Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2
On 2/19/20 7:42 AM, Thomas Hellström (VMware) wrote: On 2/18/20 10:01 PM, Daniel Vetter wrote: On Tue, Feb 18, 2020 at 9:17 PM Thomas Hellström (VMware) wrote: On 2/17/20 6:55 PM, Daniel Vetter wrote: On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote: Implement the importer side of unpinned DMA-buf handling. v2: update page tables immediately Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 770baba621b3..48de7624d49c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) return ERR_PTR(ret); } +/** + * amdgpu_dma_buf_move_notify - &attach.move_notify implementation + * + * @attach: the DMA-buf attachment + * + * Invalidate the DMA-buf attachment, making sure that the we re-create the + * mapping before the next use. + */ +static void +amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->importer_priv; + struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv); + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { false, false }; + struct ttm_placement placement = {}; + struct amdgpu_vm_bo_base *bo_base; + int r; + + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + return; + + r = ttm_bo_validate(&bo->tbo, &placement, &ctx); + if (r) { + DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r); + return; + } + + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { + struct amdgpu_vm *vm = bo_base->vm; + struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; + + if (ticket) { Yeah so this is kinda why I've been a total pain about the exact semantics of the move_notify hook. I think we should flat-out require that importers _always_ have a ticket attach when they call this, and that they can cope with additional locks being taken (i.e. full EDEADLCK) handling. Simplest way to force that contract is to add a dummy 2nd ww_mutex lock to the dma_resv object, which we then can take #ifdef CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket). Now the real disaster is how we handle deadlocks. Two issues: - Ideally we'd keep any lock we've taken locked until the end, it helps needless backoffs. I've played around a bit with that but not even poc level, just an idea: https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582 Idea is essentially to track a list of objects we had to lock as part of the ttm_bo_validate of the main object. - Second one is if we get a EDEADLCK on one of these sublocks (like the one here). We need to pass that up the entire callchain, including a temporary reference (we have to drop locks to do the ww_mutex_lock_slow call), and need a custom callback to drop that temporary reference (since that's all driver specific, might even be internal ww_mutex and not anything remotely looking like a normal dma_buf). This probably needs the exec util helpers from ttm, but at the dma_resv level, so that we can do something like this: struct dma_resv_ticket { struct ww_acquire_ctx base; /* can be set by anyone (including other drivers) that got hold of * this ticket and had to acquire some new lock. This lock might * protect anything, including driver-internal stuff, and isn't * required to be a dma_buf or even just a dma_resv. */ struct ww_mutex *contended_lock; /* callback which the driver (which might be a dma-buf exporter * and not matching the driver that started this locking ticket) * sets together with @contended_lock, for the main driver to drop * when it calls dma_resv_unlock on the contended_lock. */ void (drop_ref*)(struct ww_mutex *contended_lock); }; This is all supremely nasty (also ttm_bo_validate would need to be improved to handle these sublocks and random new objects that could force a ww_mutex_lock_slow). Just a short comment on this: Neither the currently used wait-die or the wound-wait algorithm *strictly* requires a slow lock on the contended lock. For wait-die it's just very convenient since it makes us sleep instead of spinning with -EDEADLK on the contended lock. For wound-wait IIRC one could just immediately restart the whole locking transaction after
[PATCH v4 2/9] mm: Introduce vma_is_special_huge
From: Thomas Hellstrom For VM_PFNMAP and VM_MIXEDMAP vmas that want to support transhuge pages and -page table entries, introduce vma_is_special_huge() that takes the same codepaths as vma_is_dax(). The use of "special" follows the definition in memory.c, vm_normal_page(): "Special" mappings do not wish to be associated with a "struct page" (either it doesn't exist, or it exists but they don't want to touch it) For PAGE_SIZE pages, "special" is determined per page table entry to be able to deal with COW pages. But since we don't have huge COW pages, we can classify a vma as either "special huge" or "normal huge". Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom Acked-by: Christian König --- include/linux/mm.h | 17 + mm/huge_memory.c | 6 +++--- 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 0157d293935f..d370ce2932a1 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2822,6 +2822,23 @@ extern long copy_huge_page_from_user(struct page *dst_page, const void __user *usr_src, unsigned int pages_per_huge_page, bool allow_pagefault); + +/** + * vma_is_special_huge - Are transhuge page-table entries considered special? + * @vma: Pointer to the struct vm_area_struct to consider + * + * Whether transhuge page-table entries are considered "special" following + * the definition in vm_normal_page(). + * + * Return: true if transhuge page-table entries should be considered special, + * false otherwise. + */ +static inline bool vma_is_special_huge(const struct vm_area_struct *vma) +{ + return vma_is_dax(vma) || (vma->vm_file && + (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))); +} + #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ #ifdef CONFIG_DEBUG_PAGEALLOC diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 41a0fbddc96b..f8d24fc3f4df 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1789,7 +1789,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, orig_pmd = pmdp_huge_get_and_clear_full(tlb->mm, addr, pmd, tlb->fullmm); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); - if (vma_is_dax(vma)) { + if (vma_is_special_huge(vma)) { if (arch_needs_pgtable_deposit()) zap_deposited_table(tlb->mm, pmd); spin_unlock(ptl); @@ -2053,7 +2053,7 @@ int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, */ pudp_huge_get_and_clear_full(tlb->mm, addr, pud, tlb->fullmm); tlb_remove_pud_tlb_entry(tlb, pud, addr); - if (vma_is_dax(vma)) { + if (vma_is_special_huge(vma)) { spin_unlock(ptl); /* No zero page support yet */ } else { @@ -2162,7 +2162,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, */ if (arch_needs_pgtable_deposit()) zap_deposited_table(mm, pmd); - if (vma_is_dax(vma)) + if (vma_is_special_huge(vma)) return; page = pmd_page(_pmd); if (!PageDirty(page) && pmd_dirty(_pmd)) -- 2.21.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v4 6/9] drm/vmwgfx: Support huge page faults
From: Thomas Hellstrom With vmwgfx dirty-tracking we need a specialized huge_fault callback. Implement and hook it up. Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom Reviewed-by: Roland Scheidegger Acked-by: Christian König --- drivers/gpu/drm/vmwgfx/vmwgfx_drv.h| 4 ++ drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 74 +- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c | 5 +- 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index a31e726d6d71..82d86f2d2569 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1428,6 +1428,10 @@ void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo, pgoff_t start, pgoff_t end); vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf); vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf, + enum page_entry_size pe_size); +#endif /** * VMW_DEBUG_KMS - Debug output for kernel mode-setting diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c index 17a5dca7b921..cde3e07ebaf7 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c @@ -473,7 +473,7 @@ vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf) * a lot of unnecessary write faults. */ if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE) - prot = vma->vm_page_prot; + prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED); else prot = vm_get_page_prot(vma->vm_flags); @@ -486,3 +486,75 @@ vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf) return ret; } + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf, + enum page_entry_size pe_size) +{ + struct vm_area_struct *vma = vmf->vma; + struct ttm_buffer_object *bo = (struct ttm_buffer_object *) + vma->vm_private_data; + struct vmw_buffer_object *vbo = + container_of(bo, struct vmw_buffer_object, base); + pgprot_t prot; + vm_fault_t ret; + pgoff_t fault_page_size; + bool write = vmf->flags & FAULT_FLAG_WRITE; + bool is_cow_mapping = + (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; + + switch (pe_size) { + case PE_SIZE_PMD: + fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT; + break; +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + case PE_SIZE_PUD: + fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT; + break; +#endif + default: + WARN_ON_ONCE(1); + return VM_FAULT_FALLBACK; + } + + /* Always do write dirty-tracking and COW on PTE level. */ + if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping)) + return VM_FAULT_FALLBACK; + + ret = ttm_bo_vm_reserve(bo, vmf); + if (ret) + return ret; + + if (vbo->dirty) { + pgoff_t allowed_prefault; + unsigned long page_offset; + + page_offset = vmf->pgoff - + drm_vma_node_start(&bo->base.vma_node); + if (page_offset >= bo->num_pages || + vmw_resources_clean(vbo, page_offset, + page_offset + PAGE_SIZE, + &allowed_prefault)) { + ret = VM_FAULT_SIGBUS; + goto out_unlock; + } + + /* +* Write protect, so we get a new fault on write, and can +* split. +*/ + prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED); + } else { + prot = vm_get_page_prot(vma->vm_flags); + } + + ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size); + if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) + return ret; + +out_unlock: + dma_resv_unlock(bo->base.resv); + + return ret; +} +#endif diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c index ce288756531b..34100d1f5a9d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c @@ -34,7 +34,10 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma) .page_mkwrite = vmw_bo_vm_mkwrite, .fault = vmw_bo_vm_fault, .open = ttm_bo_vm_open, - .close = ttm_bo_vm_close + .close = ttm_bo_vm_close, +#ifdef
[PATCH v4 1/9] fs: Constify vma argument to vma_is_dax
From: Thomas Hellstrom The vma argument is only dereferenced for reading. Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom Reviewed-by: Roland Scheidegger Acked-by: Christian König --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/fs.h b/include/linux/fs.h index 98e0349adb52..4f41fdbf402f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3367,7 +3367,7 @@ static inline bool io_is_direct(struct file *filp) return (filp->f_flags & O_DIRECT) || IS_DAX(filp->f_mapping->host); } -static inline bool vma_is_dax(struct vm_area_struct *vma) +static inline bool vma_is_dax(const struct vm_area_struct *vma) { return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); } -- 2.21.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v4 7/9] drm: Add a drm_get_unmapped_area() helper
From: Thomas Hellstrom Unaligned virtual addresses makes it unlikely that huge page-table entries can be used. So align virtual buffer object address huge page boundaries to the underlying physical address huge page boundaries taking buffer object sizes into account to determine when it might be possible to use huge page-table entries. Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom Reviewed-by: Roland Scheidegger Acked-by: Christian König --- drivers/gpu/drm/drm_file.c | 136 + include/drm/drm_file.h | 5 ++ 2 files changed, 141 insertions(+) diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 92d16724f949..40fae356d202 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -39,10 +39,13 @@ #include #include +#include + #include #include #include #include +#include #include "drm_crtc_internal.h" #include "drm_internal.h" @@ -796,3 +799,136 @@ struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags) return file; } EXPORT_SYMBOL_FOR_TESTS_ONLY(mock_drm_getfile); + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/* + * drm_addr_inflate() attempts to construct an aligned area by inflating + * the area size and skipping the unaligned start of the area. + * adapted from shmem_get_unmapped_area() + */ +static unsigned long drm_addr_inflate(unsigned long addr, + unsigned long len, + unsigned long pgoff, + unsigned long flags, + unsigned long huge_size) +{ + unsigned long offset, inflated_len; + unsigned long inflated_addr; + unsigned long inflated_offset; + + offset = (pgoff << PAGE_SHIFT) & (huge_size - 1); + if (offset && offset + len < 2 * huge_size) + return addr; + if ((addr & (huge_size - 1)) == offset) + return addr; + + inflated_len = len + huge_size - PAGE_SIZE; + if (inflated_len > TASK_SIZE) + return addr; + if (inflated_len < len) + return addr; + + inflated_addr = current->mm->get_unmapped_area(NULL, 0, inflated_len, + 0, flags); + if (IS_ERR_VALUE(inflated_addr)) + return addr; + if (inflated_addr & ~PAGE_MASK) + return addr; + + inflated_offset = inflated_addr & (huge_size - 1); + inflated_addr += offset - inflated_offset; + if (inflated_offset > offset) + inflated_addr += huge_size; + + if (inflated_addr > TASK_SIZE - len) + return addr; + + return inflated_addr; +} + +/** + * drm_get_unmapped_area() - Get an unused user-space virtual memory area + * suitable for huge page table entries. + * @file: The struct file representing the address space being mmap()'d. + * @uaddr: Start address suggested by user-space. + * @len: Length of the area. + * @pgoff: The page offset into the address space. + * @flags: mmap flags + * @mgr: The address space manager used by the drm driver. This argument can + * probably be removed at some point when all drivers use the same + * address space manager. + * + * This function attempts to find an unused user-space virtual memory area + * that can accommodate the size we want to map, and that is properly + * aligned to facilitate huge page table entries matching actual + * huge pages or huge page aligned memory in buffer objects. Buffer objects + * are assumed to start at huge page boundary pfns (io memory) or be + * populated by huge pages aligned to the start of the buffer object + * (system- or coherent memory). Adapted from shmem_get_unmapped_area. + * + * Return: aligned user-space address. + */ +unsigned long drm_get_unmapped_area(struct file *file, + unsigned long uaddr, unsigned long len, + unsigned long pgoff, unsigned long flags, + struct drm_vma_offset_manager *mgr) +{ + unsigned long addr; + unsigned long inflated_addr; + struct drm_vma_offset_node *node; + + if (len > TASK_SIZE) + return -ENOMEM; + + /* +* @pgoff is the file page-offset the huge page boundaries of +* which typically aligns to physical address huge page boundaries. +* That's not true for DRM, however, where physical address huge +* page boundaries instead are aligned with the offset from +* buffer object start. So adjust @pgoff to be the offset from +* buffer object start. +*/ + drm_vma_offset_lock_lookup(mgr); + node = drm_vma_offset_lookup_locked(mgr, pgoff, 1); + if (node) +
[PATCH v4 9/9] drm/vmwgfx: Hook up the helpers to align buffer objects
From: Thomas Hellstrom Start using the helpers that align buffer object user-space addresses and buffer object vram addresses to huge page boundaries. This is to improve the chances of allowing huge page-table entries. Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom Reviewed-by: Roland Scheidegger Acked-by: Christian König --- drivers/gpu/drm/drm_file.c | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_drv.c| 13 + drivers/gpu/drm/vmwgfx/vmwgfx_drv.h| 1 + drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 2 +- 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 40fae356d202..1df2fca608c3 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -932,3 +932,4 @@ unsigned long drm_get_unmapped_area(struct file *file, return current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +EXPORT_SYMBOL_GPL(drm_get_unmapped_area); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c index e962048f65d2..5452cabb4a2e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c @@ -1215,6 +1215,18 @@ static void vmw_remove(struct pci_dev *pdev) drm_put_dev(dev); } +static unsigned long +vmw_get_unmapped_area(struct file *file, unsigned long uaddr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct drm_file *file_priv = file->private_data; + struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev); + + return drm_get_unmapped_area(file, uaddr, len, pgoff, flags, +&dev_priv->vma_manager); +} + static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val, void *ptr) { @@ -1386,6 +1398,7 @@ static const struct file_operations vmwgfx_driver_fops = { .compat_ioctl = vmw_compat_ioctl, #endif .llseek = noop_llseek, + .get_unmapped_area = vmw_get_unmapped_area, }; static struct drm_driver driver = { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 06267184aa0a..9ea145cffa3d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -929,6 +929,7 @@ extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma); extern void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv, size_t gran); + /** * TTM buffer object driver - vmwgfx_ttm_buffer.c */ diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index d8ea3dd10af0..34c721ab3ff3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -754,7 +754,7 @@ static int vmw_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, break; case TTM_PL_VRAM: /* "On-card" video ram */ - man->func = &ttm_bo_manager_func; + man->func = &vmw_thp_func; man->gpu_offset = 0; man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE; man->available_caching = TTM_PL_FLAG_CACHED; -- 2.21.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v4 8/9] drm/vmwgfx: Introduce a huge page aligning TTM range manager
From: Thomas Hellstrom Using huge page-table entries requires that the physical address of the start of a buffer object is huge page size aligned. Make a special version of the TTM range manager that accomplishes this, but falls back to a smaller page size alignment (PUD->PMD, PMD->NORMAL) to avoid eviction. If other drivers want to use it in the future, it can be made a TTM generic helper. Note that drivers can force eviction for a certain alignment by assigning the TTM GPU alignment correspondingly. Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom Reviewed-by: Roland Scheidegger Acked-by: Christian König --- drivers/gpu/drm/vmwgfx/Makefile | 1 + drivers/gpu/drm/vmwgfx/vmwgfx_drv.h | 7 ++ drivers/gpu/drm/vmwgfx/vmwgfx_thp.c | 166 3 files changed, 174 insertions(+) create mode 100644 drivers/gpu/drm/vmwgfx/vmwgfx_thp.c diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile index c877a21a0739..421dd2a497a5 100644 --- a/drivers/gpu/drm/vmwgfx/Makefile +++ b/drivers/gpu/drm/vmwgfx/Makefile @@ -11,4 +11,5 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \ vmwgfx_validation.o vmwgfx_page_dirty.o \ ttm_object.o ttm_lock.o +vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index 82d86f2d2569..06267184aa0a 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1433,6 +1433,13 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf, enum page_entry_size pe_size); #endif +/* Transparent hugepage support - vmwgfx_thp.c */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +extern const struct ttm_mem_type_manager_func vmw_thp_func; +#else +#define vmw_thp_func ttm_bo_manager_func +#endif + /** * VMW_DEBUG_KMS - Debug output for kernel mode-setting * diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c new file mode 100644 index ..b7c816ba7166 --- /dev/null +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_thp.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Huge page-table-entry support for IO memory. + * + * Copyright (C) 2007-2019 Vmware, Inc. All rights reservedd. + */ +#include "vmwgfx_drv.h" +#include +#include +#include + +/** + * struct vmw_thp_manager - Range manager implementing huge page alignment + * + * @mm: The underlying range manager. Protected by @lock. + * @lock: Manager lock. + */ +struct vmw_thp_manager { + struct drm_mm mm; + spinlock_t lock; +}; + +static int vmw_thp_insert_aligned(struct drm_mm *mm, struct drm_mm_node *node, + unsigned long align_pages, + const struct ttm_place *place, + struct ttm_mem_reg *mem, + unsigned long lpfn, + enum drm_mm_insert_mode mode) +{ + if (align_pages >= mem->page_alignment && + (!mem->page_alignment || align_pages % mem->page_alignment == 0)) { + return drm_mm_insert_node_in_range(mm, node, + mem->num_pages, + align_pages, 0, + place->fpfn, lpfn, mode); + } + + return -ENOSPC; +} + +static int vmw_thp_get_node(struct ttm_mem_type_manager *man, + struct ttm_buffer_object *bo, + const struct ttm_place *place, + struct ttm_mem_reg *mem) +{ + struct vmw_thp_manager *rman = (struct vmw_thp_manager *) man->priv; + struct drm_mm *mm = &rman->mm; + struct drm_mm_node *node; + unsigned long align_pages; + unsigned long lpfn; + enum drm_mm_insert_mode mode = DRM_MM_INSERT_BEST; + int ret; + + node = kzalloc(sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + + lpfn = place->lpfn; + if (!lpfn) + lpfn = man->size; + + mode = DRM_MM_INSERT_BEST; + if (place->flags & TTM_PL_FLAG_TOPDOWN) + mode = DRM_MM_INSERT_HIGH; + + spin_lock(&rman->lock); + if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) { + align_pages = (HPAGE_PUD_SIZE >> PAGE_SHIFT); + if (mem->num_pages >= align_pages) { + ret = vmw_thp_insert_aligned(mm, node, align_pages, +
[PATCH v4 3/9] mm: Split huge pages on write-notify or COW
From: Thomas Hellstrom We currently only do COW and write-notify on the PTE level, so if the huge_fault() handler returns VM_FAULT_FALLBACK on wp faults, split the huge pages and page-table entries. Also do this for huge PUDs if there is no huge_fault() handler and the vma is not anonymous, similar to how it's done for PMDs. Note that fs/dax.c does the splitting in the huge_fault() handler, but as huge_fault() is implemented by modules we need to consider whether to export the splitting functions for use in the modules or whether to try to keep calls in the core. Opt for the latter. A follow-up patch can remove the dax.c split_huge_pmd() if needed. Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom Acked-by: Christian König --- mm/memory.c | 27 +++ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 17aadc751e5c..4c49fe963e5c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3880,11 +3880,14 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd) { if (vma_is_anonymous(vmf->vma)) return do_huge_pmd_wp_page(vmf, orig_pmd); - if (vmf->vma->vm_ops->huge_fault) - return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); + if (vmf->vma->vm_ops->huge_fault) { + vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD); - /* COW handled on pte level: split pmd */ - VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; + } + + /* COW or write-notify handled on pte level: split pmd. */ __split_huge_pmd(vmf->vma, vmf->pmd, vmf->address, false, NULL); return VM_FAULT_FALLBACK; @@ -3897,12 +3900,20 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) static vm_fault_t create_huge_pud(struct vm_fault *vmf) { -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&\ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) /* No support for anonymous transparent PUD pages yet */ if (vma_is_anonymous(vmf->vma)) - return VM_FAULT_FALLBACK; - if (vmf->vma->vm_ops->huge_fault) - return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); + goto split; + if (vmf->vma->vm_ops->huge_fault) { + vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD); + + if (!(ret & VM_FAULT_FALLBACK)) + return ret; + } +split: + /* COW or write-notify not handled on PUD level: split pud.*/ + __split_huge_pud(vmf->vma, vmf->pud, vmf->address); #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ return VM_FAULT_FALLBACK; } -- 2.21.1 ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v4 4/9] mm: Add vmf_insert_pfn_xxx_prot() for huge page-table entries
From: Thomas Hellstrom For graphics drivers needing to modify the page-protection, add huge page-table entries counterparts to vmf_insert_pfn_prot(). Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom Acked-by: Christian König --- include/linux/huge_mm.h | 41 +++-- mm/huge_memory.c| 38 -- 2 files changed, 71 insertions(+), 8 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 0b84e13e88e2..a95d1bc8ffe8 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -47,8 +47,45 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); -vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write); -vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write); +vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn, + pgprot_t pgprot, bool write); + +/** + * vmf_insert_pfn_pmd - insert a pmd size pfn + * @vmf: Structure describing the fault + * @pfn: pfn to insert + * @pgprot: page protection to use + * @write: whether it's a write fault + * + * Insert a pmd size pfn. See vmf_insert_pfn() for additional info. + * + * Return: vm_fault_t value. + */ +static inline vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, + bool write) +{ + return vmf_insert_pfn_pmd_prot(vmf, pfn, vmf->vma->vm_page_prot, write); +} +vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn, + pgprot_t pgprot, bool write); + +/** + * vmf_insert_pfn_pud - insert a pud size pfn + * @vmf: Structure describing the fault + * @pfn: pfn to insert + * @pgprot: page protection to use + * @write: whether it's a write fault + * + * Insert a pud size pfn. See vmf_insert_pfn() for additional info. + * + * Return: vm_fault_t value. + */ +static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, + bool write) +{ + return vmf_insert_pfn_pud_prot(vmf, pfn, vmf->vma->vm_page_prot, write); +} + enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f8d24fc3f4df..b2ec62cca3ae 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -811,11 +811,24 @@ static void insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, pte_free(mm, pgtable); } -vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) +/** + * vmf_insert_pfn_pmd_prot - insert a pmd size pfn + * @vmf: Structure describing the fault + * @pfn: pfn to insert + * @pgprot: page protection to use + * @write: whether it's a write fault + * + * Insert a pmd size pfn. See vmf_insert_pfn() for additional info and + * also consult the vmf_insert_mixed_prot() documentation when + * @pgprot != @vmf->vma->vm_page_prot. + * + * Return: vm_fault_t value. + */ +vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn, + pgprot_t pgprot, bool write) { unsigned long addr = vmf->address & PMD_MASK; struct vm_area_struct *vma = vmf->vma; - pgprot_t pgprot = vma->vm_page_prot; pgtable_t pgtable = NULL; /* @@ -843,7 +856,7 @@ vm_fault_t vmf_insert_pfn_pmd(struct vm_fault *vmf, pfn_t pfn, bool write) insert_pfn_pmd(vma, addr, vmf->pmd, pfn, pgprot, write, pgtable); return VM_FAULT_NOPAGE; } -EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); +EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd_prot); #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) @@ -889,11 +902,24 @@ static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, spin_unlock(ptl); } -vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, bool write) +/** + * vmf_insert_pfn_pud_prot - insert a pud size pfn + * @vmf: Structure describing the fault + * @pfn: pfn to insert + * @pgprot: page protection to use + * @write: whether it's a write fault + * + * Insert a pud size pfn. See vmf_insert_pfn() for additional info and + * also consult the vmf_insert_mixed_prot() documentation when + * @pgprot != @vmf->vma->vm_page_prot. + * + * Return: vm_fault_t value. + */ +vm_fault_t vmf_insert_pfn_pud_prot(struct vm_fault *vmf, pfn_t pfn, + pgprot_t pgprot, bool write) { unsigned long addr = vmf->address & PUD_MASK; struct vm_area_struct *vma = vmf->vma; - pgprot_t pgpro
[PATCH v4 0/9] Huge page-table entries for TTM
In order to reduce TLB misses and CPU usage this patchset enables huge- and giant page-table entries for TTM and TTM-enabled graphics drivers. Patch 1 and 2 introduce a vma_is_special_huge() function to make the mm code take the same path as DAX when splitting huge- and giant page table entries, (which currently means zapping the page-table entry and rely on re-faulting). Patch 3 makes the mm code split existing huge page-table entries on huge_fault fallbacks. Typically on COW or on buffer-objects that want write-notify. COW and write-notification is always done on the lowest page-table level. See the patch log message for additional considerations. Patch 4 introduces functions to allow the graphics drivers to manipulate the caching- and encryption flags of huge page-table entries without ugly hacks. Patch 5 implements the huge_fault handler in TTM. This enables huge page-table entries, provided that the kernel is configured to support transhuge pages, either by default or using madvise(). However, they are unlikely to be inserted unless the kernel buffer object pfns and user-space addresses align perfectly. There are various options here, but since buffer objects that reside in system pages typically start at huge page boundaries if they are backed by huge pages, we try to enforce buffer object starting pfns and user-space addresses to be huge page-size aligned if their size exceeds a huge page-size. If pud-size transhuge ("giant") pages are enabled by the arch, the same holds for those. Patch 6 implements a specialized huge_fault handler for vmwgfx. The vmwgfx driver may perform dirty-tracking and needs some special code to handle that correctly. Patch 7 implements a drm helper to align user-space addresses according to the above scheme, if possible. Patch 8 implements a TTM range manager for vmwgfx that does the same for graphics IO memory. This may later be reused by other graphics drivers if necessary. Patch 9 finally hooks up the helpers of patch 7 and 8 to the vmwgfx driver. A similar change is needed for graphics drivers that want a reasonable likelyhood of actually using huge page-table entries. If a buffer object size is not huge-page or giant-page aligned, its size will NOT be inflated by this patchset. This means that the buffer object tail will use smaller size page-table entries and thus no memory overhead occurs. Drivers that want to pay the memory overhead price need to implement their own scheme to inflate buffer-object sizes. PMD size huge page-table-entries have been tested with vmwgfx and found to work well both with system memory backed and IO memory backed buffer objects. PUD size giant page-table-entries have seen limited (fault and COW) testing using a modified kernel (to support 1GB page allocations) and a fake vmwgfx TTM memory type. The vmwgfx driver does otherwise not support 1GB-size IO memory resources. Comments and suggestions welcome. Thomas Changes since RFC: * Check for buffer objects present in contigous IO Memory (Christian König) * Rebased on the vmwgfx emulated coherent memory functionality. That rebase adds patch 5. Changes since v1: * Make the new TTM range manager vmwgfx-specific. (Christian König) * Minor fixes for configs that don't support or only partially support transhuge pages. Changes since v2: * Minor coding style and doc fixes in patch 5/9 (Christian König) * Patch 5/9 doesn't touch mm. Remove from the patch title. Changes since v3: * Added reviews and acks * Implemented ugly but generic ttm_pgprot_is_wrprotecting() instead of arch specific code. Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
[PATCH v4 5/9] drm/ttm, drm/vmwgfx: Support huge TTM pagefaults
From: Thomas Hellstrom Support huge (PMD-size and PUD-size) page-table entries by providing a huge_fault() callback. We still support private mappings and write-notify by splitting the huge page-table entries on write-access. Note that for huge page-faults to occur, either the kernel needs to be compiled with trans-huge-pages always enabled, or the kernel needs to be compiled with trans-huge-pages enabled using madvise, and the user-space app needs to call madvise() to enable trans-huge pages on a per-mapping basis. Furthermore huge page-faults will not succeed unless buffer objects and user-space addresses are aligned on huge page size boundaries. Cc: Andrew Morton Cc: Michal Hocko Cc: "Matthew Wilcox (Oracle)" Cc: "Kirill A. Shutemov" Cc: Ralph Campbell Cc: "Jérôme Glisse" Cc: "Christian König" Cc: Dan Williams Signed-off-by: Thomas Hellstrom Reviewed-by: Roland Scheidegger Reviewed-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo_vm.c| 161 - drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c | 2 +- include/drm/ttm/ttm_bo_api.h | 3 +- 3 files changed, 161 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c index 389128b8c4dd..0af14835504c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_vm.c +++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c @@ -156,6 +156,89 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_vm_reserve); +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +/** + * ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults + * @vmf: Fault data + * @bo: The buffer object + * @page_offset: Page offset from bo start + * @fault_page_size: The size of the fault in pages. + * @pgprot: The page protections. + * Does additional checking whether it's possible to insert a PUD or PMD + * pfn and performs the insertion. + * + * Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if + * a huge fault was not possible, or on insertion error. + */ +static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, + struct ttm_buffer_object *bo, + pgoff_t page_offset, + pgoff_t fault_page_size, + pgprot_t pgprot) +{ + pgoff_t i; + vm_fault_t ret; + unsigned long pfn; + pfn_t pfnt; + struct ttm_tt *ttm = bo->ttm; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + /* Fault should not cross bo boundary. */ + page_offset &= ~(fault_page_size - 1); + if (page_offset + fault_page_size > bo->num_pages) + goto out_fallback; + + if (bo->mem.bus.is_iomem) + pfn = ttm_bo_io_mem_pfn(bo, page_offset); + else + pfn = page_to_pfn(ttm->pages[page_offset]); + + /* pfn must be fault_page_size aligned. */ + if ((pfn & (fault_page_size - 1)) != 0) + goto out_fallback; + + /* Check that memory is contiguous. */ + if (!bo->mem.bus.is_iomem) { + for (i = 1; i < fault_page_size; ++i) { + if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i) + goto out_fallback; + } + } else if (bo->bdev->driver->io_mem_pfn) { + for (i = 1; i < fault_page_size; ++i) { + if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i) + goto out_fallback; + } + } + + pfnt = __pfn_to_pfn_t(pfn, PFN_DEV); + if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT)) + ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write); +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT)) + ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write); +#endif + else + WARN_ON_ONCE(ret = VM_FAULT_FALLBACK); + + if (ret != VM_FAULT_NOPAGE) + goto out_fallback; + + return VM_FAULT_NOPAGE; +out_fallback: + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; +} +#else +static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf, + struct ttm_buffer_object *bo, + pgoff_t page_offset, + pgoff_t fault_page_size, + pgprot_t pgprot) +{ + return VM_FAULT_FALLBACK; +} +#endif + /** * ttm_bo_vm_fault_reserved - TTM fault helper * @vmf: The struct vm_fault given as argument to the fault callback @@ -163,6 +246,7 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve); * @num_prefault: Maximum number of prefault pages. The caller may want to * specify this based on madvice settings and the size of the GPU object * backed by the memory. + * @fault_page_size: The size of the fault in p
Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2
On 2/20/20 7:04 PM, Daniel Vetter wrote: On Thu, Feb 20, 2020 at 10:39:06AM +0100, Thomas Hellström (VMware) wrote: On 2/19/20 7:42 AM, Thomas Hellström (VMware) wrote: On 2/18/20 10:01 PM, Daniel Vetter wrote: On Tue, Feb 18, 2020 at 9:17 PM Thomas Hellström (VMware) wrote: On 2/17/20 6:55 PM, Daniel Vetter wrote: On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote: Implement the importer side of unpinned DMA-buf handling. v2: update page tables immediately Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 770baba621b3..48de7624d49c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) return ERR_PTR(ret); } +/** + * amdgpu_dma_buf_move_notify - &attach.move_notify implementation + * + * @attach: the DMA-buf attachment + * + * Invalidate the DMA-buf attachment, making sure that the we re-create the + * mapping before the next use. + */ +static void +amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->importer_priv; + struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv); + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { false, false }; + struct ttm_placement placement = {}; + struct amdgpu_vm_bo_base *bo_base; + int r; + + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + return; + + r = ttm_bo_validate(&bo->tbo, &placement, &ctx); + if (r) { + DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r); + return; + } + + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { + struct amdgpu_vm *vm = bo_base->vm; + struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; + + if (ticket) { Yeah so this is kinda why I've been a total pain about the exact semantics of the move_notify hook. I think we should flat-out require that importers _always_ have a ticket attach when they call this, and that they can cope with additional locks being taken (i.e. full EDEADLCK) handling. Simplest way to force that contract is to add a dummy 2nd ww_mutex lock to the dma_resv object, which we then can take #ifdef CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket). Now the real disaster is how we handle deadlocks. Two issues: - Ideally we'd keep any lock we've taken locked until the end, it helps needless backoffs. I've played around a bit with that but not even poc level, just an idea: https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582 Idea is essentially to track a list of objects we had to lock as part of the ttm_bo_validate of the main object. - Second one is if we get a EDEADLCK on one of these sublocks (like the one here). We need to pass that up the entire callchain, including a temporary reference (we have to drop locks to do the ww_mutex_lock_slow call), and need a custom callback to drop that temporary reference (since that's all driver specific, might even be internal ww_mutex and not anything remotely looking like a normal dma_buf). This probably needs the exec util helpers from ttm, but at the dma_resv level, so that we can do something like this: struct dma_resv_ticket { struct ww_acquire_ctx base; /* can be set by anyone (including other drivers) that got hold of * this ticket and had to acquire some new lock. This lock might * protect anything, including driver-internal stuff, and isn't * required to be a dma_buf or even just a dma_resv. */ struct ww_mutex *contended_lock; /* callback which the driver (which might be a dma-buf exporter * and not matching the driver that started this locking ticket) * sets together with @contended_lock, for the main driver to drop * when it calls dma_resv_unlock on the contended_lock. */ void (drop_ref*)(struct ww_mutex *contended_lock); }; This is all supremely nasty (also ttm_bo_validate would need to be improved to handle these sublocks and random new objects that could force a ww_mutex_lock_slow). Just a short comment on this: Neither the currently used wait-die or the wound-wait algorithm *strictly* requires a slow lock on the contended lock. For wait-die it's just very convenient since it makes us sleep instead of spinning with -EDEADLK on the contended
Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2
On 2/20/20 9:08 PM, Daniel Vetter wrote: On Thu, Feb 20, 2020 at 08:46:27PM +0100, Thomas Hellström (VMware) wrote: On 2/20/20 7:04 PM, Daniel Vetter wrote: On Thu, Feb 20, 2020 at 10:39:06AM +0100, Thomas Hellström (VMware) wrote: On 2/19/20 7:42 AM, Thomas Hellström (VMware) wrote: On 2/18/20 10:01 PM, Daniel Vetter wrote: On Tue, Feb 18, 2020 at 9:17 PM Thomas Hellström (VMware) wrote: On 2/17/20 6:55 PM, Daniel Vetter wrote: On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote: Implement the importer side of unpinned DMA-buf handling. v2: update page tables immediately Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 770baba621b3..48de7624d49c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) return ERR_PTR(ret); } +/** + * amdgpu_dma_buf_move_notify - &attach.move_notify implementation + * + * @attach: the DMA-buf attachment + * + * Invalidate the DMA-buf attachment, making sure that the we re-create the + * mapping before the next use. + */ +static void +amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->importer_priv; + struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv); + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { false, false }; + struct ttm_placement placement = {}; + struct amdgpu_vm_bo_base *bo_base; + int r; + + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + return; + + r = ttm_bo_validate(&bo->tbo, &placement, &ctx); + if (r) { + DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r); + return; + } + + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { + struct amdgpu_vm *vm = bo_base->vm; + struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; + + if (ticket) { Yeah so this is kinda why I've been a total pain about the exact semantics of the move_notify hook. I think we should flat-out require that importers _always_ have a ticket attach when they call this, and that they can cope with additional locks being taken (i.e. full EDEADLCK) handling. Simplest way to force that contract is to add a dummy 2nd ww_mutex lock to the dma_resv object, which we then can take #ifdef CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket). Now the real disaster is how we handle deadlocks. Two issues: - Ideally we'd keep any lock we've taken locked until the end, it helps needless backoffs. I've played around a bit with that but not even poc level, just an idea: https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582 Idea is essentially to track a list of objects we had to lock as part of the ttm_bo_validate of the main object. - Second one is if we get a EDEADLCK on one of these sublocks (like the one here). We need to pass that up the entire callchain, including a temporary reference (we have to drop locks to do the ww_mutex_lock_slow call), and need a custom callback to drop that temporary reference (since that's all driver specific, might even be internal ww_mutex and not anything remotely looking like a normal dma_buf). This probably needs the exec util helpers from ttm, but at the dma_resv level, so that we can do something like this: struct dma_resv_ticket { struct ww_acquire_ctx base; /* can be set by anyone (including other drivers) that got hold of * this ticket and had to acquire some new lock. This lock might * protect anything, including driver-internal stuff, and isn't * required to be a dma_buf or even just a dma_resv. */ struct ww_mutex *contended_lock; /* callback which the driver (which might be a dma-buf exporter * and not matching the driver that started this locking ticket) * sets together with @contended_lock, for the main driver to drop * when it calls dma_resv_unlock on the contended_lock. */ void (drop_ref*)(struct ww_mutex *contended_lock); }; This is all supremely nasty (also ttm_bo_validate would need to be improved to handle these sublocks and random new objects that could force a ww_mutex_lock_slow). Just a short comment on this: Neither the currently used wait-die or the wound-wait algorithm *strictly* requires a slo
Re: [PATCH 3/8] drm/vmwgfx: don't use ttm bo->offset
On 2/19/20 2:53 PM, Nirmoy Das wrote: Calculate GPU offset within vmwgfx driver itself without depending on bo->offset Signed-off-by: Nirmoy Das Acked-by: Christian König Tested-by: Thomas Hellstrom Acked-by: Thomas Hellstrom ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH v3 0/8] do not store GPU address in TTM
Hi, On 2/19/20 2:53 PM, Nirmoy Das wrote: With this patch series I am trying to remove GPU address dependency in TTM and moving GPU address calculation to individual drm drivers. For future reference, could you please add a motivation for the series? for example cleanup, needed because, simplifies... etc. Thanks, Thomas ___ dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
Re: [PATCH 5/5] drm/amdgpu: implement amdgpu_gem_prime_move_notify v2
On 2/21/20 6:12 PM, Daniel Vetter wrote: On Thu, Feb 20, 2020 at 11:51:07PM +0100, Thomas Hellström (VMware) wrote: On 2/20/20 9:08 PM, Daniel Vetter wrote: On Thu, Feb 20, 2020 at 08:46:27PM +0100, Thomas Hellström (VMware) wrote: On 2/20/20 7:04 PM, Daniel Vetter wrote: On Thu, Feb 20, 2020 at 10:39:06AM +0100, Thomas Hellström (VMware) wrote: On 2/19/20 7:42 AM, Thomas Hellström (VMware) wrote: On 2/18/20 10:01 PM, Daniel Vetter wrote: On Tue, Feb 18, 2020 at 9:17 PM Thomas Hellström (VMware) wrote: On 2/17/20 6:55 PM, Daniel Vetter wrote: On Mon, Feb 17, 2020 at 04:45:09PM +0100, Christian König wrote: Implement the importer side of unpinned DMA-buf handling. v2: update page tables immediately Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c | 66 - drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 6 ++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 770baba621b3..48de7624d49c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -453,7 +453,71 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf) return ERR_PTR(ret); } +/** + * amdgpu_dma_buf_move_notify - &attach.move_notify implementation + * + * @attach: the DMA-buf attachment + * + * Invalidate the DMA-buf attachment, making sure that the we re-create the + * mapping before the next use. + */ +static void +amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) +{ + struct drm_gem_object *obj = attach->importer_priv; + struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv); + struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_operation_ctx ctx = { false, false }; + struct ttm_placement placement = {}; + struct amdgpu_vm_bo_base *bo_base; + int r; + + if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + return; + + r = ttm_bo_validate(&bo->tbo, &placement, &ctx); + if (r) { + DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r); + return; + } + + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { + struct amdgpu_vm *vm = bo_base->vm; + struct dma_resv *resv = vm->root.base.bo->tbo.base.resv; + + if (ticket) { Yeah so this is kinda why I've been a total pain about the exact semantics of the move_notify hook. I think we should flat-out require that importers _always_ have a ticket attach when they call this, and that they can cope with additional locks being taken (i.e. full EDEADLCK) handling. Simplest way to force that contract is to add a dummy 2nd ww_mutex lock to the dma_resv object, which we then can take #ifdef CONFIG_WW_MUTEX_SLOWPATH_DEBUG. Plus mabye a WARN_ON(!ticket). Now the real disaster is how we handle deadlocks. Two issues: - Ideally we'd keep any lock we've taken locked until the end, it helps needless backoffs. I've played around a bit with that but not even poc level, just an idea: https://cgit.freedesktop.org/~danvet/drm/commit/?id=b1799c5a0f02df9e1bb08d27be37331255ab7582 Idea is essentially to track a list of objects we had to lock as part of the ttm_bo_validate of the main object. - Second one is if we get a EDEADLCK on one of these sublocks (like the one here). We need to pass that up the entire callchain, including a temporary reference (we have to drop locks to do the ww_mutex_lock_slow call), and need a custom callback to drop that temporary reference (since that's all driver specific, might even be internal ww_mutex and not anything remotely looking like a normal dma_buf). This probably needs the exec util helpers from ttm, but at the dma_resv level, so that we can do something like this: struct dma_resv_ticket { struct ww_acquire_ctx base; /* can be set by anyone (including other drivers) that got hold of * this ticket and had to acquire some new lock. This lock might * protect anything, including driver-internal stuff, and isn't * required to be a dma_buf or even just a dma_resv. */ struct ww_mutex *contended_lock; /* callback which the driver (which might be a dma-buf exporter * and not matching the driver that started this locking ticket) * sets together with @contended_lock, for the main driver to drop * when it calls dma_resv_unlock on the contended_lock. */ void (drop_ref*)(struct ww_mutex *contended_lock); }; This is all supremely nasty (also ttm_bo_validate would need to be improved to handle these sublocks and random new objects that could force a